57a8f0f882e83e90b2eb0546bc84225530690689 hiram Thu Mar 24 12:40:59 2022 -0700 and construct the test custom track files and bigBeds refs #29111 diff --git src/hg/utils/automation/aliasBedToCt.pl src/hg/utils/automation/aliasBedToCt.pl index ed7241b..4ca2b1c 100755 --- src/hg/utils/automation/aliasBedToCt.pl +++ src/hg/utils/automation/aliasBedToCt.pl @@ -1,75 +1,97 @@ #!/usr/bin/env perl use strict; use warnings; +use File::Temp qw/ tempfile tempdir /; my $argc = scalar(@ARGV); if ($argc != 2) { printf STDERR "usage: ./bedToCt.pl chromAlias.bed resultDir\n"; printf STDERR "reads the chromAlias.bed file, writes several files\n"; printf STDERR "into the resultDir one file for each name scheme\n"; exit 255; } my $bedFile = shift; my $resultDir = shift; printf STDERR "# chromAlias input: %s\n", $bedFile; printf STDERR "# results to: %s/\n", $resultDir; my @sourceNames; # the name label my @outFiles; # reference to open file handle for each source name +my @sizeFiles; # corresponding file handle for chrom.sizes for each source open (FH, "<$bedFile") or die "can not read $bedFile"; my $headerLine = <FH>; chomp $headerLine; my @a = split('\t', $headerLine); for (my $i = 3; $i < scalar(@a); ++$i) { my $outFile = sprintf("%s/%s.ct.txt", $resultDir, $a[$i]); + my $sizeFile = sprintf("%s/%s.sizes.txt", $resultDir, $a[$i]); open (my $fh, '>', $outFile) or die "can not write to $outFile"; + open (my $sz, '>', $sizeFile) or die "can not write to $sizeFile"; printf STDERR "# %s\t%s\n", $a[$i], $outFile; push @sourceNames, $a[$i]; push @outFiles, $fh; + push @sizeFiles, $sz; printf $fh "track name='%s chrNames' description='chrom alias test with \"%s\" name scheme' type=bed visibility=pack\n", $a[$i], $a[$i]; } chomp $headerLine; while (my $line = <FH>) { chomp $line; my @a = split('\t', $line, -1); for (my $i = 0; $i < scalar(@sourceNames); ++$i) { my %nameDone; my $fh = $outFiles[$i]; + my $sz = $sizeFiles[$i]; if (length($a[3+$i])) { $nameDone{$a[3+$i]} = 1; $nameDone{$a[0]} = 1; printf $fh "%s\t%d\t%d\t%s", $a[3+$i], $a[1], $a[2], $a[0]; + printf $sz "%s\t%d\n", $a[3+$i], $a[2]; for (my $j = 3; $j < scalar(@a); ++$j) { next if (defined($nameDone{$a[$j]})); if ($j != 3+$i) { if (!defined($nameDone{$a[$j]})) { printf $fh ",%s", $a[$j] if (length($a[$j])); $nameDone{$a[$j]} = 1; } } } printf $fh "\n"; } } } close (FH); +for (my $i = 0; $i < scalar(@outFiles); ++$i) { + close ($outFiles[$i]); + close ($sizeFiles[$i]); +} + +my $tmpFile = "/dev/shm/chromAliasTest.$$.bed"; +foreach my $source (@sourceNames) { + my $sizeFile = sprintf("%s/%s.sizes.txt", $resultDir, $source); + my $ctFile = sprintf("%s/%s.ct.txt", $resultDir, $source); + my $bbFile = sprintf("%s/%s.bb", $resultDir, $source); + print `grep -v "^track" $ctFile | sort > $tmpFile`; + print `bedToBigBed $tmpFile $sizeFile $bbFile`; +} +print `rm -f $tmpFile`; + __END__ ==> GCF_000001405.39/GCF_000001405.39.chromAlias.bed <== #chrom chromStart chromEnd ucsc assembly genbank ncbi refseq NW_011332701v1_alt 0 4998962 NW_011332701v1_alt HG2139_PATCH KN538374.1 NW_011332701.1 chr1 0 248956422 chr1 1 CM000663.2 1 NC_000001.11 chr10 0 133797422 chr10 10 CM000672.2 10 NC_000010.11 chr10_NT_187579v1_alt 0 181496 chr10_NT_187579v1_alt HSCHR10_1_CTG3 KI270824.1 NT_187579.1 chr10_NT_187580v1_alt 0 188315 chr10_NT_187580v1_alt HSCHR10_1_CTG4 KI270825.1 NT_187580.1 chr10_NW_003315934v1_alt 0 179254 chr10_NW_003315934v1_alt HSCHR10_1_CTG1 GL383545.1 NW_003315934.1 chr10_NW_003315935v1_alt 0 309802 chr10_NW_003315935v1_alt HSCHR10_1_CTG2 GL383546.1 NW_003315935.1 chr10_NW_009646202v1_alt 0 277797 chr10_NW_009646202v1_alt HG2191_PATCH KN196480.1 NW_009646202.1 chr10_NW_011332692v1_alt 0 14347 chr10_NW_011332692v1_alt HG2241_PATCH KN538365.1 NW_011332692.1 ==> hg38/hg38.chromAlias.bed <==