1dea84ee820d50f05255371bdaaf6d7b98f220ca hiram Wed Mar 23 17:03:10 2022 -0700 adding construction of bigBed file for chromAlias refs #29111 diff --git src/hg/utils/automation/aliasBedToCt.pl src/hg/utils/automation/aliasBedToCt.pl new file mode 100755 index 0000000..ed7241b --- /dev/null +++ src/hg/utils/automation/aliasBedToCt.pl @@ -0,0 +1,85 @@ +#!/usr/bin/env perl + +use strict; +use warnings; + +my $argc = scalar(@ARGV); + +if ($argc != 2) { + printf STDERR "usage: ./bedToCt.pl chromAlias.bed resultDir\n"; + printf STDERR "reads the chromAlias.bed file, writes several files\n"; + printf STDERR "into the resultDir one file for each name scheme\n"; + exit 255; +} + +my $bedFile = shift; +my $resultDir = shift; +printf STDERR "# chromAlias input: %s\n", $bedFile; +printf STDERR "# results to: %s/\n", $resultDir; + +my @sourceNames; # the name label +my @outFiles; # reference to open file handle for each source name + +open (FH, "<$bedFile") or die "can not read $bedFile"; +my $headerLine = <FH>; +chomp $headerLine; +my @a = split('\t', $headerLine); +for (my $i = 3; $i < scalar(@a); ++$i) { + my $outFile = sprintf("%s/%s.ct.txt", $resultDir, $a[$i]); + open (my $fh, '>', $outFile) or die "can not write to $outFile"; + printf STDERR "# %s\t%s\n", $a[$i], $outFile; + push @sourceNames, $a[$i]; + push @outFiles, $fh; + printf $fh "track name='%s chrNames' description='chrom alias test with \"%s\" name scheme' type=bed visibility=pack\n", $a[$i], $a[$i]; +} +chomp $headerLine; +while (my $line = <FH>) { + chomp $line; + my @a = split('\t', $line, -1); + for (my $i = 0; $i < scalar(@sourceNames); ++$i) { + my %nameDone; + my $fh = $outFiles[$i]; + if (length($a[3+$i])) { + $nameDone{$a[3+$i]} = 1; + $nameDone{$a[0]} = 1; + printf $fh "%s\t%d\t%d\t%s", $a[3+$i], $a[1], $a[2], $a[0]; + for (my $j = 3; $j < scalar(@a); ++$j) { + next if (defined($nameDone{$a[$j]})); + if ($j != 3+$i) { + if (!defined($nameDone{$a[$j]})) { + printf $fh ",%s", $a[$j] if (length($a[$j])); + $nameDone{$a[$j]} = 1; + } + } + } + printf $fh "\n"; + } + } +} +close (FH); + +__END__ + +==> GCF_000001405.39/GCF_000001405.39.chromAlias.bed <== +#chrom chromStart chromEnd ucsc assembly genbank ncbi refseq +NW_011332701v1_alt 0 4998962 NW_011332701v1_alt HG2139_PATCH KN538374.1 NW_011332701.1 +chr1 0 248956422 chr1 1 CM000663.2 1 NC_000001.11 +chr10 0 133797422 chr10 10 CM000672.2 10 NC_000010.11 +chr10_NT_187579v1_alt 0 181496 chr10_NT_187579v1_alt HSCHR10_1_CTG3 KI270824.1 NT_187579.1 +chr10_NT_187580v1_alt 0 188315 chr10_NT_187580v1_alt HSCHR10_1_CTG4 KI270825.1 NT_187580.1 +chr10_NW_003315934v1_alt 0 179254 chr10_NW_003315934v1_alt HSCHR10_1_CTG1 GL383545.1 NW_003315934.1 +chr10_NW_003315935v1_alt 0 309802 chr10_NW_003315935v1_alt HSCHR10_1_CTG2 GL383546.1 NW_003315935.1 +chr10_NW_009646202v1_alt 0 277797 chr10_NW_009646202v1_alt HG2191_PATCH KN196480.1 NW_009646202.1 +chr10_NW_011332692v1_alt 0 14347 chr10_NW_011332692v1_alt HG2241_PATCH KN538365.1 NW_011332692.1 + +==> hg38/hg38.chromAlias.bed <== +#chrom chromStart chromEnd ucsc assembly ensembl genbank refseq +chr1 0 248956422 chr1 1 1 CM000663.2 NC_000001.11 +chr10 0 133797422 chr10 10 10 CM000672.2 NC_000010.11 +chr10_GL383545v1_alt 0 179254 chr10_GL383545v1_alt HSCHR10_1_CTG1 GL383545.1 NW_003315934.1 +chr10_GL383546v1_alt 0 309802 chr10_GL383546v1_alt HSCHR10_1_CTG2 GL383546.1 NW_003315935.1 +chr10_KI270824v1_alt 0 181496 chr10_KI270824v1_alt HSCHR10_1_CTG3 KI270824.1 NT_187579.1 +chr10_KI270825v1_alt 0 188315 chr10_KI270825v1_alt HSCHR10_1_CTG4 KI270825.1 NT_187580.1 +chr10_KN196480v1_fix 0 277797 chr10_KN196480v1_fix HG2191_PATCH KN196480.1 NW_009646202.1 +chr10_KN538365v1_fix 0 14347 chr10_KN538365v1_fix HG2241_PATCH KN538365.1 NW_011332692.1 +chr10_KN538366v1_fix 0 85284 chr10_KN538366v1_fix HG2242_HG2243_PATCH KN538366.1 NW_011332693.1