1dea84ee820d50f05255371bdaaf6d7b98f220ca hiram Wed Mar 23 17:03:10 2022 -0700 adding construction of bigBed file for chromAlias refs #29111 diff --git src/hg/utils/automation/aliasTextToBed.pl src/hg/utils/automation/aliasTextToBed.pl index 6383e70..f7fc41e 100755 --- src/hg/utils/automation/aliasTextToBed.pl +++ src/hg/utils/automation/aliasTextToBed.pl @@ -61,65 +61,75 @@ $chromSizes{$chrom} = $size; } close (SZ); my %nameLabels = ( "assembly" => "Assembly", "genbank" => "GenBank", "ncbi" => "NCBI", "refseq" => "RefSeq", "ucsc" => "UCSC", "ensembl" => "Ensembl" ); open (AS, ">$opt_aliasAs") or die "can not write to $opt_aliasAs"; +my $indexNames; + $titleLine =~ s/^#\s+//; my @legendNames = split('\s+', $titleLine); my $expectFieldCount = scalar(@legendNames); my $i = 0; # output the .as definition printf AS "table chromAlias\n"; printf AS " \"chromAlias bigBed index\"\n"; printf AS " (\n"; printf AS " string chrom;\t\"native sequence name\"\n"; printf AS " uint chromStart;\t\"always 0\"\n"; printf AS " uint chromEnd;\t\"chromosome size\"\n"; foreach my $title (@legendNames) { + if (length($indexNames)) { + $indexNames .= "," . $title; + } else { + $indexNames = $title; + } printf AS " string %s;\t\"%s name\"\n", $title, $nameLabels{$title}; } printf AS " )\n"; close (AS); +printf STDERR "# indexNames: '%s'\n", $indexNames; + open (BD, ">$opt_aliasBed") or die "can not write to $opt_aliasBed"; while (my $line = <FH>) { chomp $line; my @a = split('\t', $line, -1); # the -1 keeps all empty fields too if (scalar(@a) != $expectFieldCount) { printf STDERR "ERROR: expected field count %d =! %d on line %d\n", $expectFieldCount, scalar(@a), $.; exit 255; } my $nameIndex = 0; printf BD "%s\t0\t%d", $a[$nameIndex], $chromSizes{$a[$nameIndex]}; foreach my $name (@a) { printf BD "\t%s", $a[$nameIndex++]; } printf BD "\n"; } close (FH); close (BD); -print `bedToBigBed -tab -type=bed3+5 -as=$opt_aliasAs -extraIndex=ucsc,assembly,genbank,ncbi,refseq $opt_aliasBed $opt_chromSizes $opt_aliasBigBed`; +printf STDERR "bedToBigBed -tab -type=bed3+5 -as=$opt_aliasAs -extraIndex=$indexNames $opt_aliasBed $opt_chromSizes $opt_aliasBigBed\n"; +print `bedToBigBed -tab -type=bed3+5 -as=$opt_aliasAs -extraIndex=$indexNames $opt_aliasBed $opt_chromSizes $opt_aliasBigBed`; __END__ # ucsc assembly genbank ncbi refseq NW_011332701v1_alt HG2139_PATCH KN538374.1 NW_011332701.1 chr1 1 CM000663.2 1 NC_000001.11 chr10 10 CM000672.2 10 NC_000010.11 chr10_NT_187579v1_alt HSCHR10_1_CTG3 KI270824.1 NT_187579.1 chr10_NT_187580v1_alt HSCHR10_1_CTG4 KI270825.1 NT_187580.1 chr10_NW_003315934v1_alt HSCHR10_1_CTG1 GL383545.1 NW_003315934.1 chr10_NW_003315935v1_alt HSCHR10_1_CTG2 GL383546.1 NW_003315935.1 chr10_NW_009646202v1_alt HG2191_PATCH KN196480.1 NW_009646202.1 chr10_NW_011332692v1_alt HG2241_PATCH KN538365.1 NW_011332692.1