8859fbf8a8d7408df039c9ab29c8b3bdeb225516 hiram Wed May 14 12:09:07 2025 -0700 now writing out beta.hub.txt and public.hub.txt based on control from trackDb listings publicGenArk.txt and betaGenArk.txt refs #34917 diff --git src/hg/makeDb/doc/asmHubs/mkGenomes.pl src/hg/makeDb/doc/asmHubs/mkGenomes.pl index 1dcc6ec36a9..de27d690977 100755 --- src/hg/makeDb/doc/asmHubs/mkGenomes.pl +++ src/hg/makeDb/doc/asmHubs/mkGenomes.pl @@ -11,43 +11,113 @@ if ($argc != 3) { printf STDERR "mkGenomes.pl blatHost blatPort [two column name list] > .../hub/genomes.txt\n"; printf STDERR "e.g.: mkGenomes.pl dynablat-01 4040 vgp.primary.assemblies.tsv > .../vgp/genomes.txt\n"; printf STDERR "e.g.: mkGenomes.pl hgwdev 4040 vgp.primary.assemblies.tsv > .../vgp/download.genomes.txt\n"; printf STDERR "the name list is found in \$HOME/kent/src/hg/makeDb/doc/asmHubs/\n"; printf STDERR "\nthe two columns are 1: asmId (accessionId_assemblyName)\n"; printf STDERR "column 2: common name for species, columns separated by tab\n"; printf STDERR "result will write a local asmId.genomes.txt file for each hub\n"; printf STDERR "and a local asmId.hub.txt file for each hub\n"; printf STDERR "and a local asmId.groups.txt file for each hub\n"; printf STDERR "and the output to stdout will be the overall genomes.txt\n"; printf STDERR "index file for all genomes in the given list\n"; exit 255; } +my $home = $ENV{'HOME'}; +my $toolsDir = "$home/kent/src/hg/makeDb/doc/asmHubs"; +my @stageRelease = qw( beta public ); +my %publicContrib; # key is contrib directory name, value is 1 +my %betaContrib; # key is contrib directory name, value is 1 +## instead of value 1, maybe should be a listing of assemblies with this +## track. Will see if that is needed later. + +my $srcTrackDb = "$home/kent/src/hg/makeDb/trackDb"; + +if ( -s "$srcTrackDb/betaGenArk.txt" ) { + + open (my $fh, "<", "$srcTrackDb/betaGenArk.txt") or die "can not read $srcTrackDb/betaGenArk.txt"; + while (my $contribName = <$fh>) { + next if ($contribName =~ m/^#/); + chomp $contribName; + $betaContrib{$contribName} = 1; + } + close ($fh); +} +if ( -s "$srcTrackDb/publicGenArk.txt" ) { + + open (my $fh, "<", "$srcTrackDb/publicGenArk.txt") or die "can not read $srcTrackDb/publicGenArk.txt"; + while (my $contribName = <$fh>) { + next if ($contribName =~ m/^#/); + chomp $contribName; + $publicContrib{$contribName} = 1; + } + close ($fh); +} + my $downloadHost = "hgwdev"; -my @blatHosts = qw( dynablat-01 dynablat-01 ); -my @blatPorts = qw( 4040 4040 ); +my @blatHosts = qw( dynablat-01 dynablat-01 dynablat-01 dynablat-01 ); +my @blatPorts = qw( 4040 4040 4040 4040 ); my $blatHostDomain = ".soe.ucsc.edu"; my $groupsTxt = `cat ~/kent/src/hg/makeDb/doc/asmHubs/groups.txt`; -################### writing out hub.txt file, twice ########################## -sub singleFileHub($$$$$$$$$$$$$$) { +################### writing out hub.txt file, four times ########################## +sub writeHubTxtFiles($$$$$$$$$$$$$$) { my ($fh1, $fh2, $accessionId, $orgName, $descr, $asmId, $asmDate, $defPos, $taxId, $trackDb, $accessionDir, $buildDir, $chromAuthority, $hugeGenome) = @_; my @fhN; - push @fhN, $fh1; - push @fhN, $fh2; + push @fhN, $fh1; # file 1 + push @fhN, $fh2; # file 2 + # the order of these file handles is important since different contents + # will be output to the 4th one (beta.hub.txt) + # the first three will have identical contents, representing the 'public' + # version of hub.txt. The fourth will be the beta.hub.txt which could have + # a different set of contrib tracks. + # check for contrib tracks that are to go public + my %publicTrackDb; # key is track name, value is trackDb.txt content + my $publicCount = 0; + foreach my $contribTrack (keys %publicContrib) { + my $contribDir = "$buildDir/contrib/$contribTrack"; + if ( -d "${contribDir}" ) { + my $contribTdb = "$contribDir/${contribTrack}.trackDb.txt"; + if ( -s "${contribTdb}" ) { + my $tdb = `cat "${contribTdb}"`; + chomp $tdb; + $publicTrackDb{$contribTrack} .= $tdb; + ++$publicCount; + } + } + } + # check for contrib tracks that are to go beta + my %betaTrackDb; # key is track name, value is trackDb.txt content + my $betaCount = 0; + foreach my $contribTrack (keys %betaContrib) { + my $contribDir = "$buildDir/contrib/$contribTrack"; + if ( -d "${contribDir}" ) { + my $contribTdb = "$contribDir/${contribTrack}.trackDb.txt"; + if ( -s "${contribTdb}" ) { + my $tdb = `cat "${contribTdb}"`; + chomp $tdb; + $betaTrackDb{$contribTrack} .= $tdb; + ++$betaCount; + } + } + } + open (my $ph, ">", "$buildDir/public.hub.txt") or die "can not write to $buildDir/public.hub.txt"; + push @fhN, $ph; # file 3 + open (my $bh, ">", "$buildDir/beta.hub.txt") or die "can not write to $buildDir/beta.hub.txt"; + push @fhN, $bh; # file 4 my %liftOverChain; # key is 'otherDb' name, value is bbi path my %liftOverGz; # key is 'otherDb' name, value is lift.over.gz file path my $hasChainNets = `ls -d $buildDir/trackData/lastz.* 2> /dev/null | wc -l`; chomp $hasChainNets; if ($hasChainNets) { printf STDERR "# hasChainNets: %d\t%s\n", $hasChainNets, $asmId; open (my $CN, ">>", "hasChainNets.txt") or die "can not write to hasChainNets.txt"; printf $CN "%s\t%d\n", $asmId, $hasChainNets; open (CH, "ls -d $buildDir/trackData/lastz.*|") or die "can not ls -d $buildDir/trackData/lastz.*"; while (my $line = ) { chomp $line; my $otherDb = basename($line); $otherDb =~ s/lastz.//; my $OtherDb = ucfirst($otherDb); @@ -102,37 +172,52 @@ printf $fh "scientificName %s\n", $descr; printf $fh "htmlPath html/%s.description.html\n", $asmId; # until blat server host is ready for hgdownload, avoid these lines if ($blatHosts[$fileCount] ne $downloadHost) { printf $fh "blat %s%s %s dynamic $accessionDir/$accessionId\n", $blatHosts[$fileCount], $blatHostDomain, $blatPorts[$fileCount]+$hugeGenome; printf $fh "transBlat %s%s %s dynamic $accessionDir/$accessionId\n", $blatHosts[$fileCount], $blatHostDomain, $blatPorts[$fileCount]+$hugeGenome; printf $fh "isPcr %s%s %s dynamic $accessionDir/$accessionId\n", $blatHosts[$fileCount], $blatHostDomain, $blatPorts[$fileCount]+$hugeGenome; } foreach my $otherDb (sort keys %liftOverGz) { printf $fh "liftOver.%s %s\n", $otherDb, $liftOverGz{$otherDb}; } printf $fh "\n"; foreach my $tdbLine (@tdbLines) { printf $fh "%s\n", $tdbLine; } + if (3 == $fileCount) { # writing to beta.hub.txt + if (%betaTrackDb) { + foreach my $contribTrack (sort keys %betaTrackDb) { + printf $fh "%s\n", $betaTrackDb{$contribTrack}; + } + } + } else { # the other 3 get the public tracks + if (%publicTrackDb) { + foreach my $contribTrack (sort keys %publicTrackDb) { + printf $fh "%s\n", $publicTrackDb{$contribTrack}; + } + } + } ++$fileCount; } -} # sub singleFileHub($$$$$$$$$$$$$$) + foreach my $fh (@fhN) { + close ($fh); + } + close ($bh); +} # sub writeHubTxtFiles($$$$$$$$$$$$$$) ############################################################################## -my $home = $ENV{'HOME'}; -my $toolsDir = "$home/kent/src/hg/makeDb/doc/asmHubs"; my $blatHost = shift; my $blatPort = shift; my $inputList = shift; my $orderList = $inputList; if ( ! -s "$orderList" ) { $orderList = $toolsDir/$inputList; } my @orderList; # asmId of the assemblies in order from the *.list files # the order to read the different .list files: my $assemblyCount = 0; open (FH, "<${orderList}") or die "can not read ${orderList}"; while (my $line = ) { @@ -280,31 +365,31 @@ open (HT, ">$localHubTxt") or die "can not write to $localHubTxt"; printf HT "hub %s genome assembly\n", $accessionId; printf HT "shortLabel %s\n", $orgName; printf HT "longLabel %s/%s/%s genome assembly\n", $orgName, $descr, $asmId; printf HT "genomesFile genomes.txt\n"; printf HT "email hclawson\@ucsc.edu\n"; printf HT "descriptionUrl html/%s.description.html\n", $asmId; close (HT); # try creating single file hub.txt, one for hgwdev, one for hgdownload my $downloadHubTxt = "$buildDir/${asmId}.download.hub.txt"; open (DL, ">$downloadHubTxt") or die "can not write to $downloadHubTxt"; $localHubTxt = "$buildDir/${asmId}.singleFile.hub.txt"; open (HT, ">$localHubTxt") or die "can not write to $localHubTxt"; - singleFileHub(\*HT, \*DL, $accessionId, $orgName, $descr, $asmId, $asmDate, + writeHubTxtFiles(\*HT, \*DL, $accessionId, $orgName, $descr, $asmId, $asmDate, $defPos, $taxId, $trackDb, $accessionDir, $buildDir, $chromAuthority, $hugeGenome); my $localGenomesFile = "$buildDir/${asmId}.genomes.txt"; open (GF, ">$localGenomesFile") or die "can not write to $localGenomesFile"; printf GF "genome %s\n", $accessionId; printf GF "taxId %s\n", $taxId if (length($taxId) > 1); printf GF "trackDb trackDb.txt\n"; printf GF "groups groups.txt\n"; printf GF "description %s\n", $orgName; printf GF "twoBitPath %s.2bit\n", $accessionId; printf GF "twoBitBptUrl %s.2bit.bpt\n", $accessionId; printf GF "chromSizes %s.chrom.sizes.txt\n", $accessionId; if ( -s "${buildDir}/${asmId}.chromAlias.bb" ) { printf GF "chromAliasBb %s.chromAlias.bb\n", $accessionId;