dff254260c6b5cb3ab0f2257775fce47bbbfa3ea hiram Wed Mar 25 11:56:34 2020 -0700 now staging on hgdownload.soe.ucsc.edu refs #24748 diff --git src/hg/makeDb/doc/asmHubs/trackData.pl src/hg/makeDb/doc/asmHubs/trackData.pl index 326e842..937d6d0 100755 --- src/hg/makeDb/doc/asmHubs/trackData.pl +++ src/hg/makeDb/doc/asmHubs/trackData.pl @@ -1,385 +1,413 @@ #!/usr/bin/env perl use strict; use warnings; use File::stat; my $argc = scalar(@ARGV); if ($argc != 2) { printf STDERR "usage: trackData.pl Name asmHubName > trackData.html\n"; printf STDERR "e.g.: trackData.pl Mammals mammals > trackData.html\n"; exit 255; } my $Name = shift; my $asmHubName = shift; my $home = $ENV{'HOME'}; my $toolsDir = "$home/kent/src/hg/makeDb/doc/asmHubs"; my $commonNameList = "$asmHubName.asmId.commonName.tsv"; my $commonNameOrder = "$asmHubName.commonName.asmId.orderList.tsv"; my @orderList; # asmId of the assemblies in order from the *.list files # the order to read the different .list files: my %betterName; # key is asmId, value is better common name than found in # assembly_report my $assemblyTotal = 0; # complete list of assemblies in this group my $asmCount = 0; # count of assemblies completed and in the table my $overallNucleotides = 0; my $overallSeqCount = 0; my $overallGapSize = 0; my $overallGapCount = 0; ############################################################################## # from Perl Cookbook Recipe 2.17, print out large numbers with comma delimiters: ############################################################################## sub commify($) { my $text = reverse $_[0]; $text =~ s/(\d\d\d)(?=\d)(?!\d*\.)/$1,/g; return scalar reverse $text } -# ($itemCount, $percentCover) = oneTrackData($trackFile, $sizeNoGaps, $trackFb); +# ($itemCount, $percentCover) = oneTrackData($asmId, $track, $trackFile, $totalSize, $trackFb, $runDir); # might have a track feature bits file (trackFb), maybe not sub oneTrackData($$$$$$) { my ($asmId, $trackName, $file, $genomeSize, $trackFb, $runDir) = @_; # printf STDERR "# %s\n", $file; my $itemCount = 0; my $percentCover = 0; if (! -s "${file}") { if ($trackName eq "gapOverlap") { if (-s "${runDir}/$asmId.gapOverlap.bed.gz" ) { my $lineCount=`zcat "${runDir}/$asmId.gapOverlap.bed.gz" | head | wc -l`; chomp $lineCount; if (0 == $lineCount) { return("0", "0 %"); } else { return("n/a", "n/a"); } } + } elsif ($trackName eq "gap") { + return("0", "0 %"); } else { return("n/a", "n/a"); } } if ($file =~ m/.bw$/) { my $bigWigInfo = `bigWigInfo "$file" | egrep "basesCovered:|mean:" | awk '{print \$NF}' | xargs echo | sed -e 's/,//g;'`; chomp $bigWigInfo; my ($bases, $mean) = split('\s+', $bigWigInfo); $percentCover = sprintf("%.2f %%", 100.0 * $bases / $genomeSize); $itemCount = sprintf ("%.2f", $mean); # printf STDERR "# bigWigInfo %s %s %s\n", $itemCount, $percentCover, $file; } else { my $bigBedInfo = `bigBedInfo "$file" | egrep "itemCount:|basesCovered:" | awk '{print \$NF}' | xargs echo | sed -e 's/,//g;'`; chomp $bigBedInfo; my ($items, $bases) = split('\s', $bigBedInfo); $itemCount = commify($items); $percentCover = sprintf("%.2f %%", 100.0 * $bases / $genomeSize); # 56992654 bases of 2616369673 (2.178%) in intersection if ( -s "${trackFb}" ) { -printf STDERR "# $trackFb\n"; my ($itemBases, undef, undef, $noGapSize, undef) = split('\s+', `cat $trackFb`, 5); $percentCover = sprintf("%.2f %%", 100.0 * $itemBases / $noGapSize); } # printf STDERR "# bigBedInfo %s %s %s\n", $itemCount, $percentCover, $file; } return ($itemCount, $percentCover); } ############################################################################## ### start the HTML output ############################################################################## sub startHtml() { my $timeStamp = `date "+%F"`; chomp $timeStamp; my $subSetMessage = "subset of $asmHubName only"; if ($asmHubName eq "vertebrate") { $subSetMessage = "subset of other ${asmHubName}s only"; } print <<"END" - + -
Assemblies from NCBI/Genbank/Refseq sources, $subSetMessage.
-count | common name link to genome browser |
gc5 base | AGP gap |
all gaps |
assembly sequences |
rmsk | TRF simpleRepeat |
window Masker |
gap Overlap |
tandem Dups |
cpg unmasked |
cpg island |
genes ncbi |
ncbiRefSeq | xenoRefGene | -augustus | +augustus genes |
+ Ensembl genes |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
TOTALS: | total assembly count ${assemblyTotal} |
\nOther assembly hubs available:
\n
Primates | \n" - if ($asmHubName ne "primates"); - printf "Mammals | \n" - if ($asmHubName ne "mammals"); - printf "Birds | \n" - if ($asmHubName ne "birds"); - printf "Fish | \n" - if ($asmHubName ne "fish"); - printf "other vertebrates | \n" - if ($asmHubName ne "vertebrate"); + printf "
---|
Assembly hubs index pages: | \n"; + printf "Primates | \n"; + printf "Mammals | \n"; + printf "Birds | \n"; + printf "Fish | \n"; + printf "other vertebrates | \n"; + + printf "
---|---|---|---|---|---|
Hubs assembly statistics: | \n"; + printf "Primates | \n"; + printf "Mammals | \n"; + printf "Birds | \n"; + printf "Fish | \n"; + printf "other vertebrates | \n"; + + printf "
Hubs track statistics: | \n"; + printf "Primates | \n"; + printf "Mammals | \n"; + printf "Birds | \n"; + printf "Fish | \n"; + printf "other vertebrates | \n"; printf "