68e60a3ccfe0c3aaa48f2fa85f86ca700d681e81 hiram Tue Mar 31 12:08:22 2020 -0700 correct hub.txt not testHub.txt refs #23891 diff --git src/hg/makeDb/doc/asmHubs/trackData.pl src/hg/makeDb/doc/asmHubs/trackData.pl index 937d6d0..04c6c21 100755 --- src/hg/makeDb/doc/asmHubs/trackData.pl +++ src/hg/makeDb/doc/asmHubs/trackData.pl @@ -1,413 +1,413 @@ #!/usr/bin/env perl use strict; use warnings; use File::stat; my $argc = scalar(@ARGV); if ($argc != 2) { printf STDERR "usage: trackData.pl Name asmHubName > trackData.html\n"; printf STDERR "e.g.: trackData.pl Mammals mammals > trackData.html\n"; exit 255; } my $Name = shift; my $asmHubName = shift; my $home = $ENV{'HOME'}; my $toolsDir = "$home/kent/src/hg/makeDb/doc/asmHubs"; my $commonNameList = "$asmHubName.asmId.commonName.tsv"; my $commonNameOrder = "$asmHubName.commonName.asmId.orderList.tsv"; my @orderList; # asmId of the assemblies in order from the *.list files # the order to read the different .list files: my %betterName; # key is asmId, value is better common name than found in # assembly_report my $assemblyTotal = 0; # complete list of assemblies in this group my $asmCount = 0; # count of assemblies completed and in the table my $overallNucleotides = 0; my $overallSeqCount = 0; my $overallGapSize = 0; my $overallGapCount = 0; ############################################################################## # from Perl Cookbook Recipe 2.17, print out large numbers with comma delimiters: ############################################################################## sub commify($) { my $text = reverse $_[0]; $text =~ s/(\d\d\d)(?=\d)(?!\d*\.)/$1,/g; return scalar reverse $text } # ($itemCount, $percentCover) = oneTrackData($asmId, $track, $trackFile, $totalSize, $trackFb, $runDir); # might have a track feature bits file (trackFb), maybe not sub oneTrackData($$$$$$) { my ($asmId, $trackName, $file, $genomeSize, $trackFb, $runDir) = @_; # printf STDERR "# %s\n", $file; my $itemCount = 0; my $percentCover = 0; if (! -s "${file}") { if ($trackName eq "gapOverlap") { if (-s "${runDir}/$asmId.gapOverlap.bed.gz" ) { my $lineCount=`zcat "${runDir}/$asmId.gapOverlap.bed.gz" | head | wc -l`; chomp $lineCount; if (0 == $lineCount) { return("0", "0 %"); } else { return("n/a", "n/a"); } } } elsif ($trackName eq "gap") { return("0", "0 %"); } else { return("n/a", "n/a"); } } if ($file =~ m/.bw$/) { my $bigWigInfo = `bigWigInfo "$file" | egrep "basesCovered:|mean:" | awk '{print \$NF}' | xargs echo | sed -e 's/,//g;'`; chomp $bigWigInfo; my ($bases, $mean) = split('\s+', $bigWigInfo); $percentCover = sprintf("%.2f %%", 100.0 * $bases / $genomeSize); $itemCount = sprintf ("%.2f", $mean); # printf STDERR "# bigWigInfo %s %s %s\n", $itemCount, $percentCover, $file; } else { my $bigBedInfo = `bigBedInfo "$file" | egrep "itemCount:|basesCovered:" | awk '{print \$NF}' | xargs echo | sed -e 's/,//g;'`; chomp $bigBedInfo; my ($items, $bases) = split('\s', $bigBedInfo); $itemCount = commify($items); $percentCover = sprintf("%.2f %%", 100.0 * $bases / $genomeSize); # 56992654 bases of 2616369673 (2.178%) in intersection if ( -s "${trackFb}" ) { my ($itemBases, undef, undef, $noGapSize, undef) = split('\s+', `cat $trackFb`, 5); $percentCover = sprintf("%.2f %%", 100.0 * $itemBases / $noGapSize); } # printf STDERR "# bigBedInfo %s %s %s\n", $itemCount, $percentCover, $file; } return ($itemCount, $percentCover); } ############################################################################## ### start the HTML output ############################################################################## sub startHtml() { my $timeStamp = `date "+%F"`; chomp $timeStamp; my $subSetMessage = "subset of $asmHubName only"; if ($asmHubName eq "vertebrate") { $subSetMessage = "subset of other ${asmHubName}s only"; } print <<"END"
Assemblies from NCBI/Genbank/Refseq sources, $subSetMessage.
-count | common name link to genome browser |
gc5 base | AGP gap |
all gaps |
assembly sequences |
rmsk | TRF simpleRepeat |
window Masker |
gap Overlap |
tandem Dups |
cpg unmasked |
cpg island |
genes ncbi |
ncbiRefSeq | xenoRefGene | augustus genes |
Ensembl genes |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
TOTALS: | total assembly count ${assemblyTotal} |
\n
Assembly hubs index pages: | \n"; printf "Primates | \n"; printf "Mammals | \n"; printf "Birds | \n"; printf "Fish | \n"; printf "other vertebrates | \n"; printf "|||||
---|---|---|---|---|---|---|---|---|---|---|
Hubs assembly statistics: | \n"; - printf "Primates | \n"; - printf "Mammals | \n"; - printf "Birds | \n"; - printf "Fish | \n"; - printf "other vertebrates | \n"; + printf "Primates | \n"; + printf "Mammals | \n"; + printf "Birds | \n"; + printf "Fish | \n"; + printf "other vertebrates | \n"; printf "
Hubs track statistics: | \n"; printf "Primates | \n"; printf "Mammals | \n"; printf "Birds | \n"; printf "Fish | \n"; printf "other vertebrates | \n"; printf "