32bd9b7bea2ca6154383f124a803fdd6ad04c77b
hiram
Sat Feb 29 16:41:43 2020 -0800
add allGaps column and use featureBits result when present and zero when track is empty refs #23891
diff --git src/hg/makeDb/doc/asmHubs/trackData.pl src/hg/makeDb/doc/asmHubs/trackData.pl
index 074214a..326e842 100755
--- src/hg/makeDb/doc/asmHubs/trackData.pl
+++ src/hg/makeDb/doc/asmHubs/trackData.pl
@@ -27,51 +27,71 @@
my $asmCount = 0; # count of assemblies completed and in the table
my $overallNucleotides = 0;
my $overallSeqCount = 0;
my $overallGapSize = 0;
my $overallGapCount = 0;
##############################################################################
# from Perl Cookbook Recipe 2.17, print out large numbers with comma delimiters:
##############################################################################
sub commify($) {
my $text = reverse $_[0];
$text =~ s/(\d\d\d)(?=\d)(?!\d*\.)/$1,/g;
return scalar reverse $text
}
-sub oneTrackData($$) {
- my ($file, $genomeSize) = @_;
+# ($itemCount, $percentCover) = oneTrackData($trackFile, $sizeNoGaps, $trackFb);
+# might have a track feature bits file (trackFb), maybe not
+sub oneTrackData($$$$$$) {
+ my ($asmId, $trackName, $file, $genomeSize, $trackFb, $runDir) = @_;
# printf STDERR "# %s\n", $file;
my $itemCount = 0;
my $percentCover = 0;
if (! -s "${file}") {
+ if ($trackName eq "gapOverlap") {
+ if (-s "${runDir}/$asmId.gapOverlap.bed.gz" ) {
+ my $lineCount=`zcat "${runDir}/$asmId.gapOverlap.bed.gz" | head | wc -l`;
+ chomp $lineCount;
+ if (0 == $lineCount) {
+ return("0", "0 %");
+ } else {
return("n/a", "n/a");
}
+ }
+ } else {
+ return("n/a", "n/a");
+ }
+ }
if ($file =~ m/.bw$/) {
my $bigWigInfo = `bigWigInfo "$file" | egrep "basesCovered:|mean:" | awk '{print \$NF}' | xargs echo | sed -e 's/,//g;'`;
chomp $bigWigInfo;
my ($bases, $mean) = split('\s+', $bigWigInfo);
$percentCover = sprintf("%.2f %%", 100.0 * $bases / $genomeSize);
$itemCount = sprintf ("%.2f", $mean);
# printf STDERR "# bigWigInfo %s %s %s\n", $itemCount, $percentCover, $file;
} else {
my $bigBedInfo = `bigBedInfo "$file" | egrep "itemCount:|basesCovered:" | awk '{print \$NF}' | xargs echo | sed -e 's/,//g;'`;
chomp $bigBedInfo;
my ($items, $bases) = split('\s', $bigBedInfo);
$itemCount = commify($items);
$percentCover = sprintf("%.2f %%", 100.0 * $bases / $genomeSize);
+# 56992654 bases of 2616369673 (2.178%) in intersection
+ if ( -s "${trackFb}" ) {
+printf STDERR "# $trackFb\n";
+ my ($itemBases, undef, undef, $noGapSize, undef) = split('\s+', `cat $trackFb`, 5);
+ $percentCover = sprintf("%.2f %%", 100.0 * $itemBases / $noGapSize);
+ }
# printf STDERR "# bigBedInfo %s %s %s\n", $itemCount, $percentCover, $file;
}
return ($itemCount, $percentCover);
}
##############################################################################
### start the HTML output
##############################################################################
sub startHtml() {
my $timeStamp = `date "+%F"`;
chomp $timeStamp;
my $subSetMessage = "subset of $asmHubName only";
if ($asmHubName eq "vertebrate") {
@@ -98,40 +118,41 @@
the genome browser.
The numbers are: item count (percent coverage)
Except for the gc5Base column which is: overall GC % average (percent coverage)
END
}
##############################################################################
### start the table output
##############################################################################
sub startTable() {
print <<"END"
count | common name link to genome browser |
gc5 base | -gap | -assembly | +AGP gap |
+ all gaps |
+ assembly sequences |
rmsk | TRF simpleRepeat |
- windowMasker | -gapOverlap | -tandemDups | +window Masker |
+ gap Overlap |
+ tandem Dups |
cpg unmasked |
cpg island |
- ncbiGene | +genes ncbi |
ncbiRefSeq | xenoRefGene | augustus |
---|