67d50873eca0ae6f595367077c9735b7d348e5c2
hiram
  Fri May 1 12:31:59 2020 -0700
adding functions to build VGP index pages refs #23734

diff --git src/hg/makeDb/doc/asmHubs/mkAsmStats.pl src/hg/makeDb/doc/asmHubs/mkAsmStats.pl
index 5d1f5b0..c6154f9 100755
--- src/hg/makeDb/doc/asmHubs/mkAsmStats.pl
+++ src/hg/makeDb/doc/asmHubs/mkAsmStats.pl
@@ -40,42 +40,66 @@
 }
 
 ##############################################################################
 ### start the HTML output
 ##############################################################################
 sub startHtml() {
 
 my $timeStamp = `date "+%F"`;
 chomp $timeStamp;
 
 my $subSetMessage = "subset of $asmHubName only";
 if ($asmHubName eq "vertebrate") {
    $subSetMessage = "subset of other ${asmHubName}s only";
 }
 
+if ($Name =~ m/vgp/i) {
+  print <<"END"
+<!DOCTYPE HTML 4.01 Transitional>
+<!--#set var="TITLE" value="VGP - Vertebrate Genomes Project assembly hubs, assembly statistics" -->
+<!--#set var="ROOT" value="../.." -->
+
+<!--#include virtual="\$ROOT/inc/gbPageStartHardcoded.html" -->
+
+<h1>VGP - Vertebrate Genomes Project assembly hubs, assembly statistics</h1>
+<p>
+<a href='https://vertebrategenomesproject.org/' target=_blank>
+<img src='VGPlogo.png' width=280 alt='VGP logo'></a></p>
+<p>
+This assembly hub contains assemblies released
+by the <a href='https://vertebrategenomesproject.org/' target=_blank>
+Vertebrate Genomes Project.</a>
+</p>
+
+END
+} else {
   print <<"END"
 <!DOCTYPE HTML 4.01 Transitional>
 <!--#set var="TITLE" value="$Name genomes assembly hubs, assembly statistics" -->
 <!--#set var="ROOT" value="../.." -->
 
 <!--#include virtual="\$ROOT/inc/gbPageStartHardcoded.html" -->
 
 <h1>$Name Genomes assembly hubs, assembly statistics</h1>
 <p>
 Assemblies from NCBI/Genbank/Refseq sources, $subSetMessage.
 </p>
 
+END
+}
+
+  print <<"END"
 <h3>See also: <a href='index.html'>hub access</a>,&nbsp;<a href='trackData.html'>track statistics</a></h3><br>
 
 <h3>Data resource links</h3>
 NOTE: <em>Click on the column headers to sort the table by that column</em><br>
 The <em>link to genome browser</em> will attach only that single assembly to
 the genome browser.
 END
 }
 
 ##############################################################################
 ### start the table output
 ##############################################################################
 sub startTable() {
 print <<"END"
 <table class="sortable" border="1">
@@ -205,41 +229,46 @@
 }
 
 ##############################################################################
 ### tableContents()
 ##############################################################################
 sub tableContents() {
 
   foreach my $asmId (reverse(@orderList)) {
     my ($gcPrefix, $asmAcc, $asmName) = split('_', $asmId, 3);
     my $accessionId = sprintf("%s_%s", $gcPrefix, $asmAcc);
     my $accessionDir = substr($asmId, 0 ,3);
     $accessionDir .= "/" . substr($asmId, 4 ,3);
     $accessionDir .= "/" . substr($asmId, 7 ,3);
     $accessionDir .= "/" . substr($asmId, 10 ,3);
     my $buildDir = "/hive/data/genomes/asmHubs/refseqBuild/$accessionDir/$asmId";
+    if ($gcPrefix eq "GCA") {
+     $buildDir = "/hive/data/genomes/asmHubs/genbankBuild/$accessionDir/$asmId";
+    }
     my $asmReport="$buildDir/download/${asmId}_assembly_report.txt";
     if (! -s "$asmReport") {
       printf STDERR "# no assembly report:\n# %s\n", $asmReport;
       next;
     }
     my $chromSizes = "${buildDir}/${asmId}.chrom.sizes";
     my $twoBit = "${buildDir}/trackData/addMask/${asmId}.masked.2bit";
     if (! -s "$twoBit") {
       printf STDERR "# no 2bit file:\n# %s\n", $twoBit;
       next;
     }
+    my $trackDb="$buildDir/${asmId}.trackDb.txt";
+    next if (! -s "$trackDb");	# assembly build not complete
     my $faSizeTxt = "${buildDir}/${asmId}.faSize.txt";
     if ( ! -s "$faSizeTxt" ) {
        printf STDERR "twoBitToFa $twoBit stdout | faSize stdin > $faSizeTxt\n";
        print `twoBitToFa $twoBit stdout | faSize stdin > $faSizeTxt`;
     }
     my ($gapSize, $maskPerCent) = maskStats($faSizeTxt);
     $overallGapSize += $gapSize;
     my ($seqCount, $totalSize) = asmCounts($chromSizes);
     $overallSeqCount += $seqCount;
 #    my $totalSize=`ave -col=2 $chromSizes | grep "^total" | awk '{printf "%d", \$NF}'`;
     $overallNucleotides += $totalSize;
     my $gapCount = gapStats($buildDir, $asmId);
     $overallGapCount += $gapCount;
     my $sciName = "notFound";
     my $commonName = "notFound";