6ba4f09fcc1fd9831ff2da23ddeef5740837d90c
hiram
  Tue Feb 16 08:58:29 2021 -0800
add in mm39 default chainNet table check refs #26682

diff --git src/hg/utils/automation/verifyBrowser.pl src/hg/utils/automation/verifyBrowser.pl
index 21cbcd3..6c0b5e2 100755
--- src/hg/utils/automation/verifyBrowser.pl
+++ src/hg/utils/automation/verifyBrowser.pl
@@ -1,316 +1,331 @@
 #!/usr/bin/env perl
 
 use strict;
 use warnings;
 
 my $argc = scalar(@ARGV);
 
 if ($argc != 1) {
   printf STDERR "usage: verifyBrowser.pl <db>\n";
   printf STDERR "\twill check the <db> for the set of tables required\n";
   printf STDERR "\tfor a completed genome browser build.\n";
   exit 255;
 }
 
 my %optionalCheckList = ( 'ensGene' => "Ensembl genes",
 'ensGtp' => "Ensembl genes",
 'ensPep' => "Ensembl genes",
 'ensemblSource' => "Ensembl genes",
 'ensemblToGeneName' => "Ensembl genes",
 'extNcbiRefSeq' => "NCBI RefSeq genes",
 'ncbiRefSeq' => "NCBI RefSeq genes",
 'ncbiRefSeqCds' => "NCBI RefSeq genes",
 'ncbiRefSeqCurated' => "NCBI RefSeq genes",
 'ncbiRefSeqLink' => "NCBI RefSeq genes",
 'ncbiRefSeqOther' => "NCBI RefSeq genes",
 'ncbiRefSeqPepTable' => "NCBI RefSeq genes",
 'ncbiRefSeqPredicted' => "NCBI RefSeq genes",
 'ncbiRefSeqPsl' => "NCBI RefSeq genes",
 'seqNcbiRefSeq' => "NCBI RefSeq genes",
 'chainRBestHg38' => "chainNetRBestHg38",
 'chainRBestHg38Link' => "chainNetRBestHg38",
 'chainRBestMm10' => "chainNetRBestMm10",
 'chainRBestMm10Link' => "chainNetRBestMm10",
+'chainRBestMm39' => "chainNetRBestMm39",
+'chainRBestMm39Link' => "chainNetRBestMm39",
 'chainSynHg38' => "chainNetSynHg38",
 'chainSynHg38Link' => "chainNetSynHg38",
 'chainSynMm10' => "chainNetSynMm10",
 'chainSynMm10Link' => "chainNetSynMm10",
+'chainSynMm39' => "chainNetSynMm39",
+'chainSynMm39Link' => "chainNetSynMm39",
 'netRBestHg38' => "chainNetRBestHg38",
 'netRBestMm10' => "chainNetRBestMm10",
+'netRBestMm39' => "chainNetRBestMm39",
 'netSynHg38' => "chainNetSynHg38",
 'netSynMm10' => "chainNetSynMm10",
+'netSynMm39' => "chainNetSynMm39",
 'tandemDups' => "tandemDups",
 'gapOverlap' => "gapOverlap"
 );
 
 my %tableCheckList = ( 'augustusGene' => 1,
 'chainHg38' => 1,
 'chainHg38Link' => 1,
 'chainMm10' => 1,
 'chainMm10Link' => 1,
+'chainMm39' => 1,
+'chainMm39Link' => 1,
 'chromAlias' => 1,
 'chromInfo' => 1,
 'cpgIslandExt' => 1,
 'cpgIslandExtUnmasked' => 1,
 'cytoBandIdeo' => 1,
 'gap' => 1,
 'gc5BaseBw' => 1,
 'genscan' => 1,
 'genscanSubopt' => 1,
 'gold' => 1,
 'grp' => 1,
 'hgFindSpec' => 1,
 'history' => 1,
 'microsat' => 1,
 'nestedRepeats' => 1,
 'netHg38' => 1,
 'netMm10' => 1,
+'netMm39' => 1,
 'rmsk' => 1,
 'simpleRepeat' => 1,
 'tableDescriptions' => 1,
 'trackDb' => 1,
 'ucscToINSDC' => 1,
 'ucscToRefSeq' => 1,
 'windowmaskerSdust' => 1
 );
 
 ## from /cluster/data/genbank/etc/gbPerAssemblyTables.txt
 ## genbank tables
 ## some of these should be present, do not need to be all
 my %gbCheckList = ( 'gbLoaded' => 1,
 'all_mrna' => 1,
 'xenoMrna' => 1,
 'mrnaOrientInfo' => 1,
 'all_est' => 1,
 'intronEst' => 1,
 'gbStatus' => 1,
 'xenoEst' => 1,
 'estOrientInfo' => 1,
 'refGene' => 1,
 'refSeqAli' => 1,
 'refFlat' => 1,
 'xenoRefGene' => 1,
 'xenoRefSeqAli' => 1,
 'xenoRefFlat' => 1,
 'mgcFullStatus' => 1,
 'mgcStatus' => 1,
 'mgcFullMrna' => 1,
 'mgcGenes' => 1,
 'mgcFailedEst' => 1,
 'mgcIncompleteMrna' => 1,
 'mgcPickedEst' => 1,
 'mgcUnpickedEst' => 1,
 'orfeomeMrna' => 1,
 'orfeomeGenes' => 1,
 'ccdsGene' => 1,
 'ccdsInfo' => 1,
 'ccdsNotes' => 1,
 'ccdsKgMap' => 1
 );	# my %gbCheckList
 
 sub checkTableExists($$) {
   my ($db, $table) = @_;
   my $lineCount = `hgsql -N -e 'desc $table;' $db 2> /dev/null | wc -l`;
   chomp $lineCount;
   if ($lineCount > 0) {
     return 1;
   } else {
     return 0;
   }
 }
 
 #############################################################################
 ## main() starts here
 
 my $db = shift;
 my $Db = ucfirst($db);
 my $dbVersion = $db;
 $dbVersion =~ s/^[a-z]+//i;
 my $dbPrefix = $db;
 $dbPrefix =~ s/[0-9]+$//;
 
 my $dbDbNames = `hgsql -N -e 'select organism,scientificName from dbDb where name="$db";' hgcentraltest`;
 chomp $dbDbNames;
 $dbDbNames =~ s/\t/, /;
 
 my $tableCount = 0;
 my %tableList;	# key is table name, value is 1
 open (FH, "hgsql -N -e 'show tables;' $db|") or die "can not hgsql -N -e 'show tables $db'";
 while (my $table = <FH>) {
    chomp $table;
    if ($table !~ m/trackDb_|hgFindSpec_/ ) {
      $tableList{$table} = 1;
      ++$tableCount;
    }
 }
 close (FH);
 
 printf STDERR "# %d tables in database %s - %s\n", $tableCount, $db, $dbDbNames;
 
 my %extraTables;
 my $extraTableCount = 0;
 my $tablesFound = 0;
 my $optionalCount = 0;
 my %optionsFound;	# key is category, value is count of tables
 
 foreach my $table (sort keys %tableList) {
   if (defined($tableCheckList{$table}) || defined($gbCheckList{$table}) || defined($optionalCheckList{$table}) ) {
     ++$tablesFound;
     if (defined($optionalCheckList{$table})) {
        $optionalCount += 1;
        $optionsFound{$optionalCheckList{$table}} += 1;
     }
   } else {
     $extraTables{$table} = 1;
     ++$extraTableCount;
   }
 }
 
 printf STDERR "# verified %d tables in database $db, %d extra tables, %d optional tables\n", $tablesFound, $extraTableCount, $optionalCount;
 if ($optionalCount > 0) {
    foreach my $category (sort keys %optionsFound) {
      printf "# %s\t%d optional tables\n", $category, $optionsFound{$category};
    }
 }
 
 my $shownTables = 0;
 foreach my $table (sort keys %extraTables) {
   ++$shownTables;
   if ($extraTableCount > 10) {
     if ( ($shownTables < 5) || ($shownTables > ($extraTableCount - 4)) ) {
        printf STDERR "# %d\t%s\t- extra table\n", $shownTables, $table;
     } elsif ($shownTables == 5) {
        printf STDERR "# . . . etc . . .\n";
     }
   } else {
     printf STDERR "# %d\t%s\t- extra table\n", $shownTables, $table;
   }
 }
 
 my $gbTableCount = 0;
 foreach my $table (sort keys %tableList) {
   $gbTableCount += 1 if (defined($gbCheckList{$table}));
 }
 
 if ($gbTableCount < 1) {
   printf STDERR "# ERROR: no genbank tables found\n";
 } else {
   printf STDERR "# %d genbank tables found\n", $gbTableCount;
 }
 
 my %missingTables;
 my $missingTableCount = 0;
 $tablesFound = 0;
 
 my $chainSelf = "chain.*" . $Db . "*";
 my $netSelf = "net.*" . $Db . "*";
 
 foreach my $table (sort keys %tableCheckList) {
   if (defined($tableList{$table})) {
     ++$tablesFound;
   } else {
     next if ($table =~ m/$chainSelf|$netSelf/);
     if ($table !~ m/^ccds|^mgc/) {
       $missingTables{$table} = 1;
       ++$missingTableCount;
     } elsif ( ($table =~ m/^ccds/) && ($db =~ m/^hg|^mm/) ) {
       $missingTables{$table} = 1;
       ++$missingTableCount;
     } elsif ( ($table =~ m/^mgc/) &&
                  ($db =~ m/^bosTau|^danRer|^hg|^mm|^rn|^xenTro/) ) {
       $missingTables{$table} = 1;
       ++$missingTableCount;
     }
   }
 }
 
 printf STDERR "# verified %d required tables, %d missing tables\n", $tablesFound, $missingTableCount;
 
 my $missedOut = 0;
 foreach my $table (sort keys %missingTables) {
   ++$missedOut;
   printf STDERR "# %d\t%s\t- missing table\n", $missedOut, $table;
 }
 
 my %optionalChainNet;
 my %expectedChainNet;
 my @chainTypes = ("", "RBest", "Syn");
-my @otherDbs = ("hg38", "mm10");
+my @otherDbs = ("hg38", "mm10", "mm39");
 for (my $i = 0; $i < scalar(@chainTypes); ++$i) {
    my $chainTable = "chain" . $chainTypes[$i] .  $Db;
    my $chainLinkTable = "chain" . $chainTypes[$i] .  $Db . "Link";
    my $netTable = "net" . $chainTypes[$i] . $Db;
    for (my $j = 0; $j < scalar(@otherDbs); ++$j) {
       next if ($db eq $otherDbs[$j]);
-      # mm10 Syntenics do not exist (yet)
-      next if ($otherDbs[$j] eq "mm10" && $chainTypes[$i] eq "Syn");
       if (length($chainTypes[$i]) > 0) { # RBest and Syn are optional
          $optionalChainNet{$otherDbs[$j]} += 1 if (checkTableExists($otherDbs[$j], $chainTable));
          $optionalChainNet{$otherDbs[$j]} += 1 if (checkTableExists($otherDbs[$j], $chainLinkTable));
          $optionalChainNet{$otherDbs[$j]} += 1 if (checkTableExists($otherDbs[$j], $netTable));
       } else {
       printf STDERR "# missing $otherDbs[$j].$chainTable\n" if (! checkTableExists($otherDbs[$j], $chainTable));
       printf STDERR "# missing $otherDbs[$j].$chainLinkTable\n" if (! checkTableExists($otherDbs[$j], $chainLinkTable));
       printf STDERR "# missing $otherDbs[$j].$netTable\n" if (! checkTableExists($otherDbs[$j], $netTable));
       $expectedChainNet{$otherDbs[$j]} += 1 if (checkTableExists($otherDbs[$j], $chainTable));
       $expectedChainNet{$otherDbs[$j]} += 1 if (checkTableExists($otherDbs[$j], $chainLinkTable));
       $expectedChainNet{$otherDbs[$j]} += 1 if (checkTableExists($otherDbs[$j], $netTable));
       }
    }
 }
 
 foreach my $expected (sort keys %expectedChainNet) {
   printf STDERR "# %s chainNet to %s found %d required tables\n", $expected, $db, $expectedChainNet{$expected};
 }
 foreach my $optional (sort keys %optionalChainNet) {
   printf STDERR "# %s chainNet RBest and syntenic to %s found %d optional tables\n", $optional, $db, $optionalChainNet{$optional};
 }
 
 if ($dbVersion > 1) {
   my $toOthers = `hgsql -N -e 'select fromDb,toDb from liftOverChain where fromDb = "$db" AND toDb like "${dbPrefix}%";' hgcentraltest | wc -l`;
   chomp $toOthers;
   my $fromOthers = `hgsql -N -e 'select fromDb,toDb from liftOverChain where fromDb like "${dbPrefix}%" AND toDb = "${db}";' hgcentraltest | wc -l`;
   chomp $fromOthers;
   if (($toOthers > 0) && ($fromOthers > 0)) {
     printf STDERR "# liftOver to previous versions: $toOthers, from previous versions: $fromOthers\n";
   } else {
     printf STDERR "# ERROR: liftOvers to/from previous versions not complete\n";
   }
 }
 
 my $blatServers=`hgsql -N -e 'select * from blatServers where db="$db";' hgcentraltest | wc -l`;
 if ($blatServers != 2) {
   printf STDERR "# ERROR: blat server not found in hgcentraltest.blatServers ?\n";
 } else {
   printf "# blatServers: %s", `hgsql -N -e 'select * from blatServers where db="$db";' hgcentraltest | xargs echo`;
 }
 
 my $chainNet = 0;
 if ( $db ne "hg38" ) {
   $chainNet = `hgsql -e 'select * from trackDb;' hg38 | egrep "^chain$Db|^net$Db" | wc -l`;
   chomp $chainNet;
   if ($chainNet != 2) {
    printf STDERR "# ERROR: missing hg38.chainNet trackDb definitions for $db (found: $chainNet instead of 2)\n";
   }
 }
 
 if ( $db ne "mm10" ) {
   $chainNet = `hgsql -e 'select * from trackDb;' mm10 | egrep "^chain$Db|^net$Db" | wc -l`;
   chomp $chainNet;
   if ($chainNet != 2) {
    printf STDERR "# ERROR: missing mm10.chainNet trackDb definitions for $db (found: $chainNet instead of 2)\n";
   }
 }
 
+if ( $db ne "mm39" ) {
+  $chainNet = `hgsql -e 'select * from trackDb;' mm39 | egrep "^chain$Db|^net$Db" | wc -l`;
+  chomp $chainNet;
+  if ($chainNet != 2) {
+   printf STDERR "# ERROR: missing mm39.chainNet trackDb definitions for $db (found: $chainNet instead of 2)\n";
+  }
+}
+
 my $goldSearch = `hgsql -N -e 'select termRegex from hgFindSpec where searchTable="gold" AND searchName="gold";' $db | grep "abuz" | wc -l`;
 chomp $goldSearch;
 
 if ($goldSearch == 1) {
   printf STDERR "# ERROR: missing specific hgFindSpec rule for gold table\n";
 }
 
 my $allJoiner = `grep $db /usr/local/apache/cgi-bin/all.joiner | head -1 | wc -l`;
 chomp $allJoiner;
 if ($allJoiner != 1) {
   printf STDERR "# ERROR missing definitions in hg/makeDb/schema/all.joiner\n";
 }