7a14294a4cf3e478e7ed6f353af8c482ee6b8934
braney
  Wed Jan 13 11:36:28 2021 -0800
search only hg/lib for AS files to build the tableDescriptions table.

diff --git src/test/buildTableDescriptions.pl src/test/buildTableDescriptions.pl
index 3da3d2f..b9b00ad 100755
--- src/test/buildTableDescriptions.pl
+++ src/test/buildTableDescriptions.pl
@@ -1,459 +1,467 @@
 #!/usr/bin/env perl
 #
 # buildTableDescriptions.pl - read .as files in a directory tree and use them to
 #       create the tableDescriptions table used by hg CGI's 
 # See usage for a description of parameters.
 #
 
 use warnings;
 # Figure out path of executable so we can add perllib to the path.
 use FindBin qw($Bin);
 use lib "$Bin/perllib";
 use TrackDb;
 use HgConf;
 
 use Getopt::Long;
 use DBI;
 use Carp;
 use strict;
 
 #
 # Default behaviors, changeable by command line args:
 #
 my $kentSrc   = "/cluster/home/galt/kentclean/src";
   # currently does not get checked-in to git:
 my $gbdDPath  = "/cluster/home/galt/kentclean/src/hg/htdocs/goldenPath/gbdDescriptions.html";
 my $noLoad    = 0;
 my $verbose   = 0;
+my $subDir   = "";
 
 # Hard-coded behaviors:
 my $debug         = 0;
 # These are ignored to avoid errors about duplicate table/object definitions.
 my %autoSqlIgnore = ( "hg/autoSql/tests/input/polyTest.as" => "",
 		      "hg/autoSql/tests/input/newTest.as" => "",
 		      "hg/autoSql/tests/input/doc.as" => "",
 		      "hg/autoSql/tests/input/doc2.as" => "",
 		      "hg/autoSql/doc.as" => "",
 		      "hg/autoSql/doc2.as" => "",
 		      "hg/instinct/instinctMember/members.as" => "",
 		      "hg/instinct/bioIntegrator/bioIntDb.as" => "",
 		      "hg/lib/bed.as" => "",
 		      "hg/lib/ggDbRep.as" => "",
 		      "hg/lib/genotype.as" => "",
 		      "hg/lib/ispyTables.as" => "",
 		      "hg/lib/gsid/vax003AaCons.as" => "",
 		      "hg/lib/gsid/vax003AaMaf.as" => "",
 		      "hg/lib/gsid/vax003Cons.as" => "",
 		      "hg/lib/gsid/vax003Maf.as" => "",
 		      "hg/lib/gsid/vax004AaCons.as" => "",
 		      "hg/lib/gsid/vax004AaMaf.as" => "",
 		      "hg/lib/gsid/vax004Cons.as" => "",
 		      "hg/lib/gsid/vax004Maf.as" => "",
 		      "hg/makeDb/schema/joinerGraph/all.as" => "",
 		      "hg/makeDb/schema/joinerGraph/swissProt.as" => "",
 		      "hg/protein/spToDb/spDbTables.as" => "",
 		      "hg/ultras/ultraDb/as/udHistory.as" => "",
 		      "hg/bioImage/bioImage.as" => "",
 		      "hg/bioImage/loadMahoney/mahoney.as" => "",
 		      "hg/genePix/genePix.as" => "",
 		      "hg/genePix/loadMahoney/mahoney.as" => "",
 		      "hg/visiGene/visiGene.as" => "",
 		      "hg/visiGene/vgLoadMahoney/mahoney.as" => "",
 		      "lib/pslWScore.as" => "",
 		      "utils/bedToBigBed/tests/input/itemRgb.as" => "",
 		    );
 
 my $basename      = $0;  $basename =~ s@.*/@@;
 #
 # usage: Print help message and exit, happy or unhappy.
 #
 sub usage {
     print STDERR "Usage:
 $basename  [-kentSrc dir]  [-gbdDPath f]  [-noLoad]  [-help]
     -kentSrc dir:	Use dir as the kent/src checkout.
 			Default: $kentSrc.
     -gbdDPath f:	Use f as the gbdDescriptions.html.
 			Default: $gbdDPath.
     -db db:             Work only on db, not on all active dbs.
     -hgConf file:       Use file instead of ~/.hg.conf.
     -noLoad:		Don't load the database, just create .sql files.
+    -subDir		Grab AS files from this sub directory of kentSrc rather than kentSrc
     -help:		Print this message.
 ";
     exit(@_);
 } # end usage
 
 
 #
 # getActiveDbs: connect to central db, get list of active dbs
 #
 sub getActiveDbs {
   my $hgConf = shift;
   confess "Too many arguments" if (defined shift);
   my $centdb = $hgConf->lookup('central.db');
   my $host = $hgConf->lookup('central.host');
   my $username = $hgConf->lookup('central.user');
   my $password = $hgConf->lookup('central.password');
   my $dbh = DBI->connect("DBI:mysql:database=$centdb;host=$host",
 			 $username, $password);
   my $results =
       $dbh->selectcol_arrayref("select name from dbDb where active = 1;");
   $dbh->disconnect();
   return @{$results};
 }
 
 #
 # simplifyFields: trim fieldSpec of occasionally-used prefix/suffix fields
 #
 sub simplifyFields {
   my $fieldSpec = shift;
   confess "Too few arguments"  if (! defined $fieldSpec);
   confess "Too many arguments" if (defined shift);
   $fieldSpec =~ s/^bin,//;
   $fieldSpec =~ s/,crc,$/,/;
   return $fieldSpec;
 }
 
 #
 # getTableFields: connect to db, get tables and fields
 #
 sub getTableFields {
   my $hgConf = shift;
   my $db = shift;
   confess "Too few arguments"  if (! defined $db);
   confess "Too many arguments" if (defined shift);
   my $host = $hgConf->lookup('db.host');
   if ($host && $host ne 'localhost') {
     $host = ";host=$host";
   } else {
     $host = "";
   }
   my $username = $hgConf->lookup('db.user');
   my $password = $hgConf->lookup('db.password');
   my $dbh = DBI->connect("DBI:mysql:database=$db$host", $username, $password);
   my %tableFields = ();
   my %tableNamesInsens = ();
   my $tables = $dbh->selectcol_arrayref("show tables;");
   foreach my $t (@{$tables}) {
     my $desc = $dbh->selectcol_arrayref("desc $t;");
     my $fields = "";
     foreach my $f (@{$desc}) {
       $fields .= "$f,";
     }
     $t =~ s/^chr\w+_(\w+_)?/chrN_/;
     $fields = &simplifyFields($fields);
     if (defined $tableFields{$t} &&
 	$tableFields{$t} ne $fields) {
       warn "table fields differ for split table $db.$t:\n$tableFields{$t} != $fields";
     }
     my $tableInsens = $t;
     $tableInsens =~ tr/A-Z/a-z/;
     if (! defined $tableFields{$t} &&
 	defined $tableNamesInsens{$tableInsens}) {
       warn "Case-insensitive duplicate for $db.$t... dropping.";
     } else {
       $tableFields{$t} = $fields;
       $tableNamesInsens{$tableInsens} = 1;
     }
   }
   $dbh->disconnect();
   return %tableFields;
 }
 
 #
 # slurpAutoSql: find all .as files under rootDir, grab contents.
 #
 sub slurpAutoSql {
   my $rootDir = shift;
+  my $subDir = shift;
   confess "Too few arguments"  if (! defined $rootDir);
   confess "Too many arguments" if (defined shift);
-  open(P, "find $rootDir -name '*.as' -print |") || die "Can't open pipe";
+  open(P, "find $rootDir/$subDir -name '*.as' -print |") || die "Can't open pipe";
   my %tableAS = ();
   my %objectAS = ();
   my $gotLeftParen = 0;
   while (<P>) {
     chop;
     my $filename = $_;
     my $filetail = $filename;  $filetail =~ s/^$kentSrc\///;
     next if (defined $autoSqlIgnore{$filetail});
     open(F, "$filename") || die "Can't open $filename";
     my $as = "";
     my $table = "";
     my $object = "";
     my $fields = "";
     while (<F>) {
       $as .= $_;
       if (/^\s*\(/) {
 	$gotLeftParen = 1;
       }
       if (/^\s*table\s+(\S+)[^\;]*$/i) {
 	$table = $1;
 	$object = "";
       } elsif (/^\s*(object|simple)\s+(\S+)/ && !$gotLeftParen) {
 	$object = $2;
 	$table = "";
       } elsif (/^\s*enum\s*\([^\)]+\)\s*(\S+)\s*;/) {
 	$fields .= "$1,";
       } elsif (/^[^\"]+\s+(\S+)\s*;/) {
 	$fields .= "$1,";
       } elsif (/^\s*\)/) {
 	if (($table eq "" && $object eq "") || $fields eq "") {
 	  die "Trouble parsing autoSql file $filename:\n$as";
 	}
 	if ($table ne "") {
 	  if (defined $tableAS{$table}) {
 	    warn "Duplicate autoSql def for table $table (" .
 	      $tableAS{$table}->{filename} . " vs. $filename)";
 	  }
 	  $tableAS{$table} = { fields => &simplifyFields($fields),
 			       autoSql => $as,
 			       tableName => $table,
 			       filename => $filename, };
 	} elsif ($object ne "") {
 	  if (defined $objectAS{$table}) {
 	    warn "Duplicate autoSql def for object $object (" .
 	      $objectAS{$object}->{filename} . " vs. $filename)";
 	  }
 	  $objectAS{$object} = { autoSql => $as,
 				 objectName => $object,
 				 filename => $filename, };
 	}
 	$as = $table = $object = $fields = "";
 	$gotLeftParen = 0;
       }
     } # each line of autoSql file
     close(F);
   } # each autoSql file found in $rootDir
   close(P);
   # Add bedN (for when we get the fields from the trackDb.type).
   my @bedFields = split(",", $tableAS{"bed"}->{fields});
   my @autoSql   = split("\n", $tableAS{"bed"}->{autoSql});
   my $filename  = $tableAS{"bed"}->{filename};
   for (my $n=scalar(@bedFields);  $n >= 3;  $n--) {
     $tableAS{"bed$n"} = { fields => join(",", @bedFields) . ",",
 			  autoSql => join("\n", @autoSql) . "\n",
 			  tableName => "bed$n",
 			  filename => $filename, };
     my $lastField = pop(@bedFields);
     my @newAS = ();
     my $nm1 = $n - 1;
     foreach my $as (@autoSql) {
       $as =~ s/table\s+\S+/table bed$nm1/;
       if ($as !~ /\s+$lastField\s*;/) {
 	push @newAS, $as;
       }
     }
     @autoSql = @newAS;
   }
   # When table definitions rely on objects, add the object definitions to
   # the stored table autoSql (so the autoSql parser doesn't die in hgTables).
   foreach my $table (keys %tableAS) {
     if ($tableAS{$table}->{autoSql} =~ /\n\s+table (\w+)/) {
       my $object = $1;
       if (defined $objectAS{$object}) {
 	$tableAS{$table}->{autoSql} .= ("\n" . $objectAS{$object}->{autoSql});
       } elsif (defined $tableAS{$object}) {
 	$tableAS{$table}->{autoSql} .= ("\n" . $tableAS{$object}->{autoSql});
       } else {
 	warn "Incomplete AutoSql? table $table refers to object $object " .
 	  "but I can't find the definition of $object.";
       }
     }
   }
   return %tableAS;
 }
 
 #
 # indexAutoSqlByFields: make a new AutoSql hash, indexed by fields not table
 #
 sub indexAutoSqlByFields {
   my $tASRef = shift;
   confess "Too few arguments"  if (! defined $tASRef);
   confess "Too many arguments" if (defined shift);
   my %fieldsAS = ();
   foreach my $t (keys %{$tASRef}) {
     my $asRef = $tASRef->{$t};
     my $fields = $asRef->{fields};
     $fieldsAS{$fields} = $asRef;
   }
   return %fieldsAS;
 }
 
 #
 # matchAutoSqlByFields: see if there's an autoSql def for given fields.
 #
 sub matchAutoSqlByFields {
   my $fields = shift;
   my $tASRef = shift;
   my $fASRef = shift;
   confess "Too few arguments"  if (! defined $fASRef);
   confess "Too many arguments" if (defined shift);
   # try standard types first, to save time (and avoid dupl's for std types).
   if ($fields eq $tASRef->{"psl"}->{fields}) {
     return $tASRef->{"psl"};
   } elsif ($fields eq $tASRef->{"genePredExt"}->{fields}) {
     return $tASRef->{"genePredExt"};
   } elsif ($fields eq $tASRef->{"genePred"}->{fields}) {
     return $tASRef->{"genePred"};
   } elsif ($fields eq $tASRef->{"lfs"}->{fields}) {
     return $tASRef->{"lfs"};
   } elsif ($fields eq $tASRef->{"genericNameValue"}->{fields}) {
     return $tASRef->{"genericNameValue"};
   } elsif ($fields eq $tASRef->{"openChromCombinedPeaks"}->{fields}) {
     return $tASRef->{"openChromCombinedPeaks"};
   } else {
     for (my $n=12;  $n >= 3;  $n--) {
       if ($fields eq $tASRef->{"bed$n"}->{fields}) {
 	return $tASRef->{"bed$n"};
       }
     }
     return $fASRef->{$fields};
   }
 }
 
 
 #
 # parseGbdDescriptions: parse anchors and .as out of gbdDescriptions.html
 #
 sub parseGbdDescriptions {
   my $filename = shift;
   confess "Too few arguments"  if (! defined $filename);
   confess "Too many arguments" if (defined shift);
   open(F, "$filename") || die "Can't open $filename";
   my %tableAnchors = ();
   my $anchor = "";
   while (<F>) {
     if (m/<a name=\"?(\w+)\"?/i) {
       $anchor = $1;
     } elsif (/<PRE>\s*table\s+([\w_]+)/) {
       $tableAnchors{$1} = $anchor;
     }
   }
   close(F);
   return %tableAnchors;
 }
 
 
 ###########################################################################
 #
 # Parse & process command line args
 #
 # GetOptions will put command line args here:
 use vars qw/
     $opt_kentSrc
     $opt_gbdDPath
     $opt_noLoad
     $opt_db
     $opt_hgConf
     $opt_help
+    $opt_subDir
     $opt_verbose
     /;
 
 my $ok = GetOptions("kentSrc=s",
 		    "gbdDPath=s",
 		    "noLoad",
 		    "db=s",
 		    "hgConf=s",
 		    "help",
+		    "subDir=s",
 		    "verbose");
 &usage(1) if (! $ok);
 &usage(0) if ($opt_help);
 $kentSrc  = $opt_kentSrc if ($opt_kentSrc);
 $gbdDPath = $opt_gbdDPath if ($opt_gbdDPath);
 $noLoad   = 1 if (defined $opt_noLoad);
 $verbose  = $opt_verbose if (defined $opt_verbose);
 $verbose  = 1 if ($debug);
+$subDir = $opt_subDir if ($opt_subDir);
 
 # If -hgConf is given, set HGDB_CONF environment variable so our call to
 # hgsql uses the correct file.
 if ($opt_hgConf) {
   if (! -e $opt_hgConf) {
     die "Error: -hgConf file \"$opt_hgConf\" does not exist.\n";
   }
   $ENV{HGDB_CONF} = $opt_hgConf;
 }
 
 ############################################################################
 # MAIN
 
-my %tableAutoSql = slurpAutoSql($kentSrc);
+my %tableAutoSql = slurpAutoSql($kentSrc, $subDir);
 my %fieldsAutoSql = indexAutoSqlByFields(\%tableAutoSql);
 my %tableAnchors = parseGbdDescriptions($gbdDPath);
 my $hgConf = HgConf->new($opt_hgConf);
 my @auxDbs = ('hgFixed', 'proteome');
 my @dbs = (defined $opt_db) ? split(',', $opt_db) :
                               (&getActiveDbs($hgConf), @auxDbs);
 foreach my $db (@dbs) {
   my $sqlFile = "$db.tableDescriptions.sql";
   open(SQL, ">$sqlFile") || die "Can't open $sqlFile for writing";
   print SQL "use $db;\n";
   print SQL "drop table if exists tableDescriptions;";
   open(F, "$kentSrc/hg/lib/tableDescriptions.sql")
     || die "Can't open $kentSrc/hg/lib/tableDescriptions.sql";
   while (<F>) {
     print SQL;
   }
   close (F);
   my $trackDb = TrackDb->new($db);
   my %tableTypes = $trackDb->getTrackNamesTypes();
   my %tableFields = &getTableFields($hgConf, $db);
   foreach my $table (sort keys %tableFields) {
     next if ($table =~ /^(trackDb|hgFindSpec|metaDb)_/);
     next if $tableFields{$table} eq "fileName,";
     if ((! defined $tableAutoSql{$table}) ||
 	($tableFields{$table} ne $tableAutoSql{$table}->{fields})) {
       my $as =
 	&matchAutoSqlByFields($tableFields{$table}, \%tableAutoSql,
 			      \%fieldsAutoSql);
       if (defined $as) {
 	$tableAutoSql{$table} = $as;
       }
     }
     if (! defined $tableTypes{$table} &&
        defined $tableAutoSql{$table}) {
       $tableTypes{$table} = $tableAutoSql{$table}->{tableName};
       $tableTypes{$table} =~ s/bed\d+/bed/;
     }
     my $type   = $tableTypes{$table};
     if (defined $type && $table ne "mrna") {
       my $typeN = $type;
       $typeN =~ s/^bed (\d+).*/bed$1/;
       $typeN =~ s/^(\w+).*/$1/;
       $type =~ s/^(\w+).*/$1/;
       if (! defined $tableAutoSql{$table} &&
 	  defined $tableAutoSql{$typeN}) {
 	$tableAutoSql{$table} = $tableAutoSql{$typeN};
       }
       if (! defined $tableAnchors{$table} &&
 	  defined $tableAnchors{$type}) {
 	$tableAnchors{$table} = $tableAnchors{$type};
       }
     }
     my $as     = $tableAutoSql{$table};
     if (defined $as) {
       if ($tableFields{$table} ne $as->{fields}) {
 	print "$db.$table FIELD MISMATCH:\n";
 	print "$db.$table table fields:   $tableFields{$table}\n";
 	print "$db.$table autoSql fields: $as->{fields} [$as->{tableName}]\n";
       }
     } else {
       print "$db.$table: No AutoSql.\n";
     }
     my $anchor = $tableAnchors{$table} || "";
     #*** should suggest addition to gbdD of table&.as if not already in there;
     #*** should complain about gbdD tables not in any active db.
     my $asd = (defined $as) ? $as->{autoSql} : "";
+    # my $asFileName = (defined $as) ? $as->{filename} : "noAs";
     $asd =~ s/'/\\'/g;
+    # print "fileName  $asFileName $table\n";
     print SQL "INSERT INTO tableDescriptions (tableName, autoSqlDef, gbdAnchor)"
       . " values ('$table', '$asd', '$anchor');\n";
   }
   # Thanks Jorge for finding that this fixes a problem with myisamchk complaining
   # that the file was not closed properly:
   print SQL "FLUSH TABLES tableDescriptions;\n";
   close(SQL);
   if (! $noLoad) {
     (! system("/cluster/bin/x86_64/hgsql $db < $sqlFile")) || die "hgsql error for $sqlFile";
     print "Loaded $db.tableDescriptions.\n";
     unlink($sqlFile);
   }
 }