24c443b10b97e5b1e81df9ee5cb393131e7b873a
hiram
  Wed May 5 11:40:43 2021 -0700
better handling of assembly hub chain net tracks refs #26988

diff --git src/hg/utils/automation/HgAutomate.pm src/hg/utils/automation/HgAutomate.pm
index 4a1b1a5..3bba2a2 100755
--- src/hg/utils/automation/HgAutomate.pm
+++ src/hg/utils/automation/HgAutomate.pm
@@ -21,31 +21,32 @@
 # treated as constants) exported by this module:
 @EXPORT_OK = (
     # Support for common command line options:
     qw( getCommonOptionHelp processCommonOptions
 	@commonOptionVars @commonOptionSpec
       ),
     # Some basic smarts about our compute infrastructure:
     qw( choosePermanentStorage
 	chooseWorkhorse chooseFileServer
 	chooseClusterByBandwidth chooseSmallClusterByBandwidth
 	chooseFilesystemsForCluster checkClusterPath
       ),
     # General-purpose utility routines:
     qw( checkCleanSlate checkExistsUnlessDebug closeStdin
 	getAssemblyInfo getSpecies gensub2 machineHasFile databaseExists
-	makeGsub mustMkdir mustOpen nfsNoodge paraRun run verbose
+	makeGsub mustMkdir asmHubBuildDir asmHubDownloadDir mustOpen
+	nfsNoodge paraRun run verbose
       ),
     # Hardcoded paths/commands/constants:
     qw( $centralDbSql $git
 	$clusterData $trackBuild $goldenPath $images $gbdb
 	$splitThreshold $runSSH $setMachtype
       ),
 );
 
 #########################################################################
 # A simple model of our local compute environment with some subroutines
 # for checking the validity of path+machine combos and for suggesting
 # appropriate storage and machines.
 
 use vars qw( %cluster %clusterFilesystem $defaultDbHost );
 
@@ -616,41 +617,104 @@
     # allow PATH to find the gensub2 command
     $answer = "gensub2";
   }
  return $answer;
 }
 
 sub closeStdin {
   # If we don't do this, the script can hang ("Suspended (tty input)")
   # when it is run backgrounded (&) and then something is typed into the
   # terminal... or something like that.  Anyway, doesn't hurt.  It does not
   # prevent hanging on ssh prompts, however.
   close(STDIN);
   open(STDIN, '/dev/null');
 }
 
+sub asmHubDownloadDir {
+  # return path to assembly hub build directory
+  my ($asmId) = @_;
+  confess "Must have exactly 1 argument" if (scalar(@_) != 1);
+  confess "must supply GC[AF]_... assembly ID" if ($asmId !~ m/^GC/);
+  my $gcX = substr($asmId,0,3);
+  my $d0 = substr($asmId,4,3);
+  my $d1 = substr($asmId,7,3);
+  my $d2 = substr($asmId,10,3);
+  my $downloadDir = $goldenPath . "/$gcX/$d0/$d1/$d2";
+  return $downloadDir;
+}
+
+sub asmHubBuildDir {
+  # return path to assembly hub build directory
+  my ($asmId) = @_;
+  confess "Must have exactly 1 argument" if (scalar(@_) != 1);
+  confess "must supply GC[AF]_... assembly ID" if ($asmId !~ m/^GC/);
+  my $gcX = substr($asmId,0,3);
+  my $d0 = substr($asmId,4,3);
+  my $d1 = substr($asmId,7,3);
+  my $d2 = substr($asmId,10,3);
+  my $buildDir = "/hive/data/genomes/asmHubs/allBuild/$gcX/$d0/$d1/$d2/$asmId";
+  return $buildDir;
+}
+
+sub asmHubSubmitter {
+  # common name is in (parens) in the assembly_report 'Organism name:' line
+  my ($asmReport) = @_;
+  my $submitter = `grep -i "submitter" $asmReport | head -1 | tr -d '\r'`;
+  chomp $submitter;
+  $submitter =~ s/.*ubmitter:\s+//i;
+  return $submitter;
+}
+
+sub asmHubDate {
+  # common name is in (parens) in the assembly_report 'Organism name:' line
+  my ($asmReport) = @_;
+  my $date = `grep -i "date:" $asmReport | head -1 | tr -d '\r'`;
+  chomp $date;
+  $date =~ s/.*ate:\s+//i;
+  return $date;
+}
+
+sub asmHubCommonName {
+  # common name is in (parens) in the assembly_report 'Organism name:' line
+  my ($asmReport) = @_;
+  my $names = `grep -i "organism name:" $asmReport | head -1 | tr -d '\r'`;
+  chomp $names;
+  $names =~ s/.*\(//;
+  $names =~ s/\).*//;
+  return $names;
+}
+
 sub getAssemblyInfo {
   # Do a quick dbDb lookup to get assembly descriptive info for README.txt.
   my ($dbHost, $db) = @_;
   confess "Must have exactly 2 arguments" if (scalar(@_) != 2);
+  if ($db =~ m/^GC/) {
+    my $asmReport = asmHubBuildDir($db) . "/download/${db}_assembly_report.txt";
+    confess "Can not find $asmReport" if ( ! -s "${asmReport}" );
+    my $genome = asmHubCommonName($asmReport);
+    my $date = asmHubDate($asmReport);
+    my $source = asmHubSubmitter($asmReport);
+    return ($genome, $date, $source);
+  } else {
     my $query = "select genome,description,sourceName from dbDb " .
               "where name = \"$db\";";
     my $line = `echo '$query' | $HgAutomate::runSSH $dbHost $centralDbSql`;
     chomp $line;
     my ($genome, $date, $source) = split("\t", $line);
     return ($genome, $date, $source);
   }
+}
 
 sub getSpecies {
   # fetch scientificName from dbDb
   my ($dbHost, $db) = @_;
   confess "Must have exactly 2 arguments" if (scalar(@_) != 2);
   my $query = "select scientificName from dbDb " .
               "where name = \"$db\";";
   my $line = `echo '$query' | $HgAutomate::runSSH $dbHost $centralDbSql`;
   chomp $line;
   my ($scientificName) = split("\t", $line);
   if (length($scientificName) < 1) {
      if ( -s "$HgAutomate::clusterData/$db/species.name.txt" ) {
         $scientificName = `cat $HgAutomate::clusterData/$db/species.name.txt`;
         chomp $scientificName;
      } else {