efcb7334402bd922fa514a2b967ffa3843dd02db
hiram
  Wed Dec 22 09:17:04 2021 -0800
adding fungi and specifics for augustus species and rmskSpecies no redmine

diff --git src/hg/makeDb/doc/asmHubs/runBuild src/hg/makeDb/doc/asmHubs/runBuild
old mode 100644
new mode 100755
index 62e1df3..8f07ccf
--- src/hg/makeDb/doc/asmHubs/runBuild
+++ src/hg/makeDb/doc/asmHubs/runBuild
@@ -1,153 +1,166 @@
 #!/bin/bash
 
 set -beEu -o pipefail
 
 export gcxName=$1
 export asmId=$2
 export clade=$3
 export sciName=$4
 ### !!! the 'clade' is obsolete and is unused anywhere.  It is passed around
 ### because it used to be a requirement for the gateway page script, but it
 ### it isn't used there either
 
 export asmHubName=$clade
 
+export rmskSpecies="${sciName}"
+
+### This ucscNames decision needs to be an automatic process since
+### some browsers have been built with ucscNames and other have not.
+### This is important for track updates, such as ncbiRefSeq
 ### export ucscNames="-ucscNames"
 export ucscNames=""
 export augustusSpecies="-augustusSpecies=human"
 
 export ncbiRmsk="-ncbiRmsk"
 export noRmsk=""
 export subGroup="vertebrate_other"
 
 if [ "${clade}" = "primates" ]; then
   subGroup="vertebrate_mammalian"
 elif [ "${clade}" = "mammals" ]; then
   subGroup="vertebrate_mammalian"
 elif [ "${clade}" = "fishes" ]; then
   subGroup="vertebrate_other"
   augustusSpecies="-augustusSpecies=zebrafish"
 elif [ "${clade}" = "fish" ]; then
   subGroup="vertebrate_other"
   augustusSpecies="-augustusSpecies=zebrafish"
 elif [ "${clade}" = "birds" ]; then
   subGroup="vertebrate_other"
   augustusSpecies="-augustusSpecies=chicken"
 elif [ "${clade}" = "vertebrate" ]; then
   subGroup="vertebrate_other"
 elif [ "${clade}" = "invertebrate" ]; then
   subGroup="invertebrate"
+elif [ "${clade}" = "invertebrates" ]; then
+  subGroup="invertebrate"
+elif [ "${clade}" = "fungi" ]; then
+  subGroup="fungi"
+  augustusSpecies="-augustusSpecies=saccharomyces"
 elif [ "${clade}" = "plants" ]; then
   subGroup="plants"
+  augustusSpecies="-augustusSpecies=arabidopsis"
 elif [ "${clade}" = "vertebrate_mammalian" ]; then
   subGroup="vertebrate_mammalian"
 elif [ "${clade}" = "vertebrate_other" ]; then
   subGroup="vertebrate_other"
 elif [ "${clade}" = "nematode" ]; then
   subGroup="invertebrate"
   asmHubName="invertebrate"
   augustusSpecies="-augustusSpecies=caenorhabditis"
 elif [ "${clade}" = "drosophila" ]; then
   subGroup="invertebrate"
   asmHubName="invertebrate"
   augustusSpecies="-augustusSpecies=fly"
 elif [ "${clade}" = "Amellifera" ]; then
   subGroup="invertebrate"
   asmHubName="invertebrate"
   augustusSpecies="-augustusSpecies=honeybee1"
 elif [ "${clade}" = "Agambiae" ]; then
   subGroup="invertebrate"
   asmHubName="invertebrate"
   augustusSpecies="-augustusSpecies=culex"
 elif [ "${clade}" = "Scerevisiae" ]; then
   subGroup="fungi"
   asmHubName="fungi"
   augustusSpecies="-augustusSpecies=saccharomyces"
 elif [ "${clade}" = "viral" ]; then
   subGroup="viral"
+  rmskSpecies="viruses"
+  augustusSpecies="-noAugustus -noXenoRefSeq"
 elif [ "${clade}" = "bacteria" ]; then
   subGroup="bacteria"
   noRmsk="-noRmsk"
   augustusSpecies="-noAugustus -noXenoRefSeq"
 else
   printf "ERROR: unrecognized clade: '%s'\n" "${clade}" 1>&2
   exit 255
 fi
 
 export stepStart="download"
 export stepEnd="trackDb"
-### export stepStart="ncbiRefSeq"
-### export stepEnd="ncbiRefSeq"
-### export stepStart="trackDb"
-### export stepEnd="trackDb"
 
 # download, sequence, assemblyGap, gatewayPage, cytoBand, gc5Base,
 # repeatMasker, simpleRepeat, allGaps, idKeys, windowMasker, addMask,
 # gapOverlap, tandemDups, cpgIslands, ncbiGene, ncbiRefSeq, xenoRefGene,
 # augustus, trackDb, cleanup
 
 export linkTop="/hive/data/genomes/asmHubs"
 export TOP0="/hive/data/genomes/asmHubs"
 export TOP="/hive/data/genomes/asmHubs/allBuild"
 cd $TOP
 
 export gcPrefix=`echo $gcxName | cut -c1-3`
 export topBuild="genbankBuild"
 export genbankRefseq="genbank"
 if [ "${gcPrefix}" = "GCF" ]; then
   topBuild="refseqBuild"
   genbankRefseq="refseq"
 fi
 
 export gc0=`echo $gcxName | cut -c5-7`
 export gc1=`echo $gcxName | cut -c8-10`
 export gc2=`echo $gcxName | cut -c11-13`
 
 export buildDir=`printf "%s/%s/%s/%s/%s/%s" "${TOP0}/${topBuild}" "${gcPrefix}" "${gc0}" "${gc1}" "${gc2}" "${asmId}"`
 
 export linkDir=`printf "%s/%s/%s/%s" "${linkTop}" "${genbankRefseq}" "${subGroup}" "${sciName}"`
 
 if [ -d "${buildDir}" ]; then
-  printf "# Already done ? $gcxName\n" 1>&2
-  exit 255
 #  printf "# removing: %s\n" "${buildDir}" 1>&2
 #  rm -fr "${buildDir}"
+  if [ -s "${buildDir}/${asmId}.trackDb.txt" ]; then
+     printf "# Already done $gcxName\n"
+     printf "# Already done $gcxName\n" 1>&2
+     exit 0
+  fi
+  printf "# partially done $gcxName\n"
+  printf "# partially done $gcxName\n" 1>&2
 fi
-
 
 mkdir -p ${TOP0}/${topBuild}/buildLogs/${subGroup}
 export logFile="${TOP0}/${topBuild}/buildLogs/${subGroup}/${asmId}.log"
 
 if [ ! -d "${buildDir}" ]; then
   mkdir -p "${buildDir}"
 fi
 if [ ! -d "${linkDir}" ]; then
   mkdir -p "${linkDir}"
 fi
 if [ ! -L "${linkDir}/${asmId}" ]; then
   ln -s "${buildDir}" "${linkDir}"
 fi
 ls -d "${buildDir}" "${linkDir}/${asmId}"
 
 echo "========================= "`date "+%F %T"` >> "${logFile}"
 
 echo "/hive/data/genomes/asmHubs/${genbankRefseq}Build/doIdKeys \"${asmId}\" &" >> "${logFile}"
 
 /hive/data/genomes/asmHubs/${genbankRefseq}Build/doIdKeys "${asmId}" >> "${logFile}" 2>&1 &
 
 echo "### \$HOME/kent/src/hg/utils/automation/doAssemblyHub.pl \
   -continue=\"${stepStart}\" -stop=\"${stepEnd}\" \
-    -rmskSpecies=\"$sciName\" -bigClusterHub=ku -buildDir=\`pwd\` \
+    -rmskSpecies=\"${rmskSpecies}\" -bigClusterHub=ku -buildDir=\`pwd\` \
       -asmHubName=$asmHubName -fileServer=hgwdev -smallClusterHub=hgwdev \
         ${noRmsk} ${ncbiRmsk} ${ucscNames} ${augustusSpecies} -workhorse=hgwdev \"${asmId}\"" >> "${logFile}"
 
 cd "${buildDir}"
 if [ ! -s "${buildDir}/build.log" ]; then
   ln -s "${logFile}" "${buildDir}/build.log"
 fi
 
 time ($HOME/kent/src/hg/utils/automation/doAssemblyHub.pl \
   -continue="${stepStart}" -stop="${stepEnd}" \
-    -rmskSpecies="$sciName" -bigClusterHub=ku -buildDir=`pwd` \
+    -rmskSpecies="${rmskSpecies}" -bigClusterHub=ku -buildDir=`pwd` \
       -asmHubName=$asmHubName -fileServer=hgwdev -smallClusterHub=hgwdev \
         ${noRmsk} ${ncbiRmsk} ${ucscNames} ${augustusSpecies} -workhorse=hgwdev "${asmId}") >> "${logFile}" 2>&1