efcb7334402bd922fa514a2b967ffa3843dd02db hiram Wed Dec 22 09:17:04 2021 -0800 adding fungi and specifics for augustus species and rmskSpecies no redmine diff --git src/hg/makeDb/doc/asmHubs/runBuild src/hg/makeDb/doc/asmHubs/runBuild old mode 100644 new mode 100755 index 62e1df3..8f07ccf --- src/hg/makeDb/doc/asmHubs/runBuild +++ src/hg/makeDb/doc/asmHubs/runBuild @@ -1,153 +1,166 @@ #!/bin/bash set -beEu -o pipefail export gcxName=$1 export asmId=$2 export clade=$3 export sciName=$4 ### !!! the 'clade' is obsolete and is unused anywhere. It is passed around ### because it used to be a requirement for the gateway page script, but it ### it isn't used there either export asmHubName=$clade +export rmskSpecies="${sciName}" + +### This ucscNames decision needs to be an automatic process since +### some browsers have been built with ucscNames and other have not. +### This is important for track updates, such as ncbiRefSeq ### export ucscNames="-ucscNames" export ucscNames="" export augustusSpecies="-augustusSpecies=human" export ncbiRmsk="-ncbiRmsk" export noRmsk="" export subGroup="vertebrate_other" if [ "${clade}" = "primates" ]; then subGroup="vertebrate_mammalian" elif [ "${clade}" = "mammals" ]; then subGroup="vertebrate_mammalian" elif [ "${clade}" = "fishes" ]; then subGroup="vertebrate_other" augustusSpecies="-augustusSpecies=zebrafish" elif [ "${clade}" = "fish" ]; then subGroup="vertebrate_other" augustusSpecies="-augustusSpecies=zebrafish" elif [ "${clade}" = "birds" ]; then subGroup="vertebrate_other" augustusSpecies="-augustusSpecies=chicken" elif [ "${clade}" = "vertebrate" ]; then subGroup="vertebrate_other" elif [ "${clade}" = "invertebrate" ]; then subGroup="invertebrate" +elif [ "${clade}" = "invertebrates" ]; then + subGroup="invertebrate" +elif [ "${clade}" = "fungi" ]; then + subGroup="fungi" + augustusSpecies="-augustusSpecies=saccharomyces" elif [ "${clade}" = "plants" ]; then subGroup="plants" + augustusSpecies="-augustusSpecies=arabidopsis" elif [ "${clade}" = "vertebrate_mammalian" ]; then subGroup="vertebrate_mammalian" elif [ "${clade}" = "vertebrate_other" ]; then subGroup="vertebrate_other" elif [ "${clade}" = "nematode" ]; then subGroup="invertebrate" asmHubName="invertebrate" augustusSpecies="-augustusSpecies=caenorhabditis" elif [ "${clade}" = "drosophila" ]; then subGroup="invertebrate" asmHubName="invertebrate" augustusSpecies="-augustusSpecies=fly" elif [ "${clade}" = "Amellifera" ]; then subGroup="invertebrate" asmHubName="invertebrate" augustusSpecies="-augustusSpecies=honeybee1" elif [ "${clade}" = "Agambiae" ]; then subGroup="invertebrate" asmHubName="invertebrate" augustusSpecies="-augustusSpecies=culex" elif [ "${clade}" = "Scerevisiae" ]; then subGroup="fungi" asmHubName="fungi" augustusSpecies="-augustusSpecies=saccharomyces" elif [ "${clade}" = "viral" ]; then subGroup="viral" + rmskSpecies="viruses" + augustusSpecies="-noAugustus -noXenoRefSeq" elif [ "${clade}" = "bacteria" ]; then subGroup="bacteria" noRmsk="-noRmsk" augustusSpecies="-noAugustus -noXenoRefSeq" else printf "ERROR: unrecognized clade: '%s'\n" "${clade}" 1>&2 exit 255 fi export stepStart="download" export stepEnd="trackDb" -### export stepStart="ncbiRefSeq" -### export stepEnd="ncbiRefSeq" -### export stepStart="trackDb" -### export stepEnd="trackDb" # download, sequence, assemblyGap, gatewayPage, cytoBand, gc5Base, # repeatMasker, simpleRepeat, allGaps, idKeys, windowMasker, addMask, # gapOverlap, tandemDups, cpgIslands, ncbiGene, ncbiRefSeq, xenoRefGene, # augustus, trackDb, cleanup export linkTop="/hive/data/genomes/asmHubs" export TOP0="/hive/data/genomes/asmHubs" export TOP="/hive/data/genomes/asmHubs/allBuild" cd $TOP export gcPrefix=`echo $gcxName | cut -c1-3` export topBuild="genbankBuild" export genbankRefseq="genbank" if [ "${gcPrefix}" = "GCF" ]; then topBuild="refseqBuild" genbankRefseq="refseq" fi export gc0=`echo $gcxName | cut -c5-7` export gc1=`echo $gcxName | cut -c8-10` export gc2=`echo $gcxName | cut -c11-13` export buildDir=`printf "%s/%s/%s/%s/%s/%s" "${TOP0}/${topBuild}" "${gcPrefix}" "${gc0}" "${gc1}" "${gc2}" "${asmId}"` export linkDir=`printf "%s/%s/%s/%s" "${linkTop}" "${genbankRefseq}" "${subGroup}" "${sciName}"` if [ -d "${buildDir}" ]; then - printf "# Already done ? $gcxName\n" 1>&2 - exit 255 # printf "# removing: %s\n" "${buildDir}" 1>&2 # rm -fr "${buildDir}" + if [ -s "${buildDir}/${asmId}.trackDb.txt" ]; then + printf "# Already done $gcxName\n" + printf "# Already done $gcxName\n" 1>&2 + exit 0 + fi + printf "# partially done $gcxName\n" + printf "# partially done $gcxName\n" 1>&2 fi - mkdir -p ${TOP0}/${topBuild}/buildLogs/${subGroup} export logFile="${TOP0}/${topBuild}/buildLogs/${subGroup}/${asmId}.log" if [ ! -d "${buildDir}" ]; then mkdir -p "${buildDir}" fi if [ ! -d "${linkDir}" ]; then mkdir -p "${linkDir}" fi if [ ! -L "${linkDir}/${asmId}" ]; then ln -s "${buildDir}" "${linkDir}" fi ls -d "${buildDir}" "${linkDir}/${asmId}" echo "========================= "`date "+%F %T"` >> "${logFile}" echo "/hive/data/genomes/asmHubs/${genbankRefseq}Build/doIdKeys \"${asmId}\" &" >> "${logFile}" /hive/data/genomes/asmHubs/${genbankRefseq}Build/doIdKeys "${asmId}" >> "${logFile}" 2>&1 & echo "### \$HOME/kent/src/hg/utils/automation/doAssemblyHub.pl \ -continue=\"${stepStart}\" -stop=\"${stepEnd}\" \ - -rmskSpecies=\"$sciName\" -bigClusterHub=ku -buildDir=\`pwd\` \ + -rmskSpecies=\"${rmskSpecies}\" -bigClusterHub=ku -buildDir=\`pwd\` \ -asmHubName=$asmHubName -fileServer=hgwdev -smallClusterHub=hgwdev \ ${noRmsk} ${ncbiRmsk} ${ucscNames} ${augustusSpecies} -workhorse=hgwdev \"${asmId}\"" >> "${logFile}" cd "${buildDir}" if [ ! -s "${buildDir}/build.log" ]; then ln -s "${logFile}" "${buildDir}/build.log" fi time ($HOME/kent/src/hg/utils/automation/doAssemblyHub.pl \ -continue="${stepStart}" -stop="${stepEnd}" \ - -rmskSpecies="$sciName" -bigClusterHub=ku -buildDir=`pwd` \ + -rmskSpecies="${rmskSpecies}" -bigClusterHub=ku -buildDir=`pwd` \ -asmHubName=$asmHubName -fileServer=hgwdev -smallClusterHub=hgwdev \ ${noRmsk} ${ncbiRmsk} ${ucscNames} ${augustusSpecies} -workhorse=hgwdev "${asmId}") >> "${logFile}" 2>&1