7f813719b7ad2dde53149de49c2ec989b9630ea7 hiram Tue Sep 13 12:09:58 2022 -0700 now working through augustus step refs #29811 diff --git src/hg/utils/automation/oneAndDoneBrowser.sh src/hg/utils/automation/oneAndDoneBrowser.sh index 52f9387..4c6755b 100755 --- src/hg/utils/automation/oneAndDoneBrowser.sh +++ src/hg/utils/automation/oneAndDoneBrowser.sh @@ -1,185 +1,194 @@ #!/bin/bash set -beEu -o pipefail if [ $# -ne 5 ]; then printf "usage: oneAndDoneBrowser.sh asmId dbName clade hgCentralClade trackDbDir Build an assembly browser within the standard UCSC browser hierarchies of /hive/data/genomes/dbName/ and with a dbDb hgcentral insert statement. arguments: asmId is a full assembly ID such as: GCF_000857045.1_ViralProj15142 dbName is the name and directory to build into /hive/data/genomes/dbName clade is one of: primate mammal fish bird vertebrate invertebrate fungi plant nematode drosophila virus archaea bacteria hgCentralClade is one of: ancestor bacteria ciliate deuterostome haplotypes insect mammal other protista simulation vertebrate virus worm trackDbDir is one of the directories under makeDb/trackDb// where this genome trackDb//dbName/trackDb.ra will exist " 1>&2 exit 255 fi #### default build parameters, will be adjusted below depending upon clade export augustusSpecies="-augustusSpecies=human" export ncbiRmsk="-ncbiRmsk" export noRmsk="" export ucscNames="-ucscNames" #### export asmId="${1}" export dbName="${2}" export clade="${3}" export hgCentralClade="${4}" export trackDbDir="${5}" export export gcX=${asmId:0:3} export d0=${asmId:4:3} export d1=${asmId:7:3} export d2=${asmId:10:3} export srcDir="/hive/data/outside/ncbi/genomes/${gcX}/${d0}/${d1}/${d2}/$asmId" if [ ! -d "${srcDir}" ]; then printf "ERROR: can not find source directory:\n%s\n" "${srcDir}" 1>&2 exit 255 fi export asmReport="${srcDir}/${asmId}_assembly_report.txt" if [ ! -s "${asmReport}" ]; then printf "ERROR: can not find the assembly report %s_assembly_report.txt\n" "${asmId}" 1>&2 printf "in the source directory\n%s\n" "${srcDir}" 1>&2 exit 255 fi export sciName=`grep -i 'organism name:' ${asmReport} | head -1 | tr -d "\r" | sed -e 's/.*organism name: *//i; s/ *(.*//;'` export organism=`grep -i 'organism name:' ${asmReport} | head -1 | tr -d "\r" | sed -e 's/.*organism name: *.*(//i; s/).*//;'` export rmskSpecies="${sciName}" export buildDir="/hive/data/genomes/${dbName}" if [ ! -d "${buildDir}" ]; then mkdir "${buildDir}" fi case "$clade" in primate) ;; mammal) ;; fish) augustusSpecies="-augustusSpecies=zebrafish" ;; bird) augustusSpecies="-augustusSpecies=chicken" ;; vertebrate) ;; invertebrate) ;; fungi) augustusSpecies="-augustusSpecies=saccharomyces" ;; plant) augustusSpecies="-augustusSpecies=arabidopsis" ;; nematode) augustusSpecies="-augustusSpecies=caenorhabditis" ;; drosophila) augustusSpecies="-augustusSpecies=fly" ;; virus) rmskSpecies="viruses" augustusSpecies="-noAugustus -noXenoRefSeq" ;; archaea) noRmsk="-noRmsk" augustusSpecies="-noAugustus -noXenoRefSeq" ;; bacteria) noRmsk="-noRmsk" augustusSpecies="-noAugustus -noXenoRefSeq" ;; *) printf "ERROR: unrecognized clade: '%s'\n" "${clade}" 1>&2 printf "must be one of:\n" 1>&2 printf " primate mammal fish bird vertebrate invertebrate fungi\n plant nematode drosophila virus archaea bacteria\n" 1>&2 exit 255 ;; esac printf "# ==== %s ====\n" "`date '+%F %T %s'`" 1>&2 printf "# working in %s\n" "${buildDir}" 1>&2 printf "# building %s - %s\n" "${organism}" "${sciName}" 1>&2 printf "# dbName: %s\n" "${dbName}" 1>&2 printf "# ucscNames: %s\n" "${ucscNames}" 1>&2 printf "# rmskSpecies: %s\n" "${rmskSpecies}" 1>&2 printf "# augustusSpecies: %s\n" "${augustusSpecies}" 1>&2 printf "# ncbiRmsk: %s\n" "${ncbiRmsk}" 1>&2 if [ "x${noRmsk}y" != "xy" ]; then printf "# noRmsk: '%s'\n" "${noRmsk}" 1>&2 fi printf "\n" 1>&2 +# possible steps in order: + +# download sequence assemblyGap chromAlias gatewayPage cytoBand gc5Base +# repeatMasker simpleRepeat allGaps idKeys windowMasker addMask gapOverlap +# tandemDups cpgIslands ncbiGene ncbiRefSeq xenoRefGene augustus trackDb cleanup + ## export stepStart="download" ## export stepEnd="sequence" -export stepStart="chromAlias" -export stepEnd="chromAlias" +export stepStart="ncbiGene" +export stepEnd="augustus" printf "cd \"${buildDir}\"\n" 1>&2 cd "${buildDir}" printf "\$HOME/kent/src/hg/utils/automation/doAssemblyHub.pl \\ -continue=\"${stepStart}\" -stop=\"${stepEnd}\" -dbName=\"${dbName}\" \\ -rmskSpecies=\"${rmskSpecies}\" -bigClusterHub=ku -buildDir=\`pwd\` \\ -fileServer=hgwdev -smallClusterHub=hgwdev \\ ${noRmsk} ${ncbiRmsk} ${ucscNames} ${augustusSpecies} \\ -workhorse=hgwdev \"${asmId}\" >> build.log 2>&1\n" 1>&2 $HOME/kent/src/hg/utils/automation/doAssemblyHub.pl \ -continue="${stepStart}" -stop="${stepEnd}" -dbName="${dbName}" \ -rmskSpecies="${rmskSpecies}" -bigClusterHub=ku -buildDir=`pwd` \ -fileServer=hgwdev -smallClusterHub=hgwdev \ ${noRmsk} ${ncbiRmsk} ${ucscNames} ${augustusSpecies} \ -workhorse=hgwdev "${asmId}" >> build.log 2>&1 cd "${buildDir}" if [ "download/${asmId}_assembly_report.txt" -nt "${dbName}.config.ra" ]; then $HOME/kent/src/hg/utils/automation/prepConfig.pl "${dbName}" \ "${hgCentralClade}" "${trackDbDir}" download/${asmId}_assembly_report.txt \ > ${dbName}.config.ra export taxId=`grep "^taxId" ${dbName}.config.ra | awk '{print $NF}'` export asmDate=`grep "^assemblyDate" ${dbName}.config.ra | sed -e "s/assemblyDate \+//"` export asmName=`grep "^ncbiAssemblyName" ${dbName}.config.ra | sed -e "s/ncbiAssemblyName \+//"` export comName=`grep "^commonName" ${dbName}.config.ra | sed -e "s/commonName \+//"` export sciName=`grep "^scientificName" ${dbName}.config.ra | sed -e "s/scientificName \+//"` export orderKey=`grep "^orderKey" ${dbName}.config.ra | sed -e "s/orderKey \+//"` export accessionID=`grep "^genBankAccessionID" ${dbName}.config.ra | sed -e "s/genBankAccessionID \+//"` export defaultPos=`head -1 $dbName.chrom.sizes | awk '{end=int($2/2)+9999; if (end > $2){end = $2}; printf "%s:%d-%d", $1, int($2/2), end}'` printf "DELETE from dbDb where name = \"%s\";\n" "${dbName}" > dbDbInsert.sql printf "INSERT INTO dbDb (name, description, nibPath, organism, defaultPos, active, orderKey, genome, scientificName, htmlPath, hgNearOk, hgPbOk, sourceName, taxId) VALUES\n" >> dbDbInsert.sql printf "(\"%s\", \"%s (%s/%s)\", \"/gbdb/%s\", \"%s\", \"%s\", 1, %d, \"%s\", \"%s\", \"/gbdb/%s/html/description.html\", 0, 1, \"%s\", %d);\n" "${dbName}" "${asmDate}" "${asmName}" "${dbName}" "${dbName}" "${comName}" "${defaultPos}" "${orderKey}" "${comName}" "${sciName}" "${dbName}" "${accessionID}" "${taxId}" >> dbDbInsert.sql fi printf "# dbDbInsert.sql statement is completed:\n" 1>&2 cat dbDbInsert.sql 1>&2 if [ ! -s "chrom.sizes" ]; then ln -s $dbName.chrom.sizes chrom.sizes fi +if [ ! -d "bed" ]; then + ln -s trackData bed +fi