032903d6adc6d0c49cbedf17c51f4319052cb8ef hiram Thu Apr 16 12:43:59 2026 -0700 improved variable naming fixup problems process in work refs #31811 diff --git src/hg/utils/automation/kegAlignLastz.sh src/hg/utils/automation/kegAlignLastz.sh index 41e88cf8206..2553faabe46 100755 --- src/hg/utils/automation/kegAlignLastz.sh +++ src/hg/utils/automation/kegAlignLastz.sh @@ -25,33 +25,33 @@ Or, in the assembly hub build directory: /hive/data/genomes/asmHubs/allBuild/GCA/002/844/635/GCA_002844635.1_USMARCv1.0/trackData/lastzQuery.yyyy-mm-dd Will set up a DEF file there, and a run.sh script to run all steps and output makeDoc text to document what happened. AND MORE, it will run the swap operation into the corresponding blastz.target.swap directory in the query genome work space. Email will be sent to: '$userName' upon completion. e.g.: kegAlignLastz.sh rn7 papAnu4 mammal primate\n" 1>&2 exit 255 fi -# asmId - if assembly hub, determine GCx_012345678.9 name +# accId - if assembly hub, determine GCx_012345678.9 name # if not, return the asmName (== UCSC database name) -function asmId() { +function accId() { export asmName=$1 export id="${asmName}" case $asmName in GC[AF]_*) id=`echo $asmName | cut -d'_' -f1-2` ;; *) ;; esac printf "%s" "${id}" } # gcPath - if assembly hub, determine GCx/012/345/678 path # if not return empty string "" (== UCSC database name) function gcPath() { @@ -66,79 +66,79 @@ GCxPath="${gcX}/${d0}/${d1}/${d2}" ;; *) ;; esac printf "%s" "${GCxPath}" } # asmSize - determine size of genome function asmSize() { export asmName=$1 export sizes="/hive/data/genomes/${asmName}/chrom.sizes" case $asmName in GC[AF]_*) gcxPath=$(gcPath $asmName) - id=$(asmId $asmName) + id=$(accId $asmName) size=`awk '{sum+=$2}END{print sum}' /hive/data/genomes/asmHubs/${gcxPath}/${id}/${id}.chrom.sizes.txt` ;; *) size=`awk '{sum+=$2}END{print sum}' ${sizes}` ;; esac printf "%s" "${size}" } # seqCount - determine the sequence count in given genome target function seqCount() { export asmName=$1 export sizes="/hive/data/genomes/${asmName}/chrom.sizes" case $asmName in GC[AF]_*) gcxPath=$(gcPath $asmName) - id=$(asmId $asmName) + id=$(accId $asmName) count=`wc -l /hive/data/genomes/asmHubs/${gcxPath}/${id}/${id}.chrom.sizes.txt | cut -d' ' -f1` ;; *) count=`wc -l ${sizes} | cut -d' ' -f1` ;; esac printf "%s" "${count}" } function orgName() { export asmName=$1 case $asmName in GC[AF]_*) gcxPath=$(gcPath $asmName) asmDir="/hive/data/outside/ncbi/genomes/${gcxPath}/${asmName}" asmRpt="${asmDir}/${asmName}_assembly_report.txt" oName=`egrep -m 1 -i "^# organism name:" ${asmRpt} | tr -d '\r' | sed -e 's/.*(//; s/).*//'` ;; *) oName=`hgsql -N -e "select organism from dbDb where name=\"${asmName}\";" hgcentraltest` ;; esac printf "%s" "${oName}" } function faGzUrl() { export asmName=$1 case $asmName in GC[AF]_*) gcxPath=$(gcPath $asmName) - id=$(asmId $asmName) + id=$(accId $asmName) printf "https://hgdownload.soe.ucsc.edu/hubs/%s/%s/%s.fa.gz" "${gcxPath}" "${id}" "${id}" ;; *) printf "https://hgdownload.soe.ucsc.edu/goldenPath/%s/bigZips/%s.fa.gz" "${asmName}" "${asmName}" ;; esac } function orgDate() { export asmName=$1 case $asmName in GC[AF]_*) gcxPath=$(gcPath $asmName) asmDir="/hive/data/outside/ncbi/genomes/${gcxPath}/${asmName}" asmRpt="${asmDir}/${asmName}_assembly_report.txt" @@ -157,91 +157,91 @@ export db=$1 } ############################################################################## ############################################################################## ### start seconds export startT=`date "+%s"` export target="$1" export query="$2" export tClade="$3" export qClade="$4" export tGcPath=$(gcPath $target) export qGcPath=$(gcPath $query) -export tAsmId=$(asmId $target) -export qAsmId=$(asmId $query) +export tAccId=$(accId $target) +export qAccId=$(accId $query) printf "# tq: '${target}' '${query}' '${tClade}' '${qClade}'\n" 1>&2 printf "# tq gcPath: '${tGcPath}' '${qGcPath}'\n" 1>&2 -printf "# tq asmId: '${tAsmId}' '${qAsmId}'\n" 1>&2 +printf "# tq accId: '${tAccId}' '${qAccId}'\n" 1>&2 # upper case first character -export Target="${tAsmId^}" -export Query="${qAsmId^}" +export Target="${tAccId^}" +export Query="${qAccId^}" export DS=`date "+%F"` # assume UCSC db build export buildDir="/hive/data/genomes/${target}/bed/lastz${Query}.${DS}" export targetExists="/hive/data/genomes/${target}/bed" -export symLink="/hive/data/genomes/${target}/bed/lastz.${qAsmId}" -export swapDir="/hive/data/genomes/${query}/bed/blastz.${tAsmId}.swap" +export symLink="/hive/data/genomes/${target}/bed/lastz.${qAccId}" +export swapDir="/hive/data/genomes/${query}/bed/blastz.${tAccId}.swap" export queryExists="/hive/data/genomes/${query}/bed" -export swapLink="/hive/data/genomes/${query}/bed/lastz.${tAsmId}" +export swapLink="/hive/data/genomes/${query}/bed/lastz.${tAccId}" export targetSizes="/hive/data/genomes/${target}/chrom.sizes" export querySizes="/hive/data/genomes/${query}/chrom.sizes" export target2bit="/hive/data/genomes/${target}/${target}.2bit" export query2bit="/hive/data/genomes/${query}/${query}.2bit" export trackHub="" export rBestTrackHub="" export tRbestArgs="" export qRbestArgs="" export tSwapRbestArgs="" export qSwapRbestArgs="" export tFullName="" export qFullName="" export tTdb="xxx" export qTdb="xxx" # override those specifications if assembly hub case $target in GC[AF]_*) trackHub="-trackHub -noDbNameCheck" tFullName="-tAsmId $target" rBestTrackHub="-trackHub" buildDir="/hive/data/genomes/asmHubs/allBuild/${tGcPath}/${target}/trackData/lastz${Query}.${DS}" - symLink="/hive/data/genomes/asmHubs/allBuild/${tGcPath}/${target}/trackData/lastz.${qAsmId}" + symLink="/hive/data/genomes/asmHubs/allBuild/${tGcPath}/${target}/trackData/lastz.${qAccId}" targetExists="/hive/data/genomes/asmHubs/allBuild/${tGcPath}/${target}/trackData" - targetSizes="/hive/data/genomes/asmHubs/${tGcPath}/${tAsmId}/${tAsmId}.chrom.sizes.txt" - target2bit="/hive/data/genomes/asmHubs/${tGcPath}/${tAsmId}/${tAsmId}.2bit" + targetSizes="/hive/data/genomes/asmHubs/${tGcPath}/${tAccId}/${tAccId}.chrom.sizes.txt" + target2bit="/hive/data/genomes/asmHubs/${tGcPath}/${tAccId}/${tAccId}.2bit" tTdb="/hive/data/genomes/asmHubs/allBuild/${tGcPath}/${target}/doTrackDb.bash" tRbestArgs="-target2Bit=\"${target2bit}\" \\ -targetSizes=\"${targetSizes}\"" tSwapRbestArgs="-query2bit=\"${target2bit}\" \\ -querySizes=\"${targetSizes}\"" ;; esac case $query in GC[AF]_*) trackHub="-trackHub -noDbNameCheck" qFullName="-qAsmId $query" rBestTrackHub="-trackHub" - swapDir="/hive/data/genomes/asmHubs/allBuild/${qGcPath}/${query}/trackData/blastz.${tAsmId}.swap" - swapLink="/hive/data/genomes/asmHubs/allBuild/${qGcPath}/${query}/trackData/lastz.${tAsmId}" + swapDir="/hive/data/genomes/asmHubs/allBuild/${qGcPath}/${query}/trackData/blastz.${tAccId}.swap" + swapLink="/hive/data/genomes/asmHubs/allBuild/${qGcPath}/${query}/trackData/lastz.${tAccId}" queryExists="/hive/data/genomes/asmHubs/allBuild/${qGcPath}/${query}/trackData" - querySizes="/hive/data/genomes/asmHubs/${qGcPath}/${qAsmId}/${qAsmId}.chrom.sizes.txt" - query2bit="/hive/data/genomes/asmHubs/${qGcPath}/${qAsmId}/${qAsmId}.2bit" + querySizes="/hive/data/genomes/asmHubs/${qGcPath}/${qAccId}/${qAccId}.chrom.sizes.txt" + query2bit="/hive/data/genomes/asmHubs/${qGcPath}/${qAccId}/${qAccId}.2bit" qTdb="/hive/data/genomes/asmHubs/allBuild/${qGcPath}/${query}/doTrackDb.bash" qRbestArgs="-query2Bit=\"${query2bit}\" \\ -querySizes=\"${querySizes}\"" qSwapRbestArgs="-target2bit=\"${query2bit}\" \\ -targetSizes=\"${querySizes}\"" ;; esac if [ ! -d "${targetExists}" ]; then printf "ERROR: can not find ${targetExists}\n" 1>&2 exit 255 fi if [ ! -d "${queryExists}" ]; then @@ -367,31 +367,31 @@ "mammal") ;; "other") minScore="5000" linearGap="loose" ;; esac ;; "other") minScore="5000" linearGap="loose" esac if [ "$tClade" == "primate" -a "$qClade" == "primate" ]; then -export yamlString="# ${target}.${query}.yaml +export yamlString="# ${tAccId}.${qAccId}.yaml TARGET_Sequence: class: File path: ${tFaGzUrl} QUERY_Sequence: class: File path: ${qFaGzUrl} # axtChain options axtChainMinScore: ${minScore} linear_gap_options.linear_gap: ${linearGap} # lastz options xdropX: 910 ydropY: 15000 stepZ: 1 noTransitionT: false strand_selectorB: both @@ -475,52 +475,52 @@ BASE=${buildDir} TMPDIR=/dev/shm " fi ### skip primary alignment if it is already done ### primaryDone == 0 means NOT done yet if [ $primaryDone -eq 0 ]; then mkdir "${buildDir}" ### setup the DEF file printf "%s" "${defString}" > ${buildDir}/DEF ### and the yaml file -printf "%s" "${yamlString}" > ${buildDir}/${tAsmId}.${qAsmId}.yaml +printf "%s" "${yamlString}" > ${buildDir}/${tAccId}.${qAccId}.yaml ### setup the buildDir/run.sh script printf "#!/bin/bash set -beEu -o pipefail export buildDir=\"${buildDir}\" export swapDir=\"${swapDir}\" export PM=\"/hive/users/hiram/galaxy/venv3.12/bin/planemo\" -export targetDb=\"${tAsmId}\" -export queryDb=\"${qAsmId}\" +export targetDb=\"${tAccId}\" +export queryDb=\"${qAccId}\" export QueryDb=\"${Query}\" cd \${buildDir} mkdir -p log -export DS=\`date \"+\%%F_\%%T_\%%s\"\` +export DS=\`date \"+%%F_%%T_%%s\"\` export logDir=\"\${buildDir}/log\" export logFile=\"\${logDir}/\${DS}.log\" time (\${PM} run \\ - \"~/kent/src/hg/utils/automation/kegAlign.json.gz\" \\ + ~/kent/src/hg/utils/automation/kegAlign.json.ga \\ \"\${targetDb}.\${queryDb}.yaml\" --profile vgp \\ --history_name \"\${targetDb}.\${queryDb}.kegAlign\" \\ --test_output_json \"\${logDir}/runOutput.\${DS}.json\") >> \"\${logFile}\" 2>&1 export invocationId=\`jq '.tests[0].data.invocation_details.details.invocation_id' \"\${logDir}/runOutput.\${DS}.json\" | tr -d '\"'\` printf \"invocation ID: %%s\\n\" \"\${invocationId}\" 1>&2 mkdir -p result/\${DS} \${PM} invocation_download \"\${invocationId}\" --profile vgp \\ --output_directory result/\${DS} ### install allChain into buildDir/axtChain/ mkdir -p \${buildDir}/axtChain export allChainFile=\`ls result/\${DS}/allChain__*.chain\` gzip -c \"\${allChainFile}\" > \${buildDir}/axtChain/\${targetDb}.\${queryDb}.all.chain.gz @@ -641,68 +641,68 @@ mkdir $buildDir cd $buildDir printf '${yamlString} ' > DEF time (~/kent/src/hg/utils/automation/doBlastzChainNet.pl ${trackHub} -verbose=2 \`pwd\`/DEF -syntenicNet \\ ${tFullName} ${qFullName} -workhorse=${workHorse} -smallClusterHub=${smallClusterHub} -fileServer=${fileServer} -bigClusterHub=${bigHub} \\ -chainMinScore=${minScore} -chainLinearGap=${linearGap}) > do.log 2>&1 grep -w real do.log | sed -e 's/^/ # /;' " > ${buildDir}/makeDoc.txt (grep -w real $buildDir/do.log || true) | sed -e 's/^/ # /;' | head -1 >> ${buildDir}/makeDoc.txt -printf "\n sed -e 's/^/ # /;' fb.${tAsmId}.chain${Query}Link.txt\n" >> ${buildDir}/makeDoc.txt -sed -e 's/^/ # /;' $buildDir/fb.${tAsmId}.chain${Query}Link.txt >> ${buildDir}/makeDoc.txt +printf "\n sed -e 's/^/ # /;' fb.${tAccId}.chain${Query}Link.txt\n" >> ${buildDir}/makeDoc.txt +sed -e 's/^/ # /;' $buildDir/fb.${tAccId}.chain${Query}Link.txt >> ${buildDir}/makeDoc.txt -printf " sed -e 's/^/ # /;' fb.${tAsmId}.chainSyn${Query}Link.txt\n" >> ${buildDir}/makeDoc.txt -sed -e 's/^/ # /;' $buildDir/fb.${tAsmId}.chainSyn${Query}Link.txt >> ${buildDir}/makeDoc.txt +printf " sed -e 's/^/ # /;' fb.${tAccId}.chainSyn${Query}Link.txt\n" >> ${buildDir}/makeDoc.txt +sed -e 's/^/ # /;' $buildDir/fb.${tAccId}.chainSyn${Query}Link.txt >> ${buildDir}/makeDoc.txt printf "\n time (~/kent/src/hg/utils/automation/doRecipBest.pl ${rBestTrackHub} -load -workhorse=${workHorse} -buildDir=\`pwd\` \\ ${tRbestArgs} \\ ${qRbestArgs} \\ - ${tAsmId} ${qAsmId}) > rbest.log 2>&1 + ${tAccId} ${qAccId}) > rbest.log 2>&1 grep -w real rbest.log | sed -e 's/^/ # /;'\n" >> ${buildDir}/makeDoc.txt (grep -w real $buildDir/rbest.log || true) | sed -e 's/^/ # /;' >> ${buildDir}/makeDoc.txt -printf "\n sed -e 's/^/ # /;' fb.${tAsmId}.chainRBest.${Query}.txt\n" >> ${buildDir}/makeDoc.txt -(sed -e 's/^/ # /;' ${buildDir}/fb.${tAsmId}.chainRBest.${Query}.txt || true) >> ${buildDir}/makeDoc.txt +printf "\n sed -e 's/^/ # /;' fb.${tAccId}.chainRBest.${Query}.txt\n" >> ${buildDir}/makeDoc.txt +(sed -e 's/^/ # /;' ${buildDir}/fb.${tAccId}.chainRBest.${Query}.txt || true) >> ${buildDir}/makeDoc.txt printf "\n ### and for the swap\n" >> ${buildDir}/makeDoc.txt cat ${buildDir}/makeDoc.txt printf "# swap into: ${swapDir}\n" 1>&2 if [ "$swapDone" -eq 0 ]; then mkdir ${swapDir} ln -s ${buildDir}/DEF ${swapDir}/DEF printf "#!/bin/bash set -beEu -o pipefail cd $swapDir -export targetDb=\"${tAsmId}\" +export targetDb=\"${tAccId}\" export Target=\"${Target}\" -export queryDb=\"${qAsmId}\" +export queryDb=\"${qAccId}\" time (~/kent/src/hg/utils/automation/doBlastzChainNet.pl ${trackHub} -swap -verbose=2 \\ ${tFullName} ${qFullName} ${buildDir}/DEF -swapDir=\`pwd\` \\ -syntenicNet -workhorse=${workHorse} -smallClusterHub=${smallClusterHub} -fileServer=${fileServer} -bigClusterHub=${bigHub} \\ -chainMinScore=${minScore} -chainLinearGap=${linearGap}) > swap.log 2>&1 grep -w real swap.log | sed -e 's/^/ # /;' sed -e 's/^/ # /;' fb.\${queryDb}.chain\${Target}Link.txt sed -e 's/^/ # /;' fb.\${queryDb}.chainSyn\${Target}Link.txt time (~/kent/src/hg/utils/automation/doRecipBest.pl ${rBestTrackHub} -load -workhorse=${workHorse} -buildDir=\`pwd\` \\ ${tSwapRbestArgs} \\ ${qSwapRbestArgs} \\ \${queryDb} \${targetDb}) > rbest.log 2>&1 @@ -728,45 +728,45 @@ ### continue the make doc printf "\n cd ${swapDir}\n" >> ${buildDir}/makeDoc.txt printf "\n time (~/kent/src/hg/utils/automation/doBlastzChainNet.pl ${trackHub} -swap -verbose=2 \\ ${tFullName} ${qFullName} ${buildDir}/DEF -swapDir=\`pwd\` \\ -syntenicNet -workhorse=${workHorse} -smallClusterHub=${smallClusterHub} -fileServer=${fileServer} -bigClusterHub=${bigHub} \\ -chainMinScore=${minScore} -chainLinearGap=${linearGap}) > swap.log 2>&1 grep -w real swap.log | sed -e 's/^/ # /;' " >> ${buildDir}/makeDoc.txt (grep -w real ${swapDir}/swap.log || true) | sed -e 's/^/ # /;' >> ${buildDir}/makeDoc.txt -printf "\n sed -e 's/^/ # /;' fb.${qAsmId}.chain${Target}Link.txt\n" >> ${buildDir}/makeDoc.txt -sed -e 's/^/ # /;' ${swapDir}/fb.${qAsmId}.chain${Target}Link.txt >> ${buildDir}/makeDoc.txt +printf "\n sed -e 's/^/ # /;' fb.${qAccId}.chain${Target}Link.txt\n" >> ${buildDir}/makeDoc.txt +sed -e 's/^/ # /;' ${swapDir}/fb.${qAccId}.chain${Target}Link.txt >> ${buildDir}/makeDoc.txt -printf " sed -e 's/^/ # /;' fb.${qAsmId}.chainSyn${Target}Link.txt\n" >> ${buildDir}/makeDoc.txt -sed -e 's/^/ # /;' ${swapDir}/fb.${qAsmId}.chainSyn${Target}Link.txt >> ${buildDir}/makeDoc.txt +printf " sed -e 's/^/ # /;' fb.${qAccId}.chainSyn${Target}Link.txt\n" >> ${buildDir}/makeDoc.txt +sed -e 's/^/ # /;' ${swapDir}/fb.${qAccId}.chainSyn${Target}Link.txt >> ${buildDir}/makeDoc.txt printf "\ time (~/kent/src/hg/utils/automation/doRecipBest.pl ${rBestTrackHub} -load -workhorse=${workHorse} -buildDir=\`pwd\` \\ ${tSwapRbestArgs} \\ ${qSwapRbestArgs} \\ - ${qAsmId} ${tAsmId}) > rbest.log 2>&1 + ${qAccId} ${tAccId}) > rbest.log 2>&1 grep -w real rbest.log | sed -e 's/^/ # /;'\n" >> ${buildDir}/makeDoc.txt (grep -w real ${swapDir}/rbest.log || true) | sed -e 's/^/ # /;' >> ${buildDir}/makeDoc.txt -printf "\n sed -e 's/^/ # /;' fb.${qAsmId}.chainRBest.${Target}.txt\n" >> ${buildDir}/makeDoc.txt -(sed -e 's/^/ # /;' ${swapDir}/fb.${qAsmId}.chainRBest.${Target}.txt || true) >> ${buildDir}/makeDoc.txt +printf "\n sed -e 's/^/ # /;' fb.${qAccId}.chainRBest.${Target}.txt\n" >> ${buildDir}/makeDoc.txt +(sed -e 's/^/ # /;' ${swapDir}/fb.${qAccId}.chainRBest.${Target}.txt || true) >> ${buildDir}/makeDoc.txt printf "\n##############################################################################\n" >> ${buildDir}/makeDoc.txt ### show completed makeDoc.txt #### cat ${buildDir}/makeDoc.txt ### end seconds export endT=`date "+%s"` export toAddress="$userName" export fromAddress="$userName" export subject="kegAlign lastz DONE $target $query" printf "To: $toAddress From: $fromAddress Subject: $subject