2ce16411b0f909e7e1dfd27a01af34316c260f2f
hiram
  Mon Jul 5 11:58:27 2021 -0700
catching up the edge cases for pair wise alignment to assembly hub genome no redmine

diff --git src/hg/utils/automation/pairLastz.sh src/hg/utils/automation/pairLastz.sh
index fada1d1..47795e8 100755
--- src/hg/utils/automation/pairLastz.sh
+++ src/hg/utils/automation/pairLastz.sh
@@ -90,74 +90,79 @@
        oName=`hgsql -N -e "select organism from dbDb where name=\"${asmName}\";" hgcentraltest`
        ;;
   esac
   printf "%s" "${oName}"
 }
 
 export target="$1"
 export query="$2"
 export tClade="$3"
 export qClade="$4"
 
 export tGcPath=$(gcPath $target)
 export qGcPath=$(gcPath $query)
 export tAsmId=$(asmId $target)
 export qAsmId=$(asmId $query)
+printf "# tq: '${target}' '${query}' '${tClade}' '${qClade}'\n" 1>&2
 printf "# tq gcPath: '${tGcPath}' '${qGcPath}'\n" 1>&2
 printf "# tq asmId: '${tAsmId}' '${qAsmId}'\n" 1>&2
 
 # upper case first character
 export Target="${tAsmId^}"
 export Query="${qAsmId^}"
 
 export DS=`date "+%F"`
 
 # assume UCSC db build
 export buildDir="/hive/data/genomes/${target}/bed/lastz${Query}.${DS}"
 export targetExists="/hive/data/genomes/${target}/bed"
 export symLink="/hive/data/genomes/${target}/bed/lastz.${qAsmId}"
 export swapDir="/hive/data/genomes/${query}/bed/blastz.${tAsmId}.swap"
 export queryExists="/hive/data/genomes/${query}/bed"
 export swapLink="/hive/data/genomes/${query}/bed/lastz.${tAsmId}"
 export targetSizes="/hive/data/genomes/${target}/chrom.sizes"
 export querySizes="/hive/data/genomes/${query}/chrom.sizes"
 export target2bit="/hive/data/genomes/${target}/${target}.2bit"
 export query2bit="/hive/data/genomes/${query}/${query}.2bit"
 export trackHub=""
 export rBestTrackHub=""
 export tRbestArgs=""
 export qRbestArgs=""
 export swapRbestArgs=""
+export tFullName=""
+export qFullName=""
 
 #  override those specifications if assembly hub
 case $target in
      GC[AF]_*) trackHub="-trackHub -noDbNameCheck"
+       tFullName="-tAsmId $target"
        rBestTrackHub="-trackHub"
        buildDir="/hive/data/genomes/asmHubs/allBuild/${tGcPath}/${target}/trackData/lastz${Query}.${DS}"
-       symLink="/hive/data/genomes/asmHubs/allBuild/${tGcPath}/${target}/trackData/lastz${qAsmId}.${DS}"
+       symLink="/hive/data/genomes/asmHubs/allBuild/${tGcPath}/${target}/trackData/lastz.${qAsmId}"
        targetExists="/hive/data/genomes/asmHubs/allBuild/${tGcPath}/${target}/trackData"
        targetSizes="/hive/data/genomes/asmHubs/${tGcPath}/${tAsmId}/${tAsmId}.chrom.sizes.txt"
        target2bit="/hive/data/genomes/asmHubs/${tGcPath}/${tAsmId}/${tAsmId}.2bit"
        tRbestArgs="-target2Bit=\"${target2bit}\" \\
 -targetSizes=\"${targetSizes}\""
        swapRbestArgs="-query2bit=\"${target2bit}\" \\
 -querySizes=\"${targetSizes}\""
        ;;
 esac
 
 case $query in
      GC[AF]_*) trackHub="-trackHub -noDbNameCheck"
+       qFullName="-qAsmId $query"
        rBestTrackHub="-trackHub"
        swapDir="/hive/data/genomes/asmHubs/allBuild/${qGcPath}/${query}/trackData/blastz.${tAsmId}.swap"
        swapLink="/hive/data/genomes/asmHubs/allBuild/${qGcPath}/${query}/trackData/lastz.${tAsmId}"
        queryExists="/hive/data/genomes/asmHubs/allBuild/${qGcPath}/${query}/trackData"
        querySizes="/hive/data/genomes/asmHubs/${qGcPath}/${qAsmId}/${qAsmId}.chrom.sizes.txt"
        query2bit="/hive/data/genomes/asmHubs/${qGcPath}/${qAsmId}/${qAsmId}.2bit"
        tRbestArgs="-query2Bit=\"${query2bit}\" \\
 -querySizes=\"${querySizes}\""
        swapRbestArgs="-target2bit=\"${query2bit}\" \\
 -targetSizes=\"${querySizes}\""
        ;;
 esac
 
 
 if [ ! -d "${targetExists}" ]; then
@@ -180,65 +185,76 @@
   exit 255
 fi
 
 
 if [ ! -s "${targetSizes}" ]; then
   printf "ERROR: can not find ${targetSizes}\n" 1>&2
   exit 255
 fi
 
 if [ ! -s "${querySizes}" ]; then
   printf "ERROR: can not find ${querySizes}\n" 1>&2
   exit 255
 fi
 
 export doneCount=0
+export primaryDone=0
+export swapDone=0
 
 if [ -L "${symLink}" ]; then
   printf "# ${query} -> ${target} already done\n" 1>&2
   doneCount=`echo $doneCount | awk '{printf "%d", $1+1}'`
+  primaryDone=1
+else
+  printf "# no symLink: $symLink\n" 1>&2
 fi
 
 if [ -L "${swapLink}" ]; then
   printf "# swap ${query} -> ${target} already done\n" 1>&2
   doneCount=`echo $doneCount | awk '{printf "%d", $1+1}'`
+  swapDone=1
 fi
 
 export working=`ls -d ${targetExists}/lastz${Query}.* 2> /dev/null | wc -l`
 if [ "${working}" -gt 0 ]; then
+  if [ "${primaryDone}" -eq 0 ]; then
     printf "# in progress, ${query} -> ${target}:\n" 1>&2
     printf "# " 1>&2
     ls -ogd ${targetExists}/lastz${Query}.* 1>&2
-  if [ "${doneCount}" -ne 2 ]; then
-    exit 0
   fi
+  buildDir=`ls -d ${targetExists}/lastz${Query}.*`
+  printf "# continuing: %s\n" "${buildDir}" 1>&2
+fi
+export primaryPartsDone=`ls $buildDir/fb.* 2> /dev/null | wc -l`
+if [ "$primaryPartsDone" -gt 0 ]; then
+  primaryDone="$primaryPartsDone"
 fi
 
 if [ -d "${swapDir}" ]; then
   printf "# swap in progress ${query} -> ${target}\n" 1>&2
   printf "# " 1>&2
   ls -ogd "${swapDir}" 1>&2
   if [ "${doneCount}" -ne 2 ]; then
     exit 0
   fi
 fi
 
 if [ "${doneCount}" -eq 2 ]; then
     printf "# all done\n" 1>&2
-    exit 0
 fi
 
+
 export tOrgName="$(orgName $target)"
 export qOrgName="$(orgName $query)"
 export tSequenceCount="$(seqCount $target)"
 export qSequenceCount="$(seqCount $query)"
 printf "# working: %s\n" "${buildDir}" 1>&2
 printf "# target: $target - $tOrgName - $tClade - $tSequenceCount sequences\n" 1>&2
 printf "#  query: $query - $qOrgName - $qClade - $qSequenceCount sequences\n" 1>&2
 
 export seq1Limit="40"
 if [ "${tSequenceCount}" -gt 50000 ]; then
   seq1Limit="100"
 fi
 
 export seq2Limit="100"
 if [ "${qSequenceCount}" -gt 50000 ]; then
@@ -267,174 +283,186 @@
         "primate")
            ;;
         "mammal")
            ;;
         "other")
            minScore="5000"
            linearGap="loose"
            ;;
      esac
      ;;
    "other")
       minScore="5000"
       linearGap="loose"
 esac
 
-mkdir "${buildDir}"
-
 export defString="# ${qOrgName} ${Query} vs. ${tOrgName} ${Target}
 BLASTZ=/cluster/bin/penn/lastz-distrib-1.04.03/bin/lastz
 
 # TARGET: ${tOrgName} ${Target}
 SEQ1_DIR=${target2bit}
 SEQ1_LEN=${targetSizes}
 SEQ1_CHUNK=20000000
 SEQ1_LAP=10000
 SEQ1_LIMIT=${seq1Limit}
 
 # QUERY: ${qOrgName} ${Query}
 SEQ2_DIR=${query2bit}
 SEQ2_LEN=${querySizes}
 SEQ2_CHUNK=20000000
 SEQ2_LAP=0
 SEQ2_LIMIT=${seq2Limit}
 
 BASE=${buildDir}
 TMPDIR=/dev/shm
 "
 
+### skip primary alignment if it is already done
+###  primaryDone == 0 means NOT done yet
+if [ $primaryDone -eq 0 ]; then
+  mkdir "${buildDir}"
+
 ### setup the DEF file
 printf "%s" "${defString}" > ${buildDir}/DEF
 
 ### setup the buildDir/run.sh script
 printf "#!/bin/bash
 
 set -beEu -o pipefail
 
 export targetDb=\"${tAsmId}\"
 export queryDb=\"${qAsmId}\"
 export QueryDb=\"${Query}\"
 
 cd ${buildDir}
 time (~/kent/src/hg/utils/automation/doBlastzChainNet.pl ${trackHub} -verbose=2 \`pwd\`/DEF -syntenicNet \\
-  -workhorse=hgwdev -smallClusterHub=hgwdev -bigClusterHub=ku \\
+  $tFullName $qFullName -workhorse=hgwdev -smallClusterHub=hgwdev \\
+    -bigClusterHub=ku \\
     -chainMinScore=${minScore} -chainLinearGap=${linearGap}) > do.log 2>&1
 
 grep -w real do.log | sed -e 's/^/    # /;'
 
 sed -e 's/^/    # /;' fb.\${targetDb}.chain\${QueryDb}Link.txt
 sed -e 's/^/    # /;' fb.\${targetDb}.chainSyn\${QueryDb}Link.txt
 
 time (~/kent/src/hg/utils/automation/doRecipBest.pl ${rBestTrackHub} -load -workhorse=hgwdev -buildDir=\`pwd\` \\
    ${tRbestArgs} \\
    \${targetDb} \${queryDb}) > rbest.log 2>&1
 
 grep -w real rbest.log | sed -e 's/^/    # /;'
 
 sed -e 's/^/    #/;' fb.\${targetDb}.chainRBest.\${QueryDb}.txt
 " > ${buildDir}/run.sh
 chmod +x ${buildDir}/run.sh
 
 ### run the primary alignment
 time (${buildDir}/run.sh) >> ${buildDir}/do.log 2>&1
 
+fi
+#### primaryDone > 0 ready for swap
+
 #### print out the makeDoc.txt to this point into buildDir/makeDoc.txt
 
 printf "##############################################################################
 # LASTZ ${tOrgName} ${Target} vs. $qOrgName ${Query} (DONE - $DS - Hiram)
     mkdir $buildDir
     cd $buildDir
 
     printf '${defString}
 ' > DEF
 
     time (~/kent/src/hg/utils/automation/doBlastzChainNet.pl ${trackHub} -verbose=2 \`pwd\`/DEF -syntenicNet \\
-  -workhorse=hgwdev -smallClusterHub=hgwdev -bigClusterHub=ku \\
+      ${tFullName} ${qFullName} -workhorse=hgwdev -smallClusterHub=hgwdev -bigClusterHub=ku \\
         -chainMinScore=${minScore} -chainLinearGap=${linearGap}) > do.log 2>&1
     grep -w real do.log | sed -e 's/^/    # /;'
 " > ${buildDir}/makeDoc.txt
-grep -w real $buildDir/do.log | sed -e 's/^/    # /;' >> ${buildDir}/makeDoc.txt
+(grep -w real $buildDir/do.log || true) | sed -e 's/^/    # /;' >> ${buildDir}/makeDoc.txt
 
 printf "\n    sed -e 's/^/    # /;' fb.${tAsmId}.chain${Query}Link.txt\n" >> ${buildDir}/makeDoc.txt
 sed -e 's/^/    # /;' $buildDir/fb.${tAsmId}.chain${Query}Link.txt >> ${buildDir}/makeDoc.txt
 
 printf "    sed -e 's/^/    # /;' fb.${tAsmId}.chainSyn${Query}Link.txt\n" >> ${buildDir}/makeDoc.txt
 sed -e 's/^/    # /;' $buildDir/fb.${tAsmId}.chainSyn${Query}Link.txt >> ${buildDir}/makeDoc.txt
 
 printf "\n    time (~/kent/src/hg/utils/automation/doRecipBest.pl ${rBestTrackHub} -load -workhorse=hgwdev -buildDir=\`pwd\` \\
       ${tRbestArgs} \\
         ${tAsmId} ${qAsmId}) > rbest.log 2>&1
 
     grep -w real rbest.log | sed -e 's/^/    # /;'\n" >> ${buildDir}/makeDoc.txt
-grep -w real $buildDir/rbest.log | sed -e 's/^/    # /;' >> ${buildDir}/makeDoc.txt
+(grep -w real $buildDir/rbest.log || true) | sed -e 's/^/    # /;' >> ${buildDir}/makeDoc.txt
 printf "\n    sed -e 's/^/    # /;' fb.${tAsmId}.chainRBest.${Query}.txt\n" >> ${buildDir}/makeDoc.txt
-sed -e 's/^/    #/;' ${buildDir}/fb.${tAsmId}.chainRBest.${Query}.txt >> ${buildDir}/makeDoc.txt
+(sed -e 's/^/    # /;' ${buildDir}/fb.${tAsmId}.chainRBest.${Query}.txt || true) >> ${buildDir}/makeDoc.txt
 
 printf "\n### and for the swap\n" >> ${buildDir}/makeDoc.txt
 
 cat ${buildDir}/makeDoc.txt
 
 printf "# swap into: ${swapDir}\n" 1>&2
 
+if [ "$swapDone" -eq 0 ]; then
 mkdir ${swapDir}
 
 printf "#!/bin/bash
 
 set -beEu -o pipefail
 
 export targetDb=\"${tAsmId}\"
 export Target=\"${Target}\"
+export Qarget=\"${Query}\"
 export queryDb=\"${qAsmId}\"
 
-cd ${swapDir}
 time (~/kent/src/hg/utils/automation/doBlastzChainNet.pl ${trackHub}  -swap -verbose=2 \\
-  ${buildDir}/DEF -swapDir=\`pwd\` \\
+  ${tFullName} ${qFullName} ${buildDir}/DEF -swapDir=\`pwd\` \\
   -syntenicNet -workhorse=hgwdev -smallClusterHub=hgwdev -bigClusterHub=ku \\
     -chainMinScore=${minScore} -chainLinearGap=${linearGap}) > swap.log 2>&1
 
 grep -w real swap.log | sed -e 's/^/    # /;'
 
 sed -e 's/^/    # /;' fb.\${queryDb}.chain\${Target}Link.txt
 sed -e 's/^/    # /;' fb.\${queryDb}.chainSyn\${Target}Link.txt
 time (~/kent/src/hg/utils/automation/doRecipBest.pl ${rBestTrackHub} -load -workhorse=hgwdev -buildDir=\`pwd\` \\
    ${swapRbestArgs} \\
    \${queryDb} \${targetDb}) > rbest.log 2>&1
 
 grep -w real rbest.log | sed -e 's/^/    # /;'
 
 sed -e 's/^/    # /;' fb.\${queryDb}.chainRBest.\${Target}.txt
 " > ${swapDir}/runSwap.sh
 
 chmod +x  ${swapDir}/runSwap.sh
 
+printf "# running ${swapDir}/runSwap.sh\n" 1>&2
+
 time (${swapDir}/runSwap.sh) >> ${swapDir}/swap.log 2>&1
+fi
 
 ### continue the make doc
+
 printf "\ncd ${swapDir}\n" >> ${buildDir}/makeDoc.txt
 
 printf "\ntime (~/kent/src/hg/utils/automation/doBlastzChainNet.pl ${trackHub}  -swap -verbose=2 \\
-  ${buildDir}/DEF -swapDir=\`pwd\` \\
+  ${tFullName} ${qFullName} ${buildDir}/DEF -swapDir=\`pwd\` \\
   -syntenicNet -workhorse=hgwdev -smallClusterHub=hgwdev -bigClusterHub=ku \\
     -chainMinScore=${minScore} -chainLinearGap=${linearGap}) > swap.log 2>&1
 grep -w real swap.log | sed -e 's/^/    # /;'
 " >> ${buildDir}/makeDoc.txt
-grep -w real ${swapDir}/swap.log | sed -e 's/^/    # /;' >> ${buildDir}/makeDoc.txt
+(grep -w real ${swapDir}/swap.log || true) | sed -e 's/^/    # /;' >> ${buildDir}/makeDoc.txt
 
 printf "\nsed -e 's/^/    # /;' fb.${qAsmId}.chain${Target}Link.txt\n" >> ${buildDir}/makeDoc.txt
-sed -e 's/^/    # /;' ${swapDir}/fb.${tAsmId}.chain${Target}Link.txt >> ${buildDir}/makeDoc.txt
+sed -e 's/^/    # /;' ${swapDir}/fb.${qAsmId}.chain${Target}Link.txt >> ${buildDir}/makeDoc.txt
 
 printf "sed -e 's/^/    # /;' fb.${qAsmId}.chainSyn${Target}Link.txt\n" >> ${buildDir}/makeDoc.txt
 sed -e 's/^/    # /;' ${swapDir}/fb.${qAsmId}.chainSyn${Target}Link.txt >> ${buildDir}/makeDoc.txt
 
 printf "\ntime (~/kent/src/hg/utils/automation/doRecipBest.pl ${rBestTrackHub} -load -workhorse=hgwdev -buildDir=\`pwd\` \\
    ${swapRbestArgs} \\
    ${qAsmId} ${tAsmId}) > rbest.log 2>&1
 
 grep -w real rbest.log | sed -e 's/^/    # /;'\n" >> ${buildDir}/makeDoc.txt
-grep -w real ${swapDir}/rbest.log | sed -e 's/^/    # /;' >> ${buildDir}/makeDoc.txt
+(grep -w real ${swapDir}/rbest.log || true) | sed -e 's/^/    # /;' >> ${buildDir}/makeDoc.txt
 printf "\nsed -e 's/^/    # /;' fb.${qAsmId}.chainRBest.${Target}.txt\n" >> ${buildDir}/makeDoc.txt
-sed -e 's/^/    #/;' ${swapDir}/fb.${qAsmId}.chainRBest.${Target}.txt >> ${buildDir}/makeDoc.txt
+(sed -e 's/^/    # /;' ${swapDir}/fb.${qAsmId}.chainRBest.${Target}.txt || true) >> ${buildDir}/makeDoc.txt
 
 printf "\n##############################################################################\n" >> ${buildDir}/makeDoc.txt
 
 ### XXX ####
 cat ${buildDir}/makeDoc.txt