8d2de5d51f2b37ece3f733c5a575b3c74bb2c8e6
hiram
  Tue Apr 5 11:18:00 2022 -0700
more meta data on the query and target for later use in doc construction refs #29207

diff --git src/hg/utils/automation/pairLastz.sh src/hg/utils/automation/pairLastz.sh
index 578f530..a446dc5 100755
--- src/hg/utils/automation/pairLastz.sh
+++ src/hg/utils/automation/pairLastz.sh
@@ -93,39 +93,55 @@
        ;;
      *)
        count=`wc -l ${sizes} | cut -d' ' -f1`
        ;;
   esac
   printf "%s" "${count}"
 }
 
 function orgName() {
   export asmName=$1
   case $asmName in
      GC[AF]_*)
        gcxPath=$(gcPath $asmName)
        asmDir="/hive/data/outside/ncbi/genomes/${gcxPath}/${asmName}"
        asmRpt="${asmDir}/${asmName}_assembly_report.txt"
-       oName=`egrep -i "^# organism name:" ${asmRpt} | tr -d '\r' | sed -e 's/.*(//; s/).*//'`
+       oName=`egrep -m 1 -i "^# organism name:" ${asmRpt} | tr -d '\r' | sed -e 's/.*(//; s/).*//'`
        ;;
      *)
        oName=`hgsql -N -e "select organism from dbDb where name=\"${asmName}\";" hgcentraltest`
        ;;
   esac
   printf "%s" "${oName}"
 }
 
+function orgDate() {
+  export asmName=$1
+  case $asmName in
+     GC[AF]_*)
+       gcxPath=$(gcPath $asmName)
+       asmDir="/hive/data/outside/ncbi/genomes/${gcxPath}/${asmName}"
+       asmRpt="${asmDir}/${asmName}_assembly_report.txt"
+       oDate=`egrep -m 1 -i "^#[[:space:]]*Date:" ${asmRpt} | tr -d '\r' | sed -e 's/.*ate: \+//;'`
+       ;;
+     *)
+       oDate=""
+       ;;
+  esac
+  printf "%s" "${oDate}"
+}
+
 export target="$1"
 export query="$2"
 export tClade="$3"
 export qClade="$4"
 
 export tGcPath=$(gcPath $target)
 export qGcPath=$(gcPath $query)
 export tAsmId=$(asmId $target)
 export qAsmId=$(asmId $query)
 printf "# tq: '${target}' '${query}' '${tClade}' '${qClade}'\n" 1>&2
 printf "# tq gcPath: '${tGcPath}' '${qGcPath}'\n" 1>&2
 printf "# tq asmId: '${tAsmId}' '${qAsmId}'\n" 1>&2
 
 # upper case first character
 export Target="${tAsmId^}"
@@ -255,37 +271,39 @@
   printf "# swap in progress ${query} -> ${target}\n" 1>&2
   printf "# " 1>&2
   ls -ogd "${swapDir}" 1>&2
   if [ "${doneCount}" -ne 2 ]; then
     exit 0
   fi
 fi
 
 if [ "${doneCount}" -eq 2 ]; then
     printf "# all done\n" 1>&2
 fi
 
 
 export tOrgName="$(orgName $target)"
 export qOrgName="$(orgName $query)"
+export tOrgDate="$(orgDate $target)"
+export qOrgDate="$(orgDate $query)"
 export tAsmSize="$(asmSize $target)"
 export qAsmSize="$(asmSize $query)"
 export tSequenceCount="$(seqCount $target)"
 export qSequenceCount="$(seqCount $query)"
 printf "# working: %s\n" "${buildDir}" 1>&2
-printf "# target: $target - $tOrgName - $tClade - $tSequenceCount sequences\n" 1>&2
-printf "#  query: $query - $qOrgName - $qClade - $qSequenceCount sequences\n" 1>&2
+printf "# target: $target - $tOrgName - $tOrgDate - $tClade - $tSequenceCount sequences\n" 1>&2
+printf "#  query: $query - $qOrgName - $qOrgDate - $qClade - $qSequenceCount sequences\n" 1>&2
 LC_NUMERIC=en_US printf "#  sizes: target: %'d - query: %'d\n" "${tAsmSize}" "${qAsmSize}" 1>&2
 
 export seq1Limit="40"
 if [ "${tSequenceCount}" -gt 50000 ]; then
   seq1Limit="100"
 fi
 
 export seq2Limit="100"
 if [ "${qSequenceCount}" -gt 50000 ]; then
   seq2Limit="500"
 fi
 
 export minScore="3000"
 export linearGap="medium"
 
@@ -325,60 +343,60 @@
 export defString="# ${qOrgName} ${Query} vs. ${tOrgName} ${Target}
 BLASTZ=/cluster/bin/penn/lastz-distrib-1.04.03/bin/lastz
 BLASTZ_T=2
 BLASTZ_O=600
 BLASTZ_E=150
 BLASTZ_M=254
 BLASTZ_K=4500
 BLASTZ_Y=15000
 BLASTZ_Q=/hive/data/staging/data/blastz/human_chimp.v2.q
 #       A     C     G     T
 # A    90  -330  -236  -356
 # C  -330   100  -318  -236
 # G  -236  -318   100  -330
 # T  -356  -236  -330    90
 
-# TARGET: ${tOrgName} ${Target}
+# TARGET: ${tOrgName} ${tOrgDate} ${target}
 SEQ1_DIR=${target2bit}
 SEQ1_LEN=${targetSizes}
 SEQ1_CHUNK=20000000
 SEQ1_LAP=10000
 SEQ1_LIMIT=${seq1Limit}
 
-# QUERY: ${qOrgName} ${Query}
+# QUERY: ${qOrgName} ${qOrgDate} ${query}
 SEQ2_DIR=${query2bit}
 SEQ2_LEN=${querySizes}
 SEQ2_CHUNK=20000000
 SEQ2_LAP=0
 SEQ2_LIMIT=${seq2Limit}
 
 BASE=${buildDir}
 TMPDIR=/dev/shm
 "
 else
 
 export defString="# ${qOrgName} ${Query} vs. ${tOrgName} ${Target}
 BLASTZ=/cluster/bin/penn/lastz-distrib-1.04.03/bin/lastz
 
-# TARGET: ${tOrgName} ${Target}
+# TARGET: ${tOrgName} ${tOrgDate} ${target}
 SEQ1_DIR=${target2bit}
 SEQ1_LEN=${targetSizes}
 SEQ1_CHUNK=20000000
 SEQ1_LAP=10000
 SEQ1_LIMIT=${seq1Limit}
 
-# QUERY: ${qOrgName} ${Query}
+# QUERY: ${qOrgName} ${qOrgDate} ${query}
 SEQ2_DIR=${query2bit}
 SEQ2_LEN=${querySizes}
 SEQ2_CHUNK=20000000
 SEQ2_LAP=0
 SEQ2_LIMIT=${seq2Limit}
 
 BASE=${buildDir}
 TMPDIR=/dev/shm
 "
 
 fi
 
 ### skip primary alignment if it is already done
 ###  primaryDone == 0 means NOT done yet
 if [ $primaryDone -eq 0 ]; then
@@ -460,30 +478,32 @@
 
 (grep -w real $buildDir/rbest.log || true) | sed -e 's/^/    # /;' >> ${buildDir}/makeDoc.txt
 
 printf "\n    sed -e 's/^/    # /;' fb.${tAsmId}.chainRBest.${Query}.txt\n" >> ${buildDir}/makeDoc.txt
 (sed -e 's/^/    # /;' ${buildDir}/fb.${tAsmId}.chainRBest.${Query}.txt || true) >> ${buildDir}/makeDoc.txt
 
 printf "\n    ### and for the swap\n" >> ${buildDir}/makeDoc.txt
 
 cat ${buildDir}/makeDoc.txt
 
 printf "# swap into: ${swapDir}\n" 1>&2
 
 if [ "$swapDone" -eq 0 ]; then
 mkdir ${swapDir}
 
+ln -s ${buildDir}/DEF ${swapDir}/DEF
+
 printf "#!/bin/bash
 
 set -beEu -o pipefail
 
 cd $swapDir
 
 export targetDb=\"${tAsmId}\"
 export Target=\"${Target}\"
 export Qarget=\"${Query}\"
 export queryDb=\"${qAsmId}\"
 
 time (~/kent/src/hg/utils/automation/doBlastzChainNet.pl ${trackHub}  -swap -verbose=2 \\
   ${tFullName} ${qFullName} ${buildDir}/DEF -swapDir=\`pwd\` \\
   -syntenicNet -workhorse=hgwdev -smallClusterHub=hgwdev -bigClusterHub=ku \\
     -chainMinScore=${minScore} -chainLinearGap=${linearGap}) > swap.log 2>&1