7f114623aa512095de2a6a83cab942ca3829ea60
hiram
  Tue Aug 15 10:18:17 2023 -0700
reChain the chains refs #31561

diff --git src/hg/makeDb/doc/hg38/hprcReChain.txt src/hg/makeDb/doc/hg38/hprcReChain.txt
new file mode 100644
index 0000000..7f4a633
--- /dev/null
+++ src/hg/makeDb/doc/hg38/hprcReChain.txt
@@ -0,0 +1,108 @@
+##############################################################################
+### reChain the chains obtained from the HPRC project
+### done - 2023-08-14 - Hiram
+
+mkdir /hive/data/genomes/hg38/bed/hprc/reChain
+cd /hive/data/genomes/hg38/bed/hprc/reChain
+
+# avoid the broken one GCA_018504655.1
+ls ../bigChainToChain/hg38.chainHprc*.chain | grep -v GCA_018504655.1 \
+  > chain.list
+
+# using this 'runOne' script for each one:
+#######################################################################
+
+#!/bin/bash
+
+set -beEu -o pipefail
+
+export hg38TwoBit="/hive/data/genomes/hg38/hg38.2bit"
+export chainFile="${1}"
+export inChain="../bigChainToChain/${chainFile}"
+export asmName=`echo $chainFile | sed -e 's/hg38.chainHprc//; s/.chain//;'`
+
+# may already be done
+if [ -s "${chainFile}" ]; then
+  exit 0
+fi
+
+export buildDir=""
+export twoBit=""
+export asmId=""
+
+case "${asmName}" in
+    GCA_*) gcX="${asmName:0:3}"
+       d0="${asmName:4:3}"
+       d1="${asmName:7:3}"
+       d2="${asmName:10:3}"
+       tB="`ls -d /hive/data/genomes/asmHubs/allBuild/${gcX}/${d0}/${d1}/${d2}/${asmName}_*`"
+       buildDir="`realpath ${tB}`"
+       asmId="`basename $buildDir`"
+       twoBit="${buildDir}/${asmId}.2bit"
+       ;;
+     T2T-*)
+       asmId="hs1"
+       buildDir="/hive/data/genomes/$asmId"
+       twoBit="${buildDir}/${asmId}.2bit"
+       ;;
+     *)  printf "ERROR: unrecognized $asmName\n" 1>&2
+       exit 255
+       ;;
+esac
+
+printf "%s\n" "${chainFile}" 1>&2
+
+time (chainToAxt "${inChain}" "${hg38TwoBit}" \
+   "${twoBit}" stdout | axtChain -minScore=0 -linearGap=loose \
+      stdin "${hg38TwoBit}" "${twoBit}" "${chainFile}") > err/${chainFile} 2>&1
+
+hgLoadChain hg38 measureReChain "${chainFile}"
+featureBits hg38 measureReChain > featBits/fb.hg38.${asmName}.txt 2>&1
+featureBits hg38 measureReChainLink > featBits/fb.hg38.${asmName}Link.txt 2>&1
+hgsql -N -e 'select count(*) from measureReChainLink;' hg38 > counts/hg38.${asmName}.linkCount.txt
+hgsql -N -e 'select count(*) from measureReChain;' hg38 > counts/hg38.${asmName}.chainCount.txt
+
+##############################################################
+
+to generate a jobList
+
+printf "#LOOP
+./runOne $(path1)
+#ENDLOOP
+" > template
+
+gensub2 chain.list single template jobList
+
+# running them all, one at a time
+
+mkdir featBits counts err
+
+time (~/kent/src/hg/utils/automation/perlPara.pl 1 jobList) > do.log 2>&1 
+# real    236m9.479s
+
+##############################################################################
+# turn those chains into nets (DONE - 2023-08-15 - Hiram)
+
+mkdir /hive/data/genomes/hg38/bed/hprc/reNet
+cd /hive/data/genomes/hg38/bed/hprc/reNet
+
+ls ../reChain  | grep hg38.chainHprc > chain.list
+
+printf '#LOOP
+runOne $(path1) {check out exists+ $(root1).net}
+#ENDLOOP
+' > template
+
+gensub2 chain.list single template jobList
+
+para -ram=4g create jobList
+para push
+para time
+Completed: 88 of 88 jobs
+CPU time in finished jobs:        285s       4.74m     0.08h    0.00d  0.000 y
+IO & Wait Time:                   206s       3.44m     0.06h    0.00d  0.000 y
+Average job time:                   6s       0.09m     0.00h    0.00d
+Longest finished job:               8s       0.13m     0.00h    0.00d
+Submission to last job:             8s       0.13m     0.00h    0.00d
+
+##############################################################################