3cbcd195f375a1368615ca50be236375507e010f
hiram
  Mon Oct 9 15:01:49 2023 -0700
added correct construction of the maf summary table refs #31561

diff --git src/hg/makeDb/doc/hg38/hprc90way.txt src/hg/makeDb/doc/hg38/hprc90way.txt
index 5bf1daa..e1d619d 100644
--- src/hg/makeDb/doc/hg38/hprc90way.txt
+++ src/hg/makeDb/doc/hg38/hprc90way.txt
@@ -81,30 +81,70 @@
 real    29m13.295s
 
 ###############################################################################
 ### loading this maf file
 
 [hiram@hgwdev /hive/data/genomes/hg38/bed/hprc/mafFile/perChrom] ln -s `pwd`/chr*.maf /gbdb/hg38/hprc/cactus90way
 
 cd /dev/shm
 time (hgLoadMaf -pathPrefix=/gbdb/hg38/hprc/cactus90way hg38 hprc90way) > load90way.log 2>&1 &
 # Loaded 1571098 mafs in 64 files from /gbdb/hg38/hprc/cactus90way
 # real    20m32.061s
 
 #  -rw-rw-r--    1   84132726 Aug 21 11:56 hprc90way.tab
 
 ###############################################################################
+# summary table loaded after failed experiment below (DONE - 2023-10-09 - Hiram)
+    mkdir /hive/data/genomes/hg38/bed/hprc/mafFile/summary
+    cd /hive/data/genomes/hg38/bed/hprc/mafFile/summary
+    # script to operate the translation of the GC names into something
+    # that hgLoadMafSummary can work correctly with.
+    printf '#!/bin/bash
+
+set -beEu -o pipefail
+
+export mafFile=${1}
+export B="${mafFile%.maf}"
+
+sed -e 's/GC\([AF]\)_\([0-9]\+\)./GC\1\2v/;' ../iRows/result/${mafFile} \
+  | hgLoadMafSummary -test -verbose=2 -minSize=30000 \
+     -mergeGap=1500 -maxSize=200000 hg38 "hprc90${B}Summary" stdin 2> /dev/null
+
+sed -e 's/GC\([AF]\)\([0-9]\+\)v/GC\1_\2./g;' "hprc90${B}Summary.tab" \
+      > "${B}.summary.tab"
+
+rm -f "hprc90${B}Summary.tab"
+' > runOne
+    chmod +x runOne
+
+    ls ../iRows/result | grep maf > maf.list
+    printf '#LOOP
+./runOne $(path1)
+#ENDLOOP
+' > template
+    gensub2 maf.list single template jobList
+
+    time (perlPara.pl 17 jobList) > 17.log 2>&1 &
+    #  real    10m34.640s
+
+    # when done
+    sort -k2,2 -k3,3n chr*.summary.tab > ../hprc90waySummary.tab
+    cd ..
+    hgLoadSqlTab hg38 hprc90waySummary ~/kent/src/hg/lib/mafSummary.sql \
+          hprc90waySummary.tab
+
+###############################################################################
 ### and the summary table (did not work with the GCA_0123.1 dot suffix
 ###    the .1 got trimmed off the names)
 time (cat /gbdb/hg38/hprc/cactus90way/*.maf \
     | hgLoadMafSummary -verbose=2 -minSize=30000 \
       -mergeGap=1500 -maxSize=200000 hg38 hprc90waySummary stdin) > do.log 2>&1
 # Created 7864892 summary blocks from 135565223 components and 1571098 mafs from stdin
 # real    44m52.247s
 
 # -rw-rw-r--    1  417328380 Aug 21 12:44 hprc90waySummary.tab
 
 
 ### use this perl script to add the .1 to the GCA names
 ##############################################################################
 #!/usr/bin/env perl