3cbcd195f375a1368615ca50be236375507e010f hiram Mon Oct 9 15:01:49 2023 -0700 added correct construction of the maf summary table refs #31561 diff --git src/hg/makeDb/doc/hg38/hprc90way.txt src/hg/makeDb/doc/hg38/hprc90way.txt index 5bf1daa..e1d619d 100644 --- src/hg/makeDb/doc/hg38/hprc90way.txt +++ src/hg/makeDb/doc/hg38/hprc90way.txt @@ -81,30 +81,70 @@ real 29m13.295s ############################################################################### ### loading this maf file [hiram@hgwdev /hive/data/genomes/hg38/bed/hprc/mafFile/perChrom] ln -s `pwd`/chr*.maf /gbdb/hg38/hprc/cactus90way cd /dev/shm time (hgLoadMaf -pathPrefix=/gbdb/hg38/hprc/cactus90way hg38 hprc90way) > load90way.log 2>&1 & # Loaded 1571098 mafs in 64 files from /gbdb/hg38/hprc/cactus90way # real 20m32.061s # -rw-rw-r-- 1 84132726 Aug 21 11:56 hprc90way.tab ############################################################################### +# summary table loaded after failed experiment below (DONE - 2023-10-09 - Hiram) + mkdir /hive/data/genomes/hg38/bed/hprc/mafFile/summary + cd /hive/data/genomes/hg38/bed/hprc/mafFile/summary + # script to operate the translation of the GC names into something + # that hgLoadMafSummary can work correctly with. + printf '#!/bin/bash + +set -beEu -o pipefail + +export mafFile=${1} +export B="${mafFile%.maf}" + +sed -e 's/GC\([AF]\)_\([0-9]\+\)./GC\1\2v/;' ../iRows/result/${mafFile} \ + | hgLoadMafSummary -test -verbose=2 -minSize=30000 \ + -mergeGap=1500 -maxSize=200000 hg38 "hprc90${B}Summary" stdin 2> /dev/null + +sed -e 's/GC\([AF]\)\([0-9]\+\)v/GC\1_\2./g;' "hprc90${B}Summary.tab" \ + > "${B}.summary.tab" + +rm -f "hprc90${B}Summary.tab" +' > runOne + chmod +x runOne + + ls ../iRows/result | grep maf > maf.list + printf '#LOOP +./runOne $(path1) +#ENDLOOP +' > template + gensub2 maf.list single template jobList + + time (perlPara.pl 17 jobList) > 17.log 2>&1 & + # real 10m34.640s + + # when done + sort -k2,2 -k3,3n chr*.summary.tab > ../hprc90waySummary.tab + cd .. + hgLoadSqlTab hg38 hprc90waySummary ~/kent/src/hg/lib/mafSummary.sql \ + hprc90waySummary.tab + +############################################################################### ### and the summary table (did not work with the GCA_0123.1 dot suffix ### the .1 got trimmed off the names) time (cat /gbdb/hg38/hprc/cactus90way/*.maf \ | hgLoadMafSummary -verbose=2 -minSize=30000 \ -mergeGap=1500 -maxSize=200000 hg38 hprc90waySummary stdin) > do.log 2>&1 # Created 7864892 summary blocks from 135565223 components and 1571098 mafs from stdin # real 44m52.247s # -rw-rw-r-- 1 417328380 Aug 21 12:44 hprc90waySummary.tab ### use this perl script to add the .1 to the GCA names ############################################################################## #!/usr/bin/env perl