e941783cbc9cd3ec6af5f75b6969554fba9282e1 hiram Thu Apr 29 23:05:28 2021 -0700 adding frames procedure refs #11636 diff --git src/hg/makeDb/doc/hg38/cactus241way.txt src/hg/makeDb/doc/hg38/cactus241way.txt index 48805f9..95d3fcc 100644 --- src/hg/makeDb/doc/hg38/cactus241way.txt +++ src/hg/makeDb/doc/hg38/cactus241way.txt @@ -933,16 +933,81 @@ ############################################################################ # pushQ readmine (TBD - 2017-11-07 - Hiram) cd /usr/local/apache/htdocs-hgdownload/goldenPath/hg38 find -L `pwd`/cactus241way `pwd`/phastCons241way `pwd`/phyloP241way \ /gbdb/hg38/cactus241way -type f \ > /hive/data/genomes/hg38/bed/cactus241way/downloads/redmine.20216.fileList wc -l /hive/data/genomes/hg38/bed/cactus241way/downloads/redmine.20216.fileList # 1450 /hive/data/genomes/hg38/bed/cactus241way/downloads/redmine.20216.fileList cd /hive/data/genomes/hg38/bed/cactus241way/downloads hgsql -e 'show tables;' hg38 | grep 241way \ | sed -e 's/^/hg38./;' > redmine.20216.table.list ############################################################################ +## adding frames ( DONE - 2021-04-29 - Hiram ) + + mkdir /hive/data/genomes/hg38/bed/cactus241way/frames + cd /hive/data/genomes/hg38/bed/cactus241way/frames + mkdir genes + + hgsql -N -e "select name,chrom,strand,txStart,txEnd,cdsStart,cdsEnd,exonCount,exonStarts,exonEnds from knownGene" hg38 \ + | genePredSingleCover stdin stdout | gzip -2c \ + > genes/hg38.gp.gz + + genePredCheck -db=hg38 genes/hg38.gp.gz + + # checked: 19328 failed: 0 + + ls ../ucscNames | sed -e 's/.maf//;' > chr.list + ls genes | sed -e 's/.gp.gz//;' > gene.list + + printf '#!/bin/bash + +set -beEu -o pipefail + +export C=$1 +export G=$2 + +cat ../ucscNames/${C}.maf | genePredToMafFrames hg38 stdin stdout \ + "${G}" genes/${G}.gp.gz | gzip > parts/${C}.${G}.mafFrames.gz + +' > runOne + + chmod +x rnOne + + printf '#LOOP +./runOne $(root1) $(root2) parts/$(root1).$(root2).mafFrames.gz +#ENDLOOP +' > template + + gensub2 chr.list gene.list template perl.jobList + + time ($HOME/kent/src/hg/utils/automation/perlPara.pl 4 perl.jobList) \ + >> do.log 2>&1 & + + tail do.log + +# Completed: 454 of 454 jobs +# CPU time in finished jobs: 91822s 1530.37m 25.51h 1.06d 0.003y +# Average job time: 202s 3.37m 0.06h 0.00d +# Longest finished job: 7570s 126.17m 2.10h 0.09d + + # real 417m27.701s + + time find ./parts -type f | while read F +do + echo "${F}" 1>&2 + zcat ${F} +done | sort -k1,1 -k2,2n | gzip -c > cactus241wayFrames.bed.gz + + # real 0m3.178s + + hgLoadMafFrames hg38 cactus241wayFrames cactus241wayFrames.bed.gz + + featureBits -countGaps hg38 cactus241wayFrames + + # 33621579 bases of 3272116950 (1.028%) in intersection + +############################################################################