690a38c3b39e4f52daab9dcfe00dd0952908d438
hiram
  Wed Mar 16 17:10:46 2022 -0700
adding knownGene protAA and protNuc fa.gz files per user request refs #29093

diff --git src/hg/makeDb/doc/hg38/multiz100way.txt src/hg/makeDb/doc/hg38/multiz100way.txt
index a6fa622..3618147 100644
--- src/hg/makeDb/doc/hg38/multiz100way.txt
+++ src/hg/makeDb/doc/hg38/multiz100way.txt
@@ -1960,30 +1960,57 @@
 	    gzip -c > exonNuc/$C.exonNuc.fa.gz "
 	echo "mafGene -geneBeds=knownCanonical/$C.known.bed -exons $db $mz knownGene order.list stdout | \
 	    gzip -c > exonAA/$C.exonAA.fa.gz  "
     done > $gp.$mz.jobs
 
     time sh -x $gp.$mz.jobs > $gp.$mz.job.log 2>&1 
     # 109m16.821s
 
     rm *.known.bed
     export mz=multiz100way
     export gp=knownCanonical
     export db=hg38
     cat exonAA/c*.gz > $gp.$mz.exonAA.fa.gz 
     cat exonNuc/c*.gz > $gp.$mz.exonNuc.fa.gz 
 
+
+    mkdir protAA protNuc
+
+    ls knownCanonical/*.known.bed | while read F
+    do
+      if [ -s $F ]; then
+         echo $F | sed -e 's#knownCanonical/##; s/.known.bed//'
+      fi
+    done | while read C
+    do
+	echo "date"
+	echo "mafGene -geneBeds=knownCanonical/$C.known.bed -noTrans $db $mz knownGene order.list stdout | \
+	    gzip -c > protNuc/$C.protNuc.fa.gz"
+	echo "mafGene -geneBeds=knownCanonical/$C.known.bed $db $mz knownGene order.list stdout | \
+	    gzip -c > protAA/$C.protAA.fa.gz"
+    done > $gp.$mz.prot.jobs
+
+    time (sh -x $gp.$mz.jobs) > $gp.$mz.prot.job.log
+    # real    230m33.479s
+
+    export mz=multiz100way
+    export gp=knownCanonical
+    export db=hg38
+    zcat protAA/c*.gz | gzip -c > $gp.$mz.protAA.fa.gz &
+    zcat protNuc/c*.gz | gzip -c > $gp.$mz.protNuc.fa.gz &
+    # about 6 minutes
+
     rm -rf exonAA exonNuc
 
     export mz=multiz100way
     export gp=knownCanonical
     export db=hg38
     export pd=/usr/local/apache/htdocs-hgdownload/goldenPath/$db/$mz/alignments
     mkdir -p $pd
     ln -s `pwd`/$gp.$mz.exonAA.fa.gz $pd/$gp.exonAA.fa.gz
     ln -s `pwd`/$gp.$mz.exonNuc.fa.gz $pd/$gp.exonNuc.fa.gz
     cd  $pd
     md5sum *.fa.gz > md5sum.txt
 
 #############################################################################
 # wiki page for 100-way (DONE - 2015-05-14 - Hiram)
     mkdir /hive/users/hiram/bigWays/hg38.100way