a2fba51d8eda61736f4c5cf2e40bc7fc12a9e5ee
braney
  Sat Sep 11 07:26:05 2021 -0700
rebuilt V38 mafGene downloads for hg38

diff --git src/hg/makeDb/doc/hg38/multiz100way.txt src/hg/makeDb/doc/hg38/multiz100way.txt
index b64a57d..a6fa622 100644
--- src/hg/makeDb/doc/hg38/multiz100way.txt
+++ src/hg/makeDb/doc/hg38/multiz100way.txt
@@ -1842,90 +1842,89 @@
     screen -S hg38HgPal
     mkdir /hive/data/genomes/hg38/bed/multiz100way/pal
     cd /hive/data/genomes/hg38/bed/multiz100way/pal
     cat ../species.list | tr '[ ]' '[\n]' > order.list
 
     export mz=multiz100way
     export gp=knownGene
     export db=hg38
     export I=0
     mkdir exonAA exonNuc
     for C in `sort -nk2 ../../../chrom.sizes | cut -f1`
     do
         I=`echo $I | awk '{print $1+1}'`
 	echo "mafGene -chrom=$C -exons -noTrans $db $mz $gp order.list stdout | gzip -c > exonNuc/$C.exonNuc.fa.gz &"
 	echo "mafGene -chrom=$C -exons $db $mz $gp order.list stdout | gzip -c > exonAA/$C.exonAA.fa.gz &"
-        if [ $I -gt 6 ]; then
+        if [ $I -gt 11 ]; then
             echo "date"
             echo "wait"
             I=0
         fi
     done > $gp.jobs
     echo "date" >> $gp.jobs
     echo "wait" >> $gp.jobs
 
     time sh -x ./$gp.jobs > $gp.jobs.log 2>&1 &
     # real    208m39.304s
 
-    time zcat exonAA/*.gz | gzip -c > $gp.$mz.exonAA.fa.gz
-    #   real    5m34.850s
-    time zcat exonNuc/*.gz | gzip -c > $gp.$mz.exonNuc.fa.gz
-    #   real    21m15.426s
+    time cat exonAA/*.gz > $gp.$mz.exonAA.fa.gz
+    # real    0m6.023s
+    time cat exonNuc/*.gz > $gp.$mz.exonNuc.fa.gz
+    # real    0m9.152s
 
     export mz=multiz100way
     export gp=knownGene
     export db=hg38
     export pd=/usr/local/apache/htdocs-hgdownload/goldenPath/$db/$mz/alignments
     mkdir -p $pd
     md5sum *.fa.gz > md5sum.txt
     ln -s `pwd`/$gp.$mz.exonAA.fa.gz $pd/$gp.exonAA.fa.gz
     ln -s `pwd`/$gp.$mz.exonNuc.fa.gz $pd/$gp.exonNuc.fa.gz
     ln -s `pwd`/md5sum.txt $pd/
 
     rm -rf exonAA exonNuc
 
     ### need other gene track alignments also
     # running up refGene
     cd /hive/data/genomes/hg38/bed/multiz100way/pal
     export mz=multiz100way
     export gp=ncbiRefSeq
     export db=hg38
     export I=0
     mkdir exonAA exonNuc
     for C in `sort -nk2 ../../../chrom.sizes | cut -f1`
     do
         I=`echo $I | awk '{print $1+1}'`
 	echo "mafGene -chrom=$C -exons -noTrans $db $mz $gp order.list stdout | gzip -c > exonNuc/$C.exonNuc.fa.gz &"
 	echo "mafGene -chrom=$C -exons $db $mz $gp order.list stdout | gzip -c > exonAA/$C.exonAA.fa.gz &"
-        if [ $I -gt 6 ]; then
-            echo "date"
+        if [ $I -gt 11 ]; then echo "date"
             echo "wait"
             I=0
         fi
     done > $gp.jobs
     echo "date" >> $gp.jobs
     echo "wait" >> $gp.jobs
 
     time sh -x $gp.jobs > $gp.jobs.log 2>&1
     # real    126m0.688s
 
     export mz=multiz100way
     export gp=ncbiRefSeq
     export db=hg38
-    time zcat exonAA/*.gz | gzip -c > $gp.$mz.exonAA.fa.gz
+    time cat exonAA/*.gz > $gp.$mz.exonAA.fa.gz
     #   real    3m14.449s
-    time zcat exonNuc/*.gz | gzip -c > $gp.$mz.exonNuc.fa.gz
+    time cat exonNuc/*.gz> $gp.$mz.exonNuc.fa.gz
     #   real    13m27.577s
 
     du -hsc exonAA exonNuc $gp*.fa.gz
 # 3.1G    exonAA
 # 4.9G    exonNuc
 # 3.1G    ncbiRefSeq.multiz100way.exonAA.fa.gz
 # 4.9G    ncbiRefSeq.multiz100way.exonNuc.fa.gz
 
     rm -rf exonAA exonNuc
 
     # we're only distributing exons at the moment
     export mz=multiz100way
     export gp=ncbiRefSeq
     export db=hg38
     export pd=/usr/local/apache/htdocs-hgdownload/goldenPath/$db/$mz/alignments
@@ -1952,39 +1951,38 @@
     ls knownCanonical/*.known.bed | while read F
     do
       if [ -s $F ]; then
          echo $F | sed -e 's#knownCanonical/##; s/.known.bed//'
       fi
     done | while read C
     do
 	echo "date"
 	echo "mafGene -geneBeds=knownCanonical/$C.known.bed -exons -noTrans $db $mz knownGene order.list stdout | \
 	    gzip -c > exonNuc/$C.exonNuc.fa.gz "
 	echo "mafGene -geneBeds=knownCanonical/$C.known.bed -exons $db $mz knownGene order.list stdout | \
 	    gzip -c > exonAA/$C.exonAA.fa.gz  "
     done > $gp.$mz.jobs
 
     time sh -x $gp.$mz.jobs > $gp.$mz.job.log 2>&1 
-    # 267m58.813s
+    # 109m16.821s
 
     rm *.known.bed
     export mz=multiz100way
     export gp=knownCanonical
     export db=hg38
-    zcat exonAA/c*.gz | gzip -c > $gp.$mz.exonAA.fa.gz &
-    zcat exonNuc/c*.gz | gzip -c > $gp.$mz.exonNuc.fa.gz &
-    # about 6 minutes
+    cat exonAA/c*.gz > $gp.$mz.exonAA.fa.gz 
+    cat exonNuc/c*.gz > $gp.$mz.exonNuc.fa.gz 
 
     rm -rf exonAA exonNuc
 
     export mz=multiz100way
     export gp=knownCanonical
     export db=hg38
     export pd=/usr/local/apache/htdocs-hgdownload/goldenPath/$db/$mz/alignments
     mkdir -p $pd
     ln -s `pwd`/$gp.$mz.exonAA.fa.gz $pd/$gp.exonAA.fa.gz
     ln -s `pwd`/$gp.$mz.exonNuc.fa.gz $pd/$gp.exonNuc.fa.gz
     cd  $pd
     md5sum *.fa.gz > md5sum.txt
 
 #############################################################################
 # wiki page for 100-way (DONE - 2015-05-14 - Hiram)