9f629656dd1bb546fb3e222ae28e9c68486936cb
hiram
  Wed Dec 23 17:03:31 2020 -0800
ready for wiki summary pages and download README files refs #25864

diff --git src/hg/makeDb/doc/mm39/multiz35way.txt src/hg/makeDb/doc/mm39/multiz35way.txt
index 4536e4a..fbe1705 100644
--- src/hg/makeDb/doc/mm39/multiz35way.txt
+++ src/hg/makeDb/doc/mm39/multiz35way.txt
@@ -1724,140 +1724,149 @@
 #############################################################################
 # construct download files for 35-way (TBD - 2015-04-15 - Hiram)
     mkdir /usr/local/apache/htdocs-hgdownload/goldenPath/mm39/multiz35way
     mkdir /usr/local/apache/htdocs-hgdownload/goldenPath/mm39/phastCons35way
     mkdir /usr/local/apache/htdocs-hgdownload/goldenPath/mm39/phyloP35way
     mkdir /hive/data/genomes/mm39/bed/multiz35way/downloads
     cd /hive/data/genomes/mm39/bed/multiz35way/downloads
     mkdir multiz35way phastCons35way phyloP35way
 
     #########################################################################
     ## create upstream refGene maf files
     cd /hive/data/genomes/mm39/bed/multiz35way/downloads/multiz35way
     # bash script
 
 #!/bin/sh
-export geneTbl="refGene"
+export geneTbl="ncbiRefSeq"
 for S in 300 2000 5000
 do
     echo "making upstream${S}.maf"
     featureBits mm39 ${geneTbl}:upstream:${S} -fa=/dev/null -bed=stdout \
         | perl -wpe 's/_up[^\t]+/\t0/' | sort -k1,1 -k2,2n \
         | /cluster/bin/$MACHTYPE/mafFrags mm39 multiz35way \
                 stdin stdout \
                 -orgs=/hive/data/genomes/mm39/bed/multiz35way/species.list \
         | gzip -c > upstream${S}.${geneTbl}.maf.gz
     echo "done upstream${S}.${geneTbl}.maf.gz"
 done
-
+XXX - running - Wed Dec 23 14:36:49 PST 2020
     #   real    88m40.730s
 
 -rw-rw-r-- 1   52659159 Nov  6 11:46 upstream300.ncbiRefSeq.maf.gz
 -rw-rw-r-- 1  451126665 Nov  6 12:15 upstream2000.ncbiRefSeq.maf.gz
 -rw-rw-r-- 1 1080533794 Nov  6 12:55 upstream5000.ncbiRefSeq.maf.gz
 
     ######################################################################
     ## compress the maf files
     cd /hive/data/genomes/mm39/bed/multiz35way/downloads/multiz35way
     mkdir maf
-    rsync -a -P ../../anno/result/ ./maf/
-    du -hsc maf/
-    # 156G    maf
+    time rsync -a -P ../../maf/ ./maf/
+    # real    12m9.290s
+
+    du -hscL maf/ ../../maf/
+    #	141G    maf/
+    #	141G    ../../maf/
+
     cd maf
     time gzip *.maf &
-    # real    135m1.784s
+XXX - running - Wed Dec 23 14:55:47 PST 2020
+    # real    81m10.239s
 
-    du -hscL maf ../../anno/result/
+    du -hscL maf ../../maf/
     #  18G     maf
 
     cd maf
     md5sum *.maf.gz *.nh > md5sum.txt
 
     mkdir /usr/local/apache/htdocs-hgdownload/goldenPath/mm39/multiz35way/maf
     cd maf
     ln -s `pwd`/* /usr/local/apache/htdocs-hgdownload/goldenPath/mm39/multiz35way/maf
     cd --
     ln -s `pwd`/*.maf.gz `pwd`/*.nh `pwd`/*.txt \
          /usr/local/apache/htdocs-hgdownload/goldenPath/mm39/multiz35way/
 
     ###########################################################################
 
     cd /hive/data/genomes/mm39/bed/multiz35way/downloads/multiz35way
     grep TREE ../../4d/all.mod | awk '{print $NF}' \
       | ~/kent/src/hg/utils/phyloTrees/asciiTree.pl /dev/stdin \
          > mm39.35way.nh
-    ~/kent/src/hg/utils/phyloTrees/commonNames.sh mm39.35way.nh \
-      | ~/kent/src/hg/utils/phyloTrees/asciiTree.pl /dev/stdin \
-         > mm39.35way.commonNames.nh
-    ~/kent/src/hg/utils/phyloTrees/scientificNames.sh mm39.35way.nh \
-	| $HOME/kent/src/hg/utils/phyloTrees/asciiTree.pl /dev/stdin \
+
+    sed -f ../../db.to.name.sed mm39.35way.nh \
+       | sed -e "s#_x_#'#g; s#X__#X._#;" > mm39.35way.commonNames.nh
+
+    sed -f ../../db.to.sciName.sed mm39.35way.nh \
            > mm39.35way.scientificNames.nh
+
     time md5sum *.nh *.maf.gz > md5sum.txt
     #   real    0m3.147s
 
     ln -s `pwd`/*.maf.gz `pwd`/*.nh \
         /usr/local/apache/htdocs-hgdownload/goldenPath/mm39/multiz35way
 
-    du -hsc ./maf ../../anno/result
+    du -hscL ./maf ../../maf
     #  18G     ./maf
-    # 156G    ../../anno/result
+    # 156G    ../../maf
 
-    # obtain the README.txt from mm39/multiz20way and update for this
+XXX
+    # obtain the README.txt from danRer10/multiz12way and update for this
     #   situation
     ln -s `pwd`/*.txt \
          /usr/local/apache/htdocs-hgdownload/goldenPath/mm39/multiz35way/
 
     #####################################################################
     cd /hive/data/genomes/mm39/bed/multiz35way/downloads/phastCons35way
 
     mkdir mm39.35way.phastCons
     cd mm39.35way.phastCons
     ln -s ../../../cons/all/downloads/*.wigFix.gz .
     md5sum *.gz > md5sum.txt
 
     cd /hive/data/genomes/mm39/bed/multiz35way/downloads/phastCons35way
     ln -s ../../cons/all/phastCons35way.bw ./mm39.phastCons35way.bw
     ln -s ../../cons/all/all.mod ./mm39.phastCons35way.mod
     time md5sum *.mod *.bw > md5sum.txt
     #   real    0m20.354s
 
+XXX
     # obtain the README.txt from mm39/phastCons20way and update for this
     mkdir /usr/local/apache/htdocs-hgdownload/goldenPath/mm39/phastCons35way/mm39.35way.phastCons
     cd mm39.35way.phastCons
     ln -s `pwd`/* /usr/local/apache/htdocs-hgdownload/goldenPath/mm39/phastCons35way/mm39.35way.phastCons
 
     cd ..
     #   situation
     ln -s `pwd`/*.mod `pwd`/*.bw `pwd`/*.txt \
       /usr/local/apache/htdocs-hgdownload/goldenPath/mm39/phastCons35way
 
     #####################################################################
     cd /hive/data/genomes/mm39/bed/multiz35way/downloads/phyloP35way
 
     mkdir mm39.35way.phyloP
     cd mm39.35way.phyloP
 
     ln -s ../../../consPhyloP/all/downloads/*.wigFix.gz .
     md5sum *.wigFix.gz > md5sum.txt
 
     cd ..
 
     ln -s ../../consPhyloP/run.phyloP/all.mod mm39.phyloP35way.mod
     ln -s ../../consPhyloP/all/phyloP35way.bw mm39.phyloP35way.bw
 
     md5sum *.mod *.bw > md5sum.txt
 
+XXX 
     # obtain the README.txt from mm39/phyloP20way and update for this
     mkdir /usr/local/apache/htdocs-hgdownload/goldenPath/mm39/phyloP35way/mm39.35way.phyloP
     cd mm39.35way.phyloP
     ln -s `pwd`/* \
 /usr/local/apache/htdocs-hgdownload/goldenPath/mm39/phyloP35way/mm39.35way.phyloP
 
     cd ..
 
     #   situation
     ln -s `pwd`/*.mod `pwd`/*.bw `pwd`/*.txt \
       /usr/local/apache/htdocs-hgdownload/goldenPath/mm39/phyloP35way
 
 #############################################################################
 # hgPal downloads (TBD - 2017-11-06 - Hiram)
 #   FASTA from 35-way for ncbiRefSeq, refGene and knownCanonical
@@ -1882,58 +1891,55 @@
         dNum=`echo $D | awk '{printf "%03d", int($1/300)}'`
         mkdir -p exonNuc/${dNum} > /dev/null
         mkdir -p exonAA/${dNum} > /dev/null
 	echo "mafGene -chrom=$C -exons -noTrans $db $mz $gp order.list stdout | gzip -c > exonNuc/${dNum}/$C.exonNuc.fa.gz &"
 	echo "mafGene -chrom=$C -exons $db $mz $gp order.list stdout | gzip -c > exonAA/${dNum}/$C.exonAA.fa.gz &"
         if [ $I -gt 16 ]; then
             echo "date"
             echo "wait"
             I=0
         fi
     done > $gp.jobs
     echo "date" >> $gp.jobs
     echo "wait" >> $gp.jobs
 
     time (sh -x ./$gp.jobs) > $gp.jobs.log 2>&1
-XXX - running - Tue Dec 22 21:33:28 PST 2020
-    # real    79m18.323s
+    # real    18m43.962s
 
     export mz=multiz35way
     export gp=ncbiRefSeq
     time find ./exonAA -type f | grep exonAA.fa.gz | xargs zcat \
      | gzip -c > $gp.$mz.exonAA.fa.gz
-    # real    1m28.841s
+    # real    2m0.962s
 
     time find ./exonNuc -type f | grep exonNuc.fa.gz | xargs zcat \
      | gzip -c > $gp.$mz.exonNuc.fa.gz
-    #   real    3m56.370s
+    #   real    10m12.351s
 
-    # -rw-rw-r-- 1 397928833 Nov  6 18:44 ncbiRefSeq.multiz35way.exonAA.fa.gz
-    # -rw-rw-r-- 1 580377720 Nov  6 18:49 ncbiRefSeq.multiz35way.exonNuc.fa.gz
+    # -rw-rw-r-- 1  906052407 Dec 23 16:34 ncbiRefSeq.multiz35way.exonAA.fa.gz
+    # -rw-rw-r-- 1 1596566489 Dec 23 16:53 ncbiRefSeq.multiz35way.exonNuc.fa.gz
 
     export mz=multiz35way
     export gp=ncbiRefSeq
     export db=mm39
     export pd=/usr/local/apache/htdocs-hgdownload/goldenPath/$db/$mz/alignments
     mkdir -p $pd
+    md5sum *.fa.gz > md5sum.txt
     ln -s `pwd`/$gp.$mz.exonAA.fa.gz $pd/$gp.exonAA.fa.gz
     ln -s `pwd`/$gp.$mz.exonNuc.fa.gz $pd/$gp.exonNuc.fa.gz
     ln -s `pwd`/md5sum.txt $pd/
 
-    cd  $pd
-    md5sum *.fa.gz > md5sum.txt
-
     rm -rf exonAA exonNuc
 
 #############################################################################
 # wiki page for 35-way (TBD - 2017-11-06 - Hiram)
     mkdir /hive/users/hiram/bigWays/mm39.35way
     cd /hive/users/hiram/bigWays
     echo "mm39" > mm39.35way/ordered.list
     awk '{print $1}' /hive/data/genomes/mm39/bed/multiz35way/35way.distances.txt \
        >> mm39.35way/ordered.list
 
     # sizeStats.sh catches up the cached measurements required for data
     # in the tables.  They are usually already mostly done, only new
     # assemblies will have updates.
     ./sizeStats.sh mm39.35way/ordered.list
     # dbDb.sh constructs mm39.35way/XenTro9_35-way_conservation_alignment.html