3846c108e2ff699c4d1de31c4b38f7379e1cf9ee
braney
  Sat Apr 6 07:51:15 2019 -0700
new CDS FASTA downloads in honor of Gencode VM20 release

diff --git src/hg/makeDb/doc/mm10.txt src/hg/makeDb/doc/mm10.txt
index 2108959..99e7fac 100644
--- src/hg/makeDb/doc/mm10.txt
+++ src/hg/makeDb/doc/mm10.txt
@@ -17731,15 +17731,153 @@
 
     time (doSameSpeciesLiftOver.pl -verbose=2 \
 	-fileServer=hgwdev \
 	-query2Bit=/hive/data/genomes/mm10/mm10.2bit \
 	-querySizes=/hive/data/genomes/mm10/chrom.sizes \
 	-target2Bit=/hive/data/genomes/GRCm38B/GRCm38B.2bit \
 	-targetSizes=/hive/data/genomes/GRCm38B/chrom.sizes \
         -bigClusterHub=ku -dbHost=hgwdev -workhorse=hgwdev \
         -ooc=/hive/data/genomes/mm10/mm10.11.ooc \
          mm10 GRCm38B) > doLiftOverToGRCm38B.log 2>&1
     # real    156m50.777s
 
     # see if the liftOver menus function in the browser from mm10 to GRCm38B
 
 #########################################################################
+#############################################################################
+# hgPal downloads (rebuilt knownGene and knownCanonical 2019-04-01 braney )
+
+    ssh hgwdev
+    mkdir /hive/data/genomes/mm10/bed/multiz60way/pal.ucsc18
+    cd /hive/data/genomes/mm10/bed/multiz60way/pal.ucsc18
+    cat ../species.list | tr '[ ]' '[\n]' > order.list
+
+    export mz=multiz60way
+    export gp=knownGene
+    export db=mm10
+    export I=0
+    mkdir exonAA exonNuc
+    for C in `sort -nk2 ../../../chrom.sizes | cut -f1`
+    do
+        I=`echo $I | awk '{print $1+1}'`
+	echo "mafGene -chrom=$C -exons -noTrans $db $mz $gp order.list stdout | gzip -c > exonNuc/$C.exonNuc.fa.gz &"
+	echo "mafGene -chrom=$C -exons $db $mz $gp order.list stdout | gzip -c > exonAA/$C.exonAA.fa.gz &"
+        if [ $I -gt 6 ]; then
+            echo "date"
+            echo "wait"
+            I=0
+        fi
+    done > $gp.jobs
+    echo "date" >> $gp.jobs
+    echo "wait" >> $gp.jobs
+
+    time sh -x ./$gp.jobs > $gp.jobs.log 2>&1 &
+    # real    59m23.279s
+
+    time zcat exonAA/*.gz | gzip -c > $gp.$mz.exonAA.fa.gz
+    # real    1m35.590s
+    time zcat exonNuc/*.gz | gzip -c > $gp.$mz.exonNuc.fa.gz
+    # real    7m46.538s
+
+    export mz=multiz60way
+    export gp=knownGene
+    export db=mm10
+    export pd=/usr/local/apache/htdocs-hgdownload/goldenPath/$db/$mz/alignments
+    rm -rf $pd
+    mkdir -p $pd
+    ln -s `pwd`/$gp.$mz.exonAA.fa.gz $pd/$gp.exonAA.fa.gz
+    ln -s `pwd`/$gp.$mz.exonNuc.fa.gz $pd/$gp.exonNuc.fa.gz
+
+    rm -rf exonAA exonNuc
+
+    cd /hive/data/genomes/mm10/bed/multiz60way/pal
+    export mz=multiz60way
+    export gp=ncbiRefSeq
+    export db=mm10
+    export I=0
+    mkdir exonAA exonNuc
+    for C in `sort -nk2 ../../../chrom.sizes | cut -f1`
+    do
+        I=`echo $I | awk '{print $1+1}'`
+	echo "mafGene -chrom=$C -exons -noTrans $db $mz $gp order.list stdout | gzip -c > exonNuc/$C.exonNuc.fa.gz &"
+	echo "mafGene -chrom=$C -exons $db $mz $gp order.list stdout | gzip -c > exonAA/$C.exonAA.fa.gz &"
+        if [ $I -gt 6 ]; then
+            echo "date"
+            echo "wait"
+            I=0
+        fi
+    done > $gp.jobs
+    echo "date" >> $gp.jobs
+    echo "wait" >> $gp.jobs
+
+    time sh -x $gp.jobs > $gp.jobs.log 2>&1
+    # real    126m0.688s
+
+    export mz=multiz60way
+    export gp=ncbiRefSeq
+    export db=mm10
+    time zcat exonAA/*.gz | gzip -c > $gp.$mz.exonAA.fa.gz
+    # real    2m56.817s
+    time zcat exonNuc/*.gz | gzip -c > $gp.$mz.exonNuc.fa.gz
+    # real    14m8.080s
+
+    rm -rf exonAA exonNuc
+
+    # we're only distributing exons at the moment
+    export mz=multiz60way
+    export gp=ncbiRefSeq
+    export db=mm10
+    export pd=/usr/local/apache/htdocs-hgdownload/goldenPath/$db/$mz/alignments
+    ln -s `pwd`/$gp.$mz.exonAA.fa.gz $pd/$gp.exonAA.fa.gz
+    ln -s `pwd`/$gp.$mz.exonNuc.fa.gz $pd/$gp.exonNuc.fa.gz
+
+    ### And knownCanonical
+    cd /hive/data/genomes/mm10/bed/multiz60way/pal
+    export mz=multiz60way
+    export gp=knownCanonical
+    export db=mm10
+    mkdir exonAA exonNuc knownCanonical
+
+    time cut -f1 ../../../chrom.sizes | while read C
+    do
+        echo $C 1>&2
+	hgsql mm10 -N -e "select chrom, chromStart, chromEnd, transcript from knownCanonical where chrom='$C'" > knownCanonical/$C.known.bed
+    done
+    #   real    0m15.897s
+
+    ls knownCanonical/*.known.bed | while read F
+    do
+      if [ -s $F ]; then
+         echo $F | sed -e 's#knownCanonical/##; s/.known.bed//'
+      fi
+    done | while read C
+    do
+	echo "date"
+	echo "mafGene -geneBeds=knownCanonical/$C.known.bed -exons -noTrans $db $mz knownGene order.list stdout | \
+	    gzip -c > exonNuc/$C.exonNuc.fa.gz"
+	echo "mafGene -geneBeds=knownCanonical/$C.known.bed -exons $db $mz knownGene order.list stdout | \
+	    gzip -c > exonAA/$C.exonAA.fa.gz"
+    done > $gp.$mz.jobs
+
+    time sh -x $gp.$mz.jobs > $gp.$mz.job.log 2>&1 
+    # 267m58.813s
+
+    rm *.known.bed
+    export mz=multiz60way
+    export gp=knownCanonical
+    export db=mm10
+    zcat exonAA/c*.gz | gzip -c > $gp.$mz.exonAA.fa.gz &
+    zcat exonNuc/c*.gz | gzip -c > $gp.$mz.exonNuc.fa.gz &
+    # about 6 minutes
+
+    rm -rf exonAA exonNuc
+
+    export mz=multiz60way
+    export gp=knownCanonical
+    export db=mm10
+    export pd=/usr/local/apache/htdocs-hgdownload/goldenPath/$db/$mz/alignments
+    mkdir -p $pd
+    ln -s `pwd`/$gp.$mz.exonAA.fa.gz $pd/$gp.exonAA.fa.gz
+    ln -s `pwd`/$gp.$mz.exonNuc.fa.gz $pd/$gp.exonNuc.fa.gz
+
+    cd  $pd
+    md5sum *.fa.gz > md5sum.txt