7c5f089c583b6a7ff0439109000eb121e6ff5d8b
braney
  Tue Aug 17 14:50:36 2021 -0700
build the knownGene and knownCanonical mafGene downloads

diff --git src/hg/utils/otto/knownGene/buildMafGene.sh src/hg/utils/otto/knownGene/buildMafGene.sh
new file mode 100755
index 0000000..781a271
--- /dev/null
+++ src/hg/utils/otto/knownGene/buildMafGene.sh
@@ -0,0 +1,96 @@
+#!/bin/sh -ex
+cd $dir
+{
+if test "$multizDir" == ""
+then
+echo "Must set multizDir to directory with multiz files in it"
+fi
+
+if test "$mz" == ""
+then
+echo "Must set mz to name of multiz track"
+fi
+
+mkdir -p $multizDir/mafGene.knownGene${GENCODE_VERSION}
+cd $multizDir/mafGene.knownGene${GENCODE_VERSION}
+
+cat $multizDir/species.list | tr '[ ]' '[\n]' > order.list
+
+export gp=knownGene
+export I=0  
+rm -rf exonAA exonNuc
+mkdir exonAA exonNuc
+for C in `sort -nk2 ../../../chrom.sizes | cut -f1`
+do          
+    I=`echo $I | awk '{print $1+1}'`
+    echo "mafGene -chrom=$C -exons -noTrans $db $mz $gp order.list stdout | gzip -c > exonNuc/$C.exonNuc.fa.gz &"
+    echo "mafGene -chrom=$C -exons $db $mz $gp order.list stdout | gzip -c > exonAA/$C.exonAA.fa.gz &"
+    if [ $I -gt 11 ]; then
+        echo "date"
+        echo "wait"
+        I=0
+    fi
+done > $gp.jobs
+echo "date" >> $gp.jobs
+echo "wait" >> $gp.jobs
+
+time sh -x ./$gp.jobs 
+
+time cat exonAA/*.gz > $gp.$mz.exonAA.fa.gz
+time cat exonNuc/*.gz > $gp.$mz.exonNuc.fa.gz
+
+export pd=/usr/local/apache/htdocs-hgdownload/goldenPath/$db/$mz/alignments
+mkdir -p $pd
+rm -f $pd/$gp.exonAA.fa.gz $pd/$gp.exonNuc.fa.gz
+ln -s `pwd`/$gp.$mz.exonAA.fa.gz $pd/$gp.exonAA.fa.gz
+ln -s `pwd`/$gp.$mz.exonNuc.fa.gz $pd/$gp.exonNuc.fa.gz
+
+export gp=knownCanonical
+export I=0  
+rm -rf exonAA exonNuc knownCanonical
+mkdir exonAA exonNuc knownCanonical
+
+time cut -f1 $multizDir/../../chrom.sizes | while read C
+do
+    echo $C 1>&2
+    hgsql $db -N -e "select chrom, chromStart, chromEnd, transcript from knownCanonical where chrom='$C'" > knownCanonical/$C.known.bed
+done
+
+ls knownCanonical/*.known.bed | while read F
+do
+  if [ -s $F ]; then
+     echo $F | sed -e 's#knownCanonical/##; s/.known.bed//'
+  fi
+done | while read C
+do
+    echo "date"
+    echo "mafGene -geneBeds=knownCanonical/$C.known.bed -exons -noTrans $db $mz knownGene order.list stdout | \
+        gzip -c > exonNuc/$C.exonNuc.fa.gz &"
+    echo "mafGene -geneBeds=knownCanonical/$C.known.bed -exons $db $mz knownGene order.list stdout | \
+        gzip -c > exonAA/$C.exonAA.fa.gz &"
+    if [ $I -gt 11 ]; then
+        echo "date"
+        echo "wait"
+        I=0
+    fi
+done > $gp.$mz.jobs
+echo "date" >> $gp.$mz.jobs
+echo "wait" >> $gp.$mz.jobs
+
+time sh -x $gp.$mz.jobs 
+
+cat exonAA/c*.gz > $gp.$mz.exonAA.fa.gz
+cat exonNuc/c*.gz > $gp.$mz.exonNuc.fa.gz
+
+rm -rf exonAA exonNuc knownCanonical
+
+export pd=/usr/local/apache/htdocs-hgdownload/goldenPath/$db/$mz/alignments
+mkdir -p $pd
+rm -f $pd/$gp.exonAA.fa.gz $pd/$gp.exonNuc.fa.gz
+ln -s `pwd`/$gp.$mz.exonAA.fa.gz $pd/$gp.exonAA.fa.gz
+ln -s `pwd`/$gp.$mz.exonNuc.fa.gz $pd/$gp.exonNuc.fa.gz
+#cd  $pd
+#md5sum *.fa.gz > md5sum.txt
+
+echo "BuildMafGene successfully finished"
+} > doMafGene.log < /dev/null 2>&1