3b7094b836ca282f03f54eb70c3778efab444042
braney
  Wed May 1 10:54:51 2024 -0700
did this work awhile ago but didn't commit the doc

diff --git src/hg/makeDb/doc/hg38/multiz100way.txt src/hg/makeDb/doc/hg38/multiz100way.txt
index 3618147..93a3dc0 100644
--- src/hg/makeDb/doc/hg38/multiz100way.txt
+++ src/hg/makeDb/doc/hg38/multiz100way.txt
@@ -1824,74 +1824,75 @@
     # information for table of species in the README files, need to
     # edit it in after adding it to the end of this file:
 
     cat ../../species.list | tr '[ ]' '[\n]' | while read D
 do
  netType=`ls ../../mafLinks/${D}/hg38.${D}.*.maf.gz | sed -e "s#.*hg38.${D}.##; s#.maf.gz##;" | sed -e 's/synNet/syntenic/; s/rbest/reciprocal best/;'`
  info=`hgsql -N -e "select organism,\" - \",scientificName,description from dbDb where name=\"$D\";" hgcentraltest`
  echo "${info} ${netType}"
 done | tr '[\t]' '[ ]' >> README.txt
 
     # some other symlinks were already made above
     ln -s `pwd`/upstream*.gz README.txt \
         /usr/local/apache/htdocs-hgdownload/goldenPath/hg38/multiz100way
 
 #############################################################################
-# hgPal downloads (rebuilt knownGene and knownCanonical 2016-06-27 braney)
+# hgPal downloads (rebuilt knownGene and knownCanonical for V41 with knownGeneExt
 
     ssh hgwdev
     screen -S hg38HgPal
-    mkdir /hive/data/genomes/hg38/bed/multiz100way/pal
+    mkdir -p /hive/data/genomes/hg38/bed/multiz100way/pal
     cd /hive/data/genomes/hg38/bed/multiz100way/pal
     cat ../species.list | tr '[ ]' '[\n]' > order.list
 
     export mz=multiz100way
-    export gp=knownGene
+    export gp=knownGeneExt
     export db=hg38
     export I=0
     mkdir exonAA exonNuc
     for C in `sort -nk2 ../../../chrom.sizes | cut -f1`
     do
         I=`echo $I | awk '{print $1+1}'`
 	echo "mafGene -chrom=$C -exons -noTrans $db $mz $gp order.list stdout | gzip -c > exonNuc/$C.exonNuc.fa.gz &"
 	echo "mafGene -chrom=$C -exons $db $mz $gp order.list stdout | gzip -c > exonAA/$C.exonAA.fa.gz &"
         if [ $I -gt 11 ]; then
             echo "date"
             echo "wait"
             I=0
         fi
     done > $gp.jobs
     echo "date" >> $gp.jobs
     echo "wait" >> $gp.jobs
 
     time sh -x ./$gp.jobs > $gp.jobs.log 2>&1 &
     # real    208m39.304s
 
     time cat exonAA/*.gz > $gp.$mz.exonAA.fa.gz
     # real    0m6.023s
     time cat exonNuc/*.gz > $gp.$mz.exonNuc.fa.gz
     # real    0m9.152s
 
     export mz=multiz100way
-    export gp=knownGene
+    export gp=knownGeneExt
     export db=hg38
     export pd=/usr/local/apache/htdocs-hgdownload/goldenPath/$db/$mz/alignments
     mkdir -p $pd
-    md5sum *.fa.gz > md5sum.txt
+    #md5sum *.fa.gz > md5sum.txt
+    rm -f $pd/$gp.exonAA.fa.gz $pd/$gp.exonNuc.fa.gz
     ln -s `pwd`/$gp.$mz.exonAA.fa.gz $pd/$gp.exonAA.fa.gz
     ln -s `pwd`/$gp.$mz.exonNuc.fa.gz $pd/$gp.exonNuc.fa.gz
-    ln -s `pwd`/md5sum.txt $pd/
+    #ln -s `pwd`/md5sum.txt $pd/
 
     rm -rf exonAA exonNuc
 
     ### need other gene track alignments also
     # running up refGene
     cd /hive/data/genomes/hg38/bed/multiz100way/pal
     export mz=multiz100way
     export gp=ncbiRefSeq
     export db=hg38
     export I=0
     mkdir exonAA exonNuc
     for C in `sort -nk2 ../../../chrom.sizes | cut -f1`
     do
         I=`echo $I | awk '{print $1+1}'`
 	echo "mafGene -chrom=$C -exons -noTrans $db $mz $gp order.list stdout | gzip -c > exonNuc/$C.exonNuc.fa.gz &"
@@ -1944,33 +1945,33 @@
     time cut -f1 ../../../chrom.sizes | while read C
     do
         echo $C 1>&2
 	hgsql hg38 -N -e "select chrom, chromStart, chromEnd, transcript from knownCanonical where chrom='$C'" > knownCanonical/$C.known.bed
     done
     #   real    0m15.897s
 
     ls knownCanonical/*.known.bed | while read F
     do
       if [ -s $F ]; then
          echo $F | sed -e 's#knownCanonical/##; s/.known.bed//'
       fi
     done | while read C
     do
 	echo "date"
-	echo "mafGene -geneBeds=knownCanonical/$C.known.bed -exons -noTrans $db $mz knownGene order.list stdout | \
+	echo "mafGene -geneBeds=knownCanonical/$C.known.bed -exons -noTrans $db $mz knownGeneExt order.list stdout | \
 	    gzip -c > exonNuc/$C.exonNuc.fa.gz "
-	echo "mafGene -geneBeds=knownCanonical/$C.known.bed -exons $db $mz knownGene order.list stdout | \
+	echo "mafGene -geneBeds=knownCanonical/$C.known.bed -exons $db $mz knownGeneExt order.list stdout | \
 	    gzip -c > exonAA/$C.exonAA.fa.gz  "
     done > $gp.$mz.jobs
 
     time sh -x $gp.$mz.jobs > $gp.$mz.job.log 2>&1 
     # 109m16.821s
 
     rm *.known.bed
     export mz=multiz100way
     export gp=knownCanonical
     export db=hg38
     cat exonAA/c*.gz > $gp.$mz.exonAA.fa.gz 
     cat exonNuc/c*.gz > $gp.$mz.exonNuc.fa.gz 
 
 
     mkdir protAA protNuc