src/hg/makeDb/doc/ucscGenes/mm10.ucscGenes18.sh de5eef93a7b575ab87180987773e142c2a545ef0

de5eef93a7b575ab87180987773e142c2a545ef0
braney
  Mon Apr 8 11:20:47 2019 -0700
add the mmBlastTab tables to the "other" databases

diff --git src/hg/makeDb/doc/ucscGenes/mm10.ucscGenes18.sh src/hg/makeDb/doc/ucscGenes/mm10.ucscGenes18.sh
index 2ad439d..9ecd28e 100755
--- src/hg/makeDb/doc/ucscGenes/mm10.ucscGenes18.sh
+++ src/hg/makeDb/doc/ucscGenes/mm10.ucscGenes18.sh
@@ -623,35 +623,35 @@
 
 grep -E -v "^#" $bioCycDir/genes.col  > genes.tab  
 grep -E -v "^#" $bioCycDir/pathways.col  | awk -F'\t' '{if (140 == NF) { printf "%s\t\t\n", $0; } else { print $0}}' > pathways.tab
 
 kgBioCyc1 -noEnsembl genes.tab pathways.tab $tempDb bioCycPathway.tab bioCycMapDesc.tab  
 hgLoadSqlTab $tempDb bioCycPathway ~/kent/src/hg/lib/bioCycPathway.sql ./bioCycPathway.tab
 hgLoadSqlTab $tempDb bioCycMapDesc ~/kent/src/hg/lib/bioCycMapDesc.sql ./bioCycMapDesc.tab
 
 # Do KEGG Pathways build (borrowing Fan Hus's strategy from hg38.txt)
     mkdir -p $dir/kegg
     cd $dir/kegg
 
     # Make the keggMapDesc table, which maps KEGG pathway IDs to descriptive names
     cp /cluster/data/mm10/bed/ucsc.13.1/kegg/map_title.tab .
     # wget --timestamping ftp://ftp.genome.jp/pub/kegg/pathway/map_title.tab
-    cat map_title.tab | sed -e 's/\t/\thsa\t/' > j.tmp
-    cut -f 2 j.tmp >j.hsa
+    cat map_title.tab | sed -e 's/\t/\tmmu\t/' > j.tmp
+    cut -f 2 j.tmp >j.mmu
     cut -f 1,3 j.tmp >j.1
-    paste j.hsa j.1 |sed -e 's/\t//' > keggMapDesc.tab
-    rm j.hsa j.1 j.tmp
+    paste j.mmu j.1 |sed -e 's/\t//' > keggMapDesc.tab
+    rm j.mmu j.1 j.tmp
     hgLoadSqlTab -notOnServer $tempDb keggMapDesc $kent/src/hg/lib/keggMapDesc.sql keggMapDesc.tab
 
     # Following in two-step process, build/load a table that maps UCSC Gene IDs
     # to LocusLink IDs and to KEGG pathways.  First, make a table that maps 
     # LocusLink IDs to KEGG pathways from the downloaded data.  Store it temporarily
     # in the keggPathway table, overloading the schema.
     cp /cluster/data/mm9/bed/ucsc.12/kegg/mmu_pathway.list .
 
     cat mmu_pathway.list| sed -e 's/path://'|sed -e 's/:/\t/' > j.tmp
     hgLoadSqlTab -notOnServer $tempDb keggPathway $kent/src/hg/lib/keggPathway.sql j.tmp
 
     # Next, use the temporary contents of the keggPathway table to join with
     # knownToLocusLink, creating the real content of the keggPathway table.
     # Load this data, erasing the old temporary content
     hgsql $tempDb -B -N -e 'select distinct name, locusID, mapID from keggPathway p, knownToLocusLink l where p.locusID=l.value' > keggPathway.tab
@@ -852,72 +852,71 @@
 mkdir -p $dir/index
 cd $dir/index
 hgKgGetText $db knownGene.text 
 ixIxx knownGene.text knownGene.ix knownGene.ixx
 rm -f /gbdb/$db/knownGene.ix /gbdb/$db/knownGene.ixx
 ln -s $dir/index/knownGene.ix  /gbdb/$db/knownGene.ix
 ln -s $dir/index/knownGene.ixx /gbdb/$db/knownGene.ixx
 
 
 # 3. Ask cluster-admin to start an untranslated, -stepSize=5 gfServer on       
 # /gbdb/$db/targetDb/kgTargetSeq${curVer}.2bit
 
 # 4. On hgwdev, insert new records into blatServers and targetDb, using the 
 # host (field 2) and port (field 3) specified by cluster-admin.  Identify the
 # blatServer by the keyword "$db"Kg with the version number appended
-# untrans gfServer for hg38KgSeq10 on host blat1c, port 17873
-# Starting untrans gfServer for kgTargetSeq11 on host blat1a, port 17891
+# Starting untrans gfServer for kgTargetSeq11 on host blat1d, port 17905
+
 hgsql hgcentraltest -e \
-      'INSERT into blatServers values ("hg38KgSeq11", "blat1a", 17891, 0, 1);'
+      'INSERT into blatServers values ("mm10KgSeq11", "blat1d", 17905, 0, 1);'
 hgsql hgcentraltest -e \
-      'INSERT into targetDb values("hg38KgSeq11", "UCSC Genes", \
-         "hg38", "kgTargetAli", "", "", \
-         "/gbdb/hg38/targetDb/kgTargetSeq11.2bit", 1, now(), "");'
+      'INSERT into targetDb values("mm10KgSeq11", "UCSC Genes", \
+         "mm10", "kgTargetAli", "", "", \
+         "/gbdb/mm10/targetDb/kgTargetSeq11.2bit", 1, now(), "");'
 
 #
 ##
 ##   WRAP-UP  
 #
 #  add database to the db's in kent/src/hg/visiGene/vgGetText
 
 cd $dir
 #
 # Finally, need to wait until after testing, but update databases in other organisms
 # with blastTabs
 
 # Load blastTabs
 cd $dir/hgNearBlastp
 hgLoadBlastTab $xdb $blastTab run.$xdb.$tempDb/out/*.tab
 hgLoadBlastTab $ratDb $blastTab run.$ratDb.$tempDb/out/*.tab 
 hgLoadBlastTab $flyDb $blastTab run.$flyDb.$tempDb/recipBest.tab
 hgLoadBlastTab $wormDb $blastTab run.$wormDb.$tempDb/recipBest.tab
 hgLoadBlastTab $yeastDb $blastTab run.$yeastDb.$tempDb/recipBest.tab
 hgLoadBlastTab $fishDb $blastTab run.$fishDb.$tempDb/recipBest.tab
 
 # Do synteny on mouse/human/rat
 synBlastp.csh $xdb $db
-#old number of unique query values: 45399
-#old number of unique target values 22999
-#new number of unique query values: 42015
-#new number of unique target values 22470
+#old number of unique query values: 99540
+#old number of unique target values 27444
+#new number of unique query values: 92543
+#new number of unique target values 26752
 
 synBlastp.csh $ratDb $db ensGene knownGene
-#old number of unique query values:  27888  
-#old number of unique target values  18988  
-#new number of unique query values:  24530  
-#new number of unique target values  18411  
-
+#old number of unique query values: 28429
+#old number of unique target values 20661
+#new number of unique query values: 25758
+#new number of unique target values 20061
 
 # need to generate multiz downloads
 #/usr/local/apache/htdocs-hgdownload/goldenPath/hg38/multiz46way/alignments/knownCanonical.exonAA.fa.gz
 #/usr/local/apache/htdocs-hgdownload/goldenPath/hg38/multiz46way/alignments/knownCanonical.exonNuc.fa.gz
 #/usr/local/apache/htdocs-hgdownload/goldenPath/hg38/multiz46way/alignments/knownGene.exonAA.fa.gz
 #/usr/local/apache/htdocs-hgdownload/goldenPath/hg38/multiz46way/alignments/knownGene.exonNuc.fa.gz
 #/usr/local/apache/htdocs-hgdownload/goldenPath/hg38/multiz46way/alignments/md5sum.txt
 
 echo
 echo "see the bottom of the script for details about knownToWikipedia"
 echo
 # Clean up
 rm -r run.*/out
 
 # Last step in setting up isPCR: after the new UCSC Genes with the new Known Gene isPcr