de5eef93a7b575ab87180987773e142c2a545ef0 braney Mon Apr 8 11:20:47 2019 -0700 add the mmBlastTab tables to the "other" databases diff --git src/hg/makeDb/doc/ucscGenes/mm10.ucscGenes18.sh src/hg/makeDb/doc/ucscGenes/mm10.ucscGenes18.sh index 2ad439d..9ecd28e 100755 --- src/hg/makeDb/doc/ucscGenes/mm10.ucscGenes18.sh +++ src/hg/makeDb/doc/ucscGenes/mm10.ucscGenes18.sh @@ -623,35 +623,35 @@ grep -E -v "^#" $bioCycDir/genes.col > genes.tab grep -E -v "^#" $bioCycDir/pathways.col | awk -F'\t' '{if (140 == NF) { printf "%s\t\t\n", $0; } else { print $0}}' > pathways.tab kgBioCyc1 -noEnsembl genes.tab pathways.tab $tempDb bioCycPathway.tab bioCycMapDesc.tab hgLoadSqlTab $tempDb bioCycPathway ~/kent/src/hg/lib/bioCycPathway.sql ./bioCycPathway.tab hgLoadSqlTab $tempDb bioCycMapDesc ~/kent/src/hg/lib/bioCycMapDesc.sql ./bioCycMapDesc.tab # Do KEGG Pathways build (borrowing Fan Hus's strategy from hg38.txt) mkdir -p $dir/kegg cd $dir/kegg # Make the keggMapDesc table, which maps KEGG pathway IDs to descriptive names cp /cluster/data/mm10/bed/ucsc.13.1/kegg/map_title.tab . # wget --timestamping ftp://ftp.genome.jp/pub/kegg/pathway/map_title.tab - cat map_title.tab | sed -e 's/\t/\thsa\t/' > j.tmp - cut -f 2 j.tmp >j.hsa + cat map_title.tab | sed -e 's/\t/\tmmu\t/' > j.tmp + cut -f 2 j.tmp >j.mmu cut -f 1,3 j.tmp >j.1 - paste j.hsa j.1 |sed -e 's/\t//' > keggMapDesc.tab - rm j.hsa j.1 j.tmp + paste j.mmu j.1 |sed -e 's/\t//' > keggMapDesc.tab + rm j.mmu j.1 j.tmp hgLoadSqlTab -notOnServer $tempDb keggMapDesc $kent/src/hg/lib/keggMapDesc.sql keggMapDesc.tab # Following in two-step process, build/load a table that maps UCSC Gene IDs # to LocusLink IDs and to KEGG pathways. First, make a table that maps # LocusLink IDs to KEGG pathways from the downloaded data. Store it temporarily # in the keggPathway table, overloading the schema. cp /cluster/data/mm9/bed/ucsc.12/kegg/mmu_pathway.list . cat mmu_pathway.list| sed -e 's/path://'|sed -e 's/:/\t/' > j.tmp hgLoadSqlTab -notOnServer $tempDb keggPathway $kent/src/hg/lib/keggPathway.sql j.tmp # Next, use the temporary contents of the keggPathway table to join with # knownToLocusLink, creating the real content of the keggPathway table. # Load this data, erasing the old temporary content hgsql $tempDb -B -N -e 'select distinct name, locusID, mapID from keggPathway p, knownToLocusLink l where p.locusID=l.value' > keggPathway.tab @@ -852,72 +852,71 @@ mkdir -p $dir/index cd $dir/index hgKgGetText $db knownGene.text ixIxx knownGene.text knownGene.ix knownGene.ixx rm -f /gbdb/$db/knownGene.ix /gbdb/$db/knownGene.ixx ln -s $dir/index/knownGene.ix /gbdb/$db/knownGene.ix ln -s $dir/index/knownGene.ixx /gbdb/$db/knownGene.ixx # 3. Ask cluster-admin to start an untranslated, -stepSize=5 gfServer on # /gbdb/$db/targetDb/kgTargetSeq${curVer}.2bit # 4. On hgwdev, insert new records into blatServers and targetDb, using the # host (field 2) and port (field 3) specified by cluster-admin. Identify the # blatServer by the keyword "$db"Kg with the version number appended -# untrans gfServer for hg38KgSeq10 on host blat1c, port 17873 -# Starting untrans gfServer for kgTargetSeq11 on host blat1a, port 17891 +# Starting untrans gfServer for kgTargetSeq11 on host blat1d, port 17905 + hgsql hgcentraltest -e \ - 'INSERT into blatServers values ("hg38KgSeq11", "blat1a", 17891, 0, 1);' + 'INSERT into blatServers values ("mm10KgSeq11", "blat1d", 17905, 0, 1);' hgsql hgcentraltest -e \ - 'INSERT into targetDb values("hg38KgSeq11", "UCSC Genes", \ - "hg38", "kgTargetAli", "", "", \ - "/gbdb/hg38/targetDb/kgTargetSeq11.2bit", 1, now(), "");' + 'INSERT into targetDb values("mm10KgSeq11", "UCSC Genes", \ + "mm10", "kgTargetAli", "", "", \ + "/gbdb/mm10/targetDb/kgTargetSeq11.2bit", 1, now(), "");' # ## ## WRAP-UP # # add database to the db's in kent/src/hg/visiGene/vgGetText cd $dir # # Finally, need to wait until after testing, but update databases in other organisms # with blastTabs # Load blastTabs cd $dir/hgNearBlastp hgLoadBlastTab $xdb $blastTab run.$xdb.$tempDb/out/*.tab hgLoadBlastTab $ratDb $blastTab run.$ratDb.$tempDb/out/*.tab hgLoadBlastTab $flyDb $blastTab run.$flyDb.$tempDb/recipBest.tab hgLoadBlastTab $wormDb $blastTab run.$wormDb.$tempDb/recipBest.tab hgLoadBlastTab $yeastDb $blastTab run.$yeastDb.$tempDb/recipBest.tab hgLoadBlastTab $fishDb $blastTab run.$fishDb.$tempDb/recipBest.tab # Do synteny on mouse/human/rat synBlastp.csh $xdb $db -#old number of unique query values: 45399 -#old number of unique target values 22999 -#new number of unique query values: 42015 -#new number of unique target values 22470 +#old number of unique query values: 99540 +#old number of unique target values 27444 +#new number of unique query values: 92543 +#new number of unique target values 26752 synBlastp.csh $ratDb $db ensGene knownGene -#old number of unique query values: 27888 -#old number of unique target values 18988 -#new number of unique query values: 24530 -#new number of unique target values 18411 - +#old number of unique query values: 28429 +#old number of unique target values 20661 +#new number of unique query values: 25758 +#new number of unique target values 20061 # need to generate multiz downloads #/usr/local/apache/htdocs-hgdownload/goldenPath/hg38/multiz46way/alignments/knownCanonical.exonAA.fa.gz #/usr/local/apache/htdocs-hgdownload/goldenPath/hg38/multiz46way/alignments/knownCanonical.exonNuc.fa.gz #/usr/local/apache/htdocs-hgdownload/goldenPath/hg38/multiz46way/alignments/knownGene.exonAA.fa.gz #/usr/local/apache/htdocs-hgdownload/goldenPath/hg38/multiz46way/alignments/knownGene.exonNuc.fa.gz #/usr/local/apache/htdocs-hgdownload/goldenPath/hg38/multiz46way/alignments/md5sum.txt echo echo "see the bottom of the script for details about knownToWikipedia" echo # Clean up rm -r run.*/out # Last step in setting up isPCR: after the new UCSC Genes with the new Known Gene isPcr