4321aabd0d2819f88564b50b0cd26edf82e69502 braney Wed Aug 14 10:19:58 2019 -0700 add a note to rename the kgTargetSeq file to match the name in blatServers next time around. diff --git src/hg/makeDb/doc/ucscGenes/mm10.ucscGenes18.sh src/hg/makeDb/doc/ucscGenes/mm10.ucscGenes18.sh index 9ecd28e..4df649f 100755 --- src/hg/makeDb/doc/ucscGenes/mm10.ucscGenes18.sh +++ src/hg/makeDb/doc/ucscGenes/mm10.ucscGenes18.sh @@ -683,30 +683,31 @@ hgLoadSqlTab -notOnServer $tempDb cgapBiocPathway $kent/src/hg/lib/cgapBiocPathway.sql ./cgapBIOCARTA.tab cat cgapBIOCARTAdesc.tab|sort -u > cgapBIOCARTAdescSorted.tab hgLoadSqlTab -notOnServer $tempDb cgapBiocDesc $kent/src/hg/lib/cgapBiocDesc.sql cgapBIOCARTAdescSorted.tab cd $dir # Make PCR target for UCSC Genes, Part 1. # 1. Get a set of IDs that consist of the UCSC Gene accession concatenated with the # gene symbol, e.g. uc010nxr.1__DDX11L1 hgsql $db -N -e 'select kgId,geneSymbol from kgXref' \ | perl -wpe 's/^(\S+)\t(\S+)/$1\t${1}__$2/ || die;' \ | sort -u > idSub.txt # 2. Get a file of per-transcript fasta sequences that contain the sequences of each UCSC Genes transcript, with this new ID in the place of the UCSC Genes accession. Convert that file to TwoBit format and soft-link it into /gbdb/hg38/targetDb/ +### NEXT TIME use same name in blatServers table as file name!!! awk '{if (!found[$4]) print; found[$4]=1 }' ucscGenes.bed > nodups.bed subColumn 4 nodups.bed idSub.txt ucscGenesIdSubbed.bed sequenceForBed -keepName -db=$db -bedIn=ucscGenesIdSubbed.bed -fastaOut=stdout | faToTwoBit stdin kgTargetSeq${curVer}.2bit mkdir -p /gbdb/$db/targetDb/ rm -f /gbdb/$db/targetDb/kgTargetSeq${curVer}.2bit ln -s $dir/kgTargetSeq${curVer}.2bit /gbdb/$db/targetDb/ # Load the table kgTargetAli, which shows where in the genome these targets are. #cut -f 1-10 knownGene.gp | genePredToFakePsl $tempDb stdin kgTargetAli.psl /dev/null #hgLoadPsl $tempDb kgTargetAli.psl # # At this point we should save a list of the tables in tempDb!!! echo "show tables" | hgsql $tempDb > tablesInKnownGene.lst cd $dir @@ -846,33 +847,35 @@ hgsqladmin flush-tables # Make full text index. Takes a minute or so. After this the genome browser # tracks display will work including the position search. The genes details # page, gene sorter, and proteome browser still need more tables. mkdir -p $dir/index cd $dir/index hgKgGetText $db knownGene.text ixIxx knownGene.text knownGene.ix knownGene.ixx rm -f /gbdb/$db/knownGene.ix /gbdb/$db/knownGene.ixx ln -s $dir/index/knownGene.ix /gbdb/$db/knownGene.ix ln -s $dir/index/knownGene.ixx /gbdb/$db/knownGene.ixx +### NEXT TIME use same name in blatServers table as file name!!! # 3. Ask cluster-admin to start an untranslated, -stepSize=5 gfServer on # /gbdb/$db/targetDb/kgTargetSeq${curVer}.2bit +### NEXT TIME use same name in blatServers table as file name!!! # 4. On hgwdev, insert new records into blatServers and targetDb, using the # host (field 2) and port (field 3) specified by cluster-admin. Identify the # blatServer by the keyword "$db"Kg with the version number appended # Starting untrans gfServer for kgTargetSeq11 on host blat1d, port 17905 hgsql hgcentraltest -e \ 'INSERT into blatServers values ("mm10KgSeq11", "blat1d", 17905, 0, 1);' hgsql hgcentraltest -e \ 'INSERT into targetDb values("mm10KgSeq11", "UCSC Genes", \ "mm10", "kgTargetAli", "", "", \ "/gbdb/mm10/targetDb/kgTargetSeq11.2bit", 1, now(), "");' # ## ## WRAP-UP