src/hg/makeDb/doc/ucscGenes/mm10.ucscGenes18.sh 4321aabd0d2819f88564b50b0cd26edf82e69502

4321aabd0d2819f88564b50b0cd26edf82e69502
braney
  Wed Aug 14 10:19:58 2019 -0700
add a note to rename the kgTargetSeq file to match the name in
blatServers next time around.

diff --git src/hg/makeDb/doc/ucscGenes/mm10.ucscGenes18.sh src/hg/makeDb/doc/ucscGenes/mm10.ucscGenes18.sh
index 9ecd28e..4df649f 100755
--- src/hg/makeDb/doc/ucscGenes/mm10.ucscGenes18.sh
+++ src/hg/makeDb/doc/ucscGenes/mm10.ucscGenes18.sh
@@ -683,30 +683,31 @@
 
     hgLoadSqlTab -notOnServer $tempDb cgapBiocPathway $kent/src/hg/lib/cgapBiocPathway.sql ./cgapBIOCARTA.tab
 
     cat cgapBIOCARTAdesc.tab|sort -u > cgapBIOCARTAdescSorted.tab
     hgLoadSqlTab -notOnServer $tempDb cgapBiocDesc $kent/src/hg/lib/cgapBiocDesc.sql cgapBIOCARTAdescSorted.tab
 
 
 cd $dir
 # Make PCR target for UCSC Genes, Part 1.
 # 1. Get a set of IDs that consist of the UCSC Gene accession concatenated with the
 #    gene symbol, e.g. uc010nxr.1__DDX11L1
 hgsql $db -N -e 'select kgId,geneSymbol from kgXref' \
     | perl -wpe 's/^(\S+)\t(\S+)/$1\t${1}__$2/ || die;' \
       | sort -u > idSub.txt 
 # 2. Get a file of per-transcript fasta sequences that contain the sequences of each UCSC Genes transcript, with this new ID in the place of the UCSC Genes accession.   Convert that file to TwoBit format and soft-link it into /gbdb/hg38/targetDb/ 
+### NEXT TIME  use same name in blatServers table as file name!!!
 awk '{if (!found[$4]) print; found[$4]=1 }' ucscGenes.bed > nodups.bed
 subColumn 4 nodups.bed idSub.txt ucscGenesIdSubbed.bed 
 sequenceForBed -keepName -db=$db -bedIn=ucscGenesIdSubbed.bed -fastaOut=stdout  | faToTwoBit stdin kgTargetSeq${curVer}.2bit 
 mkdir -p /gbdb/$db/targetDb/ 
 rm -f /gbdb/$db/targetDb/kgTargetSeq${curVer}.2bit 
 ln -s $dir/kgTargetSeq${curVer}.2bit /gbdb/$db/targetDb/
 # Load the table kgTargetAli, which shows where in the genome these targets are.
 #cut -f 1-10 knownGene.gp | genePredToFakePsl $tempDb stdin kgTargetAli.psl /dev/null
 #hgLoadPsl $tempDb kgTargetAli.psl
 
 #
 # At this point we should save a list of the tables in tempDb!!!
 echo "show tables" | hgsql $tempDb > tablesInKnownGene.lst
 
 cd $dir
@@ -846,33 +847,35 @@
 hgsqladmin flush-tables
 
 
 # Make full text index.  Takes a minute or so.  After this the genome browser
 # tracks display will work including the position search.  The genes details
 # page, gene sorter, and proteome browser still need more tables.
 mkdir -p $dir/index
 cd $dir/index
 hgKgGetText $db knownGene.text 
 ixIxx knownGene.text knownGene.ix knownGene.ixx
 rm -f /gbdb/$db/knownGene.ix /gbdb/$db/knownGene.ixx
 ln -s $dir/index/knownGene.ix  /gbdb/$db/knownGene.ix
 ln -s $dir/index/knownGene.ixx /gbdb/$db/knownGene.ixx
 
 
+### NEXT TIME  use same name in blatServers table as file name!!!
 # 3. Ask cluster-admin to start an untranslated, -stepSize=5 gfServer on       
 # /gbdb/$db/targetDb/kgTargetSeq${curVer}.2bit
 
+### NEXT TIME  use same name in blatServers table as file name!!!
 # 4. On hgwdev, insert new records into blatServers and targetDb, using the 
 # host (field 2) and port (field 3) specified by cluster-admin.  Identify the
 # blatServer by the keyword "$db"Kg with the version number appended
 # Starting untrans gfServer for kgTargetSeq11 on host blat1d, port 17905
 
 hgsql hgcentraltest -e \
       'INSERT into blatServers values ("mm10KgSeq11", "blat1d", 17905, 0, 1);'
 hgsql hgcentraltest -e \
       'INSERT into targetDb values("mm10KgSeq11", "UCSC Genes", \
          "mm10", "kgTargetAli", "", "", \
          "/gbdb/mm10/targetDb/kgTargetSeq11.2bit", 1, now(), "");'
 
 #
 ##
 ##   WRAP-UP