src/hg/makeDb/doc/ucscGenes/hg38.gencodeV36.sh a58598681583d3b1637c0a9fcd25610bdf2b6566

a58598681583d3b1637c0a9fcd25610bdf2b6566
braney
  Sat Sep 11 07:27:43 2021 -0700
some cleanup of this old doc file

diff --git src/hg/makeDb/doc/ucscGenes/hg38.gencodeV36.sh src/hg/makeDb/doc/ucscGenes/hg38.gencodeV36.sh
index f284599..14741f5 100644
--- src/hg/makeDb/doc/ucscGenes/hg38.gencodeV36.sh
+++ src/hg/makeDb/doc/ucscGenes/hg38.gencodeV36.sh
@@ -347,32 +347,32 @@
     hgLoadRnaFold $tempDb foldUtr5 fold
     cd ../utr3
     hgLoadRnaFold -warnEmpty $tempDb foldUtr3 fold
 
 # Clean up
     rm -r split fold err batch.bak
     cd ../utr5
     rm -r split fold err batch.bak
 
 hgKgGetText $tempDb tempSearch.txt
 sort tempSearch.txt > tempSearch2.txt
 tawk '{split($2,a,"."); printf "%s\t", $1;for(ii = 1; ii <= a[2]; ii++) printf "%s ",a[1] "." ii; printf "\n" }' txToAcc.tab | sort > tempSearch3.txt
 join tempSearch2.txt tempSearch3.txt | sort > knownGene.txt
 ixIxx knownGene.txt knownGene${GENCODE_VERSION}.ix knownGene${GENCODE_VERSION}.ixx
  rm -rf /gbdb/$db/knownGene${GENCODE_VERSION}.ix /gbdb/$db/knownGene${GENCODE_VERSION}.ixx
-ln -s $dir/knownGene${GENCODE_VERSION}.ix  /gbdb/$db/knownGene${GENCODE_VERSION}.ix
-ln -s $dir/knownGene${GENCODE_VERSION}.ixx /gbdb/$db/knownGene${GENCODE_VERSION}.ixx  
+ln -s `pwd`/knownGene${GENCODE_VERSION}.ix  /gbdb/$db/knownGene${GENCODE_VERSION}.ix
+ln -s `pwd`/knownGene${GENCODE_VERSION}.ixx /gbdb/$db/knownGene${GENCODE_VERSION}.ixx  
 tawk '{print $5}' knownCanonical.tab | sort > knownCanonicalId.txt
 join knownCanonicalId.txt knownGene.txt | join -v 1 /dev/stdin pseudo.txt > knownGeneFast.txt
 ixIxx knownGeneFast.txt knownGeneFast${GENCODE_VERSION}.ix knownGeneFast${GENCODE_VERSION}.ixx
  rm -rf /gbdb/$db/knownGeneFast${GENCODE_VERSION}.ix /gbdb/$db/knownGeneFast${GENCODE_VERSION}.ixx
 ln -s $dir/knownGeneFast${GENCODE_VERSION}.ix  /gbdb/$db/knownGeneFast${GENCODE_VERSION}.ix
 ln -s $dir/knownGeneFast${GENCODE_VERSION}.ixx /gbdb/$db/knownGeneFast${GENCODE_VERSION}.ixx  
 
 #zcat gencode${GENCODE_VERSION}.bed.gz > ucscGenes.bed
 #jtwoBitToFa -noMask /cluster/data/$db/$db.2bit -bed=ucscGenes.bed stdout | faFilter -uniq stdin  ucscGenes.fa
 #jhgPepPred $tempDb generic knownGeneMrna ucscGenes.fa
 bedToPsl /cluster/data/$db/chrom.sizes ucscGenes.bed ucscGenes.psl
 pslRecalcMatch ucscGenes.psl /cluster/data/$db/$db.2bit ucscGenes.fa kgTargetAli.psl
 # should be zero
 awk '$11 != $1 + $3+$4' kgTargetAli.psl
 hgLoadPsl $tempDb kgTargetAli.psl
@@ -380,45 +380,45 @@
 cd $dir
 # Make PCR target for UCSC Genes, Part 1.
 # 1. Get a set of IDs that consist of the UCSC Gene accession concatenated with the
 #    gene symbol, e.g. uc010nxr.1__DDX11L1
 hgsql $tempDb -N -e 'select kgId,geneSymbol from kgXref' \
     | perl -wpe 's/^(\S+)\t(\S+)/$1\t${1}__$2/ || die;' \
       | sort -u > idSub.txt 
 # 2. Get a file of per-transcript fasta sequences that contain the sequences of each UCSC Genes transcript, with this new ID in the place of the UCSC Genes accession.   Convert that file to TwoBit format and soft-link it into /gbdb/hg38/targetDb/ 
 awk '{if (!found[$4]) print; found[$4]=1 }' ucscGenes.bed > nodups.bed
 subColumn 4 nodups.bed idSub.txt ucscGenesIdSubbed.bed 
 sequenceForBed -keepName -db=$db -bedIn=ucscGenesIdSubbed.bed -fastaOut=stdout  | faToTwoBit stdin ${db}KgSeq${curVer}.2bit
 mkdir -p /gbdb/$db/targetDb/ 
 rm -f /gbdb/$db/targetDb/${db}KgSeq${curVer}.2bit
 ln -s $dir/${db}KgSeq${curVer}.2bit /gbdb/$db/targetDb/
 # Load the table kgTargetAli, which shows where in the genome these targets are.
-cut -f 1-10 knownGene.gp | genePredToFakePsl $tempDb stdin kgTargetAli.psl /dev/null
+#cut -f 1-10 knownGene.gp | genePredToFakePsl $tempDb stdin kgTargetAli.psl /dev/null
 hgLoadPsl $tempDb kgTargetAli.psl
 
 # 3. Ask cluster-admin to start an untranslated, -stepSize=5 gfServer on       
 # /gbdb/$db/targetDb/${db}KgSeq${curVer}.2bit 
 
 # 4. On hgwdev, insert new records into blatServers and targetDb, using the 
 # host (field 2) and port (field 3) specified by cluster-admin.  Identify the
 # blatServer by the keyword "$db"Kg with the version number appended
 # untrans gfServer for hg38KgSeq12 on host blat1b, port 17897
 hgsql hgcentraltest -e \
-      'INSERT into blatServers values ("hg38KgSeq13", "blat1b", 1909, 0, 1);'
+      'INSERT into blatServers values ("hg38KgSeq13", "blat1b", 17909, 0, 1,"");'
 hgsql hgcentraltest -e \
             'INSERT into targetDb values("hg38KgSeq13", "GENCODE Genes", \
-                     "hg38", "knownGeneV35.kgTargetAli", "", "", \
+                     "hg38", "kgTargetAli", "", "", \
                               "/gbdb/hg38/targetDb/hg38KgSeq13.2bit", 1, now(), "");'
 
 for i in  $tempFa $xdbFa $ratFa $fishFa $flyFa $wormFa $yeastFa
 do
 if test ! -f $i
 then echo $i not found
 fi
 done
 
 rm -rf   $dir/hgNearBlastp
 mkdir  $dir/hgNearBlastp
 cd $dir/hgNearBlastp
 tcsh
 cat << _EOF_ > config.ra
 # Latest human vs. other Gene Sorter orgs: