d0aad7f66c2fee0fa9366c3b3caf4d761750c055
braney
  Tue Aug 17 10:27:07 2021 -0700
first version of Gencode genes (knownGene version) for mm39

diff --git src/hg/utils/otto/knownGene/buildTo.sh src/hg/utils/otto/knownGene/buildTo.sh
new file mode 100755
index 0000000..6878ff8
--- /dev/null
+++ src/hg/utils/otto/knownGene/buildTo.sh
@@ -0,0 +1,48 @@
+#!/bin/sh -ex
+
+{
+# knownToLocusLink
+#hgsql --skip-column-names -e "select mrnaAcc,locusLinkId from hgFixed.refLink" $db > refToLl.txt
+hgsql --skip-column-names -e "select mrnaAcc,locusLinkId from ncbiRefSeqLink where mrnaAcc != ''" $db > refToLl.txt
+hgMapToGene -geneTableType=genePred -tempDb=$tempDb $db ncbiRefSeq knownGene knownToLocusLink -lookup=refToLl.txt
+rm refToLl.txt
+
+if test "$gtexGeneMode" != ""
+then
+    hgMapToGene -geneTableType=genePred $db -tempDb=$tempDb -all -type=genePred $gtexGeneMode knownGene knownToGtex
+fi
+
+# knownToEnsembl and knownToGencode${GENCODE_VERSION}
+awk '{OFS="\t"} {print $4,$4}' ucscGenes.bed | sort | uniq > knownToEnsembl.tab
+cp knownToEnsembl.tab knownToGencode${GENCODE_VERSION}.tab
+hgLoadSqlTab -notOnServer $tempDb  knownToEnsembl  $kent/src/hg/lib/knownTo.sql  knownToEnsembl.tab
+hgLoadSqlTab -notOnServer $tempDb  knownToGencode${GENCODE_VERSION}  $kent/src/hg/lib/knownTo.sql  knownToGencode${GENCODE_VERSION}.tab
+
+# make knownToLynx
+# wget "http://lynx.ci.uchicago.edu/downloads/LYNX_GENES.tab"
+# awk '{print $2}' LYNX_GENES.tab | sort > lynxExists.txt
+# hgsql -e "select geneSymbol,kgId from kgXref" --skip-column-names $tempDb | awk '{if (NF == 2) print}' | sort > geneSymbolToKgId.txt
+# join lynxExists.txt geneSymbolToKgId.txt | awk 'BEGIN {OFS="\t"} {print $2,$1}' | sort > knownToLynx.tab
+# hgLoadSqlTab -notOnServer $tempDb  knownToLynx $kent/src/hg/lib/knownTo.sql  knownToLynx.tab
+# 
+# rm lynxExists.txt geneSymbolToKgId.txt
+
+# load malacards table
+if test "$malacardTable" != ""
+then
+    hgsql -e "select geneSymbol,kgId from kgXref" --skip-column-names $tempDb | awk '{if (NF == 2) print}' | sort > geneSymbolToKgId.txt
+    hgsql -e "select geneSymbol from malacards" --skip-column-names $db | sort > malacardExists.txt
+    join malacardExists.txt  geneSymbolToKgId.txt | awk 'BEGIN {OFS="\t"} {print $2, $1}' > knownToMalacard.txt
+    hgLoadSqlTab -notOnServer $tempDb  knownToMalacards $kent/src/hg/lib/knownTo.sql  knownToMalacard.txt
+    rm geneSymbolToKgId.txt malacardExists.txt knownToMalacard.txt
+fi
+
+#knownToVisiGene
+knownToVisiGene $tempDb -probesDb=$db
+
+hgsql $tempDb -e "select geneSymbol,name from knownGene g, kgXref x where g.name=x.kgId " | sort > $tempDb.symbolToId.txt
+join -t $'\t'   /hive/groups/browser/wikipediaScrape/symbolToPage.txt $tempDb.symbolToId.txt | tawk '{print $3,$2}' | sort | uniq > $tempDb.idToPage.txt
+hgLoadSqlTab $tempDb knownToWikipedia $HOME/kent/src/hg/lib/knownTo.sql $tempDb.idToPage.txt
+
+echo "BuildKnownTo successfully finished"
+} > doKnownTo.log < /dev/null 2>&1