b87243c823ae619a0f382db4236df99216f3f1e8
braney
  Mon Dec 13 10:38:39 2021 -0800
some knownGene cleanup

diff --git src/hg/utils/otto/knownGene/buildTo.sh src/hg/utils/otto/knownGene/buildTo.sh
index 9492d4d..577d9d1 100755
--- src/hg/utils/otto/knownGene/buildTo.sh
+++ src/hg/utils/otto/knownGene/buildTo.sh
@@ -9,37 +9,37 @@
 hgMapToGene -geneTableType=genePred -tempDb=$tempDb $db ncbiRefSeq knownGene knownToLocusLink -lookup=refToLl.txt
 rm refToLl.txt
 
 if test "$gtexGeneMode" != ""
 then
     hgMapToGene -geneTableType=genePred $db -tempDb=$tempDb -all -type=genePred $gtexGeneMode knownGene knownToGtex
 fi
 
 # knownToEnsembl and knownToGencode${GENCODE_VERSION}
 awk '{OFS="\t"} {print $4,$4}' ucscGenes.bed | sort | uniq > knownToEnsembl.tab
 cp knownToEnsembl.tab knownToGencode${GENCODE_VERSION}.tab
 hgLoadSqlTab -notOnServer $tempDb  knownToEnsembl  $kent/src/hg/lib/knownTo.sql  knownToEnsembl.tab
 hgLoadSqlTab -notOnServer $tempDb  knownToGencode${GENCODE_VERSION}  $kent/src/hg/lib/knownTo.sql  knownToGencode${GENCODE_VERSION}.tab
 
 # make knownToLynx
-# wget "http://lynx.ci.uchicago.edu/downloads/LYNX_GENES.tab"
-# awk '{print $2}' LYNX_GENES.tab | sort > lynxExists.txt
-# hgsql -e "select geneSymbol,kgId from kgXref" --skip-column-names $tempDb | awk '{if (NF == 2) print}' | sort > geneSymbolToKgId.txt
-# join lynxExists.txt geneSymbolToKgId.txt | awk 'BEGIN {OFS="\t"} {print $2,$1}' | sort > knownToLynx.tab
-# hgLoadSqlTab -notOnServer $tempDb  knownToLynx $kent/src/hg/lib/knownTo.sql  knownToLynx.tab
-# 
-# rm lynxExists.txt geneSymbolToKgId.txt
+wget "http://lynx.ci.uchicago.edu/downloads/LYNX_GENES.tab"
+awk '{print $2}' LYNX_GENES.tab | sort > lynxExists.txt
+hgsql -e "select geneSymbol,kgId from kgXref" --skip-column-names $tempDb | awk '{if (NF == 2) print}' | sort > geneSymbolToKgId.txt
+join lynxExists.txt geneSymbolToKgId.txt | awk 'BEGIN {OFS="\t"} {print $2,$1}' | sort > knownToLynx.tab
+hgLoadSqlTab -notOnServer $tempDb  knownToLynx $kent/src/hg/lib/knownTo.sql  knownToLynx.tab
+
+rm lynxExists.txt geneSymbolToKgId.txt
 
 # load malacards table
 if test "$malacardTable" != ""
 then
     hgsql -e "select geneSymbol,kgId from kgXref" --skip-column-names $tempDb | awk '{if (NF == 2) print}' | sort > geneSymbolToKgId.txt
     hgsql -e "select geneSymbol from malacards" --skip-column-names $db | sort > malacardExists.txt
     join malacardExists.txt  geneSymbolToKgId.txt | awk 'BEGIN {OFS="\t"} {print $2, $1}' > knownToMalacard.txt
     hgLoadSqlTab -notOnServer $tempDb  knownToMalacards $kent/src/hg/lib/knownTo.sql  knownToMalacard.txt
     rm geneSymbolToKgId.txt malacardExists.txt knownToMalacard.txt
 fi
 
 #knownToVisiGene
 knownToVisiGene $tempDb -probesDb=$db
 
 hgsql $tempDb -e "select geneSymbol,name from knownGene g, kgXref x where g.name=x.kgId " | sort > $tempDb.symbolToId.txt
@@ -99,20 +99,20 @@
 # filter out pdbIds not found in mupit
 cat mupit-pdbids.txt | tr '[a-z]' '[A-Z]' | \
     grep -Fwf - $tempDb.knownToPdb.txt >  knownToMupit.txt;
 # check that it filtered correctly:
 # cut -f2 $db.knownToMuipit.txt | sort -u | wc -l;
 # load new table for hgGene/hgc
 hgLoadSqlTab $tempDb knownToMupit ~/kent/src/hg/lib/knownTo.sql knownToMupit.txt
 
 # make knownToNextProt
 wget "ftp://ftp.nextprot.org/pub/current_release/ac_lists/nextprot_ac_list_all.txt"
 awk '{print $0, $0}' nextprot_ac_list_all.txt | sed 's/NX_//' | sort > displayIdToNextProt.txt
 hgsql -e "select spID,kgId from kgXref" --skip-column-names $tempDb | awk '{if (NF == 2) print}' | sort > displayIdToKgId.txt
 join displayIdToKgId.txt displayIdToNextProt.txt | awk 'BEGIN {OFS="\t"} {print $2,$3}' > knownToNextProt.tab
 hgLoadSqlTab -notOnServer $tempDb  knownToNextProt $kent/src/hg/lib/knownTo.sql  knownToNextProt.tab
 
-
+hgMapToGene -geneTableType=genePred -tempDb=$tempDb $db HInvGeneMrna knownGene knownToHInv
 
 
 echo "BuildKnownTo successfully finished"
 } > doKnownTo.log < /dev/null 2>&1