b209ddecd094d68684158d3b22c7ba1a23c06f8c
braney
  Thu Jun 27 12:58:43 2024 -0700
strip off _# at end of id for knownToEnsembl if it's there (like it is on hg19)

diff --git src/hg/utils/otto/knownGene/buildTo.sh src/hg/utils/otto/knownGene/buildTo.sh
index aab8c8c..002d58d 100755
--- src/hg/utils/otto/knownGene/buildTo.sh
+++ src/hg/utils/otto/knownGene/buildTo.sh
@@ -3,31 +3,31 @@
 {
 . ./buildEnv.sh
 
 # knownToLocusLink
 #hgsql --skip-column-names -e "select mrnaAcc,locusLinkId from hgFixed.refLink" $db > refToLl.txt
 hgsql --skip-column-names -e "select mrnaAcc,locusLinkId from ncbiRefSeqLink where mrnaAcc != '' and locusLinkId != ''" $db > refToLl.txt
 hgMapToGene -geneTableType=genePred -tempDb=$tempDb $db ncbiRefSeq knownGene knownToLocusLink -lookup=refToLl.txt
 rm refToLl.txt
 
 if test "$gtexGeneModel" != ""
 then
     hgMapToGene -geneTableType=genePred $db -tempDb=$tempDb -all -type=genePred $gtexGeneModel knownGene knownToGtex
 fi
 
 # knownToEnsembl and knownToGencode${GENCODE_VERSION}
-awk '{OFS="\t"} {print $4,$4}' ucscGenes.bed | sort | uniq > knownToEnsembl.tab
+awk '{OFS="\t"} {new=$4;gsub("_.*$","",new);print $4,new}' ucscGenes.bed | sort | uniq > knownToEnsembl.tab
 cp knownToEnsembl.tab knownToGencode${GENCODE_VERSION}.tab
 hgLoadSqlTab -notOnServer $tempDb  knownToEnsembl  $kent/src/hg/lib/knownTo.sql  knownToEnsembl.tab
 hgLoadSqlTab -notOnServer $tempDb  knownToGencode${GENCODE_VERSION}  $kent/src/hg/lib/knownTo.sql  knownToGencode${GENCODE_VERSION}.tab
 
 # make knownToLynx
 #wget "http://lynx.ci.uchicago.edu/downloads/LYNX_GENES.tab"
 #awk '{print $2}' LYNX_GENES.tab | sort > lynxExists.txt
 #hgsql -e "select geneSymbol,kgId from kgXref" --skip-column-names $tempDb | awk '{if (NF == 2) print}' | sort > geneSymbolToKgId.txt
 #join lynxExists.txt geneSymbolToKgId.txt | awk 'BEGIN {OFS="\t"} {print $2,$1}' | sort > knownToLynx.tab
 #hgLoadSqlTab -notOnServer $tempDb  knownToLynx $kent/src/hg/lib/knownTo.sql  knownToLynx.tab
 
 #rm lynxExists.txt geneSymbolToKgId.txt
 
 # load malacards table
 if test "$malacardTable" != ""