9bab3e9da3138f67a9d9d4f80b609aefb80e02c9
braney
  Mon Dec 23 11:20:20 2019 -0800
remove duplicates from knownToEnsembl and knownToGencodeV32

diff --git src/hg/makeDb/doc/ucscGenes/hg38.ucscGenes20.sh src/hg/makeDb/doc/ucscGenes/hg38.ucscGenes20.sh
index 60dcc78..8460609 100755
--- src/hg/makeDb/doc/ucscGenes/hg38.ucscGenes20.sh
+++ src/hg/makeDb/doc/ucscGenes/hg38.ucscGenes20.sh
@@ -201,31 +201,31 @@
 # this should be done AFTER moving the new tables into hg38
 hgKgGetText $tempDb tempSearch.txt
 sort tempSearch.txt > tempSearch2.txt
 tawk '{split($2,a,"."); printf "%s\t", $1;for(ii = 1; ii <= a[2]; ii++) printf "%s ",a[1] "." ii; printf "\n" }' txToAcc.tab | sort > tempSearch3.txt
 join tempSearch2.txt tempSearch3.txt | sort > knownGene.txt
 ixIxx knownGene.txt knownGene.ix knownGene.ixx
  rm -rf /gbdb/$tempDb/knownGene.ix /gbdb/$tempDb/knownGene.ixx
 ln -s $dir/knownGene.ix  /gbdb/$tempDb/knownGene.ix
 ln -s $dir/knownGene.ixx /gbdb/$tempDb/knownGene.ixx  
 
 
 hgsql --skip-column-names -e "select mrnaAcc,locusLinkId from hgFixed.refLink" $db > refToLl.txt
 hgMapToGene -tempDb=$tempDb $db refGene knownGene knownToLocusLink -lookup=refToLl.txt
 knownToVisiGene $tempDb -probesDb=$db
 
-awk '{OFS="\t"} {print $4,$4}' ucscGenes.bed | sort > knownToEnsembl.tab
+awk '{OFS="\t"} {print $4,$4}' ucscGenes.bed | sort | uniq > knownToEnsembl.tab
 cp knownToEnsembl.tab knownToGencode${GENCODE_VERSION}.tab
 #awk '{OFS="\t"} {print $2,$1}' tmp1 | sort > knownToEnsembl.tab
 #tawk '{print $2,$1}' tmp1 | sort > knownToGencode${GENCODE_VERSION}.tab
 hgLoadSqlTab -notOnServer $tempDb  knownToEnsembl  $kent/src/hg/lib/knownTo.sql  knownToEnsembl.tab
 hgLoadSqlTab -notOnServer $tempDb  knownToGencode${GENCODE_VERSION}  $kent/src/hg/lib/knownTo.sql  knownToGencode${GENCODE_VERSION}.tab
 
 hgMapToGene -tempDb=$tempDb $db gnfAtlas2 knownGene knownToGnfAtlas2 '-type=bed 12'
 
 
 if ($db =~ hg*) then
     #hgMapToGene -exclude=abGenes.txt -tempDb=$tempDb $db HInvGeneMrna knownGene knownToHInv
     #hgMapToGene -exclude=abGenes.txt -tempDb=$tempDb $db affyU133Plus2 knownGene knownToU133Plus2
     hgMapToGene -tempDb=$tempDb $db affyU133 knownGene knownToU133
     hgMapToGene -tempDb=$tempDb $db affyU95 knownGene knownToU95
     mkdir hprd