9bab3e9da3138f67a9d9d4f80b609aefb80e02c9 braney Mon Dec 23 11:20:20 2019 -0800 remove duplicates from knownToEnsembl and knownToGencodeV32 diff --git src/hg/makeDb/doc/ucscGenes/hg38.ucscGenes20.sh src/hg/makeDb/doc/ucscGenes/hg38.ucscGenes20.sh index 60dcc78..8460609 100755 --- src/hg/makeDb/doc/ucscGenes/hg38.ucscGenes20.sh +++ src/hg/makeDb/doc/ucscGenes/hg38.ucscGenes20.sh @@ -201,31 +201,31 @@ # this should be done AFTER moving the new tables into hg38 hgKgGetText $tempDb tempSearch.txt sort tempSearch.txt > tempSearch2.txt tawk '{split($2,a,"."); printf "%s\t", $1;for(ii = 1; ii <= a[2]; ii++) printf "%s ",a[1] "." ii; printf "\n" }' txToAcc.tab | sort > tempSearch3.txt join tempSearch2.txt tempSearch3.txt | sort > knownGene.txt ixIxx knownGene.txt knownGene.ix knownGene.ixx rm -rf /gbdb/$tempDb/knownGene.ix /gbdb/$tempDb/knownGene.ixx ln -s $dir/knownGene.ix /gbdb/$tempDb/knownGene.ix ln -s $dir/knownGene.ixx /gbdb/$tempDb/knownGene.ixx hgsql --skip-column-names -e "select mrnaAcc,locusLinkId from hgFixed.refLink" $db > refToLl.txt hgMapToGene -tempDb=$tempDb $db refGene knownGene knownToLocusLink -lookup=refToLl.txt knownToVisiGene $tempDb -probesDb=$db -awk '{OFS="\t"} {print $4,$4}' ucscGenes.bed | sort > knownToEnsembl.tab +awk '{OFS="\t"} {print $4,$4}' ucscGenes.bed | sort | uniq > knownToEnsembl.tab cp knownToEnsembl.tab knownToGencode${GENCODE_VERSION}.tab #awk '{OFS="\t"} {print $2,$1}' tmp1 | sort > knownToEnsembl.tab #tawk '{print $2,$1}' tmp1 | sort > knownToGencode${GENCODE_VERSION}.tab hgLoadSqlTab -notOnServer $tempDb knownToEnsembl $kent/src/hg/lib/knownTo.sql knownToEnsembl.tab hgLoadSqlTab -notOnServer $tempDb knownToGencode${GENCODE_VERSION} $kent/src/hg/lib/knownTo.sql knownToGencode${GENCODE_VERSION}.tab hgMapToGene -tempDb=$tempDb $db gnfAtlas2 knownGene knownToGnfAtlas2 '-type=bed 12' if ($db =~ hg*) then #hgMapToGene -exclude=abGenes.txt -tempDb=$tempDb $db HInvGeneMrna knownGene knownToHInv #hgMapToGene -exclude=abGenes.txt -tempDb=$tempDb $db affyU133Plus2 knownGene knownToU133Plus2 hgMapToGene -tempDb=$tempDb $db affyU133 knownGene knownToU133 hgMapToGene -tempDb=$tempDb $db affyU95 knownGene knownToU95 mkdir hprd