422154600ea64376ef82472e05379cf974b111e2 braney Thu Nov 14 10:27:14 2019 -0800 add index on transcript in knownIsoforms refs #24486 diff --git src/hg/makeDb/doc/ucscGenes/hg38.ucscGenes20.sh src/hg/makeDb/doc/ucscGenes/hg38.ucscGenes20.sh index 3780d19..88d9eaf 100755 --- src/hg/makeDb/doc/ucscGenes/hg38.ucscGenes20.sh +++ src/hg/makeDb/doc/ucscGenes/hg38.ucscGenes20.sh @@ -145,31 +145,31 @@ makeGencodeKnownGene $db $tempDb $GENCODE_VERSION txToAcc.tab hgsql $tempDb -Ne "select k.name, g.geneId, g.geneStatus, g.geneType,g.transcriptName,g.transcriptType,g.transcriptStatus, g.havanaGeneId, g.ccdsId, g.level, g.transcriptClass from knownGene k, $db.wgEncodeGencodeAttrs$GENCODE_VERSION g where k.name=g.transcriptId" | sort | uniq > knownAttrs.tab hgLoadSqlTab -notOnServer $tempDb knownAttrs $kent/src/hg/lib/knownAttrs.sql knownAttrs.tab #tawk '$4=="new" {print $3}' oldToNew.tab | sort > new.txt #sort knownGene.gp | join -t $'\t' new.txt /dev/stdin > new.gp #sort knownGene.gp | join -t $'\t' lost.txt /dev/stdin | wc # should be zero # tawk '{print $12}' hg38.lost.gp | while read name; do grep $name /tmp/2; done | wc sort kgColor.tab | uniq | hgLoadSqlTab -notOnServer $tempDb kgColor $kent/src/hg/lib/kgColor.sql stdin -hgLoadSqlTab -notOnServer $tempDb knownIsoforms $kent/src/hg/lib/knownIsoforms.sql knownIsoforms.tab +sort knownIsoforms.tab | uniq | hgLoadSqlTab -notOnServer $tempDb knownIsoforms $kent/src/hg/lib/knownIsoforms.sql stdin hgLoadSqlTab -notOnServer $tempDb kgXref $kent/src/hg/lib/kgXref.sql kgXref.tab hgLoadSqlTab -notOnServer $tempDb knownCanonical $kent/src/hg/lib/knownCanonical.sql knownCanonical.tab hgsql $tempDb -e "select * from knownToMrna" | tail -n +2 | tawk '{if ($1 != last) {print last, count, buffer; count=1; buffer=$2} else {count++;buffer=$2","buffer} last=$1}' | tail -n +2 | sort > tmp1 hgsql $tempDb -e "select * from knownToMrnaSingle" | tail -n +2 | sort > tmp2 join tmp2 tmp1 > knownGene.ev txGeneAlias $db $spDb kgXref.tab knownGene.ev oldToNew.tab foo.alias foo.protAlias tawk '{split($2,a,"."); for(ii = 1; ii <= a[2]; ii++) print $1,a[1] "." ii }' txToAcc.tab >> foo.alias sort foo.alias | uniq > ucscGenes.alias sort foo.protAlias | uniq > ucscGenes.protAlias rm foo.alias foo.protAlias hgLoadSqlTab -notOnServer $tempDb kgAlias $kent/src/hg/lib/kgAlias.sql ucscGenes.alias