a27219ced37a549f3b69bc79f07ea99a87439ecd braney Sat Feb 20 12:49:06 2021 -0800 Give user a chance to get non-short circuit hits if there's more than one short circuit choice (otherwise just go to position). Add ability to add trixSearch path to findSpecs. Add new find spec for hg38 gencodeV36 that only has canonical genes in it. diff --git src/hg/makeDb/doc/ucscGenes/hg38.gencodeV36.sh src/hg/makeDb/doc/ucscGenes/hg38.gencodeV36.sh index 10e5c32..dfd333a 100644 --- src/hg/makeDb/doc/ucscGenes/hg38.gencodeV36.sh +++ src/hg/makeDb/doc/ucscGenes/hg38.gencodeV36.sh @@ -349,30 +349,36 @@ hgLoadRnaFold -warnEmpty $tempDb foldUtr3 fold # Clean up rm -r split fold err batch.bak cd ../utr5 rm -r split fold err batch.bak hgKgGetText $tempDb tempSearch.txt sort tempSearch.txt > tempSearch2.txt tawk '{split($2,a,"."); printf "%s\t", $1;for(ii = 1; ii <= a[2]; ii++) printf "%s ",a[1] "." ii; printf "\n" }' txToAcc.tab | sort > tempSearch3.txt join tempSearch2.txt tempSearch3.txt | sort > knownGene.txt ixIxx knownGene.txt knownGene${GENCODE_VERSION}.ix knownGene${GENCODE_VERSION}.ixx rm -rf /gbdb/$db/knownGene${GENCODE_VERSION}.ix /gbdb/$db/knownGene${GENCODE_VERSION}.ixx ln -s $dir/knownGene${GENCODE_VERSION}.ix /gbdb/$db/knownGene${GENCODE_VERSION}.ix ln -s $dir/knownGene${GENCODE_VERSION}.ixx /gbdb/$db/knownGene${GENCODE_VERSION}.ixx +tawk '{print $5}' knownCanonical.tab | sort > knownCanonicalId.txt +join knownCanonicalId.txt knownGene.txt > knownGeneFast.txt +ixIxx knownGeneFast.txt knownGeneFast${GENCODE_VERSION}.ix knownGeneFast${GENCODE_VERSION}.ixx + rm -rf /gbdb/$db/knownGeneFast${GENCODE_VERSION}.ix /gbdb/$db/knownGeneFast${GENCODE_VERSION}.ixx +ln -s $dir/knownGeneFast${GENCODE_VERSION}.ix /gbdb/$db/knownGeneFast${GENCODE_VERSION}.ix +ln -s $dir/knownGeneFast${GENCODE_VERSION}.ixx /gbdb/$db/knownGeneFast${GENCODE_VERSION}.ixx #zcat gencode${GENCODE_VERSION}.bed.gz > ucscGenes.bed #jtwoBitToFa -noMask /cluster/data/$db/$db.2bit -bed=ucscGenes.bed stdout | faFilter -uniq stdin ucscGenes.fa #jhgPepPred $tempDb generic knownGeneMrna ucscGenes.fa bedToPsl /cluster/data/$db/chrom.sizes ucscGenes.bed ucscGenes.psl pslRecalcMatch ucscGenes.psl /cluster/data/$db/$db.2bit ucscGenes.fa kgTargetAli.psl # should be zero awk '$11 != $1 + $3+$4' kgTargetAli.psl hgLoadPsl $tempDb kgTargetAli.psl cd $dir # Make PCR target for UCSC Genes, Part 1. # 1. Get a set of IDs that consist of the UCSC Gene accession concatenated with the # gene symbol, e.g. uc010nxr.1__DDX11L1 hgsql $tempDb -N -e 'select kgId,geneSymbol from kgXref' \