8ada62f0cad845d55480e7e47dcab4b246736383 max Fri Jan 17 09:44:58 2020 -0800 after feedback from Terence, fix RefSeq Select to include MANE and update the docs page. refs #24787 diff --git src/hg/makeDb/doc/hg38/ncbiRefSeq.txt src/hg/makeDb/doc/hg38/ncbiRefSeq.txt index b0d3029..42f4705 100644 --- src/hg/makeDb/doc/hg38/ncbiRefSeq.txt +++ src/hg/makeDb/doc/hg38/ncbiRefSeq.txt @@ -302,21 +302,21 @@ # ncbiRefSeq 4.332%, refGene 3.098%, both 2.920%, cover 67.40%, enrich 21.75x featureBits -enrichment hg38 ncbiRefSeqCurated refGene # ncbiRefSeqCurated 3.073%, refGene 3.098%, both 3.067%, cover 99.81%, enrich 32.22x # previous: # ncbiRefSeqCurated 2.880%, refGene 3.098%, both 2.846%, cover 98.84%, enrich 31.90x featureBits -enrichment hg38 refGene ncbiRefSeqCurated # refGene 3.098%, ncbiRefSeqCurated 3.073%, both 3.067%, cover 98.99%, enrich 32.22x # previous: # refGene 3.098%, ncbiRefSeqCurated 2.880%, both 2.846%, cover 91.86%, enrich 31.90x ######################################################################### # addition of RefSeq Select-restricted subset, Max, Feb 10 2019 cd /hive/data/genomes/hg38/bed/ncbiRefSeq.p13.2019-12-06/ -zcat download/*_genomic.gff.gz | grep 'tag=RefSeq Select' | cut -f9- | tr ';' '\n' | grep Name= | grep -v NP_ | cut -d= -f2 | sort -u > refseqSelectTranscripts.txt -zcat process/hg38.curated.gp | fgrep -f refseqSelectTranscripts.txt - > refseqSelect.curated.gp +zcat download/*_genomic.gff.gz | egrep 'tag=(RefSeq|MANE) Select' | cut -f9- | tr ';' '\n' | grep Name= | grep -v NP_ | cut -d= -f2 | sort -u > refseqSelectTranscripts.txt +cat process/hg38.curated.gp | fgrep -f refseqSelectTranscripts.txt - > refseqSelect.curated.gp hgLoadGenePred -genePredExt hg38 ncbiRefSeqSelect refseqSelect.curated.gp wc -l refseqSelect.curated.gp -6378 refseqSelect.curated.gp +21071 refseqSelect.curated.gp #############################################################################