a2ad2a8f3e71fe90a3c335f967ff3fcec9d37296 kate Thu Apr 23 10:38:28 2020 -0700 Initial work for GTEx V8 gene expression track: parse files and load gene expression and metadata tables. refs #25130 diff --git src/hg/makeDb/doc/hg38/gtex.txt src/hg/makeDb/doc/hg38/gtex.txt index 75607a6..d8e0b72 100644 --- src/hg/makeDb/doc/hg38/gtex.txt +++ src/hg/makeDb/doc/hg38/gtex.txt @@ -1,16 +1,50 @@ ############################################################################# +# GTEx V8 (Apr 2020) Kate +# Create BED from hgFixed tables (see doc/gtex) + +# Load gene models (Gencode V26 transcript union from GTEx) + +cd /hive/data/outside/gtex/V8/rnaSeq +gtfToGenePred gencode.v26.GRCh38.genes.gtf gencodeV26.hg38.genePred \ + -infoOut=gtexGeneModelInfoV8.tab +hgLoadGenePred hg38 gtexGeneModelV8 gencodeV26.hg38.genePred + +# Get transcript for each gene (why ?) +tail -n +2 gtexGeneModelInfoV8.tab | awk '{printf("%s\t%s\n", $1, $9)}' > gtexGeneTranscriptsV8.tab +#hgLoadSqlTab hgFixed gtexTranscriptV8 ~/kent/src/hg/lib/gtexTranscript.sql gtexGeneTranscriptsV8.tab +# no schema (or table on hgwdev.hgFixed) + +# Load BED table +cd /hive/data/genomes/hg38/bed/gtex +mkdir V8 +cd V8 + +set gencode = V26 +~/kent/src/hg/makeDb/outside/hgGtexGeneBed/hgGtexGeneBed \ + hg38 -noLoad -gtexVersion=V8 -gencodeVersion=$gencode gtexGeneV8 -verbose=2 >&! log.txt + +Reading wgEncodeGencodeAttrs table +Reading gtexGeneModelV8 table +Reading gtexTissueMedian table +Writing tab file gtexGeneV8 +Max score: 267400.000000 + +~/kent/src/hg/makeDb/outside/hgGtexGeneBed/hgGtexGeneBed \ + hg38 -gtexVersion=V8 -gencodeVersion=$gencode gtexGeneV8 -verbose=2 + +############################################################################# # GTEx V6 (October 2015) Kate # Create BED from hgFixed tables (see doc/gtex) # Reloading during QA of track (fixing gene classes, adding scores). (March 2016) Kate cd /hive/data/outside/gtex/V6 # see doc/hg19.txt for how this genePred was made set chain = /hive/data/genomes/hg19/bed/liftOver/hg19ToHg38.over.chain.gz liftOver -genePred gencodeV19.hg19.genePred $chain gtexGeneModelV6.hg38.genePred \ gencode.V19.hg38.unmapped # 926 unmapped hgLoadGenePred hg38 gtexGeneModelV6 gtexGeneModelV6.hg38.genePred # OLD: creates gtexGeneModelV6.hg38.genePred # OLD: NOTE: drops 192 transcripts. One I spot-checked indeed didn't exist in our hg38 genes