8159a5952f6e31625a4e2ad08054685065b36be4 mspeir Fri Sep 10 13:48:12 2021 -0700 Adding makedoc for GTEx cis-eQTL track, refs #27947 diff --git src/hg/makeDb/doc/hg38/gtex.txt src/hg/makeDb/doc/hg38/gtex.txt index 5a5303e..218c75f 100644 --- src/hg/makeDb/doc/hg38/gtex.txt +++ src/hg/makeDb/doc/hg38/gtex.txt @@ -1,142 +1,193 @@ ############################################################################# # GTEx V6 (October 2015) Kate # Create BED from hgFixed tables (see doc/gtex) # Reloading during QA of track (fixing gene classes, adding scores). (March 2016) Kate cd /hive/data/outside/gtex/V6 # see doc/hg19.txt for how this genePred was made set chain = /hive/data/genomes/hg19/bed/liftOver/hg19ToHg38.over.chain.gz liftOver -genePred gencodeV19.hg19.genePred $chain gtexGeneModelV6.hg38.genePred \ gencode.V19.hg38.unmapped # 926 unmapped hgLoadGenePred hg38 gtexGeneModelV6 gtexGeneModelV6.hg38.genePred # OLD: creates gtexGeneModelV6.hg38.genePred # OLD: NOTE: drops 192 transcripts. One I spot-checked indeed didn't exist in our hg38 genes cd /hive/data/genomes/hg38/bed mkdir -p gtex cd gtex # table renamed to gtexGeneModel later # Use latest GENCODE attrs file to get biotypes # create bed table ~/kent/src/hg/makeDb/outside/hgGtexGeneBed/hgGtexGeneBed hg38 -gtexVersion=V6 \ -noLoad -gencodeVersion=$gencodeVersion gtexGeneBedV6 -verbose=2 >&! makeGeneBed.log # 45 genes not found in GENCODE attributes table #Max score: 219385.906250 wc -l *.tab wc -l *.tab 52896 gtexGeneBedV6.tab # 55810 gtexGeneBedV6.tab # exploratory for assigning score based on sum of scores (expScores field) set bedScore = ~/kent/src/utils/bedScore/bedScore $bedScore -col=10 -minScore=1 -method=std2 gtexGeneBedV6.tab gtexGeneBedV6.std2.bed $bedScore -col=10 -minScore=1 -method=encode gtexGeneBedV6.tab gtexGeneBedV6.encode.bed $bedScore -col=10 -minScore=1 -method=reg gtexGeneBedV6.tab gtexGeneBedV6.reg.bed $bedScore -col=10 -minScore=1 -method=asinh gtexGeneBedV6.tab gtexGeneBedV6.asinh.bed $bedScore -col=10 -minScore=1 -method=reg -log gtexGeneBedV6.tab gtexGeneBedV6.reg.log.bed $bedScore -log -col=10 -minScore=1 -method=std2 gtexGeneBedV6.tab gtexGeneBedV6.std2.log.bed $bedScore -log -col=10 -minScore=1 -method=encode gtexGeneBedV6.tab gtexGeneBedV6.encode.log.bed $bedScore -log -col=10 -minScore=1 -method=asinh gtexGeneBedV6.tab gtexGeneBedV6.asinh.log.bed # Using -log -method=encode, as this is closest to density plot of all scores # as here: GtexTotalExpressionDensity.png, GtexTotalExpressionFrequency.png textHistogram -real -autoScale=14 -log -col=5 gtexGeneBedV6.encode.log.bed 1.000000 ************************************************************ 22889 72.357214 *************************************************** 4862 143.714428 ************************************************** 4199 215.071643 ************************************************* 3931 286.428857 ************************************************** 4329 357.786071 *************************************************** 5419 429.143285 ************************************************** 4472 500.500500 ********************************************* 1953 571.857714 ************************************** 564 643.214928 ******************************** 200 714.572142 ************************* 61 785.929356 *********** 6 857.286571 ******** 4 928.643785 ************ 7 $bedScore -col=10 -minScore=0 -log -method=encode gtexGeneBedV6.tab gtexGeneBedV6.bed # load up set lib = ~/kent/src/hg/lib hgLoadBed hg38 -noBin -tab -type=bed6+4 \ -as=$lib/gtexGeneBed.as -sqlTable=$lib/gtexGeneBed.sql -renameSqlTable \ gtexGeneBedNewV6 gtexGeneBedV6.bed # Add GTEx to Gene Sorter (2016-08-18 kate) # See hg/near/makeNear.doc ############################################################################# # GTEx V8 (Apr 2020) Kate # Create BED from hgFixed tables (see doc/gtex) # Load gene models (Gencode V26 transcript union from GTEx) cd /hive/data/outside/gtex/V8/rnaSeq gtfToGenePred gencode.v26.GRCh38.genes.gtf gencodeV26.hg38.genePred \ -infoOut=gtexGeneModelInfoV8.tab hgLoadGenePred hg38 gtexGeneModelV8 gencodeV26.hg38.genePred # Get transcript for each gene (why ?) tail -n +2 gtexGeneModelInfoV8.tab | awk '{printf("%s\t%s\n", $1, $9)}' > gtexGeneTranscriptsV8.tab #hgLoadSqlTab hgFixed gtexTranscriptV8 ~/kent/src/hg/lib/gtexTranscript.sql gtexGeneTranscriptsV8.tab # no schema (or table on hgwdev.hgFixed) # Load BED table cd /hive/data/genomes/hg38/bed/gtex mkdir V8 cd V8 set gencode = V26 ~/kent/src/hg/makeDb/outside/hgGtexGeneBed/hgGtexGeneBed \ hg38 -noLoad -gtexVersion=V8 -gencodeVersion=$gencode gtexGeneV8 -verbose=2 >&! log.txt & Reading wgEncodeGencodeAttrs table Reading gtexGeneModelV8 table Reading gtexTissueMedian table Writing tab file gtexGeneV8 Max score: 267400.000000 # Add scores set bedScore = ~/kent/src/utils/bedScore/bedScore $bedScore -col=10 -minScore=0 -log -method=encode gtexGeneV8.tab gtexGeneBedV8.bed textHistogram -real -autoScale=14 -log -col=5 gtexGeneBedV8.bed 0.000000 ************************************************************ 21287 71.428643 **************************************************** 5635 142.857286 **************************************************** 5513 214.285929 *************************************************** 4683 285.714571 *************************************************** 4480 357.143214 *************************************************** 4748 428.571857 **************************************************** 5466 500.000500 ************************************************ 3117 571.429143 ***************************************** 911 642.857786 ********************************* 252 714.286429 ************************** 81 785.715071 ************** 11 857.143714 ******** 4 928.572357 *************** 12 # load up set lib = ~/kent/src/hg/lib hgLoadBed hg38 -noBin -tab -type=bed6+4 \ -as=$lib/gtexGeneBed.as -sqlTable=$lib/gtexGeneBed.sql -renameSqlTable \ gtexGeneV8 gtexGeneBedV8.bed #Read 56200 elements of size 10 from gtexGeneBedV8.bed ### TODO # Add GTEx to Gene Sorter (2016-08-18 kate) # See hg/near/makeNear.doc + +############################################################################# +# GTEx V8 cis-eQTLs CAVIAR High Confidence (Sept 2021) Matt + +# Tar files were downloaded from https://gtexportal.org/home/datasets#filesetFilesDiv15 +# Then unpacked + +# Used this file: CAVIAR_Results_v8_GTEx_LD_HighConfidentVariants.gz +# Description from GTEx_v8_finemapping_CAVIAR/README.txt +***CAVIAR_Results_v8_GTEx_LD_HighConfidentVariants.gz --> is a single file for all GTEx tissues and all eGene where we report +all the high causal variants (variants that have posterior probability of > 0.1). +# Started with this as it seems this is the data Kate used for hg19 eQTL tracks + +# Sample line from file +#TISSUE GENE eQTL CHROM POS Probability +#Brain_Caudate_basal_ganglia ENSG00000248485.1 1_161274374 1 161274374 0.157456 + +# Wrote script to help build interact-format tracks from CAVIAR files: +buildInteract +# Script takes in eQTL, SNP info, and GENCODE genepred file +# Uses this information to build an interact line for each item in the eQTL file +# Not sure if script is that generalizable since each eQTL file seems to have its own format +# Will see if I can do it though + +# Need to convert GTF to genePredExt +# (Kate had converted GTF to genePred for GTEx V8 expression track work, but that didn't include gene name as I was hoping) +gtfToGenePred -genePredExt -geneNameAsName2 -includeVersion gencode.v26.GRCh38.genes.gtf gencode.v26.GRCh38.genes.gpExt + +# Command to build interact files +./buildInteract CAVIAR_Results_v8_GTEx_LD_HighConfidentVariants.gz ../gencode.v26.GRCh38.genes.gpExt ../GTEx_Analysis_2017-06-05_v8_WholeGenomeSeq_838Indiv_Analysis_Freeze.lookup_table.txt.gz > gtexCaviar.interact.txt + +# Sort resulting bed file +bedSort gtexCaviar.interact.txt gtexCaviar.interact.sorted.txt + +# Build bigInteract +bedToBigBed -as=../interact.as -type=bed5+13 gtexCaviar.interact.sorted.txt /hive/data/genomes/hg38/chrom.sizes gtexCaviar.interact.bb + +## Add colors +# Make list of tissues in V8 file +zcat GTEx_v8_finemapping_CAVIAR/CAVIAR_Results_v8_GTEx_LD_ALL_NOCUTOFF.txt.gz | cut -f1 -d$'\t' |sort -u |grep -v TISSUE> gtexTissuesV8.txt + +# Using GTEx V6p colors, manually match up to names in V8 file +ln -s /hive/data/outside/GTEx/V6p/eQtl/Caviar2/gtexTissueColor.tab +gtexTissueColor.v8.tab + +# Write script to add colors from this file to the interact file +addColors +./addColors gtexCaviar.interact.sorted.txt ../gtexTissueColor.v8.tab > gtexCaviar.interact.sorted.colors.txt + +# Rebuild bigInteract file +bedToBigBed -as=../interact.as -type=bed5+13 gtexCaviar.interact.sorted.colors.txt /hive/data/genomes/hg38/chrom.sizes gtexCaviar.interact.colors.bb