8159a5952f6e31625a4e2ad08054685065b36be4
mspeir
  Fri Sep 10 13:48:12 2021 -0700
Adding makedoc for GTEx cis-eQTL track, refs #27947

diff --git src/hg/makeDb/doc/hg38/gtex.txt src/hg/makeDb/doc/hg38/gtex.txt
index 5a5303e..218c75f 100644
--- src/hg/makeDb/doc/hg38/gtex.txt
+++ src/hg/makeDb/doc/hg38/gtex.txt
@@ -1,142 +1,193 @@
 #############################################################################
 # GTEx V6 (October 2015) Kate
 # Create BED from hgFixed tables (see doc/gtex)
 # Reloading during QA of track (fixing gene classes, adding scores).  (March 2016) Kate
 
 cd /hive/data/outside/gtex/V6
 
 # see doc/hg19.txt for how this genePred was made
 set chain = /hive/data/genomes/hg19/bed/liftOver/hg19ToHg38.over.chain.gz
 liftOver -genePred gencodeV19.hg19.genePred $chain gtexGeneModelV6.hg38.genePred \
                 gencode.V19.hg38.unmapped
 # 926 unmapped
 hgLoadGenePred hg38 gtexGeneModelV6 gtexGeneModelV6.hg38.genePred
 
 # OLD: creates gtexGeneModelV6.hg38.genePred
 # OLD: NOTE: drops 192 transcripts.  One I spot-checked indeed didn't exist in our hg38 genes
 
 cd /hive/data/genomes/hg38/bed
 mkdir -p gtex
 cd gtex
 
 # table renamed to gtexGeneModel later
 
 # Use latest GENCODE attrs file to get biotypes
 
 # create bed table
 ~/kent/src/hg/makeDb/outside/hgGtexGeneBed/hgGtexGeneBed  hg38 -gtexVersion=V6 \
         -noLoad -gencodeVersion=$gencodeVersion gtexGeneBedV6 -verbose=2 >&! makeGeneBed.log
 
 # 45 genes not found in GENCODE attributes table
 
 #Max score: 219385.906250
 
 wc -l *.tab
 wc -l *.tab
 52896 gtexGeneBedV6.tab
 # 55810 gtexGeneBedV6.tab
 
 # exploratory for assigning score based on sum of scores (expScores field)
 set bedScore = ~/kent/src/utils/bedScore/bedScore
 $bedScore -col=10 -minScore=1 -method=std2 gtexGeneBedV6.tab gtexGeneBedV6.std2.bed
 $bedScore -col=10 -minScore=1 -method=encode gtexGeneBedV6.tab gtexGeneBedV6.encode.bed
 $bedScore -col=10 -minScore=1 -method=reg gtexGeneBedV6.tab gtexGeneBedV6.reg.bed
 $bedScore -col=10 -minScore=1 -method=asinh gtexGeneBedV6.tab gtexGeneBedV6.asinh.bed
 
 $bedScore -col=10 -minScore=1 -method=reg -log gtexGeneBedV6.tab gtexGeneBedV6.reg.log.bed
 $bedScore -log -col=10 -minScore=1 -method=std2 gtexGeneBedV6.tab gtexGeneBedV6.std2.log.bed
 $bedScore -log -col=10 -minScore=1 -method=encode gtexGeneBedV6.tab gtexGeneBedV6.encode.log.bed
 $bedScore -log -col=10 -minScore=1 -method=asinh gtexGeneBedV6.tab gtexGeneBedV6.asinh.log.bed
 
 # Using -log -method=encode, as this is closest to density plot of all scores
 # as here: GtexTotalExpressionDensity.png, GtexTotalExpressionFrequency.png
 textHistogram -real -autoScale=14 -log -col=5 gtexGeneBedV6.encode.log.bed
 1.000000 ************************************************************ 22889
 72.357214 *************************************************** 4862
 143.714428 ************************************************** 4199
 215.071643 ************************************************* 3931
 286.428857 ************************************************** 4329
 357.786071 *************************************************** 5419
 429.143285 ************************************************** 4472
 500.500500 ********************************************* 1953
 571.857714 ************************************** 564
 643.214928 ******************************** 200
 714.572142 ************************* 61
 785.929356 *********** 6
 857.286571 ******** 4
 928.643785 ************ 7
 
 $bedScore -col=10 -minScore=0 -log -method=encode gtexGeneBedV6.tab gtexGeneBedV6.bed
 
 
 # load up
 set lib = ~/kent/src/hg/lib
 
 hgLoadBed hg38 -noBin -tab -type=bed6+4 \
         -as=$lib/gtexGeneBed.as -sqlTable=$lib/gtexGeneBed.sql -renameSqlTable \
                 gtexGeneBedNewV6 gtexGeneBedV6.bed
 
 # Add GTEx to Gene Sorter (2016-08-18 kate)
 # See hg/near/makeNear.doc
 
 #############################################################################
 # GTEx V8 (Apr 2020) Kate
 # Create BED from hgFixed tables (see doc/gtex)
  
 # Load gene models (Gencode V26 transcript union from GTEx)
 
 cd /hive/data/outside/gtex/V8/rnaSeq
 gtfToGenePred gencode.v26.GRCh38.genes.gtf gencodeV26.hg38.genePred \
                 -infoOut=gtexGeneModelInfoV8.tab
 hgLoadGenePred hg38 gtexGeneModelV8 gencodeV26.hg38.genePred
 
 # Get transcript for each gene (why ?)
 tail -n +2 gtexGeneModelInfoV8.tab | awk '{printf("%s\t%s\n", $1, $9)}' > gtexGeneTranscriptsV8.tab
 #hgLoadSqlTab hgFixed gtexTranscriptV8 ~/kent/src/hg/lib/gtexTranscript.sql gtexGeneTranscriptsV8.tab
 # no schema (or table on hgwdev.hgFixed)
 
 # Load BED table
 cd /hive/data/genomes/hg38/bed/gtex
 mkdir V8
 cd V8
 
 set gencode = V26
 ~/kent/src/hg/makeDb/outside/hgGtexGeneBed/hgGtexGeneBed \
         hg38 -noLoad -gtexVersion=V8 -gencodeVersion=$gencode gtexGeneV8 -verbose=2 >&! log.txt &
 
 Reading wgEncodeGencodeAttrs table
 Reading gtexGeneModelV8 table
 Reading gtexTissueMedian table
 Writing tab file gtexGeneV8
 Max score: 267400.000000
 
 # Add scores
 set bedScore = ~/kent/src/utils/bedScore/bedScore
 $bedScore -col=10 -minScore=0 -log -method=encode gtexGeneV8.tab gtexGeneBedV8.bed
 textHistogram -real -autoScale=14 -log -col=5 gtexGeneBedV8.bed
 0.000000 ************************************************************ 21287
 71.428643 **************************************************** 5635
 142.857286 **************************************************** 5513
 214.285929 *************************************************** 4683
 285.714571 *************************************************** 4480
 357.143214 *************************************************** 4748
 428.571857 **************************************************** 5466
 500.000500 ************************************************ 3117
 571.429143 ***************************************** 911
 642.857786 ********************************* 252
 714.286429 ************************** 81
 785.715071 ************** 11
 857.143714 ******** 4
 928.572357 *************** 12
 
 # load up
 set lib = ~/kent/src/hg/lib
 hgLoadBed hg38 -noBin -tab -type=bed6+4 \
         -as=$lib/gtexGeneBed.as -sqlTable=$lib/gtexGeneBed.sql -renameSqlTable \
                 gtexGeneV8 gtexGeneBedV8.bed
 #Read 56200 elements of size 10 from gtexGeneBedV8.bed
 
 
 ### TODO 
 # Add GTEx to Gene Sorter (2016-08-18 kate)
 # See hg/near/makeNear.doc
+
+#############################################################################
+# GTEx V8 cis-eQTLs CAVIAR High Confidence (Sept 2021) Matt
+
+# Tar files were downloaded from https://gtexportal.org/home/datasets#filesetFilesDiv15
+# Then unpacked
+
+# Used this file: CAVIAR_Results_v8_GTEx_LD_HighConfidentVariants.gz
+# Description from GTEx_v8_finemapping_CAVIAR/README.txt
+***CAVIAR_Results_v8_GTEx_LD_HighConfidentVariants.gz --> is a single file for all GTEx tissues and all eGene where we report
+all the high causal variants (variants that have posterior probability of > 0.1).
+# Started with this as it seems this is the data Kate used for hg19 eQTL tracks
+
+# Sample line from file
+#TISSUE  GENE    eQTL    CHROM   POS     Probability
+#Brain_Caudate_basal_ganglia     ENSG00000248485.1       1_161274374     1       161274374       0.157456
+
+# Wrote script to help build interact-format tracks from CAVIAR files:
+buildInteract
+# Script takes in eQTL, SNP info, and GENCODE genepred file
+# Uses this information to build an interact line for each item in the eQTL file
+# Not sure if script is that generalizable since each eQTL file seems to have its own format
+# Will see if I can do it though
+
+# Need to convert GTF to genePredExt
+# (Kate had converted GTF to genePred for GTEx V8 expression track work, but that didn't include gene name as I was hoping)
+gtfToGenePred -genePredExt -geneNameAsName2 -includeVersion gencode.v26.GRCh38.genes.gtf gencode.v26.GRCh38.genes.gpExt 
+
+# Command to build interact files
+./buildInteract CAVIAR_Results_v8_GTEx_LD_HighConfidentVariants.gz ../gencode.v26.GRCh38.genes.gpExt ../GTEx_Analysis_2017-06-05_v8_WholeGenomeSeq_838Indiv_Analysis_Freeze.lookup_table.txt.gz > gtexCaviar.interact.txt
+
+# Sort resulting bed file
+bedSort gtexCaviar.interact.txt gtexCaviar.interact.sorted.txt 
+
+# Build bigInteract
+bedToBigBed -as=../interact.as -type=bed5+13 gtexCaviar.interact.sorted.txt /hive/data/genomes/hg38/chrom.sizes gtexCaviar.interact.bb
+
+## Add colors
+# Make list of tissues in V8 file
+zcat GTEx_v8_finemapping_CAVIAR/CAVIAR_Results_v8_GTEx_LD_ALL_NOCUTOFF.txt.gz | cut -f1 -d$'\t' |sort -u |grep -v TISSUE> gtexTissuesV8.txt
+
+# Using GTEx V6p colors, manually match up to names in V8 file
+ln -s /hive/data/outside/GTEx/V6p/eQtl/Caviar2/gtexTissueColor.tab 
+gtexTissueColor.v8.tab 
+
+# Write script to add colors from this file to the interact file
+addColors
+./addColors gtexCaviar.interact.sorted.txt ../gtexTissueColor.v8.tab > gtexCaviar.interact.sorted.colors.txt
+
+# Rebuild bigInteract file
+bedToBigBed -as=../interact.as -type=bed5+13 gtexCaviar.interact.sorted.colors.txt /hive/data/genomes/hg38/chrom.sizes gtexCaviar.interact.colors.bb