a2ad2a8f3e71fe90a3c335f967ff3fcec9d37296
kate
  Thu Apr 23 10:38:28 2020 -0700
Initial work for GTEx V8 gene expression track: parse files and load gene expression and metadata tables. refs #25130

diff --git src/hg/makeDb/doc/hg38/gtex.txt src/hg/makeDb/doc/hg38/gtex.txt
index 75607a6..d8e0b72 100644
--- src/hg/makeDb/doc/hg38/gtex.txt
+++ src/hg/makeDb/doc/hg38/gtex.txt
@@ -1,83 +1,117 @@
 #############################################################################
+# GTEx V8 (Apr 2020) Kate
+# Create BED from hgFixed tables (see doc/gtex)
+ 
+# Load gene models (Gencode V26 transcript union from GTEx)
+
+cd /hive/data/outside/gtex/V8/rnaSeq
+gtfToGenePred gencode.v26.GRCh38.genes.gtf gencodeV26.hg38.genePred \
+                -infoOut=gtexGeneModelInfoV8.tab
+hgLoadGenePred hg38 gtexGeneModelV8 gencodeV26.hg38.genePred
+
+# Get transcript for each gene (why ?)
+tail -n +2 gtexGeneModelInfoV8.tab | awk '{printf("%s\t%s\n", $1, $9)}' > gtexGeneTranscriptsV8.tab
+#hgLoadSqlTab hgFixed gtexTranscriptV8 ~/kent/src/hg/lib/gtexTranscript.sql gtexGeneTranscriptsV8.tab
+# no schema (or table on hgwdev.hgFixed)
+
+# Load BED table
+cd /hive/data/genomes/hg38/bed/gtex
+mkdir V8
+cd V8
+
+set gencode = V26
+~/kent/src/hg/makeDb/outside/hgGtexGeneBed/hgGtexGeneBed \
+        hg38 -noLoad -gtexVersion=V8 -gencodeVersion=$gencode gtexGeneV8 -verbose=2 >&! log.txt
+
+Reading wgEncodeGencodeAttrs table
+Reading gtexGeneModelV8 table
+Reading gtexTissueMedian table
+Writing tab file gtexGeneV8
+Max score: 267400.000000
+
+~/kent/src/hg/makeDb/outside/hgGtexGeneBed/hgGtexGeneBed \
+        hg38 -gtexVersion=V8 -gencodeVersion=$gencode gtexGeneV8 -verbose=2 
+
+#############################################################################
 # GTEx V6 (October 2015) Kate
 # Create BED from hgFixed tables (see doc/gtex)
 # Reloading during QA of track (fixing gene classes, adding scores).  (March 2016) Kate
 
 cd /hive/data/outside/gtex/V6
 
 # see doc/hg19.txt for how this genePred was made
 set chain = /hive/data/genomes/hg19/bed/liftOver/hg19ToHg38.over.chain.gz
 liftOver -genePred gencodeV19.hg19.genePred $chain gtexGeneModelV6.hg38.genePred \
                 gencode.V19.hg38.unmapped
 # 926 unmapped
 hgLoadGenePred hg38 gtexGeneModelV6 gtexGeneModelV6.hg38.genePred
 
 # OLD: creates gtexGeneModelV6.hg38.genePred
 # OLD: NOTE: drops 192 transcripts.  One I spot-checked indeed didn't exist in our hg38 genes
 
 cd /hive/data/genomes/hg38/bed
 mkdir -p gtex
 cd gtex
 
 # table renamed to gtexGeneModel later
 
 # Use latest GENCODE attrs file to get biotypes
 
 
 # create bed table
 ~/kent/src/hg/makeDb/outside/hgGtexGeneBed/hgGtexGeneBed  hg38 -gtexVersion=V6 \
         -noLoad -gencodeVersion=$gencodeVersion gtexGeneBedV6 -verbose=2 >&! makeGeneBed.log
 
 # 45 genes not found in GENCODE attributes table
 
 #Max score: 219385.906250
 
 wc -l *.tab
 wc -l *.tab
 52896 gtexGeneBedV6.tab
 # 55810 gtexGeneBedV6.tab
 
 # exploratory for assigning score based on sum of scores (expScores field)
 set bedScore = ~/kent/src/utils/bedScore/bedScore
 $bedScore -col=10 -minScore=1 -method=std2 gtexGeneBedV6.tab gtexGeneBedV6.std2.bed
 $bedScore -col=10 -minScore=1 -method=encode gtexGeneBedV6.tab gtexGeneBedV6.encode.bed
 $bedScore -col=10 -minScore=1 -method=reg gtexGeneBedV6.tab gtexGeneBedV6.reg.bed
 $bedScore -col=10 -minScore=1 -method=asinh gtexGeneBedV6.tab gtexGeneBedV6.asinh.bed
 
 $bedScore -col=10 -minScore=1 -method=reg -log gtexGeneBedV6.tab gtexGeneBedV6.reg.log.bed
 $bedScore -log -col=10 -minScore=1 -method=std2 gtexGeneBedV6.tab gtexGeneBedV6.std2.log.bed
 $bedScore -log -col=10 -minScore=1 -method=encode gtexGeneBedV6.tab gtexGeneBedV6.encode.log.bed
 $bedScore -log -col=10 -minScore=1 -method=asinh gtexGeneBedV6.tab gtexGeneBedV6.asinh.log.bed
 
 # Using -log -method=encode, as this is closest to density plot of all scores
 # as here: GtexTotalExpressionDensity.png, GtexTotalExpressionFrequency.png
 textHistogram -real -autoScale=14 -log -col=5 gtexGeneBedV6.encode.log.bed
 1.000000 ************************************************************ 22889
 72.357214 *************************************************** 4862
 143.714428 ************************************************** 4199
 215.071643 ************************************************* 3931
 286.428857 ************************************************** 4329
 357.786071 *************************************************** 5419
 429.143285 ************************************************** 4472
 500.500500 ********************************************* 1953
 571.857714 ************************************** 564
 643.214928 ******************************** 200
 714.572142 ************************* 61
 785.929356 *********** 6
 857.286571 ******** 4
 928.643785 ************ 7
 
 $bedScore -col=10 -minScore=0 -log -method=encode gtexGeneBedV6.tab gtexGeneBedV6.bed
 
 
 # load up
 set lib = ~/kent/src/hg/lib
 
 hgLoadBed hg38 -noBin -tab -type=bed6+4 \
         -as=$lib/gtexGeneBed.as -sqlTable=$lib/gtexGeneBed.sql -renameSqlTable \
                 gtexGeneBedNewV6 gtexGeneBedV6.bed
 
 # Add GTEx to Gene Sorter (2016-08-18 kate)
 # See hg/near/makeNear.doc