fa261bb6a4e20cb44972593d383589f1e5e4edce
kate
  Tue Apr 28 12:34:02 2020 -0700
Redo tissue table, bed file, and related code to slot new kidney medulla tissue in (assign id) in proper alphabetical order. refs #25130

diff --git src/hg/makeDb/doc/gtex/V8.txt src/hg/makeDb/doc/gtex/V8.txt
index 1329f39..0b0c794 100644
--- src/hg/makeDb/doc/gtex/V8.txt
+++ src/hg/makeDb/doc/gtex/V8.txt
@@ -1,154 +1,167 @@
 # Download and load GTEx V8 (October 2015) from portal:
 #      gtexportal.org
 # 2/20 KRR
 
 cd /hive/data/outside/GTEx
 
 # Download normalized gene expression levels (TPM)
 
 mkdir -p V8/rnaSeq
 cd V8/rnaSeq
 
 wget -nd https://storage.googleapis.com/gtex_analysis_v8/rna_seq_data/GTEx_Analysis_2017-06-05_v8_RNASeQCv1.1.9_gene_tpm.gct.gz
 set dataFile = GTEx_Analysis_2017-06-05_v8_RNASeQCv1.1.9_gene_tpm.gct
 gunzip $dataFile.gz
 wc -l $dataFile
 # 56203 GTEx_Analysis_2017-06-05_v8_RNASeQCv1.1.9_gene_tpm.gct
 
 # Download subject and sample metadata and compare to V6
 
 wget -nd https://storage.googleapis.com/gtex_analysis_v8/annotations/GTEx_Analysis_v8_Annotations_SampleAttributesDS.txt
 wget -nd https://storage.googleapis.com/gtex_analysis_v8/annotations/GTEx_Analysis_v8_Annotations_SampleAttributesDD.xlsx
 
 wc -l *Sample*txt ../../V6/*Sample*txt
 # 22952 GTEx_Analysis_v8_Annotations_SampleAttributesDS.txt
 # 11984 GTEx_Data_V6_Annotations_SampleAttributesDS.txt
 
 # > 2x more samples
 
 wget -nd https://storage.googleapis.com/gtex_analysis_v8/annotations/GTEx_Analysis_v8_Annotations_SubjectPhenotypesDS.txt
 wget -nd https://storage.googleapis.com/gtex_analysis_v8/annotations/GTEx_Analysis_v8_Annotations_SubjectPhenotypesDD.xlsx
 
 wc -l *Phenotypes*.txt ../../V6/*Phenotypes*.txt
 # 981 GTEx_Analysis_v8_Annotations_SubjectPhenotypesDS.txt
 # 571 ../../V6/GTEx_Data_V6_Annotations_SubjectPhenotypesDS.txt
 
 # nearly 2x subjects
 
 # Download gene models
 wget -nd  https://storage.googleapis.com/gtex_analysis_v8/reference/gencode.v26.GRCh38.genes.gtf
 
 # Parse and load
 # 4/20 KRR
 
 set subjectFile = GTEx_Analysis_v8_Annotations_SubjectPhenotypesDS.txt
 set sampleFile = GTEx_Analysis_v8_Annotations_SampleAttributesDS.txt
 
 set hgGtex = ~kate/kent/src/hg/makeDb/outside/hgGtex/hgGtex
 set dir = tables
 mkdir $dir
 
 # test run -- metadata, and just a few rows of data
 $hgGtex -tab=$dir -noLoad -limit=2 gtexV8 V8 $dataFile $sampleFile $subjectFile $tissueFile -verbose=2 >&! hgGtex.log &
 
 # NOTE: change to tissues in sample descriptions... (field 7)
 # Changed fibroblasts description, and added Kidney - Medulla (4 samples)
 diff sampleTissues.txt /hive/data/outside/GTEx/V6
+
 0a1
 > 
 22d22
 < Cells - Cultured fibroblasts
 24a25
 > Cells - Transformed fibroblasts
 36d36
 < Kidney - Medulla
 
 # Manually update tissueFile, with version that includes GTEx abbrevs
 cp ../../V6/metadata/gtexTissue.new.tab gtexColorTissue.tab
-# edit
+# edit - change fibroblast label, move Kidney - Medulla to alphabetical position and renumber id's
+
 set tissueFile = gtexColorTissue.tab
 
+$hgGtex -tab=$dir -noLoad gtexV8 V8 $dataFile $sampleFile $subjectFile $tissueFile -verbose=2 >&! noload.log &
+
 # sanity check files
 cd tables
 ls -l
 -rw-rw-r-- 1 kate genecats       17346 Apr 21 16:33 gtexV8Donor.tab
 -rw-rw-r-- 1 kate genecats          14 Apr 21 16:53 gtexV8Info.tab
 -rw-rw-r-- 1 kate genecats     3062798 Apr 21 16:33 gtexV8Sample.tab
 -rw-rw-r-- 1 kate genecats 61521590867 Apr 21 16:53 gtexV8SampleData.tab
 -rw-rw-r-- 1 kate genecats   189120561 Apr 21 16:53 gtexV8TissueData.tab
 -rw-rw-r-- 1 kate genecats    19937702 Apr 21 16:53 gtexV8TissueMedianAll.tab
 -rw-rw-r-- 1 kate genecats    19913568 Apr 21 16:53 gtexV8TissueMedianFemale.tab
 -rw-rw-r-- 1 kate genecats    19871866 Apr 21 16:53 gtexV8TissueMedianMale.tab
 
 wc -l gtex*Tab
         980 gtexV8Donor.tab
           1 gtexV8Info.tab
       17382 gtexV8Sample.tab
   976868400 gtexV8SampleData.tab
     3034800 gtexV8TissueData.tab
       56200 gtexV8TissueMedianAll.tab
       56200 gtexV8TissueMedianFemale.tab
       56200 gtexV8TissueMedianMale.tab
 
 # peruse Info.tab, first lines of others
 
 # Look OK, so load into database
 
 $hgGtex -tab=$dir gtexV8 V8 $dataFile $sampleFile $subjectFile $tissueFile -verbose=2 >&! load.log &
 
 [hgFixed]> show tables like 'gtexV8%';
 +-----------------------------+
 | Tables_in_hgFixed (gtexV8%) |
 +-----------------------------+
 | gtexV8Donor                 |
 | gtexV8Info                  |
 | gtexV8Sample                |
 | gtexV8SampleData            |
 | gtexV8Tissue                |
 | gtexV8TissueMedianAll       |
 | gtexV8TissueMedianFemale    |
 | gtexV8TissueMedianMale      |
 +-----------------------------+
 
 # merge gtexV8Info into gtexInfo (i.e. add a row for V8)
 hgsql hgFixed -e 'select * from gtexV8Info';
 +---------+-------------+----------+----------------+
 | version | releaseDate | maxScore | maxMedianScore |
 +---------+-------------+----------+----------------+
 | V8      | 0           |        0 |         747400 |
 +---------+-------------+----------+----------------+
 
 # get release date from GTEx portal
 hgsql hgFixed -e "insert into gtexInfo set version='V8', releaseDate='2019-08-26', maxMedianScore=747400"
 
 # Rename tables for use by hgGtexGeneBed
 # TODO: Change hgGtex to name tables as needed by next step!
 
+hgsql hgFixed -e 'alter table gtexTissueMedianV8 rename to gtexTissueMedianV8Old';
+hgsql hgFixed -e 'alter table gtexTissueV8 rename to gtexTissueV8Old';
+hgsql hgFixed -e 'alter table gtexSampleV8 rename to gtexSampleV8Old';
+hgsql hgFixed -e 'alter table gtexDonorV8 rename to gtexDonorV8Old';
+hgsql hgFixed -e 'alter table gtexSampleDataV8 rename to gtexSampleDataV8Old';
+
 hgsql hgFixed -e 'alter table gtexV8TissueMedianAll rename to gtexTissueMedianV8';
 hgsql hgFixed -e 'alter table gtexV8Tissue rename to gtexTissueV8';
 hgsql hgFixed -e 'alter table gtexV8Sample rename to gtexSampleV8';
 hgsql hgFixed -e 'alter table gtexV8Donor rename to gtexDonorV8';
 hgsql hgFixed -e 'alter table gtexV8SampleData rename to gtexSampleDataV8';
+# hung on last command.  Instead, renamed gtexSampleDataV8Old to gtexSampleDataV8.
+
+# NOTE: Tried to drop the gtexV8SampleData table but that hung too -- ask adminsL
 
 #######################################
 # Download fine-mapped eQTL's
 
 cd ../
 mkdir eQtl
 cd eQtl
 wget -nd https://storage.googleapis.com/gtex_analysis_v8/single_tissue_qtl_data/README_eQTL_v8.txt
 
 wget -nd https://storage.googleapis.com/gtex_analysis_v8/single_tissue_qtl_data/GTEx_v8_finemapping_CAVIAR.tar
 
 tar vf GTEx_v8_finemapping_CAVIAR.tar
 ln -s GTEx_v8_finemapping_CAVIAR caviar
 cd caviar
 
 # Download variant mapping and VEP annotation
 
 wget -nd https://storage.googleapis.com/gtex_analysis_v8/reference/GTEx_Analysis_2017-06-05_v8_WholeGenomeSeq_838Indiv_Analysis_Freeze.lookup_table.txt.gz
 
 wget -nd https://storage.googleapis.com/gtex_analysis_v8/reference/WGS_Feature_overlap_collapsed_VEP_short_4torus.MAF01.txt.gzo
 
 gunzip *.gz