d96751b9e97f16417a020a9f6356432e6d102bc1
kate
  Wed Apr 29 10:21:59 2020 -0700
Correct tissue description, as per GTEx portal page for V8. refs #25130

diff --git src/hg/makeDb/doc/gtex/V6.txt src/hg/makeDb/doc/gtex/V6.txt
index b7d5aca..3755d14 100644
--- src/hg/makeDb/doc/gtex/V6.txt
+++ src/hg/makeDb/doc/gtex/V6.txt
@@ -1,129 +1,143 @@
 # Download and load tissue expression and sample metadata for GTEx V6 (October 2015) from portal:
 #      gtexportal.org
 # 11/2015 KRR
 
 # Reloaded gtexSampleData table to restore zero-scored rows (2016-03-21 Kate)
 
 # Download normalized gene expression levels (RPKM)
 wget http://www.gtexportal.org/static/datasets/gtex_analysis_v6/rna_seq_data/GTEx_Analysis_v6_RNA-seq_RNA-SeQCv1.1.8_gene_rpkm.gct.gz
 gunzip $dataFile.gz
 wc -l $dataFile ../V4/*.gct
     # 56321 GTEx_Analysis_v6_RNA-seq_RNA-SeQCv1.1.8_gene_rpkm.gct
     # 55996 ../V4/GTEx_Analysis_2014-01-17_RNA-seq_RNA-SeQCv1.1.8_gene_rpkm.gct
 
 # Download subject and sample metadata and compare to V4
 wget http://www.gtexportal.org/static/datasets/gtex_analysis_v6/annotations/GTEx_Data_V6_Annotations_SampleAttributesDD.xlsx
 wget http://www.gtexportal.org/static/datasets/gtex_analysis_v6/annotations/GTEx_Data_V6_Annotations_SampleAttributesDS.txt
 wc -l *Sample*txt ../V4/*Sample*txt
   11984 GTEx_Data_V6_Annotations_SampleAttributesDS.txt
    4502 ../V4/GTEx_Data_2014-01-17_Annotations_SampleAttributesDS.txt
 
 wget http://www.gtexportal.org/static/datasets/gtex_analysis_v6/annotations/GTEx_Data_V6_Annotations_SubjectPhenotypesDS.txt
 wget http://www.gtexportal.org/static/datasets/gtex_analysis_v6/annotations/GTEx_Data_V6_Annotations_SubjectPhenotypes_DD.xlsx
 wc --l *Subject*txt ../V4/*Subject*txt
   571 GTEx_Data_V6_Annotations_SubjectPhenotypesDS.txt
   215 ../V4/GTEx_Data_2014-01-17_Annotations_SubjectPhenotypes_DS.txt
 
 # Twice as many donors and samples
 
 # NOTE: format of subject file has changed slightly -- no longer including the word 'years' in age column.  Sample file format appears unchanged.
 # Parser looks like it will work unchanged.
 
 # Start with tissues
 hgGtex $dataFile $sampleFile tissues.tab >&! parseTissues.log &
 
 # Looks like no changes from V4
 
 # Download gene models
 wget http://www.gtexportal.org/static/datasets/gtex_analysis_v6/reference/gencode.v19.genes.patched_contigs.gtf.gz
 
 
 # Create main tables
 set subjectFile = GTEx_Data_V6_Annotations_SubjectPhenotypesDS.txt
 set sampleFile = GTEx_Data_V6_Annotations_SampleAttributesDS.txt
 set dataFile = GTEx_Analysis_v6_RNA-seq_RNA-SeQCv1.1.8_gene_rpkm.gct
 set tissueFile = ../V4/portal/gtexColorTissue.dec.tab
 hgGtex -tab=tables -noLoad gtexV6 V6 $dataFile $sampleFile $subjectFile $tissueFile >&! parseData.log  &
 ls -l tables
 
 -rw-rw-r-- 1 kate genecats        9968 Dec 22 16:50 gtexV6Donor.tab
 -rw-rw-r-- 1 kate genecats          14 Dec 22 17:00 gtexV6Info.tab
 -rw-rw-r-- 1 kate genecats     1164893 Dec 22 16:50 gtexV6Sample.tab
 -rw-rw-r-- 1 kate genecats 30090841561 Dec 22 17:00 gtexV6SampleData.tab
 -rw-rw-r-- 1 kate genecats   184672753 Dec 22 17:00 gtexV6TissueData.tab
 -rw-rw-r-- 1 kate genecats    19436982 Dec 22 17:00 gtexV6TissueMedian.tab
 
 $ wc -l gtex*.tab
         570 gtexV6Donor.tab
           1 gtexV6Info.tab
        8555 gtexV6Sample.tab
   481800490 gtexV6SampleData.tab
     2984854 gtexV6TissueData.tab
       56318 gtexV6TissueMedian.tab
 
 # NOTE: half of the scores are zero valued.  Keeping -- if we drop them code must
 # be adapted so median computation and sample counts are correct
 select count(*) from gtexSampleData where score=0;
 +-----------+
 | count(*)  |
 +-----------+
 | 242219267 |
 +-----------+
 
 # keeping these instructions for reference, but we will keep zeros
 #hgsql hgFixed -e 'alter table gtexSampleData disable keys; delete from gtexSampleData where score=0; alter table gtexSampleData enable keys'
 # 30 minutes or so
 
 # looks good, load tables 
 
 hgGtex gtexV6 V6 $dataFile $sampleFile $subjectFile $tissueFile >&! parseData2.log  &
 
 # merge gtexV6Info into gtexInfo (i.e. add a row for V6)
 hgsql hgFixed -e 'select * from gtexInfoV6';
 hgsql hgFixed -e "insert into gtexInfo set version='V6', releaseDate='2015-10-01', maxMedianScore=711778"
 
 # Fix sample table (V6 format changed)
 # Required changes to hgGtex parser
 # 2016-03-01 kate
 
 hgGtex -tab=newtables -noData -noLoad gtex2V6 V6 $dataFile $sampleFile $subjectFile $tissueFile
 hgLoadSqlTab hgFixed gtexSample ~kate/kent/src/hg/lib/gtexSample.sql \
                 newtables/gtex2V6Sample.tab
 
 set dir = tables.2016-03-22
 mkdir $dir
 set hgGtex = ~kate/kent/src/hg/makeDb/outside/hgGtex/hgGtex
 $hgGtex -tab=$dir -noLoad gtex4V6 V6 $dataFile $sampleFile $subjectFile $tissueFile -verbose=2 >&! parseData4.log &
 ls -l $dir
 
 wc -l $dir
         570 gtex4V6Donor.tab
           1 gtex4V6Info.tab
        8555 gtex4V6Sample.tab
   481800490 gtex4V6SampleData.tab
     2984854 gtex4V6TissueData.tab
       56318 gtex4V6TissueMedianAll.tab
       56318 gtex4V6TissueMedianFemale.tab
       56318 gtex4V6TissueMedianMale.tab
 
 # looks OK (SampleData table same size), so load it
 cd $dir
 hgLoadSqlTab hgFixed gtexSampleDataV6_full ~kate/kent/src/hg/lib/gtexSampleData.sql \
                 $dir/gtex4V6SampleData.tab
 
 hgLoadSqlTab hgFixed gtexTissueMedianV6 ~kate/kent/src/hg/lib/gtexTissueMedian.sql \
                 $dir/gtex4V6TissueMedianAll.tab
 
 ############
 # Add GTEX consortium abbreviation column to tissue table (e.g. for ASE)
 
 cd /hive/data/outside/gtex/V6/metadata
 hgsql hgFixed -e 'select * from gtexTissue' > gtexTissue.old.tab
 
 awk -F"\t" 'NR>1 {print $2}' gtex_tissue_colors.txt  | paste gtexTissue.old.tab - > gtexTissue.new.tab
 hgLoadSqlTab hgFixed gtexTissueNew ~kate/kent/src/hg/lib/gtexTissue.sql $dir/gtexTissue.new.tab
 hgsql hgFixed -e 'alter table gtexTissueV6 rename to gtexTissueV6Old; alter table gtexTissueNew rename to gtexTissueV6'
 
 # Test GTEx track on hgwdev still works with new table
 
+############
+# Correct cell type description, as per V8 portal page
+# 4/29/20 KRR
+
+ [hgFixed]> select * from gtexTissue where name like '%fibro%';
++----+--------------------+---------------------------------+-------+----------+---------+
+| id | name               | description                     | organ | color    | abbrev  |
++----+--------------------+---------------------------------+-------+----------+---------+
+| 22 | xformedfibroblasts | Cells - Transformed fibroblasts | Skin  | 10141901 | FIBRBLS |
++----+--------------------+---------------------------------+-------+----------+---------+
+1 row in set (0.00 sec)
+
+MariaDB [hgFixed]> update gtexTissue set description="Cells - Cultured fibroblasts" where id=22;
+