src/hg/makeDb/doc/hg18.txt 1.381

1.381 2009/09/09 18:13:50 hartera
Documented code changes and reloaded vegaPep table with only those proteins whose transcripts are in vegaGtp.
Index: src/hg/makeDb/doc/hg18.txt
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/makeDb/doc/hg18.txt,v
retrieving revision 1.380
retrieving revision 1.381
diff -b -B -U 4 -r1.380 -r1.381
--- src/hg/makeDb/doc/hg18.txt	4 Sep 2009 16:51:03 -0000	1.380
+++ src/hg/makeDb/doc/hg18.txt	9 Sep 2009 18:13:50 -0000	1.381
@@ -28615,9 +28615,9 @@
     # Converted stdin, upper limit 11.63, lower limit -28.64
     hgLoadWiggle -pathPrefix=/gbdb/hg18/wib hg18 sfrs1ConsensusSites sfrs1ConsensusSites.wig
     ln -s $(pwd -P)/sfrs1ConsensusSites.wib /gbdb/hg18/wib/
 ############################################################################
-# VEGA GENES UPDATE TO BUILD 35 (DONE, 2009-08-04 - 2009-09-04, hartera)
+# VEGA GENES UPDATE TO BUILD 35 (DONE, 2009-08-04 - 2009-09-09, hartera)
 # Needs updating as the current version is build 33.
 # Download the human VEGA Genes posted on ftp site on 2009-03-31
 # 2009-08-03 (hartera) - Added code to register track handler for
 # vegaGeneComposite.
@@ -28628,10 +28628,14 @@
 # 2009-08-22 - Finished code for adding Vega report URLs to the details pages.
 # Loaded the vegaGtp table.
 # 2009-09-01 - 2009-09-02 (hartera). Loaded a vegaPep table for the protein
 # sequence link on the details pages.
-# 2009-08-04 Re-load all tables as some reverted to the older version during
+# 2009-09-04 Re-load all tables as some reverted to the older version during
 # mySQL 5 upgrade.
+# 2009-09-08 - 2009-09-09 Code change to change message on details page when 
+# no protein is available and change to trackDb to make vegaGene items a 
+# darker blue colour. Reloaded vegaPep after removing proteins whose
+# transcripts are not in vegaGtp to make all.joiner happy.
 
     mkdir /hive/data/genomes/hg18/bed/vega35
     cd /hive/data/genomes/hg18/bed/vega35
     wget --timestamping "ftp://ftp.sanger.ac.uk/pub/vega/human/*" \
@@ -28733,5 +28737,46 @@
     hgsql -e 'drop table vegaGtp;' hg18
     hgLoadSqlTab hg18 vegaGtp ensGtp.sql vegaGtp.tab
     hgsql -e 'drop table vegaPep;' hg18
     hgPepPred hg18 tab vegaPep vegaPep.hg18.fa.tab
+
+    # 2009-09-08 (hartera). Changed message in code for details page when no
+    # protein sequence is available to be more explanatory. "Non-protein
+    # coding gene or gene fragment, no protein prediction available." Changed
+    # the colouring for the vegaGene subtrack to be darker blue so there is 
+    # more of a contrast between vegaGene and vegaPseudoGene subtracks.
+
+    # 2009-09-09 (hartera) - re-loaded vegaPep table with only those proteins
+    # that have a transcript ID in vegaGtp. 
+    # all.joiner is complaining as there are about 23,000 extra proteins in 
+    # vegaPep that do not have transcripts in vegaGtp. Decided to remove these
+    # and e-mailed the HAVANA group to ask about the discrepancy. 
+    cd /hive/data/genomes/hg18/bed/vega35
+    awk '{print $2}' vegaGtp.tab | sort | uniq > vegaGtp.tx.ids
+    awk '{print $1}' vegaPep.hg18.fa.tab | sort | uniq > vegaPep.tx.ids
+    wc -l *.tx.ids
+    # 81244 vegaGtp.tx.ids
+    # 60003 vegaPep.tx.ids
+    # Number of transcripts that have a protein ID:
+    hgsql -Ne 'select transcript from vegaGtp where protein like "OTTHUMP%";' \
+         hg18 | sort | uniq > vegaGtpWithProt.tx.ids
+    wc -l vegaGtpWithProt.tx.ids        
+    # 36747 vegaGtpWithProt.tx.ids
+  
+    # find those that are common to both. 
+    comm -12 vegaGtp.tx.ids vegaPep.tx.ids > pepandGtp.tx.ids
+    wc -l pepandGtp.tx.ids 
+    # 36747 pepandGtp.tx.ids
+    comm -12 pepandGtp.tx.ids vegaGtpWithProt.tx.ids | wc -l
+    # 36747 
+    # Therefore all the vegaGtp transcripts with a protein ID are in the
+    # protein FASTA file.
+    hgsql -Ne 'select * from vegaPep as p, vegaGtp as g where g.protein \
+          like "OTTHUMP%" and p.name = g.transcript;' hg18 \
+          > vegaPepOnlyInGtp.hg18.fa.tab
+    wc -l vegaPepOnlyInGtp.hg18.fa.tab 
+    # 36747 vegaPepOnlyInGtp.hg18.fa.tab
+ 
+    hgsql -e 'drop table vegaPep;' hg18
+    hgPepPred hg18 tab vegaPep vegaPepOnlyInGtp.hg18.fa.tab    
+
 ############################################################################