src/hg/makeDb/doc/mm9.txt 1.110

1.110 2009/09/09 18:13:55 hartera
Documented code changes and reloaded vegaPep table with only those proteins whose transcripts are in vegaGtp.
Index: src/hg/makeDb/doc/mm9.txt
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/makeDb/doc/mm9.txt,v
retrieving revision 1.109
retrieving revision 1.110
diff -b -B -U 4 -r1.109 -r1.110
--- src/hg/makeDb/doc/mm9.txt	4 Sep 2009 16:52:27 -0000	1.109
+++ src/hg/makeDb/doc/mm9.txt	9 Sep 2009 18:13:55 -0000	1.110
@@ -9489,9 +9489,9 @@
    svn+ssh://hgwdev.cse.ucsc.edu/projects/compbio/usr/markd/svn/projs/transMap/tags/vertebrate.2009-07-01
 
 see doc/builds.txt for specific details.
 ############################################################################
-# VEGA GENES UPDATE TO BUILD 35 (DONE, 2009-07-30 - 2009-09-04, hartera)
+# VEGA GENES UPDATE TO BUILD 35 (DONE, 2009-07-30 - 2009-09-09, hartera)
 # Needs updating as the current version is build 31 from May 2008.
 # 2009-08-03 (hartera) - Added code to register track handler for
 # vegaGeneComposite.
 # 2009-08-15 - 2009-08-16 (hartera) - Added code to allow use of radio buttons
@@ -9501,10 +9501,15 @@
 # 2009-08-22 - Finished code for adding Vega report URLs to the details pages.
 # Loaded the vegaGtp table.
 # 2009-09-01 and 2009-09-03 (hartera). Loaded a vegaPep table for the protein
 # sequence link on the details pages.
-# 2009-08-04 Re-load all tables as some reverted to the older version during
+# 2009-09-04 Re-load all tables as some reverted to the older version during
 # mySQL 5 upgrade.
+# 2009-09-08 - 2009-09-09 Code change to change message on details page when 
+# no protein is available and change to trackDb to make vegaGene items a 
+# darker blue colour. Reloaded vegaPep after removing proteins whose
+# transcripts are not in vegaGtp to make all.joiner happy.
+
    mkdir -p /hive/data/genomes/mm9/bed/vega35
    cd /hive/data/genomes/mm9/bed/vega35
    # Download the VEGA genes for mouse from the ftp site
    # This file is from 03/17/09.
@@ -9616,8 +9621,47 @@
    hgsql -e 'drop table vegaGtp;' mm9
    hgLoadSqlTab mm9 vegaGtp ensGtp.sql vegaGtp.tab
    hgsql -e 'drop table vegaPep;' mm9
    hgPepPred mm9 tab vegaPep vegaPep.mm9.fa.tab
+   # 2009-09-08 (hartera). Changed message in code for details page when no
+   # protein sequence is available to be more explanatory. "Non-protein
+   # coding gene or gene fragment, no protein prediction available." Changed
+   # the colouring for the vegaGene subtrack to be darker blue so there is 
+   # more of a contrast between vegaGene and vegaPseudoGene subtracks.
+
+   # 2009-09-09 (hartera) - re-loaded vegaPep table with only those proteins
+   # that have a transcript ID in vegaGtp. 
+   # all.joiner is complaining as there are about 1,000 extra proteins in 
+   # vegaPep that do not have transcripts in vegaGtp. Decided to remove these
+   # and e-mailed the HAVANA group to ask about the discrepancy. 
+   cd /hive/data/genomes/mm9/bed/vega35
+   awk '{print $2}' vegaGtp.tab | sort | uniq > vegaGtp.tx.ids
+   awk '{print $1}' vegaPep.mm9.fa.tab | sort | uniq > vegaPep.tx.ids
+   wc -l *.tx.ids
+   # 59381 vegaGtp.tx.ids
+   # 30956 vegaPep.tx.ids
+   
+   # Number of transcripts that have a protein ID:
+   hgsql -Ne 'select transcript from vegaGtp where protein like "OTTMUSP%";' \
+        mm9 | sort | uniq > vegaGtpWithProt.tx.ids
+   wc -l vegaGtpWithProt.tx.ids        
+   # 29902 vegaGtpWithProt.tx.ids
+ 
+   # find those that are common to both. 
+   comm -12 vegaGtp.tx.ids vegaPep.tx.ids > pepandGtp.tx.ids
+   wc -l pepandGtp.tx.ids 
+   # 29902 pepandGtp.tx.ids
+   comm -12 pepandGtp.tx.ids vegaGtpWithProt.tx.ids | wc -l 
+   # 29902
+   # Therefore all the vegaGtp transcripts with a protein ID are in the
+   # protein FASTA file.
+   hgsql -Ne 'select * from vegaPep as p, vegaGtp as g where g.protein \
+         like "OTTMUSP%" and p.name = g.transcript;' mm9 \
+         > vegaPepOnlyInGtp.mm9.fa.tab
+   wc -l vegaPepOnlyInGtp.mm9.fa.tab 
+   # 29902 vegaPepOnlyInGtp.mm9.fa.tab  
+   hgsql -e 'drop table vegaPep;' mm9
+   hgPepPred mm9 tab vegaPep vegaPepOnlyInGtp.mm9.fa.tab
 
 ############################################################################
 # Blastz Elephant loxAfr3 (DONE - 2009-08-12 - Hiram)
     mkdir /hive/data/genomes/mm9/bed/lastzLoxAfr3.2009-08-12