src/hg/makeDb/doc/mm9.txt 1.110
1.110 2009/09/09 18:13:55 hartera
Documented code changes and reloaded vegaPep table with only those proteins whose transcripts are in vegaGtp.
Index: src/hg/makeDb/doc/mm9.txt
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/makeDb/doc/mm9.txt,v
retrieving revision 1.109
retrieving revision 1.110
diff -b -B -U 4 -r1.109 -r1.110
--- src/hg/makeDb/doc/mm9.txt 4 Sep 2009 16:52:27 -0000 1.109
+++ src/hg/makeDb/doc/mm9.txt 9 Sep 2009 18:13:55 -0000 1.110
@@ -9489,9 +9489,9 @@
svn+ssh://hgwdev.cse.ucsc.edu/projects/compbio/usr/markd/svn/projs/transMap/tags/vertebrate.2009-07-01
see doc/builds.txt for specific details.
############################################################################
-# VEGA GENES UPDATE TO BUILD 35 (DONE, 2009-07-30 - 2009-09-04, hartera)
+# VEGA GENES UPDATE TO BUILD 35 (DONE, 2009-07-30 - 2009-09-09, hartera)
# Needs updating as the current version is build 31 from May 2008.
# 2009-08-03 (hartera) - Added code to register track handler for
# vegaGeneComposite.
# 2009-08-15 - 2009-08-16 (hartera) - Added code to allow use of radio buttons
@@ -9501,10 +9501,15 @@
# 2009-08-22 - Finished code for adding Vega report URLs to the details pages.
# Loaded the vegaGtp table.
# 2009-09-01 and 2009-09-03 (hartera). Loaded a vegaPep table for the protein
# sequence link on the details pages.
-# 2009-08-04 Re-load all tables as some reverted to the older version during
+# 2009-09-04 Re-load all tables as some reverted to the older version during
# mySQL 5 upgrade.
+# 2009-09-08 - 2009-09-09 Code change to change message on details page when
+# no protein is available and change to trackDb to make vegaGene items a
+# darker blue colour. Reloaded vegaPep after removing proteins whose
+# transcripts are not in vegaGtp to make all.joiner happy.
+
mkdir -p /hive/data/genomes/mm9/bed/vega35
cd /hive/data/genomes/mm9/bed/vega35
# Download the VEGA genes for mouse from the ftp site
# This file is from 03/17/09.
@@ -9616,8 +9621,47 @@
hgsql -e 'drop table vegaGtp;' mm9
hgLoadSqlTab mm9 vegaGtp ensGtp.sql vegaGtp.tab
hgsql -e 'drop table vegaPep;' mm9
hgPepPred mm9 tab vegaPep vegaPep.mm9.fa.tab
+ # 2009-09-08 (hartera). Changed message in code for details page when no
+ # protein sequence is available to be more explanatory. "Non-protein
+ # coding gene or gene fragment, no protein prediction available." Changed
+ # the colouring for the vegaGene subtrack to be darker blue so there is
+ # more of a contrast between vegaGene and vegaPseudoGene subtracks.
+
+ # 2009-09-09 (hartera) - re-loaded vegaPep table with only those proteins
+ # that have a transcript ID in vegaGtp.
+ # all.joiner is complaining as there are about 1,000 extra proteins in
+ # vegaPep that do not have transcripts in vegaGtp. Decided to remove these
+ # and e-mailed the HAVANA group to ask about the discrepancy.
+ cd /hive/data/genomes/mm9/bed/vega35
+ awk '{print $2}' vegaGtp.tab | sort | uniq > vegaGtp.tx.ids
+ awk '{print $1}' vegaPep.mm9.fa.tab | sort | uniq > vegaPep.tx.ids
+ wc -l *.tx.ids
+ # 59381 vegaGtp.tx.ids
+ # 30956 vegaPep.tx.ids
+
+ # Number of transcripts that have a protein ID:
+ hgsql -Ne 'select transcript from vegaGtp where protein like "OTTMUSP%";' \
+ mm9 | sort | uniq > vegaGtpWithProt.tx.ids
+ wc -l vegaGtpWithProt.tx.ids
+ # 29902 vegaGtpWithProt.tx.ids
+
+ # find those that are common to both.
+ comm -12 vegaGtp.tx.ids vegaPep.tx.ids > pepandGtp.tx.ids
+ wc -l pepandGtp.tx.ids
+ # 29902 pepandGtp.tx.ids
+ comm -12 pepandGtp.tx.ids vegaGtpWithProt.tx.ids | wc -l
+ # 29902
+ # Therefore all the vegaGtp transcripts with a protein ID are in the
+ # protein FASTA file.
+ hgsql -Ne 'select * from vegaPep as p, vegaGtp as g where g.protein \
+ like "OTTMUSP%" and p.name = g.transcript;' mm9 \
+ > vegaPepOnlyInGtp.mm9.fa.tab
+ wc -l vegaPepOnlyInGtp.mm9.fa.tab
+ # 29902 vegaPepOnlyInGtp.mm9.fa.tab
+ hgsql -e 'drop table vegaPep;' mm9
+ hgPepPred mm9 tab vegaPep vegaPepOnlyInGtp.mm9.fa.tab
############################################################################
# Blastz Elephant loxAfr3 (DONE - 2009-08-12 - Hiram)
mkdir /hive/data/genomes/mm9/bed/lastzLoxAfr3.2009-08-12