src/hg/makeDb/doc/hg18.txt 1.375
1.375 2009/08/04 21:41:12 hartera
Updated Vega Genes track to build 35 to bring it up to the current release.
Index: src/hg/makeDb/doc/hg18.txt
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/makeDb/doc/hg18.txt,v
retrieving revision 1.374
retrieving revision 1.375
diff -b -B -U 4 -r1.374 -r1.375
--- src/hg/makeDb/doc/hg18.txt 29 Jul 2009 04:24:25 -0000 1.374
+++ src/hg/makeDb/doc/hg18.txt 4 Aug 2009 21:41:12 -0000 1.375
@@ -28582,4 +28582,49 @@
# Converted stdin, upper limit 11.63, lower limit -28.64
hgLoadWiggle -pathPrefix=/gbdb/hg18/wib hg18 sfrs1ConsensusSites sfrs1ConsensusSites.wig
ln -s $(pwd -P)/sfrs1ConsensusSites.wib /gbdb/hg18/wib/
############################################################################
+# VEGA GENES UPDATE TO BUILD 35 (DONE, 2009-07-04, hartera)
+# Needs updating as the current version is build 33.
+# Download the human VEGA Genes posted on ftp site on 2009-03-31
+ mkdir /hive/data/genomes/hg18/bed/vega35
+ cd /hive/data/genomes/hg18/bed/vega35
+ wget --timestamping "ftp://ftp.sanger.ac.uk/pub/vega/human/*" \
+ "ftp://ftp.sanger.ac.uk/pub/vega/human/pep/*.tot.fa.gz"
+ zcat gtf_file.gz | sed -e "s/^\([0-9XY][0-9]*\)/chr\1/; s/^MT/chrM/; s/^Un/chrUn/" \
+ | grep "^chr" > nonHaps.gtf
+ zcat gtf_file.gz | sed -e "s/^\([0-9XY][0-9]*\)/chr\1/; s/^MT/chrM/; s/^Un/chrUn/" \
+ | grep -v "^chr" > haps.gtf
+ awk 'BEGIN{OFS="\t";FS="\t";}{ if ($1 == "c6_COX") { if (($4 >= 28688544) && ($5 <= 33420241)) print; } else if ($1 == "c6_QBL") { if (($4 >= 28885510) && ($5 <= 33451440)) print;}}' haps.gtf > keptHaps.gtf
+ liftUp -type=.gtf lifted.gtf /cluster/data/hg18/jkStuff/ensGene.haplotype.lift carry keptHaps.gtf
+ cat nonHaps.gtf lifted.gtf > all.gtf
+
+ # Do this to create the infoOut.txt file and extract the extra information
+ gtfToGenePred -infoOut=infoOut.txt -genePredExt all.gtf stdout | gzip > tempAll.gp.gz
+ ~/kent/src/hg/utils/automation/extractGtf.pl infoOut.txt > vegaGtp.tab
+
+ # Change the gene name to have the gene_id label so that this is in the
+ # name2 field of the extended genePred table. This can then be displayed
+ # at the track item label.
+ perl -pi.bak -e 's/gene_id/other_gene_id/' all.gtf
+ perl -pi.bak -e 's/gene_name/gene_id/' all.gtf
+ gzip all.gtf
+ rm *.gtf tempAll.gp.gz
+ # create genePred files for loading into database
+ gtfToGenePred -genePredExt all.gtf.gz stdout | gzip > all.gp.gz
+ genePredCheck -db=hg18 all.gp.gz
+ # checked: 81244 failed: 0
+ zcat all.gtf.gz | grep -i pseudo > pseudo.gtf
+ zcat all.gtf.gz | grep -v -i pseudo > not.pseudo.gtf
+ gtfToGenePred -genePredExt pseudo.gtf pseudo.gp
+ gtfToGenePred -genePredExt not.pseudo.gtf not.pseudo.gp
+ genePredCheck -db=hg18 pseudo.gp
+ # checked: 8331 failed: 0
+ genePredCheck -db=hg18 not.pseudo.gp
+ # checked: 72913 failed: 0
+ hgLoadGenePred -genePredExt hg18 vegaGene not.pseudo.gp
+ hgLoadGenePred -genePredExt hg18 vegaPseudoGene pseudo.gp
+ # Added code to src/hg/hgTracks/simpleTracks.c to register a track handler
+ # for vegaGeneComposite that is now used for this data. This used
+ # vegaGeneMethods to display the name2 field (gene) as the item label in
+ # the track.
+############################################################################