src/hg/makeDb/doc/hg18.txt 1.357

1.357 2009/03/12 22:35:13 aamp
Vega genes update.
Index: src/hg/makeDb/doc/hg18.txt
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/makeDb/doc/hg18.txt,v
retrieving revision 1.356
retrieving revision 1.357
diff -b -B -U 4 -r1.356 -r1.357
--- src/hg/makeDb/doc/hg18.txt	12 Mar 2009 18:24:04 -0000	1.356
+++ src/hg/makeDb/doc/hg18.txt	12 Mar 2009 22:35:13 -0000	1.357
@@ -27246,4 +27246,34 @@
 #33.401u 3.275s 1:46.95 34.2%    0+0k 0+0io 0pf+0w
 
 
 #############################################################################
+
+############################################################################
+# VEGA GENES UPDATE (BUILD 33) (DONE 2008-03-11 Andy)
+    mkdir  /cluster/data/hg18/bed/vega33
+    cd  /cluster/data/hg18/bed/vega33
+    wget --timestamping "ftp://ftp.sanger.ac.uk/pub/vega/human/*" \
+         "ftp://ftp.sanger.ac.uk/pub/vega/human/pep/*.tot.fa.gz"
+    zcat gtf_file.gz | sed -e "s/^\([0-9XY][0-9]*\)/chr\1/; s/^MT/chrM/; s/^Un/chrUn/" \
+        | grep "^chr" > nonHaps.gtf
+    zcat gtf_file.gz | sed -e "s/^\([0-9XY][0-9]*\)/chr\1/; s/^MT/chrM/; s/^Un/chrUn/" \
+        | grep -v "^chr" > haps.gtf
+    awk 'BEGIN{OFS="\t";FS="\t";}{ if ($1 == "c6_COX") { if (($4 >= 28688544) && ($5 <= 33420241)) print; } else if ($1 == "c6_QBL") { if (($4 >= 28885510) && ($5 <= 33451440)) print;}}' haps.gtf > keeptHaps.gtf
+    liftUp -type=.gtf lifted.gtf /cluster/data/hg18/jkStuff/ensGene.haplotype.lift carry keeptHaps.gtf
+    cat nonHaps.gtf lifted.gtf > all.gtf
+    gzip all.gtf
+    rm *.gtf
+    gtfToGenePred -infoOut=infoOut.txt -genePredExt all.gtf.gz stdout | gzip > all.gp.gz
+    /cluster/home/hiram/kent/src/hg/utils/automation/extractGtf.pl infoOut.txt > ensGtp.tab
+    genePredCheck -db=hg18 all.gp.gz 
+#checked: 69859 failed: 0
+    zcat all.gtf.gz | grep -i pseudo > pseudo.gtf
+    zcat all.gtf.gz | grep -v -i pseudo > not.pseudo.gtf
+    gtfToGenePred -genePredExt pseudo.gtf pseudo.gp
+    gtfToGenePred -genePredExt not.pseudo.gtf not.pseudo.gp
+    genePredCheck -db=hg18 pseudo.gp
+#checked: 6901 failed: 0
+    genePredCheck -db=hg18 not.pseudo.gp
+#checked: 62958 failed: 0
+    hgLoadGenePred -genePredExt hg18 vegaGene not.pseudo.gp
+    hgLoadGenePred -genePredExt hg18 vegaPseudoGene pseudo.gp