src/hg/makeDb/doc/makeEnsembl.txt 1.18
1.18 2010/05/27 23:22:21 hiram
bosTau4 and gasAcu1 problems taken care of for v58 update
Index: src/hg/makeDb/doc/makeEnsembl.txt
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/makeDb/doc/makeEnsembl.txt,v
retrieving revision 1.17
retrieving revision 1.18
diff -b -B -U 4 -r1.17 -r1.18
--- src/hg/makeDb/doc/makeEnsembl.txt 27 May 2010 22:53:51 -0000 1.17
+++ src/hg/makeDb/doc/makeEnsembl.txt 27 May 2010 23:22:21 -0000 1.18
@@ -119,8 +119,79 @@
featureBits rn4 ensGene
# 46705616 bases of 2571531505 (1.816%) in intersection
############################################################################
+# bosTau4 was broken - finished manually (DONE - 2010-05-27 - Hiram)
+ ssh hgwdev
+ cd /hive/data/genomes/bosTau4
+ cat << '_EOF_' > bosTau4.ensGene.ra
+# required db variable
+db bosTau4
+# optional nameTranslation, the sed command that will transform
+# Ensemble names to UCSC names. With quotes just to make sure.
+nameTranslation "s/^\([0-9UX][0-9n]*\)/chr\1/; s/^MT/chrM/"
+# cause SQL tables to be fetched to see if chrUn can be fixed up
+# geneScaffolds yes
+'_EOF_'
+# << happy emacs
+
+ doEnsGeneUpdate.pl -ensVersion=58 bosTau4.ensGene.ra
+ # broken during processing, fix doProcess.csh to eliminate AAFC03011182
+ ssh hgwdev
+ cd /hive/data/genomes/bosTau4/bed/ensGene.58/process
+ mv allGenes.gtf.gz allGenes.gtf.gz.0
+ zcat ../download/Bos_taurus.Btau_4.0.58.gtf.gz \
+ | sed -e "s/^\([0-9UX][0-9n]*\)/chr\1/; s/^MT/chrM/" \
+ | grep -v AAFC03011182 | gzip > allGenes.gtf.gz
+ gtfToGenePred -infoOut=infoOut.txt -genePredExt allGenes.gtf.gz stdout \
+ | gzip > bosTau4.allGenes.gp.gz
+ /cluster/bin/scripts/extractGtf.pl infoOut.txt > ensGtp.tab
+ genePredCheck -db=bosTau4 bosTau4.allGenes.gp.gz
+ cd /hive/data/genomes/bosTau4
+ doEnsGeneUpdate.pl -ensVersion=58 -continue=load bosTau4.ensGene.ra \
+ > ens.58.load 2>&1
+ featureBits bosTau4 ensGene
+ # 42306079 bases of 2731830700 (1.549%) in intersection
+
+############################################################################
+# gasAcu1 - Stickleback - Ensembl Genes version 58 (DONE - 2010-05-27 - hiram)
+ ssh hgwdev
+ cd /hive/data/genomes/gasAcu1
+ cat << '_EOF_' > gasAcu1.ensGene.ra
+# required db variable
+db gasAcu1
+# optional nameTranslation, the sed command that will transform
+# Ensemble names to UCSC names. With quotes just to make sure.
+nameTranslation "s/^group\([IUVX]\)/chr\1/; s/^MT/chrM/;"
+'_EOF_'
+# << happy emacs
+
+ doEnsGeneUpdate.pl -ensVersion=58 gasAcu1.ensGene.ra
+
+ # requires extra attention after the all database for loop attempt
+ cd /hive/data/genomes/gasAcu1/bed/ensGene.58/process
+ mv gasAcu1.allGenes.gp.gz gasAcu1.allGenes.gp.beforeLift.gz
+ zcat gasAcu1.allGenes.gp.beforeLift.gz \
+ | liftUp -extGenePred -type=.gp gasAcu1.scaffolds.gp \
+ ../../../jkStuff/contigsToScaffolds.lft carry stdin
+ liftUp -extGenePred gasAcu1.allGenes.gp \
+ ../../../jkStuff/UCSC.chromToScaffoldSansGaps.lft carry \
+ gasAcu1.scaffolds.gp
+ gzip gasAcu1.scaffolds.gp
+ gzip gasAcu1.allGenes.gp
+ # verify OK
+ genePredCheck -db=gasAcu1 gasAcu1.allGenes.gp.gz
+ # checked: 29245 failed: 0
+
+ # then continue with the load
+ cd /hive/data/genomes/gasAcu1
+ doEnsGeneUpdate.pl -continue=load -ensVersion=58 gasAcu1.ensGene.ra \
+ > ens.58.load 2>&1
+
+ featureBits gasAcu1 ensGene
+ # 36792090 bases of 446627861 (8.238%) in intersection
+
+############################################################################
############################################################################
# ensembl 57 update (DONE - 2010-04-02 - Hiram)
############################################################################