src/hg/makeDb/doc/makeEnsembl.txt 1.19
1.19 2010/05/28 18:32:33 hiram
all v58 build sequences done
Index: src/hg/makeDb/doc/makeEnsembl.txt
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/makeDb/doc/makeEnsembl.txt,v
retrieving revision 1.18
retrieving revision 1.19
diff -b -B -U 4 -r1.18 -r1.19
--- src/hg/makeDb/doc/makeEnsembl.txt 27 May 2010 23:22:21 -0000 1.18
+++ src/hg/makeDb/doc/makeEnsembl.txt 28 May 2010 18:32:33 -0000 1.19
@@ -8,8 +8,40 @@
############################################################################
# ensembl 58 update (WORKING - 2010-05-27 - Hiram)
############################################################################
+# susScr2 - Pig - lifted susScr1 v58 genes to susScr2 (DONE - 2010-05-28 - Hiram)
+ mkdir /hive/data/genomes/susScr2/bed/ensGene.58
+ cd /hive/data/genomes/susScr2/bed/ensGene.58
+ ln -s ../../../susScr1/bed/ensGene.58/process/susScr1.allGenes.gp.gz .
+ zcat susScr1.allGenes.gp.gz > susScr1.allGenes.genePred
+ ln -s ../../../susScr1/bed/liftOver/susScr1ToSusScr2.over.chain.gz
+ zcat susScr1ToSusScr2.over.chain.gz > susScr1ToSusScr2.over.chain
+ liftOver -genePred susScr1.allGenes.genePred \
+ susScr1ToSusScr2.over.chain \
+ susScr2.allGenes.gp susScr1.liftOver.unMapped.txt
+ gzip susScr2.allGenes.gp
+ hgLoadGenePred -genePredExt susScr2 \
+ ensGene susScr2.allGenes.gp.gz > loadGenePred.errors.txt 2>&1
+
+ zcat \
+../../../susScr1/bed/ensGene.58/download/Sus_scrofa.Sscrofa9.58.pep.all.fa.gz \
+ | sed -e 's/^>.* transcript:/>/; s/ CCDS.*$//;' \
+ | gzip > ensPep.txt.gz
+ zcat ensPep.txt.gz \
+ | ~/kent/src/utils/faToTab/faToTab.pl /dev/null /dev/stdin \
+ | sed -e '/^$/d; s/*$//' | sort > ensPep.susScr2.fa.tab
+ hgPepPred susScr2 tab ensPep ensPep.susScr2.fa.tab
+ ln -s ../../../susScr1/bed/ensGene.58/process/ensGtp.tab .
+ hgLoadSqlTab susScr2 ensGtp ~/kent/src/hg/lib/ensGtp.sql ensGtp.tab
+ hgsql -e 'INSERT INTO trackVersion \
+ (db, name, who, version, updateTime, comment, source, dateReference) \
+ VALUES("susScr2", "ensGene", "hiram", "58", now(), \
+ "with peptides Sus_scrofa.Sscrofa9.58.pep.all.fa.gz", \
+ "lifted susScr1 to susScr2 ftp://ftp.ensembl.org/pub/release-58/gtf/sus_scrofa/Sus_scrofa.Sscrofa9.58.gtf.gz", \
+ "current" );' hgFixed
+
+############################################################################
# mm9 - Mouse - Ensembl Genes version 58 (DONE - 2010-05-27 - hiram)
cd /hive/data/genomes/mm9
cat << '_EOF_' > mm9.ensGene.ra
# required db variable
@@ -190,8 +222,853 @@
featureBits gasAcu1 ensGene
# 36792090 bases of 446627861 (8.238%) in intersection
############################################################################
+# anoCar1 - Lizard - Ensembl Genes version 58 (DONE - 2010-05-27 - hiram)
+ ssh hgwdev
+ cd /hive/data/genomes/anoCar1
+ cat << '_EOF_' > anoCar1.ensGene.ra
+# required db variable
+db anoCar1
+'_EOF_'
+# << happy emacs
+
+ doEnsGeneUpdate.pl -ensVersion=58 anoCar1.ensGene.ra
+ ssh hgwdev
+ cd /hive/data/genomes/anoCar1/bed/ensGene.58
+ featureBits anoCar1 ensGene
+ # 26974393 bases of 1741478929 (1.549%) in intersection
+
+############################################################################
+# canFam2 - Dog - Ensembl Genes version 58 (DONE - 2010-05-27 - hiram)
+ ssh hgwdev
+ cd /hive/data/genomes/canFam2
+ cat << '_EOF_' > canFam2.ensGene.ra
+# required db variable
+db canFam2
+# optional nameTranslation, the sed command that will transform
+# Ensemble names to UCSC names. With quotes just to make sure.
+nameTranslation "s/^\([0-9XY][0-9]*\)/chr\1/; s/^MT/chrM/; s/^Un/chrUn/"
+'_EOF_'
+# << happy emacs
+
+ doEnsGeneUpdate.pl -ensVersion=58 canFam2.ensGene.ra
+ ssh hgwdev
+ cd /hive/data/genomes/canFam2/bed/ensGene.58
+ featureBits canFam2 ensGene
+ # 34693517 bases of 2384996543 (1.455%) in intersection
+
+############################################################################
+# cavPor3 - Guinea pig - Ensembl Genes version 58 (DONE - 2010-05-27 -
+# hiram)
+ ssh hgwdev
+ cd /hive/data/genomes/cavPor3
+ cat << '_EOF_' > cavPor3.ensGene.ra
+# required db variable
+db cavPor3
+# do we need to translate geneScaffold coordinates
+# geneScaffolds yes
+nameTranslation "s/^MT/chrM/;"
+'_EOF_'
+# << happy emacs
+
+ doEnsGeneUpdate.pl -ensVersion=58 cavPor3.ensGene.ra
+ ssh hgwdev
+ cd /hive/data/genomes/cavPor3/bed/ensGene.58
+ featureBits cavPor3 ensGene
+ # 30971317 bases of 2663369733 (1.163%) in intersection
+
+############################################################################
+# choHof1 - Sloth - Ensembl Genes version 58 (DONE - 2010-05-27 - hiram)
+ ssh hgwdev
+ cd /hive/data/genomes/choHof1
+ cat << '_EOF_' > choHof1.ensGene.ra
+# required db variable
+db choHof1
+# need to translate Ensembl GeneScaffold coordinates to UCSC scaffolds
+geneScaffolds yes
+# during the loading of the gene pred, skip all invalid genes
+skipInvalid yes
+# 18938: ENSCHOT00000005046 no exonFrame on CDS exon 1
+'_EOF_'
+# << happy emacs
+
+ doEnsGeneUpdate.pl -ensVersion=58 choHof1.ensGene.ra
+ ssh hgwdev
+ cd /hive/data/genomes/choHof1/bed/ensGene.58
+ featureBits choHof1 ensGene
+ # 18277677 bases of 2060419685 (0.887%) in intersection
+
+############################################################################
+# ci2 - C. intestinalis - Ensembl Genes version 58 (DONE - 2010-05-27 -
+# hiram)
+ ssh hgwdev
+ cd /hive/data/genomes/ci2
+ cat << '_EOF_' > ci2.ensGene.ra
+# required db variable
+db ci2
+# optional nameTranslation, the sed command that will transform
+# Ensemble names to UCSC names. With quotes just to make sure.
+nameTranslation "s/^\([0-9][pq]\)/chr0\1/; s/^\([0-9][0-9][pq]\)/chr\1/; "
+'_EOF_'
+# << happy emacs
+
+ doEnsGeneUpdate.pl -ensVersion=58 ci2.ensGene.ra
+ ssh hgwdev
+ cd /hive/data/genomes/ci2/bed/ensGene.58
+ featureBits ci2 ensGene
+ # 20114967 bases of 141233565 (14.242%) in intersection
+
+############################################################################
+# cioSav2 - C. savignyi - Ensembl Genes version 58 (DONE - 2010-05-27 -
+# hiram)
+ ssh hgwdev
+ cd /hive/data/genomes/cioSav2
+ cat << '_EOF_' > cioSav2.ensGene.ra
+# required db variable
+db cioSav2
+# optional nameTranslation, the sed command that will transform
+# Ensemble names to UCSC names. With quotes just to make sure.
+# nameTranslation "s/^\([0-9XY][0-9]*\)/chr\1/; s/^MT/chrM/; s/^Un/chrUn/"
+# optional haplotype lift-down from Ensembl full chrom coordinates
+# to UCSC simple haplotype coordinates
+# haplotypeLift /cluster/data/hg18/jkStuff/ensGene.haplotype.lift
+
+'_EOF_'
+# << happy emacs
+
+ doEnsGeneUpdate.pl -ensVersion=58 cioSav2.ensGene.ra
+ ssh hgwdev
+ cd /hive/data/genomes/cioSav2/bed/ensGene.58
+ featureBits cioSav2 ensGene
+ # 16572478 bases of 173749524 (9.538%) in intersection
+
+############################################################################
+# danRer6 - Zebrafish - Ensembl Genes version 58 (DONE - 2010-05-27 - hiram)
+ ssh hgwdev
+ cd /hive/data/genomes/danRer6
+ cat << '_EOF_' > danRer6.ensGene.ra
+# required db variable
+db danRer6
+# optional nameTranslation, the sed command that will transform
+# Ensemble names to UCSC names. With quotes just to make sure.
+nameTranslation "s/^\([0-9XY][0-9]*\)/chr\1/; s/^MT/chrM/"
+'_EOF_'
+# << happy emacs
+
+ doEnsGeneUpdate.pl -ensVersion=58 danRer6.ensGene.ra
+ ssh hgwdev
+ cd /hive/data/genomes/danRer6/bed/ensGene.58
+ featureBits danRer6 ensGene
+ # 44621280 bases of 1506896106 (2.961%) in intersection
+
+############################################################################
+# dasNov2 - Armadillo - Ensembl Genes version 58 (DONE - 2010-05-27 - hiram)
+ ssh hgwdev
+ cd /hive/data/genomes/dasNov2
+ cat << '_EOF_' > dasNov2.ensGene.ra
+# required db variable
+db dasNov2
+# need to translate Ensembl GeneScaffold coordinates to UCSC scaffolds
+geneScaffolds yes
+# during the loading of the gene pred, skip all invalid genes
+skipInvalid yes
+# 13843: ENSDNOT00000025033 no exonFrame on CDS exon 5
+# 23044: ENSDNOT00000004471 no exonFrame on CDS exon 1
+# 30976: ENSDNOT00000003424 no exonFrame on CDS exon 3
+'_EOF_'
+# << happy emacs
+
+ doEnsGeneUpdate.pl -ensVersion=58 dasNov2.ensGene.ra
+ ssh hgwdev
+ cd /hive/data/genomes/dasNov2/bed/ensGene.58
+ featureBits dasNov2 ensGene
+ # 21968025 bases of 2371493872 (0.926%) in intersection
+
+############################################################################
+# dipOrd1 - Kangaroo rat - Ensembl Genes version 58 (DONE - 2010-05-27 -
+# hiram)
+ ssh hgwdev
+ cd /hive/data/genomes/dipOrd1
+ cat << '_EOF_' > dipOrd1.ensGene.ra
+# required db variable
+db dipOrd1
+# do we need to translate geneScaffold coordinates
+geneScaffolds yes
+# ignore genes that do not properly convert to a gene pred, and contig
+# names that are not in the UCSC assembly
+skipInvalid yes
+# ignore the single gene that have invalid structures from Ensembl:
+# 11275: ENSDORT00000004734 no exonFrame on CDS exon 12
+'_EOF_'
+# << happy emacs
+
+ doEnsGeneUpdate.pl -ensVersion=58 dipOrd1.ensGene.ra
+ ssh hgwdev
+ cd /hive/data/genomes/dipOrd1/bed/ensGene.58
+ featureBits dipOrd1 ensGene
+ # 25324463 bases of 1844961421 (1.373%) in intersection
+
+############################################################################
+# echTel1 - Tenrec - Ensembl Genes version 58 (DONE - 2010-05-27 - hiram)
+ ssh hgwdev
+ cd /hive/data/genomes/echTel1
+ cat << '_EOF_' > echTel1.ensGene.ra
+# required db variable
+db echTel1
+# do we need to translate geneScaffold coordinates
+geneScaffolds yes
+# ignore genes that do not properly convert to a gene pred, and contig
+# names that are not in the UCSC assembly
+skipInvalid yes
+# ignore the two genes that have invalid structures from Ensembl:
+# 29277: ENSETET00000011172 no exonFrame on CDS exon 14
+# 44942: ENSETET00000018714 no exonFrame on CDS exon 1
+
+'_EOF_'
+# << happy emacs
+
+ doEnsGeneUpdate.pl -ensVersion=58 echTel1.ensGene.ra
+ ssh hgwdev
+ cd /hive/data/genomes/echTel1/bed/ensGene.58
+ featureBits echTel1 ensGene
+ # 25769836 bases of 2111581369 (1.220%) in intersection
+
+############################################################################
+# equCab2 - Horse - Ensembl Genes version 58 (DONE - 2010-05-27 - hiram)
+ ssh hgwdev
+ cd /hive/data/genomes/equCab2
+ cat << '_EOF_' > equCab2.ensGene.ra
+# required db variable
+db equCab2
+# optional nameTranslation, the sed command that will transform
+# Ensemble names to UCSC names. With quotes just to make sure.
+nameTranslation "s/^\([0-9XY][0-9]*\)/chr\1/; s/^MT/chrM/; s/^Un/chrUn/"
+# translate Ensembl chrUnNNNN names to chrUn coordinates
+liftUp /cluster/data/equCab2/jkStuff/chrUn.lift
+'_EOF_'
+# << happy emacs
+
+ doEnsGeneUpdate.pl -ensVersion=58 equCab2.ensGene.ra
+ ssh hgwdev
+ cd /hive/data/genomes/equCab2/bed/ensGene.58
+ featureBits equCab2 ensGene
+ # 39563285 bases of 2428790173 (1.629%) in intersection
+
+############################################################################
+# eriEur1 - Hedgehog - Ensembl Genes version 58 (DONE - 2010-05-27 - hiram)
+ ssh hgwdev
+ cd /hive/data/genomes/eriEur1
+ cat << '_EOF_' > eriEur1.ensGene.ra
+# required db variable
+db eriEur1
+# do we need to translate geneScaffold coordinates
+geneScaffolds yes
+# ignore genes that do not properly convert to a gene pred, and contig
+# names that are not in the UCSC assembly
+skipInvalid yes
+# ignore the three genes that have invalid structures from Ensembl:
+# 4691: ENSEEUT00000004188 no exonFrame on CDS exon 7
+# 35795: ENSEEUT00000003156 no exonFrame on CDS exon 4
+# 40908: ENSEEUT00000001064 no exonFrame on CDS exon 2
+'_EOF_'
+# << happy emacs
+
+ doEnsGeneUpdate.pl -ensVersion=58 eriEur1.ensGene.ra
+ ssh hgwdev
+ cd /hive/data/genomes/eriEur1/bed/ensGene.58
+ featureBits eriEur1 ensGene
+ # 22555252 bases of 2133134836 (1.057%) in intersection
+
+############################################################################
+# felCat3 - Cat - Ensembl Genes version 58 (DONE - 2010-05-27 - hiram)
+ ssh hgwdev
+ cd /hive/data/genomes/felCat3
+ cat << '_EOF_' > felCat3.ensGene.ra
+# required db variable
+db felCat3
+# do we need to translate geneScaffold coordinates
+geneScaffolds yes
+# ignore genes that do not properly convert to a gene pred, and contig
+# names that are not in the UCSC assembly
+skipInvalid yes
+# ignore the three genes that have invalid structures from Ensembl:
+# 2100: ENSFCAT00000006929 no exonFrame on CDS exon 16
+# 14578: ENSFCAT00000010965 no exonFrame on CDS exon 1
+# 26634: ENSFCAT00000009384 no exonFrame on CDS exon 0
+
+'_EOF_'
+# << happy emacs
+
+ doEnsGeneUpdate.pl -ensVersion=58 felCat3.ensGene.ra
+ ssh hgwdev
+ cd /hive/data/genomes/felCat3/bed/ensGene.58
+ featureBits felCat3 ensGene
+ # 22299251 bases of 1642698377 (1.357%) in intersection
+
+############################################################################
+# fr2 - Fugu - Ensembl Genes version 58 (DONE - 2010-05-27 - hiram)
+ ssh hgwdev
+ cd /hive/data/genomes/fr2
+ cat << '_EOF_' > fr2.ensGene.ra
+# required db variable
+db fr2
+nameTranslation "s/^MT/chrM/;"
+# lift Ensembl scaffolds to UCSC chrUn coordinates
+liftUp /cluster/data/fr2/jkStuff/liftAll.lft
+'_EOF_'
+# << happy emacs
+
+ doEnsGeneUpdate.pl -ensVersion=58 fr2.ensGene.ra
+ ssh hgwdev
+ cd /hive/data/genomes/fr2/bed/ensGene.58
+ featureBits fr2 ensGene
+ # 34568537 bases of 393312790 (8.789%) in intersection
+
+############################################################################
+# galGal3 - Chicken - Ensembl Genes version 58 (DONE - 2010-05-27 - hiram)
+ ssh hgwdev
+ cd /hive/data/genomes/galGal3
+ cat << '_EOF_' > galGal3.ensGene.ra
+# required db variable
+db galGal3
+# optional nameTranslation, the sed command that will transform
+# Ensemble names to UCSC names. With quotes just to make sure.
+nameTranslation "s/^\([0-9EWXYZ][0-9]*\)/chr\1/; s/^MT/chrM/; s/^Un/chrUn/"
+'_EOF_'
+# << happy emacs
+
+ doEnsGeneUpdate.pl -ensVersion=58 galGal3.ensGene.ra
+ ssh hgwdev
+ cd /hive/data/genomes/galGal3/bed/ensGene.58
+ featureBits galGal3 ensGene
+ # 30741650 bases of 1042591351 (2.949%) in intersection
+
+############################################################################
+# loxAfr3 - Elephant - Ensembl Genes version 58 (DONE - 2010-05-27 - hiram)
+ ssh hgwdev
+ cd /hive/data/genomes/loxAfr3
+ cat << '_EOF_' > loxAfr3.ensGene.ra
+# required db variable
+db loxAfr3
+'_EOF_'
+# << happy emacs
+
+ doEnsGeneUpdate.pl -ensVersion=58 loxAfr3.ensGene.ra
+ ssh hgwdev
+ cd /hive/data/genomes/loxAfr3/bed/ensGene.58
+ featureBits loxAfr3 ensGene
+ # 32151456 bases of 3118565340 (1.031%) in intersection
+
+############################################################################
+# micMur1 - Mouse lemur - Ensembl Genes version 58 (DONE - 2010-05-27 -
+# hiram)
+ ssh hgwdev
+ cd /hive/data/genomes/micMur1
+ cat << '_EOF_' > micMur1.ensGene.ra
+# required db variable
+db micMur1
+# do we need to translate geneScaffold coordinates
+geneScaffolds yes
+'_EOF_'
+# << happy emacs
+
+ doEnsGeneUpdate.pl -ensVersion=58 micMur1.ensGene.ra
+ ssh hgwdev
+ cd /hive/data/genomes/micMur1/bed/ensGene.58
+ featureBits micMur1 ensGene
+ # 25688743 bases of 1852394361 (1.387%) in intersection
+
+############################################################################
+# monDom5 - Opossum - Ensembl Genes version 58 (DONE - 2010-05-27 - hiram)
+ ssh hgwdev
+ cd /hive/data/genomes/monDom5
+ cat << '_EOF_' > monDom5.ensGene.ra
+# required db variable
+db monDom5
+# optional nameTranslation, the sed command that will transform
+# Ensemble names to UCSC names. With quotes just to make sure.
+nameTranslation "s/^\([0-9XY][0-9]*\)/chr\1/; s/^MT/chrM/; s/^Un/chrUn/"
+'_EOF_'
+# << happy emacs
+
+ doEnsGeneUpdate.pl -ensVersion=58 monDom5.ensGene.ra
+ ssh hgwdev
+ cd /hive/data/genomes/monDom5/bed/ensGene.58
+ featureBits monDom5 ensGene
+ # 32982595 bases of 3501660299 (0.942%) in intersection
+
+############################################################################
+# myoLuc1 - Microbat - Ensembl Genes version 58 (DONE - 2010-05-27 - hiram)
+ ssh hgwdev
+ cd /hive/data/genomes/myoLuc1
+ cat << '_EOF_' > myoLuc1.ensGene.ra
+# required db variable
+db myoLuc1
+# do we need to translate geneScaffold coordinates
+geneScaffolds yes
+# ignore genes that do not properly convert to a gene pred, and contig
+# names that are not in the UCSC assembly
+skipInvalid yes
+# ignore the three genes that have invalid structures from Ensembl:
+# 1265: ENSMLUT00000004658 no exonFrame on CDS exon 1
+# 17770: ENSMLUT00000003427 no exonFrame on CDS exon 10
+# 32743: ENSMLUT00000009601 no exonFrame on CDS exon 1
+
+'_EOF_'
+# << happy emacs
+
+ doEnsGeneUpdate.pl -ensVersion=58 myoLuc1.ensGene.ra
+ ssh hgwdev
+ cd /hive/data/genomes/myoLuc1/bed/ensGene.58
+ featureBits myoLuc1 ensGene
+ # 24707365 bases of 1673855868 (1.476%) in intersection
+
+############################################################################
+# ochPri2 - Pika - Ensembl Genes version 58 (DONE - 2010-05-27 - hiram)
+ ssh hgwdev
+ cd /hive/data/genomes/ochPri2
+ cat << '_EOF_' > ochPri2.ensGene.ra
+# required db variable
+db ochPri2
+# do we need to translate geneScaffold coordinates
+geneScaffolds yes
+# ignore genes that do not properly convert to a gene pred, and contig
+# names that are not in the UCSC assembly
+skipInvalid yes
+# ignore the single gene that has an invalid structure from Ensembl:
+# 10995: ENSOPRT00000002716 no exonFrame on CDS exon 2
+'_EOF_'
+# << happy emacs
+
+ doEnsGeneUpdate.pl -ensVersion=58 ochPri2.ensGene.ra
+ ssh hgwdev
+ cd /hive/data/genomes/ochPri2/bed/ensGene.58
+ featureBits ochPri2 ensGene
+ # 25447435 bases of 1923624051 (1.323%) in intersection
+
+############################################################################
+# ornAna1 - Platypus - Ensembl Genes version 58 (DONE - 2010-05-27 - hiram)
+ ssh hgwdev
+ cd /hive/data/genomes/ornAna1
+ cat << '_EOF_' > ornAna1.ensGene.ra
+# required db variable
+db ornAna1
+# optional nameTranslation, the sed command that will transform
+# Ensemble names to UCSC names. With quotes just to make sure.
+nameTranslation "s/^\([0-9][0-9]*\)/chr\1/; s/^\(X[0-9]\)/chr\1/; s/^MT/chrM/"
+# ignore genes that do not properly convert to a gene pred, and contig
+# names that are not in the UCSC assembly, 365 items
+skipInvalid yes
+'_EOF_'
+# << happy emacs
+
+ doEnsGeneUpdate.pl -ensVersion=58 ornAna1.ensGene.ra
+ ssh hgwdev
+ cd /hive/data/genomes/ornAna1/bed/ensGene.58
+ featureBits ornAna1 ensGene
+ # 24466294 bases of 1842236818 (1.328%) in intersection
+
+############################################################################
+# oryCun2 - Rabbit - Ensembl Genes version 58 (DONE - 2010-05-27 - hiram)
+ ssh hgwdev
+ cd /hive/data/genomes/oryCun2
+ cat << '_EOF_' > oryCun2.ensGene.ra
+# required db variable
+db oryCun2
+# ensembl appears to still be in scaffolds ?
+liftUp /hive/data/genomes/oryCun2/jkStuff/ensGene.lft
+'_EOF_'
+# << happy emacs
+
+ doEnsGeneUpdate.pl -ensVersion=58 oryCun2.ensGene.ra
+ ssh hgwdev
+ cd /hive/data/genomes/oryCun2/bed/ensGene.58
+ featureBits oryCun2 ensGene
+ # 31785271 bases of 2604023284 (1.221%) in intersection
+
+############################################################################
+# oryLat2 - Medaka - Ensembl Genes version 58 (DONE - 2010-05-27 - hiram)
+ ssh hgwdev
+ cd /hive/data/genomes/oryLat2
+ cat << '_EOF_' > oryLat2.ensGene.ra
+# required db variable
+db oryLat2
+# optional nameTranslation, the sed command that will transform
+# Ensemble names to UCSC names. With quotes just to make sure.
+nameTranslation "s/^\([0-9][0-9]*\)/chr\1/; s/^MT/chrM/"
+# ignore 2,687 genes that haven't lifted properly yet
+# skipInvalid yes
+'_EOF_'
+# << happy emacs
+
+ doEnsGeneUpdate.pl -ensVersion=58 oryLat2.ensGene.ra
+ ssh hgwdev
+ cd /hive/data/genomes/oryLat2/bed/ensGene.58
+ featureBits oryLat2 ensGene
+ # 32313511 bases of 700386597 (4.614%) in intersection
+
+############################################################################
+# otoGar1 - Bushbaby - Ensembl Genes version 58 (DONE - 2010-05-27 - hiram)
+ ssh hgwdev
+ cd /hive/data/genomes/otoGar1
+ cat << '_EOF_' > otoGar1.ensGene.ra
+# required db variable
+db otoGar1
+# do we need to translate geneScaffold coordinates
+geneScaffolds yes
+# after geneScaffold conversions, change Ensembl chrom names to UCSC names
+liftUp /cluster/data/otoGar1/jkStuff/ensGene.lft
+'_EOF_'
+# << happy emacs
+
+ doEnsGeneUpdate.pl -ensVersion=58 otoGar1.ensGene.ra
+ ssh hgwdev
+ cd /hive/data/genomes/otoGar1/bed/ensGene.58
+ featureBits otoGar1 ensGene
+ # 23692750 bases of 1969052059 (1.203%) in intersection
+
+############################################################################
+# panTro2 - Chimp - Ensembl Genes version 58 (DONE - 2010-05-27 - hiram)
+ ssh hgwdev
+ cd /hive/data/genomes/panTro2
+ cat << '_EOF_' > panTro2.ensGene.ra
+# required db variable
+db panTro2
+# optional nameTranslation, the sed command that will transform
+# Ensemble names to UCSC names. With quotes just to make sure.
+nameTranslation "s/^\([0-9XY][0-9]*\)/chr\1/; s/^MT/chrM/; s/^Un/chrUn/"
+'_EOF_'
+# << happy emacs
+
+ doEnsGeneUpdate.pl -ensVersion=58 panTro2.ensGene.ra
+ ssh hgwdev
+ cd /hive/data/genomes/panTro2/bed/ensGene.58
+ featureBits panTro2 ensGene
+ # 50004270 bases of 2909485072 (1.719%) in intersection
+
+############################################################################
+# ponAbe2 - Orangutan - Ensembl Genes version 58 (DONE - 2010-05-27 - hiram)
+ ssh hgwdev
+ cd /hive/data/genomes/ponAbe2
+ cat << '_EOF_' > ponAbe2.ensGene.ra
+# required db variable
+db ponAbe2
+# optional nameTranslation, the sed command that will transform
+# Ensemble names to UCSC names. With quotes just to make sure.
+nameTranslation "s/^\([0-9XY][0-9]*\)/chr\1/; s/^MT/chrM/; s/^Un/chrUn/"
+# optional haplotype lift-down from Ensembl full chrom coordinates
+# to UCSC simple haplotype coordinates
+# haplotypeLift /cluster/data/hg18/jkStuff/ensGene.haplotype.lift
+'_EOF_'
+# << happy emacs
+
+ doEnsGeneUpdate.pl -ensVersion=58 ponAbe2.ensGene.ra
+ ssh hgwdev
+ cd /hive/data/genomes/ponAbe2/bed/ensGene.58
+ featureBits ponAbe2 ensGene
+ # 38120801 bases of 3093572278 (1.232%) in intersection
+
+############################################################################
+# proCap1 - Rock hyrax - Ensembl Genes version 58 (DONE - 2010-05-27 -
+# hiram)
+ ssh hgwdev
+ cd /hive/data/genomes/proCap1
+ cat << '_EOF_' > proCap1.ensGene.ra
+# required db variable
+db proCap1
+# do we need to translate geneScaffold coordinates
+geneScaffolds yes
+# ignore genes that do not properly convert to a gene pred, and contig
+# names that are not in the UCSC assembly
+skipInvalid yes
+# ignore the two genes that have invalid structures from Ensembl:
+# 4595: ENSPCAT00000007286 no exonFrame on CDS exon 1
+# 28894: ENSPCAT00000000699 no exonFrame on CDS exon 4
+
+'_EOF_'
+# << happy emacs
+
+ doEnsGeneUpdate.pl -ensVersion=58 proCap1.ensGene.ra
+ ssh hgwdev
+ cd /hive/data/genomes/proCap1/bed/ensGene.58
+ featureBits proCap1 ensGene
+ # 25344155 bases of 2407847681 (1.053%) in intersection
+
+############################################################################
+# pteVam1 - Megabat - Ensembl Genes version 58 (DONE - 2010-05-27 - hiram)
+ ssh hgwdev
+ cd /hive/data/genomes/pteVam1
+ cat << '_EOF_' > pteVam1.ensGene.ra
+# required db variable
+db pteVam1
+# do we need to translate geneScaffold coordinates
+geneScaffolds yes
+# ignore genes that do not properly convert to a gene pred, and contig
+# names that are not in the UCSC assembly
+skipInvalid yes
+# ignore the two genes that have invalid structures from Ensembl:
+# 6381: ENSPVAT00000012919 no exonFrame on CDS exon 14
+# 23522: ENSPVAT00000010661 no exonFrame on CDS exon 0
+
+'_EOF_'
+# << happy emacs
+
+ doEnsGeneUpdate.pl -ensVersion=58 pteVam1.ensGene.ra
+ ssh hgwdev
+ cd /hive/data/genomes/pteVam1/bed/ensGene.58
+ featureBits pteVam1 ensGene
+ # 28966701 bases of 1839436660 (1.575%) in intersection
+
+############################################################################
+# rheMac2 - Rhesus - Ensembl Genes version 58 (DONE - 2010-05-27 - hiram)
+ ssh hgwdev
+ cd /hive/data/genomes/rheMac2
+ cat << '_EOF_' > rheMac2.ensGene.ra
+# required db variable
+db rheMac2
+# optional nameTranslation, the sed command that will transform
+# Ensemble names to UCSC names. With quotes just to make sure.
+nameTranslation "/^109[0-9]*/d; /^MT/d; s/^\([0-9XY][0-9]*\)/chr\1/;"
+'_EOF_'
+# << happy emacs
+
+ doEnsGeneUpdate.pl -ensVersion=58 rheMac2.ensGene.ra
+ ssh hgwdev
+ cd /hive/data/genomes/rheMac2/bed/ensGene.58
+ featureBits rheMac2 ensGene
+ # 44562701 bases of 2646704109 (1.684%) in intersection
+
+############################################################################
+# sacCer2 - S. cerevisiae - Ensembl Genes version 58 (DONE - 2010-05-27 -
+# hiram)
+ ssh hgwdev
+ cd /hive/data/genomes/sacCer2
+ cat << '_EOF_' > sacCer2.ensGene.ra
+# required db variable
+db sacCer2
+# optional nameTranslation, the sed command that will transform
+# Ensemble names to UCSC names. With quotes just to make sure.
+nameTranslation "s/^VIII/chrVIII/; s/^VII/chrVII/; s/^VI/chrVI/; s/^V/chrV/; s/^XIII/chrXIII/; s/^XII/chrXII/; s/^XIV/chrXIV/; s/^XI/chrXI/; s/^XVI/chrXVI/; s/^XV/chrXV/; s/^X/chrX/; s/^III/chrIII/; s/^IV/chrIV/; s/^II/chrII/; s/^IX/chrIX/; s/^I/chrI/; s/^MT/chrM/; s/2-micron/2micron/"
+'_EOF_'
+# << happy emacs
+
+ doEnsGeneUpdate.pl -ensVersion=58 sacCer2.ensGene.ra
+ ssh hgwdev
+ cd /hive/data/genomes/sacCer2/bed/ensGene.58
+ featureBits sacCer2 ensGene
+ # 8912793 bases of 12162995 (73.278%) in intersection
+
+############################################################################
+# sorAra1 - Shrew - Ensembl Genes version 58 (DONE - 2010-05-27 - hiram)
+ ssh hgwdev
+ cd /hive/data/genomes/sorAra1
+ cat << '_EOF_' > sorAra1.ensGene.ra
+# required db variable
+db sorAra1
+# do we need to translate geneScaffold coordinates
+geneScaffolds yes
+'_EOF_'
+# << happy emacs
+
+ doEnsGeneUpdate.pl -ensVersion=58 sorAra1.ensGene.ra
+ ssh hgwdev
+ cd /hive/data/genomes/sorAra1/bed/ensGene.58
+ featureBits sorAra1 ensGene
+ # 19690431 bases of 1832864697 (1.074%) in intersection
+
+############################################################################
+# speTri1 - Squirrel - Ensembl Genes version 58 (DONE - 2010-05-27 - hiram)
+ ssh hgwdev
+ cd /hive/data/genomes/speTri1
+ cat << '_EOF_' > speTri1.ensGene.ra
+# required db variable
+db speTri1
+# do we need to translate geneScaffold coordinates
+geneScaffolds yes
+# ignore genes that do not properly convert to a gene pred, and contig
+# names that are not in the UCSC assembly
+skipInvalid yes
+# ignore the single gene that has an invalid structure from Ensembl:
+# 1071: ENSSTOT00000007455 no exonFrame on CDS exon 1
+'_EOF_'
+# << happy emacs
+
+ doEnsGeneUpdate.pl -ensVersion=58 speTri1.ensGene.ra
+ ssh hgwdev
+ cd /hive/data/genomes/speTri1/bed/ensGene.58
+ featureBits speTri1 ensGene
+ # 21594682 bases of 1913367893 (1.129%) in intersection
+
+############################################################################
+# susScr1 - Pig - Ensembl Genes version 58 (DONE - 2010-05-27 - hiram)
+ ssh hgwdev
+ cd /hive/data/genomes/susScr1
+ cat << '_EOF_' > susScr1.ensGene.ra
+# required db variable
+db susScr1
+# optional nameTranslation, the sed command that will transform
+# Ensemble names to UCSC names. With quotes just to make sure.
+nameTranslation "s/^\([0-9XY][0-9]*\)/chr\1/; s/^MT/chrM/;"
+'_EOF_'
+# << happy emacs
+
+ doEnsGeneUpdate.pl -ensVersion=58 susScr1.ensGene.ra
+ ssh hgwdev
+ cd /hive/data/genomes/susScr1/bed/ensGene.58
+ featureBits susScr1 ensGene
+ # 28758401 bases of 2231332019 (1.289%) in intersection
+
+############################################################################
+# taeGut1 - Zebra finch - Ensembl Genes version 58 (DONE - 2010-05-27 -
+# hiram)
+ ssh hgwdev
+ cd /hive/data/genomes/taeGut1
+ cat << '_EOF_' > taeGut1.ensGene.ra
+# required db variable
+db taeGut1
+# optional nameTranslation, the sed command that will transform
+# Ensemble names to UCSC names. With quotes just to make sure.
+nameTranslation "s/^\([0-9LXYZ][0-9ABG]*\)/chr\1/; s/^Un/chrUn/"
+# need to translate Ensembl GeneScaffold coordinates to UCSC scaffolds
+# geneScaffolds yes
+# during the loading of the gene pred, skip all invalid genes
+# skipInvalid yes
+# 13843: ENSDNOT00000025033 no exonFrame on CDS exon 5
+# 23044: ENSDNOT00000004471 no exonFrame on CDS exon 1
+# 30976: ENSDNOT00000003424 no exonFrame on CDS exon 3
+'_EOF_'
+# << happy emacs
+
+ doEnsGeneUpdate.pl -ensVersion=58 taeGut1.ensGene.ra
+ ssh hgwdev
+ cd /hive/data/genomes/taeGut1/bed/ensGene.58
+ featureBits taeGut1 ensGene
+ # 25441417 bases of 1222864691 (2.080%) in intersection
+
+############################################################################
+# tarSyr1 - Tarsier - Ensembl Genes version 58 (DONE - 2010-05-27 - hiram)
+ ssh hgwdev
+ cd /hive/data/genomes/tarSyr1
+ cat << '_EOF_' > tarSyr1.ensGene.ra
+# required db variable
+db tarSyr1
+# do we need to translate geneScaffold coordinates
+geneScaffolds yes
+'_EOF_'
+# << happy emacs
+
+ doEnsGeneUpdate.pl -ensVersion=58 tarSyr1.ensGene.ra
+ ssh hgwdev
+ cd /hive/data/genomes/tarSyr1/bed/ensGene.58
+ featureBits tarSyr1 ensGene
+ # 21327582 bases of 2768536343 (0.770%) in intersection
+
+############################################################################
+# tetNig2 - Tetraodon - Ensembl Genes version 58 (DONE - 2010-05-27 - hiram)
+ ssh hgwdev
+ cd /hive/data/genomes/tetNig2
+ cat << '_EOF_' > tetNig2.ensGene.ra
+# required db variable
+db tetNig2
+# optional nameTranslation, the sed command that will transform
+# Ensemble names to UCSC names. With quotes just to make sure.
+nameTranslation "s/^\([0-9XY][0-9]*\)/chr\1/; s/^MT/chrM/; s/^Un/chrUn/"
+'_EOF_'
+# << happy emacs
+
+ doEnsGeneUpdate.pl -ensVersion=58 tetNig2.ensGene.ra
+ ssh hgwdev
+ cd /hive/data/genomes/tetNig2/bed/ensGene.58
+ featureBits tetNig2 ensGene
+ # 31642974 bases of 302314788 (10.467%) in intersection
+
+############################################################################
+# tupBel1 - Tree shrew - Ensembl Genes version 58 (DONE - 2010-05-27 -
+# hiram)
+ ssh hgwdev
+ cd /hive/data/genomes/tupBel1
+ cat << '_EOF_' > tupBel1.ensGene.ra
+# required db variable
+db tupBel1
+# do we need to translate geneScaffold coordinates
+geneScaffolds yes
+# after geneScaffold conversions, change Ensembl chrom names to UCSC names
+liftUp /cluster/data/tupBel1/jkStuff/ensGene.lft
+# ignore genes that do not properly convert to a gene pred, and contig
+# names that are not in the UCSC assembly
+skipInvalid yes
+# ignore the two genes that have invalid structures from Ensembl:
+# 2993: ENSTBET00000015831 no exonFrame on CDS exon 11
+# 3556: ENSTBET00000013522 no exonFrame on CDS exon 1
+'_EOF_'
+# << happy emacs
+
+ doEnsGeneUpdate.pl -ensVersion=58 tupBel1.ensGene.ra
+ ssh hgwdev
+ cd /hive/data/genomes/tupBel1/bed/ensGene.58
+ featureBits tupBel1 ensGene
+ # 22848284 bases of 2137225476 (1.069%) in intersection
+
+############################################################################
+# turTru1 - Dolphin - Ensembl Genes version 58 (DONE - 2010-05-27 - hiram)
+ ssh hgwdev
+ cd /hive/data/genomes/turTru1
+ cat << '_EOF_' > turTru1.ensGene.ra
+# required db variable
+db turTru1
+# do we need to translate geneScaffold coordinates
+geneScaffolds yes
+'_EOF_'
+# << happy emacs
+
+ doEnsGeneUpdate.pl -ensVersion=58 turTru1.ensGene.ra
+ ssh hgwdev
+ cd /hive/data/genomes/turTru1/bed/ensGene.58
+ featureBits turTru1 ensGene
+ # 28614079 bases of 2298444090 (1.245%) in intersection
+
+############################################################################
+# vicPac1 - Alpaca - Ensembl Genes version 58 (DONE - 2010-05-27 - hiram)
+ ssh hgwdev
+ cd /hive/data/genomes/vicPac1
+ cat << '_EOF_' > vicPac1.ensGene.ra
+# required db variable
+db vicPac1
+# do we need to translate geneScaffold coordinates
+geneScaffolds yes
+# ignore genes that do not properly convert to a gene pred, and contig
+# names that are not in the UCSC assembly
+skipInvalid yes
+# ignore the single gene that does not translate properly to UCSC coordinates
+# 4649: ENSVPAT00000009076 no exonFrame on CDS exon 0
+'_EOF_'
+# << happy emacs
+
+ doEnsGeneUpdate.pl -ensVersion=58 vicPac1.ensGene.ra
+ ssh hgwdev
+ cd /hive/data/genomes/vicPac1/bed/ensGene.58
+ featureBits vicPac1 ensGene
+ # 17891769 bases of 1922910435 (0.930%) in intersection
+
+############################################################################
+# xenTro2 - X. tropicalis - Ensembl Genes version 58 (DONE - 2010-05-27 -
+# hiram)
+ ssh hgwdev
+ cd /hive/data/genomes/xenTro2
+ cat << '_EOF_' > xenTro2.ensGene.ra
+# required db variable
+db xenTro2
+'_EOF_'
+# << happy emacs
+
+ doEnsGeneUpdate.pl -ensVersion=58 xenTro2.ensGene.ra
+ ssh hgwdev
+ cd /hive/data/genomes/xenTro2/bed/ensGene.58
+ featureBits xenTro2 ensGene
+ # 29181688 bases of 1359412157 (2.147%) in intersection
+
+############################################################################
############################################################################
# ensembl 57 update (DONE - 2010-04-02 - Hiram)
############################################################################