src/hg/makeDb/doc/makeEnsembl.txt 1.12

1.12 2009/07/20 18:30:02 hiram
Version 55 update done
Index: src/hg/makeDb/doc/makeEnsembl.txt
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/makeDb/doc/makeEnsembl.txt,v
retrieving revision 1.11
retrieving revision 1.12
diff -b -B -U 4 -r1.11 -r1.12
--- src/hg/makeDb/doc/makeEnsembl.txt	22 Jan 2009 21:30:53 -0000	1.11
+++ src/hg/makeDb/doc/makeEnsembl.txt	20 Jul 2009 18:30:02 -0000	1.12
@@ -3,8 +3,955 @@
 #  This file is a record of building the Ensembl gene track for all UCSC
 #	genome browsers.  The end of this file has a historical record of
 #	Robert's experiments with an automated process.
 #
+
+############################################################################
+# ensembl 55 updates (WORKING - 2009-07-14 - Hiram)
+    # see also: more notes about how this is done in the "ensembl 50 updates"
+    #	section below  (and in /hive/users/hiram/ensGene/)
+############################################################################
+#  macEug1 - Wallaby - (BROKEN - 2009-07-20 - Hiram)
+    # can not get this one to work, either a broken GeneScaffold lift
+    #	or a different set of scaffold names, need to investigate
+############################################################################
+#  danRer6 - Zebrafish - Ensembl Genes version 55  (DONE - 2009-07-14 - hiram)
+    ssh kkr14u05
+    cd /hive/data/genomes/danRer6
+    cat << '_EOF_' > danRer6.ensGene.ra
+# required db variable
+db danRer6
+# optional nameTranslation, the sed command that will transform
+#       Ensemble names to UCSC names.  With quotes just to make sure.
+nameTranslation "s/^\([0-9XY][0-9]*\)/chr\1/; s/^MT/chrM/"
+'_EOF_'
+#  << happy emacs
+
+    doEnsGeneUpdate.pl -ensVersion=55 danRer6.ensGene.ra
+    ssh hgwdev
+    cd /hive/data/genomes/danRer6/bed/ensGene.55
+    featureBits danRer6 ensGene
+    # 44586206 bases of 1506896106 (2.959%) in intersection
+############################################################################
+#  hg19 - Human - Ensembl Genes version 55  (DONE - 2009-07-14 - hiram)
+    ssh kkr14u08
+    cd /hive/data/genomes/hg19
+    cat << '_EOF_' > hg19.ensGene.ra
+# required db variable
+db hg19
+# optional nameTranslation, the sed command that will transform
+#       Ensemble names to UCSC names.  With quotes just to make sure.
+nameTranslation "s/^\([0-9XY][0-9]*\)/chr\1/; s/^MT/chrM/; s/^Un/chrUn/"
+# optionally update the knownToEnsembl table after ensGene updated
+knownToEnsembl yes
+# optional haplotype lift-down from Ensembl full chrom coordinates
+#       to UCSC simple haplotype coordinates
+haplotypeLift /hive/data/genomes/hg19/jkStuff/ensGene.haplotype.lift
+# liftUp /hive/data/genomes/hg19/jkStuff/liftContigs.lft
+'_EOF_'
+#  << happy emacs
+
+    doEnsGeneUpdate.pl -ensVersion=55 hg19.ensGene.ra
+    ssh hgwdev
+    cd /hive/data/genomes/hg19/bed/ensGene.55
+    featureBits hg19 ensGene
+    # 85295627 bases of 2897316137 (2.944%) in intersection
+############################################################################
+#  anoCar1 - Lizard - Ensembl Genes version 55  (DONE - 2009-07-14 - hiram)
+    ssh kkr14u07
+    cd /hive/data/genomes/anoCar1
+    cat << '_EOF_' > anoCar1.ensGene.ra
+# required db variable
+db anoCar1
+'_EOF_'
+#  << happy emacs
+
+    doEnsGeneUpdate.pl -ensVersion=55 anoCar1.ensGene.ra
+    ssh hgwdev
+    cd /hive/data/genomes/anoCar1/bed/ensGene.55
+    featureBits anoCar1 ensGene
+    # 26956669 bases of 1741478929 (1.548%) in intersection
+############################################################################
+#  choHof1 - Sloth - Ensembl Genes version 55  (DONE - 2009-07-14 - hiram)
+    ssh kkr14u03
+    cd /hive/data/genomes/choHof1
+    cat << '_EOF_' > choHof1.ensGene.ra
+# required db variable
+db choHof1
+# need to translate Ensembl GeneScaffold coordinates to UCSC scaffolds
+geneScaffolds yes
+#       during the loading of the gene pred, skip all invalid genes
+skipInvalid yes
+#       18938: ENSCHOT00000005046 no exonFrame on CDS exon 1
+'_EOF_'
+#  << happy emacs
+
+    doEnsGeneUpdate.pl -ensVersion=55 choHof1.ensGene.ra
+    ssh hgwdev
+    cd /hive/data/genomes/choHof1/bed/ensGene.55
+    featureBits choHof1 ensGene
+    # 18231244 bases of 2060419685 (0.885%) in intersection
+############################################################################
+#  dasNov2 - Armadillo - Ensembl Genes version 55  (DONE - 2009-07-14 - hiram)
+    ssh kkr14u02
+    cd /hive/data/genomes/dasNov2
+    cat << '_EOF_' > dasNov2.ensGene.ra
+# required db variable
+db dasNov2
+# need to translate Ensembl GeneScaffold coordinates to UCSC scaffolds
+geneScaffolds yes
+#       during the loading of the gene pred, skip all invalid genes
+skipInvalid yes
+#       13843: ENSDNOT00000025033 no exonFrame on CDS exon 5
+#       23044: ENSDNOT00000004471 no exonFrame on CDS exon 1
+#       30976: ENSDNOT00000003424 no exonFrame on CDS exon 3
+'_EOF_'
+#  << happy emacs
+
+    doEnsGeneUpdate.pl -ensVersion=55 dasNov2.ensGene.ra
+    ssh hgwdev
+    cd /hive/data/genomes/dasNov2/bed/ensGene.55
+    featureBits dasNov2 ensGene
+    # 21864229 bases of 2371493872 (0.922%) in intersection
+############################################################################
+#  loxAfr2 - Elephant - Ensembl Genes version 55  (DONE - 2009-07-14 - hiram)
+    ssh swarm
+    cd /hive/data/genomes/loxAfr2
+    cat << '_EOF_' > loxAfr2.ensGene.ra
+# required db variable
+db loxAfr2
+# need to translate Ensembl GeneScaffold coordinates to UCSC scaffolds
+geneScaffolds yes
+#       during the loading of the gene pred, skip all invalid genes
+skipInvalid yes
+#        ENSLAFT00000000586 no exonFrame on CDS exon 4
+'_EOF_'
+#  << happy emacs
+
+    doEnsGeneUpdate.pl -ensVersion=55 loxAfr2.ensGene.ra
+    ssh hgwdev
+    cd /hive/data/genomes/loxAfr2/bed/ensGene.55
+    featureBits loxAfr2 ensGene
+    # 23586871 bases of 2444975542 (0.965%) in intersection
+
+############################################################################
+#  bosTau4 - Cow - Ensembl Genes version 55  (DONE - 2009-07-15 - hiram)
+    ssh kkr14u02
+    cd /hive/data/genomes/bosTau4
+    cat << '_EOF_' > bosTau4.ensGene.ra
+# required db variable
+db bosTau4
+# optional nameTranslation, the sed command that will transform
+#       Ensemble names to UCSC names.  With quotes just to make sure.
+nameTranslation "s/^\([0-9UX][0-9n]*\)/chr\1/; s/^MT/chrM/"
+# cause SQL tables to be fetched to see if chrUn can be fixed up
+# geneScaffolds yes
+'_EOF_'
+#  << happy emacs
+
+    doEnsGeneUpdate.pl -ensVersion=55 bosTau4.ensGene.ra
+    ssh hgwdev
+    cd /hive/data/genomes/bosTau4/bed/ensGene.55
+    featureBits bosTau4 ensGene
+    # 42207115 bases of 2731830700 (1.545%) in intersection
+
+############################################################################
+#  canFam2 - Dog - Ensembl Genes version 55  (DONE - 2009-07-14 - hiram)
+    ssh kkr14u07
+    cd /hive/data/genomes/canFam2
+    cat << '_EOF_' > canFam2.ensGene.ra
+# required db variable
+db canFam2
+# optional nameTranslation, the sed command that will transform
+#       Ensemble names to UCSC names.  With quotes just to make sure.
+nameTranslation "s/^\([0-9XY][0-9]*\)/chr\1/; s/^MT/chrM/; s/^Un/chrUn/"
+'_EOF_'
+#  << happy emacs
+
+    doEnsGeneUpdate.pl -ensVersion=55 canFam2.ensGene.ra
+    ssh hgwdev
+    cd /hive/data/genomes/canFam2/bed/ensGene.55
+    featureBits canFam2 ensGene
+    # 34634472 bases of 2384996543 (1.452%) in intersection
+
+############################################################################
+#  cavPor3 - Guinea Pig - Ensembl Genes version 55  (DONE - 2009-07-14 - hiram)
+    ssh kkr14u01
+    cd /hive/data/genomes/cavPor3
+    cat << '_EOF_' > cavPor3.ensGene.ra
+# required db variable
+db cavPor3
+# do we need to translate geneScaffold coordinates
+# geneScaffolds yes
+nameTranslation "s/^MT/chrM/;"
+'_EOF_'
+#  << happy emacs
+
+    doEnsGeneUpdate.pl -ensVersion=55 cavPor3.ensGene.ra
+    ssh hgwdev
+    cd /hive/data/genomes/cavPor3/bed/ensGene.55
+    featureBits cavPor3 ensGene
+    # 30852014 bases of 2663369733 (1.158%) in intersection
+
+############################################################################
+#  ci2 - C. intestinalis - Ensembl Genes version 55  (DONE - 2009-07-14 - hiram)
+    ssh kkr14u08
+    cd /hive/data/genomes/ci2
+    cat << '_EOF_' > ci2.ensGene.ra
+# required db variable
+db ci2
+# optional nameTranslation, the sed command that will transform
+#       Ensemble names to UCSC names.  With quotes just to make sure.
+nameTranslation "s/^\([0-9][pq]\)/chr0\1/; s/^\([0-9][0-9][pq]\)/chr\1/; "
+'_EOF_'
+#  << happy emacs
+
+    doEnsGeneUpdate.pl -ensVersion=55 ci2.ensGene.ra
+    ssh hgwdev
+    cd /hive/data/genomes/ci2/bed/ensGene.55
+    featureBits ci2 ensGene
+    # 20113161 bases of 141233565 (14.241%) in intersection
+
+############################################################################
+#  cioSav2 - C. savignyi - Ensembl Genes version 55  (DONE - 2009-07-14 - hiram)
+    ssh kkr14u08
+    cd /hive/data/genomes/cioSav2
+    cat << '_EOF_' > cioSav2.ensGene.ra
+# required db variable
+db cioSav2
+# optional nameTranslation, the sed command that will transform
+#       Ensemble names to UCSC names.  With quotes just to make sure.
+# nameTranslation "s/^\([0-9XY][0-9]*\)/chr\1/; s/^MT/chrM/; s/^Un/chrUn/"
+# optional haplotype lift-down from Ensembl full chrom coordinates
+#       to UCSC simple haplotype coordinates
+# haplotypeLift /cluster/data/hg18/jkStuff/ensGene.haplotype.lift
+
+'_EOF_'
+#  << happy emacs
+
+    doEnsGeneUpdate.pl -ensVersion=55 cioSav2.ensGene.ra
+    ssh hgwdev
+    cd /hive/data/genomes/cioSav2/bed/ensGene.55
+    featureBits cioSav2 ensGene
+    # 16616680 bases of 173749524 (9.564%) in intersection
+
+############################################################################
+#  dipOrd1 - Kangaroo rat - Ensembl Genes version 55  (DONE - 2009-07-14 - hiram)
+    ssh swarm
+    cd /hive/data/genomes/dipOrd1
+    cat << '_EOF_' > dipOrd1.ensGene.ra
+# required db variable
+db dipOrd1
+# do we need to translate geneScaffold coordinates
+geneScaffolds yes
+# ignore genes that do not properly convert to a gene pred, and contig
+#       names that are not in the UCSC assembly
+skipInvalid yes
+# ignore the single gene that have invalid structures from Ensembl:
+# 11275: ENSDORT00000004734 no exonFrame on CDS exon 12
+'_EOF_'
+#  << happy emacs
+
+    doEnsGeneUpdate.pl -ensVersion=55 dipOrd1.ensGene.ra
+    ssh hgwdev
+    cd /hive/data/genomes/dipOrd1/bed/ensGene.55
+    featureBits dipOrd1 ensGene
+    # 25275613 bases of 1844961421 (1.370%) in intersection
+
+############################################################################
+#  echTel1 - Tenrec - Ensembl Genes version 55  (DONE - 2009-07-14 - hiram)
+    ssh kkr14u03
+    cd /hive/data/genomes/echTel1
+    cat << '_EOF_' > echTel1.ensGene.ra
+# required db variable
+db echTel1
+# do we need to translate geneScaffold coordinates
+geneScaffolds yes
+# ignore genes that do not properly convert to a gene pred, and contig
+#       names that are not in the UCSC assembly
+skipInvalid yes
+# ignore the two genes that have invalid structures from Ensembl:
+# 29277: ENSETET00000011172 no exonFrame on CDS exon 14
+# 44942: ENSETET00000018714 no exonFrame on CDS exon 1
+
+'_EOF_'
+#  << happy emacs
+
+    doEnsGeneUpdate.pl -ensVersion=55 echTel1.ensGene.ra
+    ssh hgwdev
+    cd /hive/data/genomes/echTel1/bed/ensGene.55
+    featureBits echTel1 ensGene
+    # 25563184 bases of 2111581369 (1.211%) in intersection
+
+############################################################################
+#  equCab2 - Horse - Ensembl Genes version 55  (DONE - 2009-07-14 - hiram)
+    ssh kkr14u08
+    cd /hive/data/genomes/equCab2
+    cat << '_EOF_' > equCab2.ensGene.ra
+# required db variable
+db equCab2
+# optional nameTranslation, the sed command that will transform
+#       Ensemble names to UCSC names.  With quotes just to make sure.
+nameTranslation "s/^\([0-9XY][0-9]*\)/chr\1/; s/^MT/chrM/; s/^Un/chrUn/"
+#       translate Ensembl chrUnNNNN names to chrUn coordinates
+liftUp /cluster/data/equCab2/jkStuff/chrUn.lift
+'_EOF_'
+#  << happy emacs
+
+    doEnsGeneUpdate.pl -ensVersion=55 equCab2.ensGene.ra
+    ssh hgwdev
+    cd /hive/data/genomes/equCab2/bed/ensGene.55
+    featureBits equCab2 ensGene
+    # 39506745 bases of 2428790173 (1.627%) in intersection
+
+############################################################################
+#  eriEur1 - Hedgehog - Ensembl Genes version 55  (DONE - 2009-07-14 - hiram)
+    ssh kkr14u01
+    cd /hive/data/genomes/eriEur1
+    cat << '_EOF_' > eriEur1.ensGene.ra
+# required db variable
+db eriEur1
+# do we need to translate geneScaffold coordinates
+geneScaffolds yes
+# ignore genes that do not properly convert to a gene pred, and contig
+#       names that are not in the UCSC assembly
+skipInvalid yes
+# ignore the three genes that have invalid structures from Ensembl:
+# 4691: ENSEEUT00000004188 no exonFrame on CDS exon 7
+# 35795: ENSEEUT00000003156 no exonFrame on CDS exon 4
+# 40908: ENSEEUT00000001064 no exonFrame on CDS exon 2
+'_EOF_'
+#  << happy emacs
+
+    doEnsGeneUpdate.pl -ensVersion=55 eriEur1.ensGene.ra
+    ssh hgwdev
+    cd /hive/data/genomes/eriEur1/bed/ensGene.55
+    featureBits eriEur1 ensGene
+    # 22480171 bases of 2133134836 (1.054%) in intersection
+
+############################################################################
+#  felCat3 - Cat - Ensembl Genes version 55  (DONE - 2009-07-14 - hiram)
+    ssh kkr14u05
+    cd /hive/data/genomes/felCat3
+    cat << '_EOF_' > felCat3.ensGene.ra
+# required db variable
+db felCat3
+# do we need to translate geneScaffold coordinates
+geneScaffolds yes
+# ignore genes that do not properly convert to a gene pred, and contig
+#       names that are not in the UCSC assembly
+skipInvalid yes
+# ignore the three genes that have invalid structures from Ensembl:
+# 2100: ENSFCAT00000006929 no exonFrame on CDS exon 16
+# 14578: ENSFCAT00000010965 no exonFrame on CDS exon 1
+# 26634: ENSFCAT00000009384 no exonFrame on CDS exon 0
+
+'_EOF_'
+#  << happy emacs
+
+    doEnsGeneUpdate.pl -ensVersion=55 felCat3.ensGene.ra
+    ssh hgwdev
+    cd /hive/data/genomes/felCat3/bed/ensGene.55
+    featureBits felCat3 ensGene
+    # 22220711 bases of 1642698377 (1.353%) in intersection
+
+############################################################################
+#  fr2 - Fugu - Ensembl Genes version 55  (DONE - 2009-07-14 - hiram)
+    ssh kkr14u06
+    cd /hive/data/genomes/fr2
+    cat << '_EOF_' > fr2.ensGene.ra
+# required db variable
+db fr2
+nameTranslation "s/^MT/chrM/;"
+# lift Ensembl scaffolds to UCSC chrUn coordinates
+liftUp /cluster/data/fr2/jkStuff/liftAll.lft
+'_EOF_'
+#  << happy emacs
+
+    doEnsGeneUpdate.pl -ensVersion=55 fr2.ensGene.ra
+    ssh hgwdev
+    cd /hive/data/genomes/fr2/bed/ensGene.55
+    featureBits fr2 ensGene
+    # 34560383 bases of 393312790 (8.787%) in intersection
+
+############################################################################
+#  galGal3 - Chicken - Ensembl Genes version 55  (DONE - 2009-07-14 - hiram)
+    ssh kkr14u07
+    cd /hive/data/genomes/galGal3
+    cat << '_EOF_' > galGal3.ensGene.ra
+# required db variable
+db galGal3
+# optional nameTranslation, the sed command that will transform
+#       Ensemble names to UCSC names.  With quotes just to make sure.
+nameTranslation "s/^\([0-9EWXYZ][0-9]*\)/chr\1/; s/^MT/chrM/; s/^Un/chrUn/"
+'_EOF_'
+#  << happy emacs
+
+    doEnsGeneUpdate.pl -ensVersion=55 galGal3.ensGene.ra
+    ssh hgwdev
+    cd /hive/data/genomes/galGal3/bed/ensGene.55
+    featureBits galGal3 ensGene
+    # 30733557 bases of 1042591351 (2.948%) in intersection
+
+############################################################################
+#  gasAcu1 - Stickleback - Ensembl Genes version 55  (DONE - 2009-07-15 - hiram)
+    ssh kkr14u07
+    cd /hive/data/genomes/gasAcu1
+    cat << '_EOF_' > gasAcu1.ensGene.ra
+# required db variable
+db gasAcu1
+# optional nameTranslation, the sed command that will transform
+#       Ensemble names to UCSC names.  With quotes just to make sure.
+nameTranslation "s/^group\([IUVX]\)/chr\1/; s/^MT/chrM/;"
+'_EOF_'
+#  << happy emacs
+
+    doEnsGeneUpdate.pl -ensVersion=55 gasAcu1.ensGene.ra
+    ssh hgwdev
+    cd /hive/data/genomes/gasAcu1/bed/ensGene.55
+    featureBits gasAcu1 ensGene
+    # 36789271 bases of 446627861 (8.237%) in intersection
+
+############################################################################
+#  gorGor1 - Gorilla - Ensembl Genes version 55  (DONE - 2009-07-14 - hiram)
+    ssh kkr14u04
+    cd /hive/data/genomes/gorGor1
+    cat << '_EOF_' > gorGor1.ensGene.ra
+# required db variable
+db gorGor1
+# do we need to translate geneScaffold coordinates
+geneScaffolds yes
+# after geneScaffold conversions, change Ensembl chrom names to UCSC
+# names
+liftUp /hive/data/genomes/gorGor1/jkStuff/ensemblLiftToUcsc.lift
+# ignore the single gene that has an invalid structure from Ensembl:
+skipInvalid yes
+# 8939: ENSGGOT00000010340 no exonFrame on CDS exon 3
+'_EOF_'
+#  << happy emacs
+
+    doEnsGeneUpdate.pl -ensVersion=55 gorGor1.ensGene.ra
+    ssh hgwdev
+    cd /hive/data/genomes/gorGor1/bed/ensGene.55
+    featureBits gorGor1 ensGene
+    # 23242041 bases of 2075548667 (1.120%) in intersection
+
+############################################################################
+#  micMur1 - Mouse lemur - Ensembl Genes version 55  (DONE - 2009-07-14 - hiram)
+    ssh kkr14u08
+    cd /hive/data/genomes/micMur1
+    cat << '_EOF_' > micMur1.ensGene.ra
+# required db variable
+db micMur1
+# do we need to translate geneScaffold coordinates
+geneScaffolds yes
+'_EOF_'
+#  << happy emacs
+
+    doEnsGeneUpdate.pl -ensVersion=55 micMur1.ensGene.ra
+    ssh hgwdev
+    cd /hive/data/genomes/micMur1/bed/ensGene.55
+    featureBits micMur1 ensGene
+    # 25659397 bases of 1852394361 (1.385%) in intersection
+
+############################################################################
+#  mm9 - Mouse - Ensembl Genes version 55  (DONE - 2009-07-15 - hiram)
+    ssh kkr14u07
+    cd /hive/data/genomes/mm9
+    cat << '_EOF_' > mm9.ensGene.ra
+# required db variable
+db mm9
+# optional liftRandoms yes/no or absent
+liftRandoms yes
+# optional nameTranslation, the sed command that will transform
+#       Ensemble names to UCSC names.  With quotes just to make sure.
+nameTranslation "s/^\([0-9XY][0-9]*\)/chr\1/; s/^MT/chrM/"
+# optionally update the knownToEnsembl table after ensGene updated
+knownToEnsembl yes
+'_EOF_'
+#  << happy emacs
+
+    doEnsGeneUpdate.pl -ensVersion=55 mm9.ensGene.ra
+    ssh hgwdev
+    cd /hive/data/genomes/mm9/bed/ensGene.55
+    featureBits mm9 ensGene
+    # 63272128 bases of 2620346127 (2.415%) in intersection
+
+############################################################################
+#  monDom5 - Opossum - Ensembl Genes version 55  (DONE - 2009-07-14 - hiram)
+    ssh kkr14u03
+    cd /hive/data/genomes/monDom5
+    cat << '_EOF_' > monDom5.ensGene.ra
+# required db variable
+db monDom5
+# optional nameTranslation, the sed command that will transform
+#       Ensemble names to UCSC names.  With quotes just to make sure.
+nameTranslation "s/^\([0-9XY][0-9]*\)/chr\1/; s/^MT/chrM/; s/^Un/chrUn/"
+'_EOF_'
+#  << happy emacs
+
+    doEnsGeneUpdate.pl -ensVersion=55 monDom5.ensGene.ra
+    ssh hgwdev
+    cd /hive/data/genomes/monDom5/bed/ensGene.55
+    featureBits monDom5 ensGene
+    # 32999268 bases of 3501660299 (0.942%) in intersection
+
+############################################################################
+#  myoLuc1 - Microbat - Ensembl Genes version 55  (DONE - 2009-07-14 - hiram)
+    ssh swarm
+    cd /hive/data/genomes/myoLuc1
+    cat << '_EOF_' > myoLuc1.ensGene.ra
+# required db variable
+db myoLuc1
+# do we need to translate geneScaffold coordinates
+geneScaffolds yes
+# ignore genes that do not properly convert to a gene pred, and contig
+#       names that are not in the UCSC assembly
+skipInvalid yes
+# ignore the three genes that have invalid structures from Ensembl:
+# 1265: ENSMLUT00000004658 no exonFrame on CDS exon 1
+# 17770: ENSMLUT00000003427 no exonFrame on CDS exon 10
+# 32743: ENSMLUT00000009601 no exonFrame on CDS exon 1
+
+'_EOF_'
+#  << happy emacs
+
+    doEnsGeneUpdate.pl -ensVersion=55 myoLuc1.ensGene.ra
+    ssh hgwdev
+    cd /hive/data/genomes/myoLuc1/bed/ensGene.55
+    featureBits myoLuc1 ensGene
+    # 24630744 bases of 1673855868 (1.471%) in intersection
+
+############################################################################
+#  ochPri2 - Pika - Ensembl Genes version 55  (DONE - 2009-07-14 - hiram)
+    ssh kkr14u07
+    cd /hive/data/genomes/ochPri2
+    cat << '_EOF_' > ochPri2.ensGene.ra
+# required db variable
+db ochPri2
+# do we need to translate geneScaffold coordinates
+geneScaffolds yes
+# ignore genes that do not properly convert to a gene pred, and contig
+#       names that are not in the UCSC assembly
+skipInvalid yes
+# ignore the single gene that has an invalid structure from Ensembl:
+# 10995: ENSOPRT00000002716 no exonFrame on CDS exon 2
+'_EOF_'
+#  << happy emacs
+
+    doEnsGeneUpdate.pl -ensVersion=55 ochPri2.ensGene.ra
+    ssh hgwdev
+    cd /hive/data/genomes/ochPri2/bed/ensGene.55
+    featureBits ochPri2 ensGene
+    # 25342444 bases of 1923624051 (1.317%) in intersection
+
+############################################################################
+#  ornAna1 - Platypus - Ensembl Genes version 55  (DONE - 2009-07-14 - hiram)
+    ssh kkr14u02
+    cd /hive/data/genomes/ornAna1
+    cat << '_EOF_' > ornAna1.ensGene.ra
+# required db variable
+db ornAna1
+# optional nameTranslation, the sed command that will transform
+#       Ensemble names to UCSC names.  With quotes just to make sure.
+nameTranslation "s/^\([0-9][0-9]*\)/chr\1/; s/^\(X[0-9]\)/chr\1/; s/^MT/chrM/"
+# ignore genes that do not properly convert to a gene pred, and contig
+#       names that are not in the UCSC assembly, 365 items
+skipInvalid yes
+'_EOF_'
+#  << happy emacs
+
+    doEnsGeneUpdate.pl -ensVersion=55 ornAna1.ensGene.ra
+    ssh hgwdev
+    cd /hive/data/genomes/ornAna1/bed/ensGene.55
+    featureBits ornAna1 ensGene
+    # 24537221 bases of 1842236818 (1.332%) in intersection
+
+############################################################################
+#  oryCun1 - Rabbit - Ensembl Genes version 55  (DONE - 2009-07-14 - hiram)
+    ssh swarm
+    cd /hive/data/genomes/oryCun1
+    cat << '_EOF_' > oryCun1.ensGene.ra
+# required db variable
+db oryCun1
+# do we need to translate geneScaffold coordinates
+geneScaffolds yes
+# ignore genes that do not properly convert to a gene pred, and contig
+#       names that are not in the UCSC assembly
+skipInvalid yes
+# ignore the three genes that have invalid structures from Ensembl:
+# 24994: ENSOCUT00000009485 no exonFrame on CDS exon 9
+# 26897: ENSOCUT00000004627 no exonFrame on CDS exon 3
+# 32794: ENSOCUT00000014840 no exonFrame on CDS exon 3
+'_EOF_'
+#  << happy emacs
+
+    doEnsGeneUpdate.pl -ensVersion=55 oryCun1.ensGene.ra
+    ssh hgwdev
+    cd /hive/data/genomes/oryCun1/bed/ensGene.55
+    featureBits oryCun1 ensGene
+    # 22839824 bases of 2076044328 (1.100%) in intersection
+
+############################################################################
+#  oryLat2 - Medaka - Ensembl Genes version 55  (DONE - 2009-07-14 - hiram)
+    ssh swarm
+    cd /hive/data/genomes/oryLat2
+    cat << '_EOF_' > oryLat2.ensGene.ra
+# required db variable
+db oryLat2
+# optional nameTranslation, the sed command that will transform
+#       Ensemble names to UCSC names.  With quotes just to make sure.
+nameTranslation "s/^\([0-9][0-9]*\)/chr\1/; s/^MT/chrM/"
+# ignore 2,687 genes that haven't lifted properly yet
+# skipInvalid yes
+'_EOF_'
+#  << happy emacs
+
+    doEnsGeneUpdate.pl -ensVersion=55 oryLat2.ensGene.ra
+    ssh hgwdev
+    cd /hive/data/genomes/oryLat2/bed/ensGene.55
+    featureBits oryLat2 ensGene
+    # 32301732 bases of 700386597 (4.612%) in intersection
+
+############################################################################
+#  otoGar1 - Bushbaby - Ensembl Genes version 55  (DONE - 2009-07-14 - hiram)
+    ssh kolossus
+    cd /hive/data/genomes/otoGar1
+    cat << '_EOF_' > otoGar1.ensGene.ra
+# required db variable
+db otoGar1
+# do we need to translate geneScaffold coordinates
+geneScaffolds yes
+# after geneScaffold conversions, change Ensembl chrom names to UCSC names
+liftUp /cluster/data/otoGar1/jkStuff/ensGene.lft
+'_EOF_'
+#  << happy emacs
+
+    doEnsGeneUpdate.pl -ensVersion=55 otoGar1.ensGene.ra
+    ssh hgwdev
+    cd /hive/data/genomes/otoGar1/bed/ensGene.55
+    featureBits otoGar1 ensGene
+    # 23597902 bases of 1969052059 (1.198%) in intersection
+
+############################################################################
+#  panTro2 - Chimp - Ensembl Genes version 55  (DONE - 2009-07-14 - hiram)
+    ssh kkr14u08
+    cd /hive/data/genomes/panTro2
+    cat << '_EOF_' > panTro2.ensGene.ra
+# required db variable
+db panTro2
+# optional nameTranslation, the sed command that will transform
+#       Ensemble names to UCSC names.  With quotes just to make sure.
+nameTranslation "s/^\([0-9XY][0-9]*\)/chr\1/; s/^MT/chrM/; s/^Un/chrUn/"
+'_EOF_'
+#  << happy emacs
+
+    doEnsGeneUpdate.pl -ensVersion=55 panTro2.ensGene.ra
+    ssh hgwdev
+    cd /hive/data/genomes/panTro2/bed/ensGene.55
+    featureBits panTro2 ensGene
+    # 49983145 bases of 2909485072 (1.718%) in intersection
+
+############################################################################
+#  ponAbe2 - Orangutan - Ensembl Genes version 55  (DONE - 2009-07-14 - hiram)
+    ssh kkr14u03
+    cd /hive/data/genomes/ponAbe2
+    cat << '_EOF_' > ponAbe2.ensGene.ra
+# required db variable
+db ponAbe2
+# optional nameTranslation, the sed command that will transform
+#       Ensemble names to UCSC names.  With quotes just to make sure.
+nameTranslation "s/^\([0-9XY][0-9]*\)/chr\1/; s/^MT/chrM/; s/^Un/chrUn/"
+# optional haplotype lift-down from Ensembl full chrom coordinates
+#       to UCSC simple haplotype coordinates
+# haplotypeLift /cluster/data/hg18/jkStuff/ensGene.haplotype.lift
+'_EOF_'
+#  << happy emacs
+
+    doEnsGeneUpdate.pl -ensVersion=55 ponAbe2.ensGene.ra
+    ssh hgwdev
+    cd /hive/data/genomes/ponAbe2/bed/ensGene.55
+    featureBits ponAbe2 ensGene
+    # 38087987 bases of 3093572278 (1.231%) in intersection
+
+############################################################################
+#  proCap1 - Rock hyrax - Ensembl Genes version 55  (DONE - 2009-07-14 - hiram)
+    ssh kkr14u03
+    cd /hive/data/genomes/proCap1
+    cat << '_EOF_' > proCap1.ensGene.ra
+# required db variable
+db proCap1
+# do we need to translate geneScaffold coordinates
+geneScaffolds yes
+# ignore genes that do not properly convert to a gene pred, and contig
+#       names that are not in the UCSC assembly
+skipInvalid yes
+# ignore the two genes that have invalid structures from Ensembl:
+# 4595: ENSPCAT00000007286 no exonFrame on CDS exon 1
+# 28894: ENSPCAT00000000699 no exonFrame on CDS exon 4
+
+'_EOF_'
+#  << happy emacs
+
+    doEnsGeneUpdate.pl -ensVersion=55 proCap1.ensGene.ra
+    ssh hgwdev
+    cd /hive/data/genomes/proCap1/bed/ensGene.55
+    featureBits proCap1 ensGene
+    # 25296156 bases of 2407847681 (1.051%) in intersection
+
+############################################################################
+#  pteVam1 - Megabat - Ensembl Genes version 55  (DONE - 2009-07-14 - hiram)
+    ssh kkr14u08
+    cd /hive/data/genomes/pteVam1
+    cat << '_EOF_' > pteVam1.ensGene.ra
+# required db variable
+db pteVam1
+# do we need to translate geneScaffold coordinates
+geneScaffolds yes
+# ignore genes that do not properly convert to a gene pred, and contig
+#       names that are not in the UCSC assembly
+skipInvalid yes
+# ignore the two genes that have invalid structures from Ensembl:
+#       6381: ENSPVAT00000012919 no exonFrame on CDS exon 14
+#       23522: ENSPVAT00000010661 no exonFrame on CDS exon 0
+
+'_EOF_'
+#  << happy emacs
+
+    doEnsGeneUpdate.pl -ensVersion=55 pteVam1.ensGene.ra
+    ssh hgwdev
+    cd /hive/data/genomes/pteVam1/bed/ensGene.55
+    featureBits pteVam1 ensGene
+    # 28914790 bases of 1839436660 (1.572%) in intersection
+
+############################################################################
+#  rheMac2 - Rhesus - Ensembl Genes version 55  (DONE - 2009-07-14 - hiram)
+    ssh kolossus
+    cd /hive/data/genomes/rheMac2
+    cat << '_EOF_' > rheMac2.ensGene.ra
+# required db variable
+db rheMac2
+# optional nameTranslation, the sed command that will transform
+#       Ensemble names to UCSC names.  With quotes just to make sure.
+nameTranslation "/^109[0-9]*/d; /^MT/d; s/^\([0-9XY][0-9]*\)/chr\1/;"
+'_EOF_'
+#  << happy emacs
+
+    doEnsGeneUpdate.pl -ensVersion=55 rheMac2.ensGene.ra
+    ssh hgwdev
+    cd /hive/data/genomes/rheMac2/bed/ensGene.55
+    featureBits rheMac2 ensGene
+    # 44519581 bases of 2646704109 (1.682%) in intersection
+
+############################################################################
+#  rn4 - Rat - Ensembl Genes version 55  (DONE - 2009-07-14 - hiram)
+    ssh swarm
+    cd /hive/data/genomes/rn4
+    cat << '_EOF_' > rn4.ensGene.ra
+# required db variable
+db rn4
+# optional nameTranslation, the sed command that will transform
+#       Ensemble names to UCSC names.  With quotes just to make sure.
+nameTranslation "s/^\([0-9XY][0-9]*\)/chr\1/; s/^MT/chrM/"
+# optionally update the knownToEnsembl table after ensGene updated
+knownToEnsembl yes
+'_EOF_'
+#  << happy emacs
+
+    doEnsGeneUpdate.pl -ensVersion=55 rn4.ensGene.ra
+    ssh hgwdev
+    cd /hive/data/genomes/rn4/bed/ensGene.55
+    featureBits rn4 ensGene
+    # 43758167 bases of 2571531505 (1.702%) in intersection
+
+############################################################################
+#  sacCer2 - S. cerevisiae - Ensembl Genes version 55  (DONE - 2009-07-14 - hiram)
+    ssh kkr14u02
+    cd /hive/data/genomes/sacCer2
+    cat << '_EOF_' > sacCer2.ensGene.ra
+# required db variable
+db sacCer2
+# optional nameTranslation, the sed command that will transform
+#       Ensemble names to UCSC names.  With quotes just to make sure.
+nameTranslation "s/^VIII/chrVIII/; s/^VII/chrVII/; s/^VI/chrVI/; s/^V/chrV/; s/^XIII/chrXIII/; s/^XII/chrXII/; s/^XIV/chrXIV/; s/^XI/chrXI/; s/^XVI/chrXVI/; s/^XV/chrXV/; s/^X/chrX/; s/^III/chrIII/; s/^IV/chrIV/; s/^II/chrII/; s/^IX/chrIX/; s/^I/chrI/; s/^MT/chrM/; s/2-micron/2micron/"
+'_EOF_'
+#  << happy emacs
+
+    doEnsGeneUpdate.pl -ensVersion=55 sacCer2.ensGene.ra
+    ssh hgwdev
+    cd /hive/data/genomes/sacCer2/bed/ensGene.55
+    featureBits sacCer2 ensGene
+    # 8912793 bases of 12162995 (73.278%) in intersection
+
+############################################################################
+#  sorAra1 - Shrew - Ensembl Genes version 55  (DONE - 2009-07-14 - hiram)
+    ssh kkr14u04
+    cd /hive/data/genomes/sorAra1
+    cat << '_EOF_' > sorAra1.ensGene.ra
+# required db variable
+db sorAra1
+# do we need to translate geneScaffold coordinates
+geneScaffolds yes
+'_EOF_'
+#  << happy emacs
+
+    doEnsGeneUpdate.pl -ensVersion=55 sorAra1.ensGene.ra
+    ssh hgwdev
+    cd /hive/data/genomes/sorAra1/bed/ensGene.55
+    featureBits sorAra1 ensGene
+    # 19509213 bases of 1832864697 (1.064%) in intersection
+
+############################################################################
+#  speTri1 - Squirrel - Ensembl Genes version 55  (DONE - 2009-07-14 - hiram)
+    ssh kolossus
+    cd /hive/data/genomes/speTri1
+    cat << '_EOF_' > speTri1.ensGene.ra
+# required db variable
+db speTri1
+# do we need to translate geneScaffold coordinates
+geneScaffolds yes
+# ignore genes that do not properly convert to a gene pred, and contig
+#       names that are not in the UCSC assembly
+skipInvalid yes
+# ignore the single gene that has an invalid structure from Ensembl:
+# 1071: ENSSTOT00000007455 no exonFrame on CDS exon 1
+'_EOF_'
+#  << happy emacs
+
+    doEnsGeneUpdate.pl -ensVersion=55 speTri1.ensGene.ra
+    ssh hgwdev
+    cd /hive/data/genomes/speTri1/bed/ensGene.55
+    featureBits speTri1 ensGene
+    # 21590338 bases of 1913367893 (1.128%) in intersection
+
+############################################################################
+#  taeGut1 - Zebra finch - Ensembl Genes version 55  (DONE - 2009-07-15 - hiram)
+    ssh kkr14u03
+    cd /hive/data/genomes/taeGut1
+    cat << '_EOF_' > taeGut1.ensGene.ra
+# required db variable
+db taeGut1
+# optional nameTranslation, the sed command that will transform
+#       Ensemble names to UCSC names.  With quotes just to make sure.
+nameTranslation "s/^\([0-9LXYZ][0-9ABG]*\)/chr\1/; s/^Un/chrUn/"
+# need to translate Ensembl GeneScaffold coordinates to UCSC scaffolds
+# geneScaffolds yes
+#       during the loading of the gene pred, skip all invalid genes
+# skipInvalid yes
+#       13843: ENSDNOT00000025033 no exonFrame on CDS exon 5
+#       23044: ENSDNOT00000004471 no exonFrame on CDS exon 1
+#       30976: ENSDNOT00000003424 no exonFrame on CDS exon 3
+'_EOF_'
+#  << happy emacs
+
+    doEnsGeneUpdate.pl -ensVersion=55 taeGut1.ensGene.ra
+    ssh hgwdev
+    cd /hive/data/genomes/taeGut1/bed/ensGene.55
+    featureBits taeGut1 ensGene
+    #   25428670 bases of 1222864691 (2.079%) in intersection
+
+############################################################################
+#  tarSyr1 - Tarsier - Ensembl Genes version 55  (DONE - 2009-07-20 - hiram)
+    ssh kkr14u01
+    cd /hive/data/genomes/tarSyr1
+    cat << '_EOF_' > tarSyr1.ensGene.ra
+# required db variable
+db tarSyr1
+# do we need to translate geneScaffold coordinates
+geneScaffolds yes
+'_EOF_'
+#  << happy emacs
+
+    doEnsGeneUpdate.pl -ensVersion=55 tarSyr1.ensGene.ra
+    ssh hgwdev
+    cd /hive/data/genomes/tarSyr1/bed/ensGene.55
+    featureBits tarSyr1 ensGene
+    # 21282560 bases of 2768536343 (0.769%) in intersection
+
+############################################################################
+#  tupBel1 - TreeShrew - Ensembl Genes version 55  (DONE - 2009-07-14 - hiram)
+    ssh kkr14u05
+    cd /hive/data/genomes/tupBel1
+    cat << '_EOF_' > tupBel1.ensGene.ra
+# required db variable
+db tupBel1
+# do we need to translate geneScaffold coordinates
+geneScaffolds yes
+# after geneScaffold conversions, change Ensembl chrom names to UCSC names
+liftUp /cluster/data/tupBel1/jkStuff/ensGene.lft
+# ignore genes that do not properly convert to a gene pred, and contig
+#       names that are not in the UCSC assembly
+skipInvalid yes
+# ignore the two genes that have invalid structures from Ensembl:
+# 2993: ENSTBET00000015831 no exonFrame on CDS exon 11
+# 3556: ENSTBET00000013522 no exonFrame on CDS exon 1
+'_EOF_'
+#  << happy emacs
+
+    doEnsGeneUpdate.pl -ensVersion=55 tupBel1.ensGene.ra
+    ssh hgwdev
+    cd /hive/data/genomes/tupBel1/bed/ensGene.55
+    featureBits tupBel1 ensGene
+    # 22808448 bases of 2137225476 (1.067%) in intersection
+
+############################################################################
+#  turTru1 - Dolphin - Ensembl Genes version 55  (DONE - 2009-07-20 - hiram)
+    ssh kkr14u06
+    cd /hive/data/genomes/turTru1
+    cat << '_EOF_' > turTru1.ensGene.ra
+# required db variable
+db turTru1
+# do we need to translate geneScaffold coordinates
+geneScaffolds yes
+'_EOF_'
+#  << happy emacs
+
+    doEnsGeneUpdate.pl -ensVersion=55 turTru1.ensGene.ra
+    ssh hgwdev
+    cd /hive/data/genomes/turTru1/bed/ensGene.55
+    featureBits turTru1 ensGene
+    # 28534327 bases of 2298444090 (1.241%) in intersection
+
+############################################################################
+#  vicPac1 - Alpaca - Ensembl Genes version 55  (DONE - 2009-07-14 - hiram)
+    ssh kkr14u06
+    cd /hive/data/genomes/vicPac1
+    cat << '_EOF_' > vicPac1.ensGene.ra
+# required db variable
+db vicPac1
+# do we need to translate geneScaffold coordinates
+geneScaffolds yes
+# ignore genes that do not properly convert to a gene pred, and contig
+#       names that are not in the UCSC assembly
+skipInvalid yes
+# ignore the 53 genes that do not translate properly to UCSC coordinates
+'_EOF_'
+#  << happy emacs
+
+    doEnsGeneUpdate.pl -ensVersion=55 vicPac1.ensGene.ra
+    ssh hgwdev
+    cd /hive/data/genomes/vicPac1/bed/ensGene.55
+    featureBits vicPac1 ensGene
+    # 17833823 bases of 1922910435 (0.927%) in intersection
+
+############################################################################
+#  xenTro2 - X. tropicalis - Ensembl Genes version 55  (DONE - 2009-07-14 - hiram)
+    ssh kolossus
+    cd /hive/data/genomes/xenTro2
+    cat << '_EOF_' > xenTro2.ensGene.ra
+# required db variable
+db xenTro2
+'_EOF_'
+#  << happy emacs
+
+    doEnsGeneUpdate.pl -ensVersion=55 xenTro2.ensGene.ra
+    ssh hgwdev
+    cd /hive/data/genomes/xenTro2/bed/ensGene.55
+    featureBits xenTro2 ensGene
+    # 29158032 bases of 1359412157 (2.145%) in intersection
+
+############################################################################
+
 ############################################################################
 # ensembl 52 updates (DONE - 2009-01-21,22 - Hiram)
     # see also: more notes about how this is done in the "ensembl 50 updates"
     #	section below
@@ -13,9 +960,9 @@
     ssh swarm
     cd /hive/data/genomes/bosTau4
     cat << '_EOF_' > bosTau4.ensGene.ra
 # required db variable
-db bosTau4
+    db bosTau4
 # optional nameTranslation, the sed command that will transform
 #       Ensemble names to UCSC names.  With quotes just to make sure.
 nameTranslation "s/^\([0-9UX][0-9n]*\)/chr\1/; s/^MT/chrM/"
 # cause SQL tables to be fetched to see if chrUn can be fixed up