src/hg/makeDb/doc/mm9.txt 1.95

1.95 2009/06/10 19:16:49 hartera
Updated the miRNA track to miRBase data release 13.0.
Index: src/hg/makeDb/doc/mm9.txt
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/makeDb/doc/mm9.txt,v
retrieving revision 1.94
retrieving revision 1.95
diff -b -B -U 4 -r1.94 -r1.95
--- src/hg/makeDb/doc/mm9.txt	22 May 2009 21:32:06 -0000	1.94
+++ src/hg/makeDb/doc/mm9.txt	10 Jun 2009 19:16:49 -0000	1.95
@@ -9363,4 +9363,51 @@
     cat fb.mm9.chainHg19Link.txt 
     #	1013880568 bases of 2620346127 (38.693%) in intersection
 
 #############################################################################
+#  RE-BUILD miRNA TRACK (DONE, 2009-06-09-2009-06-10, hartera)
+    # The miRNA track from miRBase is out of date so update the track. 
+    mkdir -p /hive/data/genomes/mm9/bed/miRNA-2009-06-09
+    cd /hive/data/genomes/mm9/bed/miRNA-2009-06-09
+    # Download GFF file of latest miRNA annotations from miRBase at the
+    # Wellcome Trust Sanger Institute (WTSI). This is Release 13.0.
+    wget --timestamping \
+ftp://ftp.sanger.ac.uk/pub/mirbase/sequences/CURRENT/genomes/mmu.gff
+    # The previous version that is currently on the Genome Browser has 493 
+    # annotations. This version has 470 miRNAs.
+    # Re-format, need to add "chr" to the beginning of each line.
+    sed -e 's/^/chr/' mmu.gff > mmMirBaseFormat.gff
+    # Remove extra "chr" in comment lines
+    perl -pi.bak -e 's/chr#/#/' mmMirBaseFormat.gff
+    # Change chrMT to chrM
+    perl -pi.bak -e 's/chrMT/chrM/' mmMirBaseFormat.gff
+    # Remove all but ID name in last field
+    sed -e 's/\";//g' mmMirBaseFormat.gff | sed -e 's/ID=\"/transcript_id=/g' \
+       | sed -e 's/ACC=\"MI[0-9]*\s//' > mmMirBaseFormatIdOnly.gff
+
+    # Load into database. 
+    ldHgGene -exon=miRNA mm9 miRNARel13 mmMirBaseFormatIdOnly.gff
+    # Does not load as mmu-mir-692-2 is on two chroms, chr4 and chr13.
+    # These are alignments not genePreds so convert to BED for loading into
+    # the database.
+    sed -e 's/\";//g' mmMirBaseFormat.gff | sed -e 's/ID=\"//g' \
+       | sed -e 's/ACC=\"MI[0-9]*\s//' > mmMirBaseFormatIdOnly.gff
+    # chr1    .       miRNA   20669091        20669163        .       +
+    # .       mmu-mir-206
+    awk 'BEGIN {FS="\t"} {OFS="\t"} \
+        {if ($0 !~ /#/) print $1, $4, $5, $9, "0", $7}' \
+        mmMirBaseFormatIdOnly.gff > mmMirBaseFormatIdOnly.bed
+    # Remove previous table
+    hgsql -e 'drop table miRNA' mm9
+    hgLoadBed mm9 miRNA mmMirBaseFormatIdOnly.bed
+# Reading mmMirBaseFormatIdOnly.bed
+# Loaded 568 elements of size 6
+# Sorted
+# Creating table definition for miRNARel13
+# Saving bed.tab
+# Loading mm9
+    hgsql -e 'select count(*) from miRNARel13;' mm9 
+# 568
+# The previous version had 493 miRNAs.
+hgsql -e 'select count(distinct name) from miRNARel13;' mm9
+# 541
+# The previous version had 466 unique miRNAs.