src/hg/makeDb/doc/hg18.txt 1.365
1.365 2009/06/13 20:39:58 hartera
Updated the wgRna track with latest miRBase and snoRNABase data.
Index: src/hg/makeDb/doc/hg18.txt
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/makeDb/doc/hg18.txt,v
retrieving revision 1.364
retrieving revision 1.365
diff -b -B -U 4 -r1.364 -r1.365
--- src/hg/makeDb/doc/hg18.txt 2 Jun 2009 23:49:01 -0000 1.364
+++ src/hg/makeDb/doc/hg18.txt 13 Jun 2009 20:39:58 -0000 1.365
@@ -27972,4 +27972,83 @@
fox2ClipClusters.bed unmapped.bed
hgLoadBed hg18 fox2ClipClusters{,.bed}
##############################################################################
+# RE-BUILD sno/miRNA TRACK (DONE, 2009-06-11 - 2009-06-13, hartera)
+ # The data in this track is out of date so update the track.
+ mkdir -p /hive/data/genomes/hg18/bed/wgRna-2009-06-11
+ cd /hive/data/genomes/hg18/bed/wgRna-2009-06-11
+ # Download GFF file of latest miRNA annotations from miRBase at the
+ # Wellcome Trust Sanger Institute (WTSI). This is Release 13.0 (March
+ # 2009)
+ wget --timestamping \
+ftp://ftp.sanger.ac.uk/pub/mirbase/sequences/CURRENT/genomes/hsa.gff
+ # Re-format, need to add "chr" to the beginning of each line.
+ sed -e 's/^/chr/' hsa.gff > hsMirBaseFormat.gff
+ # Remove extra "chr" in comment lines
+ perl -pi.bak -e 's/chr#/#/' hsMirBaseFormat.gff
+ # Change chrMT to chrM
+ perl -pi.bak -e 's/chrMT/chrM/' hsMirBaseFormat.gff
+ # Remove all but ID name in last field
+ sed -e 's/\";//g' hsMirBaseFormat.gff | sed -e 's/ID=\"//g' \
+ | sed -e 's/ACC=\"MI[0-9]*\s//' > hsMirBaseFormatIdOnly.gff
+
+ # use score 906 for + strand and 480 for - strand. This will show
+ # up black on the track for + strand and grey for - strand.
+ # Starts appear to be 1-based when compared to miRNAs in current track
+ # and those in Ensembl.
+ # Confirmed with Sam Griffith-Jones (one of the authors of miRBase,
+ # sam.griffith-jones@manchester.ac.uk) that these GFF coordinates
+ # are 1-based.
+ # Also add thickStart and thickEnd columns and "miRNA" for type.
+ awk 'BEGIN {FS="\t"} {OFS="\t"} \
+ {if ($0 !~ /#/ && $7 == "+") \
+ print $1, $4-1, $5, $9, 960, $7, 0, 0, "miRNA"; \
+ else if ($0 !~ /#/ && $7 == "-") \
+ print $1, $4-1, $5, $9, 480, $7, 0, 0, "miRNA";}' \
+ hsMirBaseFormatIdOnly.gff > hsMirBaseFormatIdOnly.bed
+ # 2009-06-12
+ # snoRNAs are from snoRNABase at http://www-snorna.biotoul.fr/
+ # Download coordinates for hg18 from
+ # http://www-snorna.biotoul.fr/coordinates.php
+ # This is version 3 of the database.
+ # save as tab-separated file: snoRNABaseVersion3Coords.txt and remove
+ # first and last lines.
+ perl -pi.bak -e 's/\"//g' snoRNABaseVersion3Coords.txt
+ # Reformat to BED format with thickStart and thickEnd set to 0.
+ awk 'BEGIN {FS="\t"} {OFS="\t"} \
+ {if ($4 == "+") \
+ print $1, $2-1, $3, $5, 960, $4, 0, 0,$6; \
+ else if ($4 == "-") \
+ print $1, $2-1, $3, $5, 480, $4, 0, 0,$6;}' \
+ snoRNABaseVersion3Coords.txt > snoRNABaseVersion3Coords.bed
+ # Merge the miRNA and snoRNA files together
+ cat hsMirBaseFormatIdOnly.bed snoRNABaseVersion3Coords.bed \
+ > wgRna20090611.bed
+ # Load into separate table rather than overwriting wgRna
+ cp -p /cluster/home/hartera/src/hg/lib/wgRna.sql wgRnaJun09.sql
+ perl -pi.bak -e 's/TABLE wgRna/TABLE wgRnaJun09/' wgRnaJun09.sql
+ hgLoadBed -sqlTable=wgRnaJun09.sql hg18 wgRnaJun09 wgRna20090611.bed
+# Reading wgRna20090611.bed
+# Loaded 1120 elements of size 9
+# Sorted
+# Creating table definition for wgRnaJun09
+# Saving bed.tab
+# Loading hg18
+
+ # Clean up
+ rm *.bak
+
+hgsql -e 'select count(*) from wgRna;' hg18
+# 1059
+# for miRNAs: 685 (676 unique names)
+# and others: 374 including 21 scaRNA
+hgsql -e 'select count(*) from wgRnaJun09;' hg18
+# 1120
+# for miRNAs: 718 (705 unique)
+# and others: 402 including 21 scaRNA
+ # 2009-06-13
+ # Renamed the old wgRna track to wgRnaOld and renamed the new wgRnaJun09
+ # track to wgRna. Will keep the old track around for a while until
+ # new track checked and QA'd.
+ hgsql -e 'alter table wgRna rename wgRnaOld;' hg18
+ hgsql -e 'alter table wgRnaJun09 rename wgRna;' hg18