src/hg/makeDb/doc/mm9.txt 1.123

1.123 2010/02/09 19:05:39 hartera
Documented adding mapability track.
Index: src/hg/makeDb/doc/mm9.txt
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/makeDb/doc/mm9.txt,v
retrieving revision 1.122
retrieving revision 1.123
diff -b -B -U 4 -r1.122 -r1.123
--- src/hg/makeDb/doc/mm9.txt	6 Feb 2010 00:17:33 -0000	1.122
+++ src/hg/makeDb/doc/mm9.txt	9 Feb 2010 19:05:39 -0000	1.123
@@ -10083,4 +10083,58 @@
     cat fb.susScr1.chainMm9Link.txt 
     #	656445475 bases of 2231332019 (29.419%) in intersection
 
 #########################################################################
+# CRG MAPABILITY (2010-02-05 - 2010-02-09, hartera, DONE)
+# Data was provided by Thomas Derrien (thomas.derrien.crg.es) and Paolo Ribeca
+# from the Guigo lab at the Center for Genomic Regulation (CRG) in Barcelona
+# on 2010-02-04.
+# Data was produced using their GEM mapper aligner taking sliding k-mers 
+# window of the human genome that were mapped back onto the genome with up 
+# to 2mismatches. For each window, a mappability score is computed 
+# S = 1/(nb of match_found) and the BigWig index was created according to 
+# this score.
+# 2010-02-09. Loaded database and added data to /gbdb/
+# Added trackDb entry for the Mapability track.
+ 
+     mkdir -p /hive/data/genomes/mm9/bed/crgMapability
+     cd /hive/data/genomes/mm9/bed/crgMapability
+cat << 'EOF' > temp
+#!/bin/tcsh -ef
+http://genome.crg.es/~tderrien/UCSC_Tracks/M.musculus.genome.mm9.mappability-36_mm9.bw.bz2
+http://genome.crg.es/~tderrien/UCSC_Tracks/M.musculus.genome.mm9.mappability-50_mm9.bw.bz2
+http://genome.crg.es/~tderrien/UCSC_Tracks/M.musculus.genome.mm9.mappability-75_mm9.bw.bz2
+http://genome.crg.es/~tderrien/UCSC_Tracks/M.musculus.genome.mm9.mappability-100_mm9.bw.bz2
+http://genome.crg.es/~tderrien/UCSC_Tracks/M.musculus.genome.mm9.mappability-40_mm9.bw.bz2
+'EOF'
+
+     awk '{if ($0 ~ /#/) print; else print "wget --timestamping \"" $0 "\"";}' \
+         temp > download.csh
+     rm temp
+     chmod +x download.csh
+     ./download.csh >& download.log &
+     
+     # Add the data to /gbdb/ and load the file names into tables (2010-01-26)
+     cd /hive/data/genomes/mm9/bed/crgMapability
+     bunzip2 *.bz2
+     # Add data to gbdb
+     mkdir -p /gbdb/mm9/bbi/
+     # Symlink files with names as crgMapabilityAlignXmer.bw to /gbdb/mm9/bbi
+     # and load file name into a table - one per dataset. Each table 
+     # represents a subtrack.
+     foreach f (`ls *.bw`)
+        echo $f
+        set g=`echo $f | cut -d "-" -f2`
+        set num=`echo $g | cut -d "_" -f1`
+        set mer=`echo "${num}mer"`
+        set nf=`echo "crgMapabilityAlign${mer}.bw"`
+        echo $nf
+        ln -s `pwd`/${f} /gbdb/mm9/bbi/${nf}
+        hgsql mm9 -e "drop table if exists crgMapabilityAlign${mer}; \
+     create table crgMapabilityAlign${mer} (fileName varchar(255) not null); \
+     insert into crgMapabilityAlign${mer} values ('/gbdb/mm9/bbi/${nf}');"
+     end
+
+     # Added a trackDb entry for this mapability track in
+     # kent/src/hg/makeDb/trackDb/mouse/mm9/trackDb.ra
+     # use bigWigInfo to check min and max values. Created a mapability.html
+     # description page.