src/hg/makeDb/doc/hg18.txt 1.395

1.395 2010/01/26 19:39:35 hartera
Adding a mapability data from CRG.
Index: src/hg/makeDb/doc/hg18.txt
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/makeDb/doc/hg18.txt,v
retrieving revision 1.394
retrieving revision 1.395
diff -b -B -U 4 -r1.394 -r1.395
--- src/hg/makeDb/doc/hg18.txt	19 Jan 2010 21:44:30 -0000	1.394
+++ src/hg/makeDb/doc/hg18.txt	26 Jan 2010 19:39:35 -0000	1.395
@@ -29279,6 +29278,51 @@
 
     hgLoadBed hg18 gwasCatalog gwasCatalog.bed \
       -tab -sqlTable=$HOME/kent/src/hg/lib/gwasCatalog.sql -notItemRgb -allowStartEqualEnd
 
-
 #############################################################################
+# CRG MAPABILITY (2010-01-19, hartera, in progress)
+# Data was provided by Thomas Derrien (thomas.derrien.crg.es) from the Guigo 
+# lab at the Center for Genomic Regulation (CRG) in Barcelona. Data was
+# produced using their GEM mapper aligner taking sliding k-mers window of the
+# human genome that were mapped back onto the genome with up to 2mimatches.
+# For each window, a mappability score is computed S = 1/(nb of match_found)
+# and the BigWig index was created according to this score.
+    
+     mkdir -p /hive/data/genomes/hg18/bed/crgMapability
+     cd /hive/data/genomes/hg18/bed/crgMapability
+cat << 'EOF' > temp
+#!/bin/tcsh -ef
+http://genome.imim.es/~tderrien/UCSC_Tracks/ALL_mappablity_hg18_H.sapiens.genome.hg18.main.mappability-36.bw.bz2
+http://genome.imim.es/~tderrien/UCSC_Tracks/ALL_mappablity_hg18_H.sapiens.genome.hg18.main.mappability-40.bw.bz2
+http://genome.imim.es/~tderrien/UCSC_Tracks/ALL_mappablity_hg18_H.sapiens.genome.hg18.main.mappability-50.bw.bz2
+http://genome.imim.es/~tderrien/UCSC_Tracks/ALL_mappablity_hg18_H.sapiens.genome.hg18.main.mappability-75.bw.bz2
+http://genome.imim.es/~tderrien/UCSC_Tracks/ALL_mappablity_hg18_H.sapiens.genome.hg18.main.mappability-100.bw.bz2
+'EOF'
+
+     awk '{if ($0 ~ /#/) print; else print "wget --timestamping \"" $0 "\"";}' \
+         temp > download.csh
+     rm temp
+     chmod +x download.csh
+     ./download.csh >& download.log &
+     
+     # Add the data to /gbdb/ and load the file names into tables (2010-01-26)
+     cd /hive/data/genomes/hg18/bed/crgMapability
+     bunzip2 *.bz2
+
+     # Add data to gbdb
+     mkdir -p /gbdb/hg18/bbi/
+     # Symlink files with names as crgMapabilityAlignXmer.bw to /gbdb/hg18/bbi
+     # and load file name into a table - one per dataset. Each table 
+     # represents a subtrack.
+     foreach f (`ls *.bw`)
+        echo $f
+        set g=`echo $f | cut -d "-" -f2`
+        set num=`echo $g | cut -d "." -f1`
+        set mer=`echo "${num}mer"`
+        set nf=`echo "crgMapabilityAlign${mer}.bw"`
+        echo $nf
+        ln -s `pwd`/${f} /gbdb/hg18/bbi/${nf}
+        hgsql hg18 -e "drop table if exists crgMapabilityAlign${mer}; \
+     create table crgMapabilityAlign${mer} (fileName varchar(255) not null); \
+     insert into crgMapabilityAlign${mer} values ('/gbdb/hg18/bbi/${nf}');"
+     end