src/hg/makeDb/doc/hg18.txt 1.391
1.391 2009/12/12 04:39:22 hartera
Added documentation for making track of Burge lab RNA-seq reads mapped by GEM mapper.
Index: src/hg/makeDb/doc/hg18.txt
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/makeDb/doc/hg18.txt,v
retrieving revision 1.390
retrieving revision 1.391
diff -b -B -U 4 -r1.390 -r1.391
--- src/hg/makeDb/doc/hg18.txt 9 Dec 2009 20:54:34 -0000 1.390
+++ src/hg/makeDb/doc/hg18.txt 12 Dec 2009 04:39:22 -0000 1.391
@@ -28893,8 +28893,70 @@
cd /cluster/data/genbank
./bin/lsSnpPdbDbLoadStep hg18
# once this has been QAed, will auto-update from genbank scripts
#############################################################################
+# BURGE LAB DATA MAPPED WITH GEMMAPPER. PROVIDED BY THOMAS DERRIEN FROM RODERIC
+# GUIGO'S LAB AT CRG. (E-MAIL: thomas.derrien@crg.es)
+# (hartera, 2009-09-13 - 2009-09-16, DONE)
+ mkdir /hive/groups/gencode/browser/hg18/burgeRnaSeqGemMapperAlign
+ cd /hive/groups/gencode/browser/hg18/burgeRnaSeqGemMapperAlign
+
+ wget --timestamping "http://genome.imim.es/~tderrien/Gencode/For_Rachel/GFF_Burge_32bp_Gencode2b/GSM325476_brain_HCT168_hg18.fa.GEM_index.hg18.gencode_data.rel2b.length_32.gff.gz"
+ wget --timestamping "http://genome.imim.es/~tderrien/Gencode/For_Rachel/GFF_Burge_32bp_Gencode2b/GSM325477_liver_HCT169_hg18.fa.GEM_index.hg18.gencode_data.rel2b.length_32.gff.gz"
+ wget --timestamping "http://genome.imim.es/~tderrien/Gencode/For_Rachel/GFF_Burge_32bp_Gencode2b/GSM325478_heart_HCT170_hg18.fa.GEM_index.hg18.gencode_data.rel2b.length_32.gff.gz"
+ wget --timestamping "http://genome.imim.es/~tderrien/Gencode/For_Rachel/GFF_Burge_32bp_Gencode2b/GSM325479_skelMuscle_HCT171_hg18.fa.GEM_index.hg18.gencode_data.rel2b.length_32.gff.gz"
+ wget --timestamping "http://genome.imim.es/~tderrien/Gencode/For_Rachel/GFF_Burge_32bp_Gencode2b/GSM325480_colon_hg18.fa.GEM_index.hg18.gencode_data.rel2b.length_32.gff.gz"
+ wget --timestamping "http://genome.imim.es/~tderrien/Gencode/For_Rachel/GFF_Burge_32bp_Gencode2b/GSM325481_adipose_hg18.fa.GEM_index.hg18.gencode_data.rel2b.length_32.gff.gz"
+ wget --timestamping "http://genome.imim.es/~tderrien/Gencode/For_Rachel/GFF_Burge_32bp_Gencode2b/GSM325482_testes_hg18.fa.GEM_index.hg18.gencode_data.rel2b.length_32.gff.gz"
+ wget --timestamping "http://genome.imim.es/~tderrien/Gencode/For_Rachel/GFF_Burge_32bp_Gencode2b/GSM325483_lymphNode_hg18.fa.GEM_index.hg18.gencode_data.rel2b.length_32.gff.gz"
+ wget --timestamping "http://genome.imim.es/~tderrien/Gencode/For_Rachel/GFF_Burge_32bp_Gencode2b/GSM325484_HCT204_bt474_.fa.GEM_index.hg18.gencode_data.rel2b.length_32.gff.gz"
+ wget --timestamping "http://genome.imim.es/~tderrien/Gencode/For_Rachel/GFF_Burge_32bp_Gencode2b/GSM325485_HCT205_HME_.fa.GEM_index.hg18.gencode_data.rel2b.length_32.gff.gz"
+ wget --timestamping "http://genome.imim.es/~tderrien/Gencode/For_Rachel/GFF_Burge_32bp_Gencode2b/GSM325486_HCT202_s2468_.fa.GEM_index.hg18.gencode_data.rel2b.length_32.gff.gz"
+ wget --timestamping "http://genome.imim.es/~tderrien/Gencode/For_Rachel/GFF_Burge_32bp_Gencode2b/GSM325487_HCT203_s2468.fa.GEM_index.hg18.gencode_data.rel2b.length_32.gff.gz"
+ wget --timestamping "http://genome.imim.es/~tderrien/Gencode/For_Rachel/GFF_Burge_32bp_Gencode2b/GSM325488_HCT206_s2468_.fa.GEM_index.hg18.gencode_data.rel2b.length_32.gff.gz"
+ wget --timestamping "http://genome.imim.es/~tderrien/Gencode/For_Rachel/GFF_Burge_32bp_Gencode2b/GSM325489_HCT207_s2468_.fa.GEM_index.hg18.gencode_data.rel2b.length_32.gff.gz"
+
+ cd /hive/groups/gencode/browser/hg18/burgeRnaSeqGemMapperAlign
+ # Load this data into tables for hg18.
+ # Unzip the files:
+ gunzip *.gff.gz
+ # Create a file with the list of file names and tissues.
+ ls *.gff > burgeDataFiles.txt
+ GSM325486_HCT202_s2468 breast
+ GSM325487_HCT203_s2468 MCF-7
+ GSM325488_HCT206_s2468 MB435
+ GSM325489_HCT207_s2468 T47D
+ # Did not map these two as they are not 32 bp.
+ GSM325490_brain_s1368 MAQC mixed human brain tissue/cell lines
+ GSM325491_UHR_s247 MAQC_UHR mixed human cell lines
+ # Edit the file above to add a tab separation between file name and tissue
+ # name. Then remove the "read_name: " from the last field in each
+ # file otherwise it gets included in the name and load the data into hg18.
+ # Write a script to do this:
+cat << '_EOF_' > formatAndLoadData
+#!/bin/bash -e
+
+# Assign variables
+# Tab-separated file of file names and tissue/cell line names
+DATAFILES=$1
+# track name used as prefix for subtracks
+TRACK=$2
+# database
+DATABASE=$3
+
+cat $DATAFILES | while read file tissue; do
+ subTrack=`echo $TRACK$tissue`
+ echo $subTrack
+ sed -e 's/read_name:\s//' $file > ${subTrack}.gff
+ ldHgGene -exon=read $DATABASE ${subTrack} ${subTrack}.gff
+done
+'_EOF_'
+ chmod +x formatAndLoadData
+ ./formatAndLoadData burgeDataFiles.txt burgeRnaSeqGemMapperAlign hg18 \
+ > load.log &
+ # Added a trackDb entry in
+ # ccds/trunk/gencode/browser/trackDb/human/hg18/trackDb.ra
+
############################################################################
# TRANSMAP vertebrate.2009-09-13 build (2009-09-20 markd)
vertebrate-wide transMap alignments were built Tracks are created and loaded