src/hg/encode/DAFs/WoldRnaSeq.daf 1.3

1.3 2009/03/06 00:52:10 mikep
updated DAF to handle more raw data files
Index: src/hg/encode/DAFs/WoldRnaSeq.daf
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/encode/DAFs/WoldRnaSeq.daf,v
retrieving revision 1.2
retrieving revision 1.3
diff -b -B -U 1000000 -r1.2 -r1.3
--- src/hg/encode/DAFs/WoldRnaSeq.daf	14 Feb 2009 08:11:52 -0000	1.2
+++ src/hg/encode/DAFs/WoldRnaSeq.daf	6 Mar 2009 00:52:10 -0000	1.3
@@ -1,64 +1,97 @@
 # Data Agreement File for RNA-seq project $Revision$
 
 # This file specifies the data agreement between your lab and 
 # the DCC.  It should not be changed unilaterally.
 
 # Lab and general info
 grant             Wold
 lab               Caltech
 dataType          RnaSeq
-variables         cell, localization, rnaExtract, mapAlgorithm
+variables         cell, localization, rnaExtract, mapAlgorithm, readType
 assembly          hg18
 dafVersion        0.2.2
 
 # Track/view definition
 view             Signal
 longLabelPrefix  Caltech RNA-Seq Map Algorithm Signal
 type             wig
 hasReplicates    no
 required         no
 
 view             RawSignal
 longLabelPrefix  Caltech RNA-Seq Raw Signal
 type             wig
 hasReplicates    yes
 required         no
 
 view             Splices
 longLabelPrefix  Caltech RNA-Seq Map Algorithm Splice Alignments
 type             bed 12
 hasReplicates    yes 
 required         no
 
-view             Alignments
+view             Aligns
 longLabelPrefix  Caltech RNA-Seq Map Algorithm Alignments
 type             bed 12
 hasReplicates    yes
 required         no
 
-view             PairedAligns
+view             Paired
 longLabelPrefix  Caltech RNA-Seq Map Algorithm Paired Alignments
 type             bed 12
 hasReplicates    yes
 required         no
 
+# For 2x75 datasets, raw Illumina reads (RawData, fasta format, query75.txt files) are run through bowtie 0.9.8.1 with up to 2 mismatches 
+# and the resulting mappings are stored (RawData2, bowtie format, hg18sp75spike.bowtie.txt files) for up to ten matches per-read to the genome, 
+# spiked controls and UCSC knownGene splice junctions. Reads that were not mapped by bowtie (RawData3, fasta format, unmapped.fa files) 
+# are then mapped onto the genome using blat and filtered using pslReps (RawData4, psl format, bowtierej.hg18.blatbetter files). 
+# The bowtie and blat mappings are then analyzed by ERANGE3.0.2 to generate wiggles (RawSignal, wiggle format, .graph files), 
+# bed files of all reads and splices (Aligns, PairedAligns, bed format, all.bed files), 
+# all bowtie and blat splices (Splices, bed format, allsplices.bed files), 
+# and blat-only splices (Splices, bed format, blatsplices.bed), 
+# as well as RPKM expression level measurements at the gene-level (RawData5, rpkm format, final.rpkm), 
+# exon-level (RawData6, rpkm format, gencode_exon.rpkm), and candidate novel exons (RawData7, rpkm format, accepted.rpkm).
+
+# query75.txt 
 view             RawData
 type             fasta
 hasReplicates    yes
 required         no
 
+# hg18sp75spike.bowtie.txt
 view             RawData2
-type             rpkm
+type             bowtie
 hasReplicates    yes
 required         no
 
+# unmapped.fa
 view             RawData3
-type             bowtie
+type             fasta
 hasReplicates    yes
 required         no
 
+# bowtierej.hg18.blatbetter
 view             RawData4
 type             psl
 hasReplicates    yes
 required         no
 
+# final.rpkm
+view             RawData5
+type             rpkm
+hasReplicates    yes
+required         no
+
+# gencode_exon
+view             RawData6
+type             rpkm
+hasReplicates    yes
+required         no
+
+# accepted.rpkm
+view             RawData7
+type             rpkm
+hasReplicates    yes
+required         no
+