src/hg/tcga/scripts/processData.py 1.9

1.9 2009/08/12 05:05:24 jsanborn
added other tcga datasets
Index: src/hg/tcga/scripts/processData.py
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/tcga/scripts/processData.py,v
retrieving revision 1.8
retrieving revision 1.9
diff -b -B -U 4 -r1.8 -r1.9
--- src/hg/tcga/scripts/processData.py	27 Apr 2009 23:49:23 -0000	1.8
+++ src/hg/tcga/scripts/processData.py	12 Aug 2009 05:05:24 -0000	1.9
@@ -21,8 +21,12 @@
     print "\t\tbaylorABI -- Baylor ABI SNP data"
     print "\t\twustlABI -- WUSTL ABI SNP Data"
     print "\t\tsnp6 -- Broad SNP 6.0"
     print "\t\tbroadAffyOV -- Affy U133A Ovarian"
+    print "\t\tuncOVG4502  -- Agilent G4502 Ovarian"
+    print "\t\tharvardOVCGH415 -- Agilent G4124A 415K CGH Ovarian"
+    print "\t\tuncOVMiRNA -- Agilent MiRNA Ovarian"
+    print "\t\tjhuMethylOV -- Methylation27 Ovarian"
     print ""
     
 def handleOpts(argv):
     opts, args = getopt.getopt(argv, "ht:", ["help", "type="])
@@ -75,8 +79,19 @@
         prefix = "jhuUscMethyl3Beta"
 
         Data = MethylBetaData(baseDir, patterns, bed12file, bed15file, prefix)
 
+    elif type == "jhuMethylOV":
+        baseDir = "/data/TCGA/O_jhuMethylOV/working/"
+        patterns = ["*lvl-2*"]
+        bed12file = baseDir + "methylIllumina.bed"
+        bed15file = baseDir + "jhuMethylOV.bed"
+        prefix = "jhuMethyOV"
+
+        Data = Methyl27Data(baseDir, patterns, bed12file, bed15file, prefix)
+        Data.dataStartIndex = 1
+        Data.medianNormalize = 1
+        
     elif type == "harvard":
         baseDir = "/data/TCGA/O_harvardCGH/working/"
         patterns = ["*data.txt"]
         bed12file = baseDir + "agilentCGH244A.bed"
@@ -86,23 +101,43 @@
         Data = CGH244A(baseDir, patterns, bed12file, bed15file, prefix)
 
     elif type == "harvardOV":
         baseDir = "/data/TCGA/O_harvardOVCGH/working/"
-        patterns = ["*normalized", "*.data.txt"]
+        patterns = ["*.data.txt"]
         bed12file = baseDir + "agilentCGH244A.bed"
         bed15file = baseDir + "harvardOVCGH244A_bed15.bed"
         prefix = "harvardOVCGH"
         
         Data = CGH244A(baseDir, patterns, bed12file, bed15file, prefix)
         
+    elif type == "harvardOVCGH415":
+        baseDir = "/data/TCGA/O_harvardOVCGH415/working/"
+        patterns = ["*.data.txt"]
+        bed12file = baseDir + "agilentG4124A.bed"
+        bed15file = baseDir + "harvardOVCGH415_bed15.bed"
+        prefix = "harvardOVCGH415"
+        
+        Data = CGH244A(baseDir, patterns, bed12file, bed15file, prefix)
+
     elif type == "broadAffyOV":
         baseDir = "/data/TCGA/O_broadAffyOV/working/"
-        patterns = ["*_rma"]
+        patterns = ["*data.txt"]
         bed12file = baseDir + "affyU133A.bed"
         bed15file = baseDir + "broadAffyOV_bed15.bed"
         prefix = "broadAffyOV"
         
         Data = AffyU133A(baseDir, patterns, bed12file, bed15file, prefix)
+        Data.medianNormalize = 1
+        
+    elif type == "uncOVG4502":
+        baseDir = "/data/TCGA/O_uncOVG4502/working/"
+        patterns = ["*data.txt"]
+        bed12file = baseDir + "agilentG4502A.bed"
+        bed15file = baseDir + "uncOVG4502_bed15.bed"
+        prefix = "uncOVG4502"
+        
+        Data = AffyU133A(baseDir, patterns, bed12file, bed15file, prefix)
+        Data.medianNormalize = 1
         
     elif type == "mskcc":
         baseDir = "/data/TCGA/O_mskccCGH/working/"
         patterns = ["*data.txt"]
@@ -124,14 +159,25 @@
         Data.dataStartIndex = 2
 
     elif type == "mskccOV1x1M":
         baseDir = "/data/TCGA/O_mskccOV1x1M/working/"
-        patterns = ["*transformation"]
+        patterns = ["*data.txt"]
         bed12file = baseDir + "agilent1x1M.bed"
         bed15file = baseDir + "mskccOV1x1M_bed15.bed"
         prefix = "mskccOV1x1M"
                 
         Data = CGH1x1M(baseDir, patterns, bed12file, bed15file, prefix)
+        Data.dataStartIndex = 3
+
+    elif type == "uncOVMiRNA":
+        baseDir = "/data/TCGA/O_uncOVMiRNA/working/"
+        patterns = ["*data.txt"]
+        bed12file = baseDir + "miRNA_8x15K.bed"
+        bed15file = baseDir + "uncOVHmiRNA_bed15.bed"
+        prefix = "uncOVHmiRNA"
+                
+        Data = MiRNA(baseDir, patterns, bed12file, bed15file, prefix)
+        Data.medianNormalize = 1
         
     elif type == "huEx":
         baseDir = "/data/TCGA/C_lblHuEx/working/"
         patterns = ["*data.txt"]