src/hg/tcga/scripts/processData.py 1.9
1.9 2009/08/12 05:05:24 jsanborn
added other tcga datasets
Index: src/hg/tcga/scripts/processData.py
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/tcga/scripts/processData.py,v
retrieving revision 1.8
retrieving revision 1.9
diff -b -B -U 4 -r1.8 -r1.9
--- src/hg/tcga/scripts/processData.py 27 Apr 2009 23:49:23 -0000 1.8
+++ src/hg/tcga/scripts/processData.py 12 Aug 2009 05:05:24 -0000 1.9
@@ -21,8 +21,12 @@
print "\t\tbaylorABI -- Baylor ABI SNP data"
print "\t\twustlABI -- WUSTL ABI SNP Data"
print "\t\tsnp6 -- Broad SNP 6.0"
print "\t\tbroadAffyOV -- Affy U133A Ovarian"
+ print "\t\tuncOVG4502 -- Agilent G4502 Ovarian"
+ print "\t\tharvardOVCGH415 -- Agilent G4124A 415K CGH Ovarian"
+ print "\t\tuncOVMiRNA -- Agilent MiRNA Ovarian"
+ print "\t\tjhuMethylOV -- Methylation27 Ovarian"
print ""
def handleOpts(argv):
opts, args = getopt.getopt(argv, "ht:", ["help", "type="])
@@ -75,8 +79,19 @@
prefix = "jhuUscMethyl3Beta"
Data = MethylBetaData(baseDir, patterns, bed12file, bed15file, prefix)
+ elif type == "jhuMethylOV":
+ baseDir = "/data/TCGA/O_jhuMethylOV/working/"
+ patterns = ["*lvl-2*"]
+ bed12file = baseDir + "methylIllumina.bed"
+ bed15file = baseDir + "jhuMethylOV.bed"
+ prefix = "jhuMethyOV"
+
+ Data = Methyl27Data(baseDir, patterns, bed12file, bed15file, prefix)
+ Data.dataStartIndex = 1
+ Data.medianNormalize = 1
+
elif type == "harvard":
baseDir = "/data/TCGA/O_harvardCGH/working/"
patterns = ["*data.txt"]
bed12file = baseDir + "agilentCGH244A.bed"
@@ -86,23 +101,43 @@
Data = CGH244A(baseDir, patterns, bed12file, bed15file, prefix)
elif type == "harvardOV":
baseDir = "/data/TCGA/O_harvardOVCGH/working/"
- patterns = ["*normalized", "*.data.txt"]
+ patterns = ["*.data.txt"]
bed12file = baseDir + "agilentCGH244A.bed"
bed15file = baseDir + "harvardOVCGH244A_bed15.bed"
prefix = "harvardOVCGH"
Data = CGH244A(baseDir, patterns, bed12file, bed15file, prefix)
+ elif type == "harvardOVCGH415":
+ baseDir = "/data/TCGA/O_harvardOVCGH415/working/"
+ patterns = ["*.data.txt"]
+ bed12file = baseDir + "agilentG4124A.bed"
+ bed15file = baseDir + "harvardOVCGH415_bed15.bed"
+ prefix = "harvardOVCGH415"
+
+ Data = CGH244A(baseDir, patterns, bed12file, bed15file, prefix)
+
elif type == "broadAffyOV":
baseDir = "/data/TCGA/O_broadAffyOV/working/"
- patterns = ["*_rma"]
+ patterns = ["*data.txt"]
bed12file = baseDir + "affyU133A.bed"
bed15file = baseDir + "broadAffyOV_bed15.bed"
prefix = "broadAffyOV"
Data = AffyU133A(baseDir, patterns, bed12file, bed15file, prefix)
+ Data.medianNormalize = 1
+
+ elif type == "uncOVG4502":
+ baseDir = "/data/TCGA/O_uncOVG4502/working/"
+ patterns = ["*data.txt"]
+ bed12file = baseDir + "agilentG4502A.bed"
+ bed15file = baseDir + "uncOVG4502_bed15.bed"
+ prefix = "uncOVG4502"
+
+ Data = AffyU133A(baseDir, patterns, bed12file, bed15file, prefix)
+ Data.medianNormalize = 1
elif type == "mskcc":
baseDir = "/data/TCGA/O_mskccCGH/working/"
patterns = ["*data.txt"]
@@ -124,14 +159,25 @@
Data.dataStartIndex = 2
elif type == "mskccOV1x1M":
baseDir = "/data/TCGA/O_mskccOV1x1M/working/"
- patterns = ["*transformation"]
+ patterns = ["*data.txt"]
bed12file = baseDir + "agilent1x1M.bed"
bed15file = baseDir + "mskccOV1x1M_bed15.bed"
prefix = "mskccOV1x1M"
Data = CGH1x1M(baseDir, patterns, bed12file, bed15file, prefix)
+ Data.dataStartIndex = 3
+
+ elif type == "uncOVMiRNA":
+ baseDir = "/data/TCGA/O_uncOVMiRNA/working/"
+ patterns = ["*data.txt"]
+ bed12file = baseDir + "miRNA_8x15K.bed"
+ bed15file = baseDir + "uncOVHmiRNA_bed15.bed"
+ prefix = "uncOVHmiRNA"
+
+ Data = MiRNA(baseDir, patterns, bed12file, bed15file, prefix)
+ Data.medianNormalize = 1
elif type == "huEx":
baseDir = "/data/TCGA/C_lblHuEx/working/"
patterns = ["*data.txt"]