src/hg/tcga/scripts/processData.py 1.8
1.8 2009/04/27 23:49:23 jsanborn
added U133A
Index: src/hg/tcga/scripts/processData.py
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/tcga/scripts/processData.py,v
retrieving revision 1.7
retrieving revision 1.8
diff -b -B -U 4 -r1.7 -r1.8
--- src/hg/tcga/scripts/processData.py 25 Apr 2009 02:17:38 -0000 1.7
+++ src/hg/tcga/scripts/processData.py 27 Apr 2009 23:49:23 -0000 1.8
@@ -12,14 +12,17 @@
print "\t\tmethyl3Beta -- JHU-USC Methylation - OMA003: Avg Beta Values"
print "\t\tharvard -- Harvard CGH 244A GBM"
print "\t\tharvardOV -- Harvard CGH 244A Ovarian"
print "\t\tmskcc -- MSKCC 244A"
+ print "\t\tmskccOV -- MSKCC Ovarian 244A"
+ print "\t\tmskccOV1x1M - MSKCC Ovarian 1x1M"
print "\t\thuEx -- human exon array"
print "\t\tbroadABI -- Broad ABI SNP data -- GBM"
print "\t\tbroadABI -- Broad ABI SNP data -- Ovarian"
print "\t\tbaylorABI -- Baylor ABI SNP data"
print "\t\twustlABI -- WUSTL ABI SNP Data"
print "\t\tsnp6 -- Broad SNP 6.0"
+ print "\t\tbroadAffyOV -- Affy U133A Ovarian"
print ""
def handleOpts(argv):
opts, args = getopt.getopt(argv, "ht:", ["help", "type="])
@@ -83,23 +86,53 @@
Data = CGH244A(baseDir, patterns, bed12file, bed15file, prefix)
elif type == "harvardOV":
baseDir = "/data/TCGA/O_harvardOVCGH/working/"
- patterns = ["*data.txt"]
+ patterns = ["*normalized", "*.data.txt"]
bed12file = baseDir + "agilentCGH244A.bed"
bed15file = baseDir + "harvardOVCGH244A_bed15.bed"
prefix = "harvardOVCGH"
Data = CGH244A(baseDir, patterns, bed12file, bed15file, prefix)
+ elif type == "broadAffyOV":
+ baseDir = "/data/TCGA/O_broadAffyOV/working/"
+ patterns = ["*_rma"]
+ bed12file = baseDir + "affyU133A.bed"
+ bed15file = baseDir + "broadAffyOV_bed15.bed"
+ prefix = "broadAffyOV"
+
+ Data = AffyU133A(baseDir, patterns, bed12file, bed15file, prefix)
+
elif type == "mskcc":
baseDir = "/data/TCGA/O_mskccCGH/working/"
patterns = ["*data.txt"]
bed12file = baseDir + "agilentCGH244A.bed"
bed15file = baseDir + "mskccCGH244A_bed15.bed"
prefix = "mskccCGH"
Data = CGH244A(baseDir, patterns, bed12file, bed15file, prefix)
+ Data.dataStartIndex = 3
+
+ elif type == "mskccOV":
+ baseDir = "/data/TCGA/O_mskccOVCGH/working/"
+ patterns = ["*transformation"]
+ bed12file = baseDir + "agilentCGH244A.bed"
+ bed15file = baseDir + "mskccOVCGH244A_bed15.bed"
+ prefix = "mskccOVCGH"
+
+ Data = CGH244A(baseDir, patterns, bed12file, bed15file, prefix)
+ Data.dataStartIndex = 2
+
+ elif type == "mskccOV1x1M":
+ baseDir = "/data/TCGA/O_mskccOV1x1M/working/"
+ patterns = ["*transformation"]
+ bed12file = baseDir + "agilent1x1M.bed"
+ bed15file = baseDir + "mskccOV1x1M_bed15.bed"
+ prefix = "mskccOV1x1M"
+
+ Data = CGH1x1M(baseDir, patterns, bed12file, bed15file, prefix)
+
elif type == "huEx":
baseDir = "/data/TCGA/C_lblHuEx/working/"
patterns = ["*data.txt"]
bed12file = baseDir + "affyHuEx1_bed12.bed"
@@ -163,12 +197,12 @@
Data.sampleIdField = "Tumor_Sample_Barcode"
Data.collapseStatus = {}
Data.collapseStatus["Somatic"] = ["Somatic"]
- Data.collapseStatus["Germline"] = ["Germline", "LOH", "Unknown"]
+ Data.collapseStatus["Germline"] = ["Germline"] #, "LOH", "Unknown"]
Data.collapseClass = {}
- Data.collapseClass["Quiet"] = ["Silent", "Targeted_Region"]
+ Data.collapseClass["Quiet"] = ["Silent"]
Data.collapseClass["Loud"] = ["Frame_Shift_Del", "Frame_Shift_Ins",
"In_Frame_Del", "In_Frame_Ins",
"Missense_Mutation", "Nonsense_Mutation",
"Splice_Site_Indel", "Splice_Site_SNP"]
@@ -286,8 +320,11 @@
print "Writing entry for microarrayGroups.ra file..."
dataObject.writeRaFile()
+ print "Add txLength data to score field, only works on SNP data..."
+ dataObject.addTxLength()
+
if __name__ == "__main__":
main(sys.argv[1:])