src/hg/tcga/scripts/createLabTrackTCGA.py 1.1

1.1 2009/04/29 21:57:36 jsanborn
added script to generate tcga labTrack file
Index: src/hg/tcga/scripts/createLabTrackTCGA.py
===================================================================
RCS file: src/hg/tcga/scripts/createLabTrackTCGA.py
diff -N src/hg/tcga/scripts/createLabTrackTCGA.py
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ src/hg/tcga/scripts/createLabTrackTCGA.py	29 Apr 2009 21:57:36 -0000	1.1
@@ -0,0 +1,55 @@
+import sys, string, re, getopt
+
+inFile = open("../../makeDb/hgCgiData/Human/microarrayGroups.ra", 'r')
+outFile = open("tcga_labTrack.tab", 'w')
+
+line = inFile.readline()
+
+hash = {}
+for line in inFile.readlines():
+    line = line[:-1]
+
+    # ignore all non-'names' lines
+    if not line.startswith("names"):
+        continue
+
+    tmp = line.split(' ')
+    names = tmp[1]
+
+    # ignore all non-TCGA ids
+    if not names.startswith("TCGA"):
+        continue
+    
+    names = names.split(',')
+
+    for n in names:
+        if not n.startswith("TCGA"):
+            continue
+
+        nameParts = n.split('-')
+        s = nameParts[0]
+        for np in nameParts[1:]:
+            s += "-" + np
+            if len(s) < 16:
+                continue
+            
+            if len(n) > 16:
+                sampleId = n[:16]
+            else:
+                sampleId = n
+
+            if s in hash:
+                continue
+
+            hash[s] = sampleId
+
+sortedHash = sorted(hash.iteritems(), key=lambda (k,v): (v,k), reverse=False)
+
+for name, sampleId in sortedHash:
+    print sampleId, name
+    s = sampleId + "\t" + name + "\n"
+    outFile.write(s)
+    
+outFile.close()
+
+