src/hg/tcga/scripts/createLabTrackTCGA.py 1.1
1.1 2009/04/29 21:57:36 jsanborn
added script to generate tcga labTrack file
Index: src/hg/tcga/scripts/createLabTrackTCGA.py
===================================================================
RCS file: src/hg/tcga/scripts/createLabTrackTCGA.py
diff -N src/hg/tcga/scripts/createLabTrackTCGA.py
--- /dev/null 1 Jan 1970 00:00:00 -0000
+++ src/hg/tcga/scripts/createLabTrackTCGA.py 29 Apr 2009 21:57:36 -0000 1.1
@@ -0,0 +1,55 @@
+import sys, string, re, getopt
+
+inFile = open("../../makeDb/hgCgiData/Human/microarrayGroups.ra", 'r')
+outFile = open("tcga_labTrack.tab", 'w')
+
+line = inFile.readline()
+
+hash = {}
+for line in inFile.readlines():
+ line = line[:-1]
+
+ # ignore all non-'names' lines
+ if not line.startswith("names"):
+ continue
+
+ tmp = line.split(' ')
+ names = tmp[1]
+
+ # ignore all non-TCGA ids
+ if not names.startswith("TCGA"):
+ continue
+
+ names = names.split(',')
+
+ for n in names:
+ if not n.startswith("TCGA"):
+ continue
+
+ nameParts = n.split('-')
+ s = nameParts[0]
+ for np in nameParts[1:]:
+ s += "-" + np
+ if len(s) < 16:
+ continue
+
+ if len(n) > 16:
+ sampleId = n[:16]
+ else:
+ sampleId = n
+
+ if s in hash:
+ continue
+
+ hash[s] = sampleId
+
+sortedHash = sorted(hash.iteritems(), key=lambda (k,v): (v,k), reverse=False)
+
+for name, sampleId in sortedHash:
+ print sampleId, name
+ s = sampleId + "\t" + name + "\n"
+ outFile.write(s)
+
+outFile.close()
+
+