8aea88625794a4942add9bf0b2ed9660f98404bf
mmaddren
  Tue Jul 19 15:40:41 2011 -0700
temporary commit for adding additional bigWigs to cshllong
diff --git python/programs/mkGeoPkg/mkGeoPkg python/programs/mkGeoPkg/mkGeoPkg
index 2930f66..ee808d4 100755
--- python/programs/mkGeoPkg/mkGeoPkg
+++ python/programs/mkGeoPkg/mkGeoPkg
@@ -146,38 +146,38 @@
 	expVars = None
 	series = None
 	datatype = None
 	
 	for stanza in mdb.itervalues():
 		
 		if 'objType' in stanza and stanza['objType'] == 'composite':
 			series = stanza
 			expVars = stanza['expVars'].split(',')
 			continue
 
 		if 'expId' not in stanza:
 			print stanza.name + ': no expId'
 			continue
 
-		if 'geoSampleAccession' not in stanza:
+		#if 'geoSampleAccession' not in stanza:
 			# if this hasn't been submitted to GEO yet, we'll add it to the submission list
 			if stanza['expId'] not in expIds:
 				expIds[stanza['expId']] = list()
 				
 			expIds[stanza['expId']].append(stanza)
 		
-		else:
+		if 'geoSampleAccession' in stanza:
 			# otherwise we keep track of the geo number for partially submitted samples
 			if stanza['expId'] not in geoMapping:
 				geoMapping[stanza['expId']] = stanza['geoSampleAccession']
 			elif geoMapping[stanza['expId']] != 'Inconsistent' and geoMapping[stanza['expId']] != stanza['geoSampleAccession']:
 				geoMapping[stanza['expId']] = 'Inconsistent'
 				print stanza.name + ': inconsistent geo mapping'
 		
 		if datatype == None and 'dataType' in stanza:
 			datatype = stanza['dataType']
 		elif datatype != None and 'dataType' in stanza and datatype != stanza['dataType']:
 			raise KeyError(stanza.name + ': inconsistent data type') 
 
 		
 	
 	datatype = datatypes[datatype]
@@ -209,31 +209,31 @@
 	#stanza['!Series_repeats_sample_list_1'] = 'replist1' # ^
 	
 	seriesStanza['!Series_sample_id'] = list()
 	
 	for idNum in expIds.iterkeys():
 		if idNum in geoMapping and geoMapping[idNum] != 'Inconsistent':
 			seriesStanza['!Series_sample_id'].append(geoMapping[idNum])
 		else:
 			seriesStanza['!Series_sample_id'].append(expIds[idNum][0]['metaObject'])
 	
 	if 'geoAccession' in series:
 		seriesStanza['!Series_geo_accession'] = series['geoAccession']
 	
 	softfile[series['composite']] = seriesStanza
 	
-def createHighThroughputSoftFile(database, composite, organism, compositeUrl, mdb, cv, track, md5sums, expIds, expVars, geoMapping, series, datatype, copyDirectory):
+def createHighThroughputSoftFile(database, composite, organism, compositeUrl, mdb, cv, track, md5sums, expIds, expVars, geoMapping, series, datatype):
 	
 	print 'Creating HighThroughput soft file'
 
 	softfile = SoftFile()
 	fileList = list()
 	
 	createSeries(softfile, composite, compositeUrl, track, expIds, geoMapping, series)
 		
 	for idNum in expIds.iterkeys():
 		
 		expId = expIds[idNum]
 		firstStanza = expId[0]
 		print 'Writing sample ' + firstStanza['metaObject'] + ' (' + idNum + ')'
 		sample = HighThroughputSampleStanza()
 		sample['^SAMPLE'] = firstStanza['metaObject']
@@ -333,31 +333,31 @@
 		sample['!Sample_instrument_model'] = '[REPLACE]'
 		for stanza in expId:	
 			if 'seqPlatform' in stanza:
 				sample['!Sample_instrument_model'] = instrumentModels[stanza['seqPlatform']]
 			
 		sample['!Sample_data_processing'] = compositeUrl
 
 		if idNum in geoMapping and geoMapping[idNum] != 'Inconsistent':
 			sample['!Sample_geo_accession'] = geoMapping[idNum]
 		
 		softfile[firstStanza['metaObject']] = sample
 		
 	return softfile, fileList
 		
 		
-def createMicroArraySoftFile(database, composite, organism, compositeUrl, mdb, cv, track, md5sums, expIds, expVars, geoMapping, series, datatype, copyDirectory):
+def createMicroArraySoftFile(database, composite, organism, compositeUrl, mdb, cv, track, md5sums, expIds, expVars, geoMapping, series, datatype):
 	
 	print 'Creating MicroArray soft file'
 
 	softfile = SoftFile()
 	fileList = list()
 	
 	createSeries(softfile, composite, compositeUrl, track, expIds, geoMapping, series)
 	
 	for idNum in expIds.iterkeys():
 		
 		expId = expIds[idNum]
 		firstStanza = expId[0]
 		print 'Writing sample ' + firstStanza['metaObject'] + ' (' + idNum + ')'
 		sample = MicroArraySampleStanza()
 		sample['^SAMPLE'] = firstStanza['accession']
@@ -428,75 +428,125 @@
 		# platform['!Platform_manufacture_protocol'] = KeyOnePlus
 		# platform['!Platform_catalog_number'] = KeyZeroPlus
 		# platform['!Platform_web_link'] = KeyZeroPlus
 		# platform['!Platform_support'] = KeyOptional
 		# platform['!Platform_coating'] = KeyOptional
 		# platform['!Platform_description'] = KeyZeroPlus
 		# platform['!Platform_contributor'] = KeyZeroPlus
 		# platform['!Platform_pubmed_id'] = KeyZeroPlus
 		# platform['!Platform_geo_accession'] = KeyOptional
 		# platform['!Platform_table_begin'] = KeyRequired
 		# platform['!Platform_table_end'] = KeyRequired
 		
 	return softfile, fileList
 	
 		
+def createSpecialSoftFile(database, composite, organism, compositeUrl, mdb, cv, track, md5sums, expIds, expVars, geoMapping, series, datatype):
+	softfile = SoftFile()
+	fileList = list()
+	
+	createSeries(softfile, composite, compositeUrl, track, expIds, geoMapping, series)
+	
+	for idNum in expIds.iterkeys():
+		
+		expId = expIds[idNum]
+		firstStanza = expId[0]
+		print 'Writing sample ' + firstStanza['metaObject'] + ' (' + idNum + ')'
+		sample = HighThroughputSampleStanza()
+		
+		hasbigwig = 0
+		for stanza in expId:
+		
+			if getFileType(stanza['fileName']) == 'bigWig':
+				hasbigwig = 1
+				
+		if hasbigwig == 0:
+			continue
+		
+		sample['^SAMPLE'] = firstStanza['geoSampleAccession']
+		
+		if 'geoSeriesAccession' in series:
+			sample['!Sample_series_id'] = series['geoSeriesAccession']
+			
+		sample['!Sample_geo_accession'] = firstStanza['geoSampleAccession']
+		
+		count = 1
+			
+		for stanza in expId:
+		
+			if getFileType(stanza['fileName']) == 'bigWig':
+				sample['!Sample_supplementary_file_' + str(count)] = stanza['fileName']
+				
+				if 'checksum' in stanza:
+					sample['!Sample_supplementary_file_checksum_' + str(count)] = stanza['checksum']
+				elif md5sums != None and stanza['fileName'] in md5sums:
+					sample['!Sample_supplementary_file_checksum_' + str(count)] = md5sums[stanza['fileName']]
+				
+				# sample['!Sample_supplementary_file_build_' + str(count)] = database
+				
+				fileList.append(stanza['fileName'])
+				count = count + 1
+				
+		softfile[firstStanza['geoSampleAccession']] = sample
+		
+	return softfile, fileList
+		
 def main():
 	database = sys.argv[1]
 	composite = sys.argv[2]
 	
 	wholeComposite = 1
 	if len(sys.argv) == 5:
 		submitStart = sys.argv[3]
 		submitSize = int(sys.argv[4])
 		wholeComposite = 0
 		
 	organism = organisms[database]
 
 	mdbPath = '/cluster/home/mmaddren/kent/src/hg/makeDb/trackDb/' + organism + '/' + database + '/metaDb/alpha/' + composite + '.ra' #CHANGE
 	cvPath = '/cluster/home/mmaddren/kent/src/hg/makeDb/trackDb/cv/alpha/cv.ra' #CHANGE
 	trackPath = '/cluster/home/mmaddren/kent/src/hg/makeDb/trackDb/' + organism + '/' + database + '/' + composite + '.ra'
 	md5path = '/hive/groups/encode/dcc/analysis/ftp/pipeline/' + database + '/' + composite + '/md5sum.txt'
 
 	downloadsDirectory = '/hive/groups/encode/dcc/analysis/ftp/pipeline/' + database + '/' + composite + '/'
 	
 	compositeUrl = 'http://genome.ucsc.edu/cgi-bin/hgTrackUi?db=' + database + '&g=' + composite
 	
 	mdb = RaFile(mdbPath)
 	cv = CvFile(cvPath)
 	track = RaFile(trackPath)
 	md5sums = readMd5sums(md5path)
-	print md5sums
 	expIds, expVars, geoMapping, series, datatype = createMappings(mdb)
 	
 	submission = dict()
 	sortedIds = expIds.keys()
 	sortedIds.sort()
+	print sortedIds
 	
 	if wholeComposite == 0:
 		sortedIds = sortedIds[sortedIds.index(submitStart):sortedIds.index(submitStart) + submitSize]
 		
 	minId = min(sortedIds)
 	maxId = max(sortedIds)
 	print 'Generating soft using expIds ' + minId + ' to ' + maxId
 	for expId in sortedIds:
 		submission[expId] = expIds[expId]
 
 	if datatype.soft == HighThroughputSoftFile:
-		softfile, fileList = createHighThroughputSoftFile(database, composite, organism, compositeUrl, mdb, cv, track, md5sums, submission, expVars, geoMapping, series, datatype, copyDirectory)
+		softfile, fileList = createSpecialSoftFile(database, composite, organism, compositeUrl, mdb, cv, track, md5sums, submission, expVars, geoMapping, series, datatype)
 	elif datatype.soft == MicroArraySoftFile:
-		softfile, fileList = createMicroArraySoftFile(database, composite, organism, compositeUrl, mdb, cv, track, md5sums, submission, expVars, geoMapping, series, datatype, copyDirectory)
+		softfile, fileList = createMicroArraySoftFile(database, composite, organism, compositeUrl, mdb, cv, track, md5sums, submission, expVars, geoMapping, series, datatype)
 	else:
 		raise KeyError('unsupported type')
 		
 	print 'Writing soft file'
 	outfileName = os.path.dirname(sys.argv[0]) + composite + '.soft'
 	outfile = open(outfileName, 'w')
 	outfile.write(str(softfile))
 	
 	fileString = outfileName
 	for file in fileList:
 		fileString = fileString + ' ' + downloadsDirectory + file
 		
 	fileString.strip()
 	callString = '/opt/aspera/connect/bin/ascp -i ~/encode_geo_key/encode_geo_key.ppk -QTr -l300m ' + fileString + ' asp-geo@upload.ncbi.nlm.nih.gov:ENCODE/' + composite
 	outscript = open(composite + minId + '-' + maxId + '.sh', 'w')