91cb10cf4bacaac09c32b12cbbe30a5f1e47da2b
mmaddren
  Mon Jul 25 14:14:24 2011 -0700
slight reworking in preparation for new module integration
diff --git python/programs/mkGeoPkg/mkGeoPkg python/programs/mkGeoPkg/mkGeoPkg
index ee808d4..c7732ac 100755
--- python/programs/mkGeoPkg/mkGeoPkg
+++ python/programs/mkGeoPkg/mkGeoPkg
@@ -146,38 +146,38 @@
 	expVars = None
 	series = None
 	datatype = None
 	
 	for stanza in mdb.itervalues():
 		
 		if 'objType' in stanza and stanza['objType'] == 'composite':
 			series = stanza
 			expVars = stanza['expVars'].split(',')
 			continue
 
 		if 'expId' not in stanza:
 			print stanza.name + ': no expId'
 			continue
 
-		#if 'geoSampleAccession' not in stanza:
+		if 'geoSampleAccession' not in stanza:
 			# if this hasn't been submitted to GEO yet, we'll add it to the submission list
 		if stanza['expId'] not in expIds:
 			expIds[stanza['expId']] = list()
 			
 		expIds[stanza['expId']].append(stanza)
 		
-		if 'geoSampleAccession' in stanza:
+		else:
 			# otherwise we keep track of the geo number for partially submitted samples
 			if stanza['expId'] not in geoMapping:
 				geoMapping[stanza['expId']] = stanza['geoSampleAccession']
 			elif geoMapping[stanza['expId']] != 'Inconsistent' and geoMapping[stanza['expId']] != stanza['geoSampleAccession']:
 				geoMapping[stanza['expId']] = 'Inconsistent'
 				print stanza.name + ': inconsistent geo mapping'
 		
 		if datatype == None and 'dataType' in stanza:
 			datatype = stanza['dataType']
 		elif datatype != None and 'dataType' in stanza and datatype != stanza['dataType']:
 			raise KeyError(stanza.name + ': inconsistent data type') 
 
 		
 	
 	datatype = datatypes[datatype]
@@ -224,36 +224,37 @@
 def createHighThroughputSoftFile(database, composite, organism, compositeUrl, mdb, cv, track, md5sums, expIds, expVars, geoMapping, series, datatype):
 	
 	print 'Creating HighThroughput soft file'
 
 	softfile = SoftFile()
 	fileList = list()
 	
 	createSeries(softfile, composite, compositeUrl, track, expIds, geoMapping, series)
 		
 	for idNum in expIds.iterkeys():
 		
 		expId = expIds[idNum]
 		firstStanza = expId[0]
 		print 'Writing sample ' + firstStanza['metaObject'] + ' (' + idNum + ')'
 		sample = HighThroughputSampleStanza()
-		sample['^SAMPLE'] = firstStanza['metaObject']
-		sample['!Sample_type'] = 'SRA'
 		
 		concat = expVars[0]
 		for expVar in expVars[1:len(expVars)]:
 			concat += '_' + firstStanza[expVar]
+			
+		sample['^SAMPLE'] = concat
+		sample['!Sample_type'] = 'SRA'
 		sample['!Sample_title'] = concat
 		
 		if 'geoSeriesAccession' in series:
 			sample['!Sample_series_id'] = series['geoSeriesAccession']
 			
 		count = 1
 		
 		for stanza in expId:
 		
 			if isRawFile(stanza['fileName']):
 				sample['!Sample_raw_file_' + str(count)] = stanza['fileName']
 				sample['!Sample_raw_file_type_' + str(count)] = getFileType(stanza['fileName'])
 				
 				if 'checksum' in stanza:
 					sample['!Sample_raw_file_checksum_' + str(count)] = stanza['checksum']
@@ -348,106 +349,82 @@
 def createMicroArraySoftFile(database, composite, organism, compositeUrl, mdb, cv, track, md5sums, expIds, expVars, geoMapping, series, datatype):
 	
 	print 'Creating MicroArray soft file'
 
 	softfile = SoftFile()
 	fileList = list()
 	
 	createSeries(softfile, composite, compositeUrl, track, expIds, geoMapping, series)
 	
 	for idNum in expIds.iterkeys():
 		
 		expId = expIds[idNum]
 		firstStanza = expId[0]
 		print 'Writing sample ' + firstStanza['metaObject'] + ' (' + idNum + ')'
 		sample = MicroArraySampleStanza()
-		sample['^SAMPLE'] = firstStanza['accession']
+		
+		concat = expVars[0]
+		for expVar in expVars[1:len(expVars)]:
+			concat += '_' + firstStanza[expVar]
+			
+		sample['^SAMPLE'] = concat
 		
 		if 'geoSeriesAccession' in series:
 			sample['!Sample_series_id'] = series['geoSeriesAccession']
 
-		#concat = expVars[0]
-		#for expVar in expVars[1:len(expVars)]:
-		#	concat += '_' + firstStanza[expVar]
-		#sample['!Sample_title'] = concat
-		
-		sample['!Sample_geo_accession'] = firstStanza['accession']
+		sample['!Sample_title'] = concat
 		
 		count = 1
 			
 		for stanza in expId:
 		
 			if isSupplimentaryFile(stanza['fileName']):
 				sample['!Sample_supplementary_file_' + str(count)] = stanza['fileName']
 				
 				if 'checksum' in stanza:
 					sample['!Sample_supplementary_file_checksum_' + str(count)] = stanza['checksum']
 				elif md5sums != None and stanza['fileName'] in md5sums:
 					sample['!Sample_supplementary_file_checksum_' + str(count)] = md5sums[stanza['fileName']]
 				
-				# sample['!Sample_supplementary_file_build_' + str(count)] = database
-				
 				fileList.append(stanza['fileName'])
 				count = count + 1
 				
-		softfile[firstStanza['accession']] = sample
-
 		# sample['!Sample_table'] = KeyOptional # CEL file
-		# sample['!Sample_source_name_ch'] = KeyOnePlusNumbered
-		# sample['!Sample_organism_ch'] = KeyOnePlusNumbered
-		# sample['!Sample_characteristics_ch'] = KeyOnePlusNumbered
+		sample['!Sample_source_name_ch_1'] = '[REPLACE]' #KeyOnePlusNumbered
+		sample['!Sample_organism_ch'] = '[REPLACE]' #KeyOnePlusNumbered
+		sample['!Sample_characteristics_ch'] = '[REPLACE]' #KeyOnePlusNumbered
 		# sample['!Sample_biomaterial_provider_ch'] = KeyZeroPlusNumbered
 		# sample['!Sample_treatment_protocol_ch'] = KeyZeroPlusNumbered
 		# sample['!Sample_growth_protocol_ch'] = KeyZeroPlusNumbered
-		# sample['!Sample_molecule_ch'] = KeyOnePlusNumbered
-		# sample['!Sample_extract_protocol_ch'] = KeyOnePlusNumbered
-		# sample['!Sample_label_ch'] = KeyOnePlusNumbered
-		# sample['!Sample_label_protocol_ch'] = KeyOnePlusNumbered
-		# sample['!Sample_hyb_protocol'] = KeyOnePlus
-		# sample['!Sample_scan_protocol'] = KeyOnePlus
-		# sample['!Sample_data_processing'] = KeyOnePlus
-		# sample['!Sample_description'] = KeyZeroPlus
-		# sample['!Sample_platform_id'] = KeyRequired
+		sample['!Sample_molecule_ch_1'] = '[REPLACE]' #KeyOnePlusNumbered
+		sample['!Sample_extract_protocol_ch_1'] = '[REPLACE]' #KeyOnePlusNumbered
+		sample['!Sample_label_ch_1'] = '[REPLACE]' #KeyOnePlusNumbered
+		sample['!Sample_label_protocol_ch_1'] = '[REPLACE]' #KeyOnePlusNumbered
+		sample['!Sample_hyb_protocol'] = '[REPLACE]' #KeyOnePlus
+		sample['!Sample_scan_protocol'] = '[REPLACE]' #KeyOnePlus
+		sample['!Sample_data_processing'] = '[REPLACE]' #KeyOnePlus
+		sample['!Sample_description'] = '[REPLACE]' #KeyZeroPlus
+		sample['!Sample_platform_id'] = '[REPLACE]'
 		# sample['!Sample_geo_accession'] = KeyOptional
-		# sample['!Sample_anchor'] = KeyRequired
-		# sample['!Sample_type'] = KeyRequired
-		# sample['!Sample_tag_count'] = KeyRequired
-		# sample['!Sample_tag_length'] = KeyRequired
-		# sample['!Sample_table_begin'] = KeyRequired
-		# sample['!Sample_table_end'] = KeyRequired
-	
-	# for idk:
-	
-		# platform = PlatformStanza()
-		
-		# platform['^PLATFORM'] = KeyRequired
-		# platform['!Platform_title'] = KeyRequired
-		# platform['!Platform_distribution'] = KeyRequired
-		# platform['!Platform_technology'] = KeyRequired
-		# platform['!Platform_organism'] = KeyOnePlus
-		# platform['!Platform_manufacturer'] = KeyRequired
-		# platform['!Platform_manufacture_protocol'] = KeyOnePlus
-		# platform['!Platform_catalog_number'] = KeyZeroPlus
-		# platform['!Platform_web_link'] = KeyZeroPlus
-		# platform['!Platform_support'] = KeyOptional
-		# platform['!Platform_coating'] = KeyOptional
-		# platform['!Platform_description'] = KeyZeroPlus
-		# platform['!Platform_contributor'] = KeyZeroPlus
-		# platform['!Platform_pubmed_id'] = KeyZeroPlus
-		# platform['!Platform_geo_accession'] = KeyOptional
-		# platform['!Platform_table_begin'] = KeyRequired
-		# platform['!Platform_table_end'] = KeyRequired
+		# sample['!Sample_anchor'] = KeyRequired SAGE ONLY
+		# sample['!Sample_type'] = KeyRequired SAGE ONLY
+		# sample['!Sample_tag_count'] = KeyRequired SAGE ONLY
+		# sample['!Sample_tag_length'] = KeyRequired SAGE ONLY
+		sample['!Sample_table_begin'] = ''
+		sample['!Sample_table_end'] = ''
+	
+		softfile[firstStanza['accession']] = sample
 		
 	return softfile, fileList
 	
 	
 def createSpecialSoftFile(database, composite, organism, compositeUrl, mdb, cv, track, md5sums, expIds, expVars, geoMapping, series, datatype):
 	softfile = SoftFile()
 	fileList = list()
 	
 	createSeries(softfile, composite, compositeUrl, track, expIds, geoMapping, series)
 	
 	for idNum in expIds.iterkeys():
 		
 		expId = expIds[idNum]
 		firstStanza = expId[0]
 		print 'Writing sample ' + firstStanza['metaObject'] + ' (' + idNum + ')'
@@ -520,43 +497,45 @@
 	submission = dict()
 	sortedIds = expIds.keys()
 	sortedIds.sort()
 	print sortedIds
 	
 	if wholeComposite == 0:
 		sortedIds = sortedIds[sortedIds.index(submitStart):sortedIds.index(submitStart) + submitSize]
 		
 	minId = min(sortedIds)
 	maxId = max(sortedIds)
 	print 'Generating soft using expIds ' + minId + ' to ' + maxId
 	for expId in sortedIds:
 		submission[expId] = expIds[expId]
 
 	if datatype.soft == HighThroughputSoftFile:
-		softfile, fileList = createSpecialSoftFile(database, composite, organism, compositeUrl, mdb, cv, track, md5sums, submission, expVars, geoMapping, series, datatype)
+		softfile, fileList = createHighThroughputSoftFile(database, composite, organism, compositeUrl, mdb, cv, track, md5sums, submission, expVars, geoMapping, series, datatype)
 	elif datatype.soft == MicroArraySoftFile:
 		softfile, fileList = createMicroArraySoftFile(database, composite, organism, compositeUrl, mdb, cv, track, md5sums, submission, expVars, geoMapping, series, datatype)
 	else:
 		raise KeyError('unsupported type')
 		
 	print 'Writing soft file'
 	outfileName = os.path.dirname(sys.argv[0]) + composite + '.soft'
 	outfile = open(outfileName, 'w')
 	outfile.write(str(softfile))
 	
 	fileString = outfileName
 	for file in fileList:
+		if not os.path.exists(file):
+			raise IOError(file)
 		fileString = fileString + ' ' + downloadsDirectory + file
 		
 	fileString.strip()
 	callString = '/opt/aspera/connect/bin/ascp -i ~/encode_geo_key/encode_geo_key.ppk -QTr -l300m ' + fileString + ' asp-geo@upload.ncbi.nlm.nih.gov:ENCODE/' + composite
 	outscript = open(composite + minId + '-' + maxId + '.sh', 'w')
 	outscript.write('#!/bin/sh\n\n')
 	outscript.write('/opt/aspera/connect/bin/ascp -i ~/encode_geo_key/encode_geo_key.ppk -QTr -l300m \\\n')
 	
 	outscript.write(os.path.dirname(sys.argv[0]) + composite + '.soft' + ' \\\n')
 	
 	for file in fileList:
 		if not os.path.exists(downloadsDirectory + file):
 			raise FileError(downloadsDirectory + file + ' does not exist')
 		outscript.write(downloadsDirectory + file + ' \\\n')