python/programs/mkGeoPkg/mkGeoPkg fa1fd1e6225697d08803d99446cab7d4e0d549e9

fa1fd1e6225697d08803d99446cab7d4e0d549e9
vsmalladi
  Fri Oct 21 14:38:24 2011 -0700
Moved all python scripts from /python/programs to live in sr/hg/encode as per Kate's request for all encode programs in a common place.
diff --git python/programs/mkGeoPkg/mkGeoPkg python/programs/mkGeoPkg/mkGeoPkg
deleted file mode 100755
index e85c252..0000000
--- python/programs/mkGeoPkg/mkGeoPkg
+++ /dev/null
@@ -1,619 +0,0 @@
-#!/hive/groups/encode/dcc/bin/python
-import sys, os, shutil, stat, argparse, datetime
-from ucscgenomics import track, ra, soft, cv
-
-class DataType(object):
-
-	def __init__(self, molecule, strategy, source, selection, soft):
-		self.molecule = molecule
-		self.strategy = strategy
-		self.source = source
-		self.selection = selection
-		self.soft = soft
-
-datatypes = {
-	'Cage': DataType('OVERRIDE RNA', 'OTHER', 'transcriptomic', 'CAGE', soft.HighThroughputSoftFile),
-	'ChipSeq': DataType('genomic DNA', 'ChIP-Seq', 'genomic', 'ChIP', soft.HighThroughputSoftFile),
-	'DnaPet': DataType('genomic DNA', 'OTHER', 'genomic', 'size fractionation', soft.HighThroughputSoftFile),
-	'DnaseDgf': DataType('genomic DNA', 'DNase-Hypersensitivity', 'genomic', 'DNase', soft.HighThroughputSoftFile),
-	'DnaseSeq': DataType('genomic DNA', 'DNase-Hypersensitivity', 'genomic', 'DNase', soft.HighThroughputSoftFile),
-	'FaireSeq': DataType('genomic DNA', 'OTHER', 'genomic', 'other', soft.HighThroughputSoftFile),
-	'MethylSeq': DataType('genomic DNA', 'MRE-Seq', 'genomic', 'Restriction Digest', soft.HighThroughputSoftFile),
-	'MethylRrbs': DataType('genomic DNA', 'Bisulfite-Seq', 'genomic', 'Reduced Representation', soft.HighThroughputSoftFile),
-	'Orchid': DataType('genomic DNA', 'OTHER', 'genomic', 'other', soft.HighThroughputSoftFile),
-	'Proteogenomics': DataType('protein', 'mass spectrometry-based proteogenomic mapping', 'protein', 'chromatographically fractionated peptides', soft.HighThroughputSoftFile),
-	'RnaPet': DataType('OVERRIDE RNA', 'OTHER', 'transcriptomic', 'other', soft.HighThroughputSoftFile),
-	'RnaSeq': DataType('OVERRIDE RNA', 'RNA-Seq', 'transcriptomic', 'cDNA', soft.HighThroughputSoftFile),
-	
-	#these need to be curated
-	'5C': DataType('REPLACE', 'REPLACE', 'REPLACE', 'REPLACE', None),
-	'AffyExonArray': DataType('REPLACE', 'REPLACE', 'REPLACE', 'REPLACE', soft.MicroArraySoftFile),
-	'Bip': DataType('REPLACE', 'REPLACE', 'REPLACE', 'REPLACE', None),
-	'Cluster': DataType('REPLACE', 'REPLACE', 'REPLACE', 'REPLACE', None),
-	'Cnv': DataType('REPLACE', 'REPLACE', 'REPLACE', 'REPLACE', None),
-	'Combined': DataType('REPLACE', 'REPLACE', 'REPLACE', 'REPLACE', None),
-	'Genotype': DataType('REPLACE', 'REPLACE', 'REPLACE', 'REPLACE', None),
-	'Gencode': DataType('REPLACE', 'REPLACE', 'REPLACE', 'REPLACE', None),
-	'ChiaPet': DataType('REPLACE', 'REPLACE', 'REPLACE', 'REPLACE', None),
-	'Mapability': DataType('REPLACE', 'REPLACE', 'REPLACE', 'REPLACE', None),
-	'MethylArray': DataType('REPLACE', 'REPLACE', 'REPLACE', 'REPLACE', None),
-	'NRE': DataType('REPLACE', 'REPLACE', 'REPLACE', 'REPLACE', None),
-	'Nucleosome': DataType('REPLACE', 'REPLACE', 'REPLACE', 'REPLACE', None),
-	'RnaChip': DataType('REPLACE', 'REPLACE', 'REPLACE', 'REPLACE', None),
-	'RipGeneSt': DataType('REPLACE', 'REPLACE', 'REPLACE', 'REPLACE', None),
-	'RipTiling': DataType('REPLACE', 'REPLACE', 'REPLACE', 'REPLACE', None),
-	'RipChip': DataType('REPLACE', 'REPLACE', 'REPLACE', 'REPLACE', None),
-	'RipSeq': DataType('REPLACE', 'REPLACE', 'REPLACE', 'REPLACE', None),
-	'Switchgear': DataType('REPLACE', 'REPLACE', 'REPLACE', 'REPLACE', None),
-	'TfbsValid': DataType('REPLACE', 'REPLACE', 'REPLACE', 'REPLACE', None)
-}
-
-#compare this to the source in datatype, give GP ids depending on the type
-gpIds = {
-	'human genomic': '63443',
-	'human transcriptomic': '30709',
-	'human protein': '63447',
-	
-	'mouse genomic': '63471',
-	'mouse transcriptomic': '66167',
-	'mouse protein': '63475'
-}
-
-cvDetails = {
-	'cell':	[ 'organism', 'description', 'karyotype', 'lineage', 'sex' ],
-	'antibody': [ 'antibodyDescription', 'targetDescription', 'vendorName', 'vendorId' ]
-}
-
-#if the term appears in the mdb and must overriding the value in the cv
-cvOverride = [ 'sex' ]
-
-#talk to Venkat lol
-cvPretend = { 'antibody Input': 'control' }
-
-#if its not in cvDetails, which things should we check by default
-cvDefaults = [ 'description' ]
-
-mdbWhitelist = [
-	'age',
-	'bioRep',
-	'control',
-	'controlId',
-	'fragSize',
-	'labExpId',
-	'labVersion',
-	'mapAlgorithm',
-	'obtainedBy',
-	'phase',
-	'readType',
-	'region',
-	'replicate',
-	'restrictionEnzyme',
-	'run',
-	'softwareVersion',
-	'spikeInPool',
-	'strain'
-]
-
-# if the molecule is RNA, we need to map our data into !Sample_molecule, which only takes certain fields
-# first we check rnaExtractMapping. If its not there, we use the localization. This is because (at current)
-# polyA is the most important trait, otherwise its going to be nonPolyA which GEO doesn't accept that. 
-rnaExtractMapping = {
-	'shortPolyA': 'polyA RNA', 
-	'longPolyA': 'polyA RNA', 
-	'polyA': 'polyA RNA'
-}
-
-localizationMapping = {
-	'cytosol': 'cytoplasmic RNA', 
-	'polysome': 'cytoplasmic RNA',
-	'membraneFraction': 'cytoplasmic RNA',
-	'mitochondria': 'cytoplasmic RNA',
-	'nucleus': 'nuclear RNA', 
-	'nucleolus': 'nuclear RNA', 
-	'nucleoplasm': 'nuclear RNA', 
-	'nuclearMatrix': 'nuclear RNA', 
-	'chromatin': 'nuclear RNA',
-	'cell': 'total RNA'
-}
-
-# map our instrument names to GEO's names
-instrumentModels = {
-	'Illumina_GA2x': 'Illumina Genome Analyzer II',
-	'Illumina_GA2': 'Illumina Genome Analyzer II',
-	'Illumina_HiSeq_2000': 'Illumina HiSeq 2000'
-}
-
-	
-def isRawFile(file):
-	return (file.extension == 'fastq' or file.extension == 'fasta')
-	
-def isSupplimentaryFile(file):
-	return not isRawFile(file)
-	
-def sampleTitle(stanza, expVars, warn=False):
-	concat = stanza[expVars[0]].replace('-m', '')
-	for expVar in expVars[1:len(expVars)]:
-		if expVar in stanza and stanza[expVar] != 'None':
-			concat += '_' + stanza[expVar]
-		elif warn:
-			print 'warning: %s is None or not in %s' % (expVar, stanza.name)
-	return concat
-	
-def createMappings(mdb):
-	expIds = dict()
-	geoMapping = dict()
-	expVars = None
-	series = None
-	datatype = None
-	
-	for stanza in mdb.itervalues():
-		
-		if 'objType' in stanza and stanza['objType'] == 'composite':
-			series = stanza
-			expVars = stanza['expVars'].split(',')
-			continue
-
-		if 'expId' not in stanza:
-			print stanza.name + ': no expId'
-			continue
-
-		if 'geoSampleAccession' not in stanza:
-			# if this hasn't been submitted to GEO yet, we'll add it to the submission list
-			if stanza['expId'] not in expIds:
-				expIds[stanza['expId']] = list()
-				
-			expIds[stanza['expId']].append(stanza)
-		
-		else:
-			# otherwise we keep track of the geo number for partially submitted samples
-			if stanza['expId'] not in geoMapping:
-				geoMapping[stanza['expId']] = stanza['geoSampleAccession']
-			elif geoMapping[stanza['expId']] != 'Inconsistent' and geoMapping[stanza['expId']] != stanza['geoSampleAccession']:
-				geoMapping[stanza['expId']] = 'Inconsistent'
-				print stanza.name + ': inconsistent geo mapping'
-		
-		if datatype == None and 'dataType' in stanza:
-			datatype = stanza['dataType']
-		elif datatype != None and 'dataType' in stanza and datatype != stanza['dataType']:
-			raise KeyError(stanza.name + ': inconsistent data type') 
-
-	datatype = datatypes[datatype]
-	
-	return expIds, expVars, geoMapping, series, datatype
-	
-	
-def createSeries(softfile, compositeTrack, expIds, expVars, geoMapping, series, datatype, replace, audit):
-	
-	if 'geoSeriesAccession' in series:
-		print 'Existing series ' + series['composite'] + ' using geoSeriesAccession ' + series['geoSeriesAccession']
-		return
-		
-	print 'Writing series ' + series['composite']
-	
-	seriesStanza = soft.SeriesStanza()
-	seriesStanza['^SERIES'] = series['composite']
-	seriesStanza['!Series_title'] = compositeTrack.trackDb[compositeTrack.name]['longLabel'] #STILL INCORRECT
-	
-	if '!Series_summary' in replace:
-		seriesStanza['!Series_summary'] = replace['!Series_summary']
-	else:
-		print 'warning: no series summary found. Please include in replace file.'
-		seriesStanza['!Series_summary'] = '[REPLACE]'
-		if audit:
-			print seriesStanza.name + ': no summary'
-		
-	if '!Series_overall_design' in replace:
-		seriesStanza['!Series_overall_design'] = replace['!Series_overall_design']
-	else:
-		print 'no series overall design found. Please include in replace file.'
-		seriesStanza['!Series_overall_design'] = '[REPLACE]'
-		if audit:
-			print seriesStanza.name + ': no overall design'
-			
-	seriesStanza['!Series_web_link'] = [ compositeTrack.url, 'http://www.ncbi.nlm.nih.gov/geo/info/ENCODE.html' ]
-	
-	if '!Series_contributor' in replace:
-		seriesStanza['!Series_contributor'] = replace['!Series_contributor']
-	else:
-		seriesStanza['!Series_contributor'] = '[REPLACE]'
-		if audit:
-			print seriesStanza.name + ': no contributor'
-		
-	seriesStanza['!Series_gp_id'] = gpIds[compositeTrack.organism + ' ' + datatype.source]
-	
-	# could use !Series_variable_* and !Series_repeats_*
-	
-	seriesStanza['!Series_sample_id'] = list()
-	
-	for idNum in expIds.iterkeys():
-		if idNum in geoMapping and geoMapping[idNum] != 'Inconsistent':
-			seriesStanza['!Series_sample_id'].append(geoMapping[idNum])
-		else:
-			seriesStanza['!Series_sample_id'].append(sampleTitle(expIds[idNum][0], expVars))
-	
-	softfile[series['composite']] = seriesStanza
-	
-def createHighThroughputSoftFile(compositeTrack, cv, expIds, expVars, geoMapping, series, datatype, replace, audit):
-	
-	print 'Creating HighThroughput soft file'
-
-	softfile = soft.HighThroughputSoftFile()
-	fileList = list()
-	
-	createSeries(softfile, compositeTrack, expIds, expVars, geoMapping, series, datatype, replace, audit)
-		
-	for idNum in expIds.iterkeys():
-		
-		expId = expIds[idNum]
-		firstStanza = expId[0]
-		print 'Writing sample ' + firstStanza['metaObject'] + ' (' + idNum + ')'
-		sample = soft.HighThroughputSampleStanza()
-
-		sample['^SAMPLE'] = sampleTitle(firstStanza, expVars, 1)
-		sample['!Sample_type'] = 'SRA'
-		sample['!Sample_title'] = sample['^SAMPLE']
-		
-		if 'geoSeriesAccession' in series:
-			sample['!Sample_series_id'] = series['geoSeriesAccession']
-			
-		count = 1
-		
-		#figure out if the instrument model is consistent across the entire sample
-		instrumentModel = None
-		for stanza in expId:	
-			if 'seqPlatform' in stanza:
-				if instrumentModel == None:
-					instrumentModel = stanza['seqPlatform']
-				else:
-					if instrumentModel != stanza['seqPlatform']:
-						instrumentModel = None
-						if audit:
-							print 'expId' + str(expId) + ': inconsistent instrument model'
-						break
-		
-		for stanza in expId:
-		
-			file = compositeTrack.files[stanza['fileName']]
-			
-			if isRawFile(file):
-				sample['!Sample_raw_file_' + str(count)] = file.name
-				sample['!Sample_raw_file_type_' + str(count)] = file.extension
-				
-				if file.md5sum != None:
-					sample['!Sample_raw_file_checksum_' + str(count)] = file.md5sum
-
-				if instrumentModel == None and 'seqPlatform' in stanza:
-					sample['!Sample_raw_file_instrument_model_' + str(count)] = stanza['seqPlatform']
-					
-				fileList.append(file)	
-				count = count + 1
-			
-		count = 1
-			
-		for stanza in expId:
-		
-			file = compositeTrack.files[stanza['fileName']]
-		
-			if isSupplimentaryFile(file):
-				sample['!Sample_supplementary_file_' + str(count)] = file.name
-				
-				if file.md5sum != None:
-					sample['!Sample_supplementary_file_checksum_' + str(count)] = file.md5sum
-				
-				sample['!Sample_supplementary_file_build_' + str(count)] = compositeTrack.database
-				
-				if instrumentModel == None and 'seqPlatform' in stanza:
-					sample['!Sample_supplementary_file_instrument_model_' + str(count)] = stanza['seqPlatform']
-				
-				fileList.append(file)
-				count = count + 1
-			
-		sample['!Sample_source_name'] = firstStanza['cell']
-		sample['!Sample_organism'] = compositeTrack.organism
-		
-		sample['!Sample_characteristics'] = list()
-		allVars = expVars + mdbWhitelist
-		
-		for var in allVars:
-			if var in firstStanza:
-				foobar = var
-				sample['!Sample_characteristics'].append(var + ': ' + firstStanza[var])
-				for pretend in cvPretend.iterkeys():
-					if var + ' ' + firstStanza[var] == pretend:
-						foobar = cvPretend[pretend]
-				if foobar in cvDetails:
-					for cvVar in cvDetails[foobar]:
-						if cvVar in cvOverride and cvVar in firstStanza:
-							sample['!Sample_characteristics'].append(var + ' ' + cvVar + ': ' + firstStanza[cvVar])
-						elif cvVar in cv[firstStanza[var]]:
-							sample['!Sample_characteristics'].append(var + ' ' + cvVar + ': ' + cv[firstStanza[var]][cvVar])
-				else:
-					for cvVar in cvDefaults:
-						if firstStanza[var] in cv and cvVar in cv[firstStanza[var]]:
-							sample['!Sample_characteristics'].append(var + ' ' +  cvVar + ': ' + cv[firstStanza[var]][cvVar])
-				
-		sample['!Sample_biomaterial_provider'] = cv[firstStanza['cell']]['vendorName']
-		
-		if 'treatment' in firstStanza:
-			sample['!Sample_treatment_protocol'] = firstStanza['treatment']
-		
-		if 'protocol' in cv[firstStanza['cell']]:
-			for protocol in cv[firstStanza['cell']]['protocol'].split(' '):
-					key, val = protocol.split(':')
-					if key == 'ENCODE' or key == cv[firstStanza['lab']]['labPi']:
-						sample['!Sample_growth_protocol'] = val
-		
-		if datatype.molecule == 'OVERRIDE RNA':
-			if firstStanza['rnaExtract'] in rnaExtractMapping:
-				sample['!Sample_molecule'] = rnaExtractMapping[firstStanza['rnaExtract']]
-			elif firstStanza['localization'] in localizationMapping:
-				sample['!Sample_molecule'] = localizationMapping[firstStanza['localization']]
-				
-		else:
-			sample['!Sample_molecule'] = datatype.molecule
-			
-		sample['!Sample_extract_protocol'] = compositeTrack.url
-		sample['!Sample_library_strategy'] = datatype.strategy
-		sample['!Sample_library_source'] = datatype.source
-		sample['!Sample_library_selection'] = datatype.selection
-		
-		# if the instrumentModel is consistent, just use that
-		# otherwise take the first seqPlatform value from metadata
-		# if that still fails, check the replacement file
-		# finally just make it say [REPLACE]
-		if instrumentModel != None:
-			sample['!Sample_instrument_model'] = instrumentModel
-		else:
-			for stanza in expId:	
-				if 'seqPlatform' in stanza:
-					sample['!Sample_instrument_model'] = instrumentModels[stanza['seqPlatform']]
-					break
-			if '!Sample_instrument_model' not in sample:
-				if '!Sample_instrument_model' in replace:
-					sample['!Sample_instrument_model'] = instrumentModels[replace['!Sample_instrument_model'][0]]
-			if '!Sample_instrument_model' not in sample:
-				sample['!Sample_instrument_model'] = '[REPLACE]'
-				if audit:
-					print stanza.name + ': no instrument'
-				
-		sample['!Sample_data_processing'] = compositeTrack.url
-
-		if idNum in geoMapping and geoMapping[idNum] != 'Inconsistent':
-			sample['!Sample_geo_accession'] = geoMapping[idNum]
-		
-		softfile[firstStanza['metaObject']] = sample
-		
-	return softfile, fileList
-		
-		
-def createMicroArraySoftFile(database, composite, organism, compositeUrl, mdb, cv, track, md5sums, expIds, expVars, geoMapping, series, datatype):
-	
-	raise KeyError('microarray')
-	
-	print 'Creating MicroArray soft file'
-
-	softfile = SoftFile()
-	fileList = list()
-	
-	createSeries(softfile, composite, compositeUrl, track, expIds, geoMapping, series)
-	
-	for idNum in expIds.iterkeys():
-		
-		expId = expIds[idNum]
-		firstStanza = expId[0]
-		print 'Writing sample ' + firstStanza['metaObject'] + ' (' + idNum + ')'
-		sample = MicroArraySampleStanza()
-			
-		sample['^SAMPLE'] = sampleTitle(firstStanza, expVars)
-		
-		if 'geoSeriesAccession' in series:
-			sample['!Sample_series_id'] = series['geoSeriesAccession']	
-			
-		sample['!Sample_title'] = concat
-		
-		count = 1
-			
-		for stanza in expId:
-		
-			if isSupplimentaryFile(stanza['fileName']):
-				sample['!Sample_supplementary_file_' + str(count)] = stanza['fileName']
-				
-				if 'checksum' in stanza:
-					sample['!Sample_supplementary_file_checksum_' + str(count)] = stanza['checksum']
-				elif md5sums != None and stanza['fileName'] in md5sums:
-					sample['!Sample_supplementary_file_checksum_' + str(count)] = md5sums[stanza['fileName']]
-				
-				fileList.append(stanza['fileName'])
-				count = count + 1
-
-		# sample['!Sample_table'] = KeyOptional # CEL file
-		sample['!Sample_source_name_ch_1'] = '[REPLACE]' #KeyOnePlusNumbered
-		sample['!Sample_organism_ch_1'] = '[REPLACE]' #KeyOnePlusNumbered
-		sample['!Sample_characteristics_ch_1'] = '[REPLACE]' #KeyOnePlusNumbered
-		# sample['!Sample_biomaterial_provider_ch'] = KeyZeroPlusNumbered
-		# sample['!Sample_treatment_protocol_ch'] = KeyZeroPlusNumbered
-		# sample['!Sample_growth_protocol_ch'] = KeyZeroPlusNumbered
-		sample['!Sample_molecule_ch_1'] = '[REPLACE]' #KeyOnePlusNumbered
-		sample['!Sample_extract_protocol_ch_1'] = '[REPLACE]' #KeyOnePlusNumbered
-		sample['!Sample_label_ch_1'] = '[REPLACE]' #KeyOnePlusNumbered
-		sample['!Sample_label_protocol_ch_1'] = '[REPLACE]' #KeyOnePlusNumbered
-		sample['!Sample_hyb_protocol'] = '[REPLACE]' #KeyOnePlus
-		sample['!Sample_scan_protocol'] = '[REPLACE]' #KeyOnePlus
-		sample['!Sample_data_processing'] = '[REPLACE]' #KeyOnePlus
-		sample['!Sample_description'] = '[REPLACE]' #KeyZeroPlus
-		sample['!Sample_platform_id'] = '[REPLACE]'
-		# sample['!Sample_geo_accession'] = KeyOptional
-		# sample['!Sample_anchor'] = KeyRequired SAGE ONLY
-		# sample['!Sample_type'] = KeyRequired SAGE ONLY
-		# sample['!Sample_tag_count'] = KeyRequired SAGE ONLY
-		# sample['!Sample_tag_length'] = KeyRequired SAGE ONLY
-		sample['!Sample_table_begin'] = ''
-		sample['!Sample_table_end'] = ''
-	
-		softfile[firstStanza['accession']] = sample
-		
-	return softfile, fileList
-	
-	
-def createSpecialSoftFile(database, composite, organism, compositeUrl, mdb, cv, track, md5sums, expIds, expVars, geoMapping, series, datatype):
-	softfile = SoftFile()
-	fileList = list()
-	
-	createSeries(softfile, composite, compositeUrl, track, expIds, geoMapping, series)
-	
-	for idNum in expIds.iterkeys():
-		
-		expId = expIds[idNum]
-		firstStanza = expId[0]
-		print 'Writing sample ' + firstStanza['metaObject'] + ' (' + idNum + ')'
-		sample = HighThroughputSampleStanza()
-		
-		hasbigwig = 0
-		for stanza in expId:
-		
-			if getFileType(stanza['fileName']) == 'bigWig':
-				hasbigwig = 1
-				
-		if hasbigwig == 0:
-			continue
-		
-		sample['^SAMPLE'] = firstStanza['geoSampleAccession']
-		
-		if 'geoSeriesAccession' in series:
-			sample['!Sample_series_id'] = series['geoSeriesAccession']
-			
-		sample['!Sample_geo_accession'] = firstStanza['geoSampleAccession']
-		
-		count = 1
-			
-		for stanza in expId:
-		
-			if getFileType(stanza['fileName']) == 'bigWig':
-				sample['!Sample_supplementary_file_' + str(count)] = stanza['fileName']
-				
-				if 'checksum' in stanza:
-					sample['!Sample_supplementary_file_checksum_' + str(count)] = stanza['checksum']
-				elif md5sums != None and stanza['fileName'] in md5sums:
-					sample['!Sample_supplementary_file_checksum_' + str(count)] = md5sums[stanza['fileName']]
-				
-				# sample['!Sample_supplementary_file_build_' + str(count)] = database
-				
-				fileList.append(stanza['fileName'])
-				count = count + 1
-				
-		softfile[firstStanza['geoSampleAccession']] = sample
-		
-	return softfile, fileList
-		
-def main():
-
-	parser = argparse.ArgumentParser(description = 'Prepares a submission to GEO. Creates a soft file and shell script with the correct call to aspera.')
-	parser.add_argument('-t', '--trackPath', help='Overrides the default track path ~/kent/src/hg/makeDb/trackDb/')
-	parser.add_argument('-r', '--replace', help='Give the name of a file that has contents to be used to replace unspecified tags in metadata (description, contributers, etc) and instrument model')
-	parser.add_argument('-a', '--audit', action='store_true', default=False, help='Instead of building the files, will just give you a list of errors')
-	parser.add_argument('database', help='The database, typically hg19 or mm9')
-	parser.add_argument('composite', help='The composite name, wgEncodeCshlLongRnaSeq for instance')
-	parser.add_argument('expIds', nargs='*', help='Any number of expIds separated by spaces, you can also specify a range by using a hyphen, "140 150 160-170" for instance, or leave blank to specify the entire file')
-	
-	if len(sys.argv) == 1:
-		parser.print_usage()
-		return
-	
-	args = parser.parse_args(sys.argv[1:])
-		
-	compositeTrack = track.CompositeTrack(args.database, args.composite, args.trackPath)
-
-	cvPath = compositeTrack.trackPath + 'cv/alpha/cv.ra'
-	controlledVocab = cv.CvFile(cvPath)
-	
-	replace = dict()
-	if args.replace != None:
-		for line in open(args.replace):
-			if line == '':
-				continue
-			key, val = map(str.strip, line.split('=', 1))
-			if key not in replace:
-				replace[key] = list()
-			replace[key].append(val)
-		
-	
-	ids = list()
-	
-	for id in args.expIds:
-		if '-' in id:
-			start, end = id.split('-', 1)
-			ids.extend(range(int(start), int(end) + 1))
-		else:
-			ids.append(int(id))
-	
-	expIds, expVars, geoMapping, series, datatype = createMappings(compositeTrack.alphaMetaDb)
-	
-	submission = dict()
-	if len(ids) == 0:
-		submission = expIds
-	else:
-		for expId in ids:
-			submission[str(expId)] = expIds[str(expId)]
-	
-	expIdStr = ' '
-	for id in args.expIds:
-		expIdStr = expIdStr + id + ',' 
-	expIdStr = expIdStr[:len(expIdStr) - 1]
-	print 'Generating soft using expIds ' + expIdStr
-	
-	if datatype.soft == soft.HighThroughputSoftFile:
-		softfile, fileList = createHighThroughputSoftFile(compositeTrack, controlledVocab, submission, expVars, geoMapping, series, datatype, replace, args.audit)
-	elif datatype.soft == soft.MicroArraySoftFile:
-		softfile, fileList = createMicroArraySoftFile(compositeTrack, controlledVocab, submission, expVars, geoMapping, series, datatype)
-	else:
-		raise KeyError('unsupported type')
-	
-	if not args.audit:
-		print 'Creating directory'
-	
-		d = datetime.datetime.today()
-		datestring = '%4d-%02d-%02d' % (d.year, d.month, d.day)
-		
-		dirname = '%s_%s_%s/' % (compositeTrack.database, compositeTrack.name, datestring)
-		asperadirname = '%s_%s/' % (compositeTrack.database, compositeTrack.name)
-		linkdirname = '%s_%s/' % (compositeTrack.database, compositeTrack.name)
-	
-		os.mkdir(dirname)
-		os.mkdir(dirname + linkdirname)
-	
-		print 'Writing file'
-		
-		outfileName = '%s%s_%s.soft' % (dirname, compositeTrack.database, compositeTrack.name)
-		outfile = open(outfileName, 'w')
-		outfile.write(str(softfile))
-		fileslistname = '%sfiles.txt' % dirname
-		fileslist = open(fileslistname, 'w')
-		scriptname = '%supload.sh' % dirname
-		outscript = open(scriptname, 'w')
-		
-		outscript.write('#!/bin/sh\n\n')
-		outscript.write('/opt/aspera/connect/bin/ascp -i ~/encode_geo_key/encode_geo_key.ppk --symbolic-links=follow -QTdr -l300m %s asp-geo@upload.ncbi.nlm.nih.gov:ENCODE\n' % linkdirname)
-		outscript.close()
-		
-	for file in fileList:
-		if not os.path.exists(file.path):
-			print IOError(file.path + ' does not exist')
-		elif not args.audit:
-			linkname = '%s_%s' % (compositeTrack.database, file.name)
-			linkpath = linkdirname + linkname
-			os.symlink(file.fullname, dirname + linkpath)
-		
-			#outscript.write(linkpath + ' \\\n')
-			fileslist.write(linkname + '\n')
-	
-	if not args.audit:
-		#outscript.write()
-		
-		fileslist.close()
-
-		os.system('chmod +x ' + scriptname)
-		
-		print 'Finished!'
-	
-if __name__ == '__main__':
-	main()
\ No newline at end of file