261b8674b99bfd1007e110b06c70973e6b347f67 mmaddren Mon Jul 11 13:31:45 2011 -0700 cleaner version of mkGeoPkg diff --git python/programs/mkGeoPkg/mkGeoPkg python/programs/mkGeoPkg/mkGeoPkg index 53a1d40..23a3e84 100755 --- python/programs/mkGeoPkg/mkGeoPkg +++ python/programs/mkGeoPkg/mkGeoPkg @@ -318,102 +318,84 @@ sample['!Sample_instrument_model'] = instrumentModels[stanza['seqPlatform']] sample['!Sample_data_processing'] = compositeUrl if idNum in geoMapping and geoMapping[idNum] != 'Inconsistent': sample['!Sample_geo_accession'] = geoMapping[idNum] softfile[firstStanza['metaObject']] = sample return softfile, fileList def createMicroArraySoftFile(database, composite, organism, compositeUrl, mdb, cv, track, md5sums, expIds, expVars, geoMapping, series, datatype, copyDirectory): pass - -def copyFiles(fileList, downloadsDirectory, copyDirectory): - - print 'Copying files:' - for filename in fileList: - print 'Copying file ' + filename + ' (' + count + '/' + len(fileList) + ' ...' - shutil.copy2(downloadsDirectory + filename, copyDirectory) - def main(): database = sys.argv[1] composite = sys.argv[2] submitStart = sys.argv[3] submitSize = int(sys.argv[4]) organism = organisms[database] mdbPath = '/cluster/home/mmaddren/kent/src/hg/makeDb/trackDb/' + organism + '/' + database + '/metaDb/alpha/' + composite + '.ra' #CHANGE cvPath = '/cluster/home/mmaddren/kent/src/hg/makeDb/trackDb/cv/alpha/cv.ra' #CHANGE trackPath = '/cluster/home/mmaddren/kent/src/hg/makeDb/trackDb/' + organism + '/' + database + '/' + composite + '.ra' md5path = '/hive/groups/encode/dcc/analysis/ftp/pipeline/' + database + '/' + composite + '/md5sum.txt' downloadsDirectory = '/hive/groups/encode/dcc/analysis/ftp/pipeline/' + database + '/' + composite + '/' copyDirectory = '/cluster/home/mmaddren/kent/python/ucscgenomics/mkGeoPkg/' + composite compositeUrl = 'http://genome.ucsc.edu/cgi-bin/hgTrackUi?db=' + database + '&g=' + composite mdb = RaFile(mdbPath) cv = CvFile(cvPath) track = RaFile(trackPath) md5sums = readMd5sums(md5path) expIds, expVars, geoMapping, series, datatype = createMappings(mdb) submission = dict() sortedIds = expIds.keys() sortedIds.sort() sortedIds = sortedIds[sortedIds.index(submitStart):sortedIds.index(submitStart) + submitSize] - print 'Generating soft using expIds ' + min(sortedIds) + ' to ' + max(sortedIds) + minId = min(sortedIds) + maxId = max(sortedIds) + print 'Generating soft using expIds ' + minId + ' to ' + maxId for expId in sortedIds: submission[expId] = expIds[expId] if datatype.soft == HighThroughputSoftFile: softfile, fileList = createHighThroughputSoftFile(database, composite, organism, compositeUrl, mdb, cv, track, md5sums, submission, expVars, geoMapping, series, datatype, copyDirectory) elif datatype.soft == MicroArraySoftFile: softfile, fileList = createMicroArraySoftFile(database, composite, organism, compositeUrl, mdb, cv, track, md5sums, submission, expVars, geoMapping, series, datatype, copyDirectory) else: raise Error('unsupported type') print 'Writing soft file' outfileName = os.path.dirname(sys.argv[0]) + composite + '.soft' outfile = open(outfileName, 'w') outfile.write(str(softfile)) - #print 'Copying files' - fileString = outfileName for file in fileList: fileString = fileString + ' ' + downloadsDirectory + file -#callList = [ '/opt/aspera/connect/bin/ascp asp-geo@upload.ncbi.nlm.nih.gov:ENCODE/' + composite, '-i', '~/encode_geo_key/encode_geo_key.ppk', '-QTr', '-l300m' ] -#callList.extend(fileList) -#callList.append(') - fileString.strip() callString = '/opt/aspera/connect/bin/ascp -i ~/encode_geo_key/encode_geo_key.ppk -QTr -l300m ' + fileString + ' asp-geo@upload.ncbi.nlm.nih.gov:ENCODE/' + composite -#subprocess.call(callList, shell=True) - outscript = open(composite + '_' + submitStart + '-' + str(int(submitStart) + submitSize) + '.sh', 'w') + outscript = open(composite + minId + '-' + maxId + '.sh', 'w') outscript.write('#!/bin/sh\n\n') outscript.write('/opt/aspera/connect/bin/ascp -i ~/encode_geo_key/encode_geo_key.ppk -QTr -l300m \\\n') outscript.write(os.path.dirname(sys.argv[0]) + composite + '.soft' + ' \\\n') for file in fileList: outscript.write(downloadsDirectory + file + ' \\\n') outscript.write('asp-geo@upload.ncbi.nlm.nih.gov:ENCODE\n') outscript.close() - #os.fchmod(composite + '_' + submitStart + '-' + str(int(submitStart) + submitSize) + '.sh', stat.S_IXUSR) - os.system('chmod +x ' + composite + '_' + submitStart + '-' + str(int(submitStart) + submitSize) + '.sh') -# outscript.write(callString) - #print callString - #os.system(callString) - #copyFiles(fileList, downloadsDirectory, copyDirectory) + os.system('chmod +x ' + composite + minId + '-' + maxId + '.sh') print 'Finished!' if __name__ == '__main__': main() \ No newline at end of file