79213bd1e25c9761a4e82e1dbf00cadb9aea3672 lrnassar Tue Nov 12 10:28:42 2019 -0800 Adding additional support for different python versionds refs #24432 diff --git src/utils/chromToUcsc/chromToUcsc src/utils/chromToUcsc/chromToUcsc index 0a2d2fa..fc271bc 100755 --- src/utils/chromToUcsc/chromToUcsc +++ src/utils/chromToUcsc/chromToUcsc @@ -1,41 +1,41 @@ #!/usr/bin/env python import logging, optparse, gzip -from sys import stdin, stdout, stderr, exit +from sys import stdin, stdout, stderr, exit, modules try: from urllib.request import urlopen # py2 except ImportError: from urllib2 import urlopen # py3 try: from cStringIO import StringIO # py2 except ImportError: - from io import StringIO # py3 + from io import BytesIO # py3 # ==== functions ===== def parseArgs(): " setup logging, parse command line arguments and options. -h shows auto-generated help page " parser = optparse.OptionParser("""usage: %prog [options] filename - change NCBI or Ensembl chromosome names to UCSC names using the chromAlias table of the genome browser. Examples: %prog -i test.bed -o test.ucsc.bed -g hg19 %prog -g mm10 --get %prog -i test2.bed -o test2.ucsc.bed -a mm10.chromAlias.tsv cat test.bed | %prog -a mm10.chromAlias.tsv > test.ucsc.bed """) parser.add_option("-g", "--genomeDb", dest="db", action="store", help="a UCSC assembly ID, like hg19, hg38 or similar. Not required if -a is used. ") - parser.add_option("-a", "--chromAlias", dest="aliasFname", action="store", help="a UCSC chromAlias table in tab-sep format. The alias tables for hg19 or hg38 are hardcoded in the script, they do not require a chromAlias table..") + parser.add_option("-a", "--chromAlias", dest="aliasFname", action="store", help="a UCSC chromAlias table in tab-sep format. The alias tables for hg19 or hg38 are hardcoded in the script, they do not require a chromAlias table.") parser.add_option("-i", "--in", dest="inFname", action="store", help="input filename, default: /dev/stdin") parser.add_option("-o", "--out", dest="outFname", action="store", help="output filename, default: /dev/stdout") parser.add_option("", "--get", dest="doDownload", action="store_true", help="download a chrom alias table from UCSC for --genomeDb into the current directory and exit") parser.add_option("-d", "--debug", dest="debug", action="store_true", help="show debug messages") #parser.add_option("", "--test", dest="test", action="store_true", help="do something") (options, args) = parser.parse_args() if options.db is None and options.aliasFname is None: parser.print_help() exit(1) if options.debug: logging.basicConfig(level=logging.DEBUG) else: logging.basicConfig(level=logging.INFO) @@ -106,31 +106,37 @@ db = fname.split(".")[0] print("Reading alias table for %s: %s" % (db, fname)) aliasData[db] = parseAlias(fname) myFname = __file__ oldScript = open(myFname).read() assert("#ALIASDATA#" in oldScript) newScript = oldScript.replace("#ALIASDATA#", "aliasData = "+repr(aliasData)) open(outFname, "w").write(newScript) print("Patched script written to "+outFname) exit(0) def download(db): url = "http://hgdownload.soe.ucsc.edu/goldenPath/%s/database/chromAlias.txt.gz" % db gzData = urlopen(url).read() - data = gzip.GzipFile(fileobj=StringIO.StringIO(gzData)).read() + + if 'cStringIO' in modules: + data = StringIO(gzData) + else: + data = BytesIO(gzData) + + data = gzip.GzipFile(fileobj=data).read().decode() outFname = db+".chromAlias.tsv" open(outFname, "w").write(data) print("Wrote %s" % outFname) exit(0) def main(): args, options = parseArgs() db = options.db aliasFname = options.aliasFname inFname = options.inFname outFname = options.outFname if db=="build": patchScript(aliasFname, outFname)