2567ad8ce3a3617f4d2ad81440d47d85246a0f0a max Mon Feb 27 02:51:25 2023 -0800 fixing chromToUcsc bug on py3 with uncompressed chromAlias files, no redmine, ran into this myself diff --git src/utils/chromToUcsc/chromToUcsc src/utils/chromToUcsc/chromToUcsc index ad7b1e6..982f328 100755 --- src/utils/chromToUcsc/chromToUcsc +++ src/utils/chromToUcsc/chromToUcsc @@ -188,56 +188,66 @@ if ucscChrom is None: handledUnmappedChrom(chrom, skipUnknown, skipWarned, "line %d: chrom name %s is not in chromAlias table" % (lineNo, repr(chrom))) continue if isSam: mateChrom = row[6] if mateChrom not in ("=", "*"): row[6] = toUcsc[mateChrom] row[fieldIdx] = ucscChrom line = sep.join(row) ofh.write(line) ofh.write("\n") +def downloadUrl(url): + """ download URL and return as string. gzip OK. + Supporting both py2 and py3 here makes this method more complicated that one would expect. + """ + data = urlopen(url).read() + + if url.endswith(".gz"): + if "decompress" in dir(gzip): # py3 + data = gzip.decompress(data) + else: + data = gzip.GzipFile(fileobj=StringIO(data)).read() # py2 + + if isinstance(data, bytes): # urlopen returns 'bytes' on py3 + data = data.decode("latin1") + + return data + def download(db): " download chromAlias file from UCSC " # Genark assemblies are in a different directory of the download server if "_" in db: p1 = db[0:3] p2 = db[4:7] p3 = db[7:10] p4 = db[10:13] url = "https://hgdownload.soe.ucsc.edu/hubs/%s/%s/%s/%s/%s/%s.chromAlias.txt" % (p1, p2, p3, p4, db, db) elif db in ["hg38"]: # hg38 has been patched a few times, assume that the user wants the latest chromAlias file url="https://hgdownload.soe.ucsc.edu/goldenPath/%s/bigZips/latest/%s.chromAlias.txt" % (db, db) else: url = "https://hgdownload.soe.ucsc.edu/goldenPath/%s/database/chromAlias.txt.gz" % db - data = urlopen(url).read() - - if url.endswith(".gz"): - if 'cStringIO' in modules: - data = StringIO(data) - else: - data = BytesIO(data) - - data = gzip.GzipFile(fileobj=data).read().decode() + data = downloadUrl(url) outFname = db+".chromAlias.tsv" + open(outFname, "w").write(data) print("Wrote %s to %s" % (url, outFname)) print("You can now convert a file with 'chromToUcsc -a %s -i infile.bed -o outfile.bed'" % outFname) exit(0) def main(): args, options = parseArgs() aliasFname = options.aliasFname inFname = options.inFname outFname = options.outFname skipUnknown = options.skipUnknown if options.downloadDb: download(options.downloadDb)