2567ad8ce3a3617f4d2ad81440d47d85246a0f0a
max
  Mon Feb 27 02:51:25 2023 -0800
fixing chromToUcsc bug on py3 with uncompressed chromAlias files, no redmine, ran into this myself

diff --git src/utils/chromToUcsc/chromToUcsc src/utils/chromToUcsc/chromToUcsc
index ad7b1e6..982f328 100755
--- src/utils/chromToUcsc/chromToUcsc
+++ src/utils/chromToUcsc/chromToUcsc
@@ -188,56 +188,66 @@
                 if ucscChrom is None:
                     handledUnmappedChrom(chrom, skipUnknown, skipWarned,
                                          "line %d: chrom name %s is not in chromAlias table" % (lineNo, repr(chrom)))
                     continue
             if isSam:
                 mateChrom = row[6]
                 if mateChrom not in ("=", "*"):
                     row[6] = toUcsc[mateChrom]
 
             row[fieldIdx] = ucscChrom
 
         line = sep.join(row)
         ofh.write(line)
         ofh.write("\n")
 
+def downloadUrl(url):
+    """ download URL and return as string. gzip OK.
+    Supporting both py2 and py3 here makes this method more complicated that one would expect.
+    """
+    data = urlopen(url).read()
+
+    if url.endswith(".gz"):
+        if "decompress" in dir(gzip): # py3
+            data = gzip.decompress(data)
+        else:
+            data = gzip.GzipFile(fileobj=StringIO(data)).read() # py2
+
+    if isinstance(data, bytes): # urlopen returns 'bytes' on py3
+        data = data.decode("latin1")
+
+    return data
+
 def download(db):
     " download chromAlias file from UCSC "
     # Genark assemblies are in a different directory of the download server
     if "_" in db:
         p1 = db[0:3]
         p2 = db[4:7]
         p3 = db[7:10]
         p4 = db[10:13]
         url = "https://hgdownload.soe.ucsc.edu/hubs/%s/%s/%s/%s/%s/%s.chromAlias.txt" % (p1, p2, p3, p4, db, db)
     elif db in ["hg38"]:
         # hg38 has been patched a few times, assume that the user wants the latest chromAlias file
         url="https://hgdownload.soe.ucsc.edu/goldenPath/%s/bigZips/latest/%s.chromAlias.txt" % (db, db)
     else:
         url = "https://hgdownload.soe.ucsc.edu/goldenPath/%s/database/chromAlias.txt.gz" % db
 
-    data = urlopen(url).read()
-
-    if url.endswith(".gz"):
-        if 'cStringIO' in modules:
-            data = StringIO(data)
-        else:
-            data = BytesIO(data)
-
-        data = gzip.GzipFile(fileobj=data).read().decode()
+    data = downloadUrl(url)
 
     outFname = db+".chromAlias.tsv"
+
     open(outFname, "w").write(data)
     print("Wrote %s to %s" % (url, outFname))
     print("You can now convert a file with 'chromToUcsc -a %s -i infile.bed -o outfile.bed'" % outFname)
     exit(0)
 
 def main():
     args, options = parseArgs()
 
     aliasFname = options.aliasFname
     inFname = options.inFname
     outFname = options.outFname
     skipUnknown = options.skipUnknown
 
     if options.downloadDb:
         download(options.downloadDb)