71fb2df68ba2d6c8164e8b5dcecd87a65934f0b5
max
  Thu Feb 23 12:10:13 2023 +0100
adding docs, refs #30454

diff --git src/utils/chromToUcsc/chromToUcsc src/utils/chromToUcsc/chromToUcsc
index b2c44cb..bc1f2d7 100755
--- src/utils/chromToUcsc/chromToUcsc
+++ src/utils/chromToUcsc/chromToUcsc
@@ -11,30 +11,32 @@
     from cStringIO import StringIO # py2
 except ImportError:
     from io import BytesIO # py3
 
 # ==== functions =====
 def parseArgs():
     " setup logging, parse command line arguments and options. -h shows auto-generated help page "
     parser = optparse.OptionParser("""usage: %prog [options] filename - change NCBI or Ensembl chromosome names to UCSC names in tabular or wiggle files, using a chromAlias table.
 
     Supports these UCSC file formats:
     BED, genePred, PSL, wiggle (all formats), bedGraph, VCF, SAM, GTF, Chain
     ... or any other csv or tsv format where the sequence (chromosome) name is a separate field.
 
     Requires a <genome>.chromAlias.tsv file which can be downloaded like this:
         %prog --get hg19              # download the file hg19.chromAlias.tsv into current directory
+    Which also works for GenArk assemblies:
+        %prog --get GCF_000001735.3   # for GenArk assemblies, will translate to NCBI sequence names (accessions)
 
     If you do not want to use the --get option to retrieve the mapping tables, you can also download the alias mapping
     files yourself, e.g. for mm10 with 'wget https://hgdownload.soe.ucsc.edu/goldenPath/mm10/database/chromAlias.txt.gz'
 
     Then the script can be run like this:
         %prog -i in.bed -o out.bed -a hg19.chromAlias.tsv
         %prog -i in.bed -o out.bed -a https://hgdownload.soe.ucsc.edu/goldenPath/hg19/database/chromAlias.txt.gz
     Or in pipes, like this:
         cat test.bed | %prog -a mm10.chromAlias.tsv > test.ucsc.bed
     For BAM files use this program in a pipe with samtools:
         samtools view -h in.bam | ./chromToUcsc -a mm10.chromAlias.tsv | samtools -bS > out.bam
 
     By default, this script expects the chromosome name in the first field.
     The default works for BED, bedGraph, GTF, wiggle, VCF.
     For the following file formats, you will need to set the -k option to these values manually:
@@ -198,38 +200,40 @@
         ofh.write(line)
         ofh.write("\n")
 
 def download(db):
     " download chromAlias file from UCSC "
     # Genark assemblies are in a different directory of the download server
     if "_" in db:
         p1 = db[0:3]
         p2 = db[4:7]
         p3 = db[7:10]
         p4 = db[10:13]
         url = "https://hgdownload.soe.ucsc.edu/hubs/%s/%s/%s/%s/%s/%s.chromAlias.txt" % (p1, p2, p3, p4, db, db)
     else:
         url = "http://hgdownload.soe.ucsc.edu/goldenPath/%s/database/chromAlias.txt.gz" % db
 
-    gzData = urlopen(url).read()
+    data = urlopen(url).read()
 
+    if url.endswith(".gz"):
         if 'cStringIO' in modules:
-        data = StringIO(gzData)
+            data = StringIO(data)
         else:
-        data = BytesIO(gzData)
+            data = BytesIO(data)
 
         data = gzip.GzipFile(fileobj=data).read().decode()
+
     outFname = db+".chromAlias.tsv"
     open(outFname, "w").write(data)
     print("Wrote %s to %s" % (url, outFname))
     print("You can now convert a file with 'chromToUcsc -a %s -i infile.bed -o outfile.bed'" % outFname)
     exit(0)
 
 def main():
     args, options = parseArgs()
 
     aliasFname = options.aliasFname
     inFname = options.inFname
     outFname = options.outFname
     skipUnknown = options.skipUnknown
 
     if options.downloadDb: