src/utils/chromToUcsc/chromToUcsc dc808927b55b21c72db0cd528f76624fcba339dd

dc808927b55b21c72db0cd528f76624fcba339dd
max
  Mon Jan 24 05:15:33 2022 -0800
adding igv-style chrom alias support to chromToUcsc, refs #28722

diff --git src/utils/chromToUcsc/chromToUcsc src/utils/chromToUcsc/chromToUcsc
index 31ce38d..c1f884f 100755
--- src/utils/chromToUcsc/chromToUcsc
+++ src/utils/chromToUcsc/chromToUcsc
@@ -78,48 +78,64 @@
                 yield lineNo, " ", row, line # *step lines are always space-separated
             else:
                 if sep==-1:
                     # why test first for space? Because after fixedStep, the lines are just single numbers
                     if "\t" in line:
                         sep = "\t"
                     elif " " in line:
                         sep = None # = split on any whitespace, consec. whitespc counts as one
                     else:
                         sep = "\t" # default is to split on tab
                 row = line.rstrip("\n\r").split(sep)
                 if sep is None:
                     sep = " "
                 yield lineNo, sep, row, line
 
+def parseNewAlias(ifh):
+    " IGV-compatible format: first is UCSC, all other columns are aliases "
+    toUcsc = {}
+    for line in ifh:
+        if line.startswith("#"):
+            continue
+        row = line.rstrip("\n").split("\t")
+        for i in range(1, len(row)):
+            toUcsc[row[i]] = row[0]
+    return toUcsc
+
 def parseAlias(fname):
     " parse tsv file with at least two columns, orig chrom name and new chrom name "
+    logging.debug("alias file is in IGV-format")
     toUcsc = {}
     if fname.startswith("http://") or fname.startswith("https://"):
         ifh = urlopen(fname)
         if fname.endswith(".gz"):
             data = gzip.GzipFile(fileobj=ifh).read().decode()
             ifh = data.splitlines()
     elif fname.endswith(".gz"):
         ifh = gzip.open(fname, "rt")
     else:
         ifh = open(fname)
 
+    firstLine = True
     for line in ifh:
+        if line.startswith("#") and firstLine:
+            return parseNewAlias(ifh)
         if line.startswith("alias"):
             continue
         row = line.rstrip("\n").split("\t")
         toUcsc[row[0]] = row[1]
+        firstLine = False
     return toUcsc
 
 
 def handledUnmappedChrom(chrom, skipUnknown, skipWarned, message):
     "either generate an error or warning when an unknown chromosome is encountered."
     if skipUnknown:
         if chrom not in skipWarned:
             logging.warning(message)
             skipWarned.add(chrom)
     else:
         logging.error(message)
         exit(1)
 
 def chromToUcsc(aliasFname, fieldIdx, skipUnknown, ifh, ofh):
     " convert column number fieldIdx to UCSC-style chrom names "