f790971244c8ed7efc550cf5c86228ca5b242e4c
max
  Fri May 23 07:50:30 2025 -0700
updating tabUniq

diff --git src/cbPyLib/cellbrowser/cellbrowser.py src/cbPyLib/cellbrowser/cellbrowser.py
index 3c77bf4..45f0084 100755
--- src/cbPyLib/cellbrowser/cellbrowser.py
+++ src/cbPyLib/cellbrowser/cellbrowser.py
@@ -3565,32 +3565,35 @@
     for line in openFile(fname):
         if line.startswith("#"):
             continue
         line = removeBom(line)
         line = line.rstrip("\r\n")
         if len(line)==0:
             continue
 
         hasDesc = False
         hasPmid = False
         if line.startswith("symbol"):
             continue
         row = line.split(sep)
         geneOrSym = row[0]
 
-        # case 1: user provides both geneId and symbol. Rare.
-        # Necessary when symbol <-> geneId is not unique
+        # The following looks overly complicated but that's due to the complexity of combinations
+        # the we allow and because datasets come in different shapes
+
+        # case 1: user provides both geneId and symbol in the quickgenes file. Rare.
+        # Necessary when symbol <-> geneId is not unique and wrangler wants a particular gene
         if "|" in geneOrSym:
             geneId, sym = geneOrSym.split("|")
             if geneId not in matrixGeneIds:
                 logging.info("case 1: geneId %s in quickgenes file is not in expression matrix" % repr(geneId))
                 continue
             geneStr = geneOrSym
 
         # case 2: matrix has only symbols and user provides symbol. This is our legacy format for old datasets.
         # store only the symbol. We could look up the geneId but that's data inference, 
         # which we try not to do. The lookup could be wrong.
         elif matrixSyms is not None and geneOrSym in matrixSyms:
             geneStr = geneOrSym
             if geneStr not in matrixGeneIds:
                 logging.info("case 2: geneId %s in quickgenes file is not in expression matrix" % repr(geneStr))
                 continue