f77821fea322cfbd8e982a7cc584e81f629f20e7 max Thu Jul 3 08:25:35 2025 -0700 prefer syms from matrix diff --git src/cbPyLib/cellbrowser/cellbrowser.py src/cbPyLib/cellbrowser/cellbrowser.py index 877ce8f..a62a548 100755 --- src/cbPyLib/cellbrowser/cellbrowser.py +++ src/cbPyLib/cellbrowser/cellbrowser.py @@ -3606,35 +3606,39 @@ row = line.split(sep) geneOrSym = row[0] # The following looks overly complicated but that's due to the complexity of combinations # the we allow and because datasets come in different shapes # case 1: user provides both geneId and symbol in the quickgenes file. Rare. # Necessary when symbol <-> geneId is not unique and wrangler wants a particular gene if "|" in geneOrSym: geneId, sym = geneOrSym.split("|") if geneId not in matrixGeneIds: logging.info("case 1: geneId %s in quickgenes file is not in expression matrix" % repr(geneId)) continue geneStr = geneOrSym - # case 2: matrix has only symbols and user provides symbol. This is our legacy format for old datasets. + # case 2: matrix has geneId+symbol or just symbols and user provides symbol in quickgenes. + # (symbol-only is is our legacy format for old datasets.) # store only the symbol. We could look up the geneId but that's data inference, # which we try not to do. The lookup could be wrong. elif matrixSyms is not None and geneOrSym in matrixSyms: geneStr = geneOrSym + if symToGene: + geneStr = symToGene[geneStr]+"|"+geneStr + else: if geneStr not in matrixGeneIds: logging.info("case 2: geneId %s in quickgenes file is not in expression matrix" % repr(geneStr)) continue # case 3: matrix has geneIds and user provides a geneId. add the symbol from our mapping # that's data inference, but that should be OK elif geneOrSym in matrixGeneIds: geneId = geneOrSym if not geneToSym: #logging.info("Quick gene %s but we have no geneId/symbol table. You can use " #"the format geneId|symbol in the quick genes file to manually assign a label" % repr(geneId)) # when geneToSym is None, that means that we have symbols in the matrix. So just pass through the # symbol. example dataset: ams-supercluster geneStr = geneId else: