f790971244c8ed7efc550cf5c86228ca5b242e4c max Fri May 23 07:50:30 2025 -0700 updating tabUniq diff --git src/cbPyLib/cellbrowser/cellbrowser.py src/cbPyLib/cellbrowser/cellbrowser.py index 3c77bf4..45f0084 100755 --- src/cbPyLib/cellbrowser/cellbrowser.py +++ src/cbPyLib/cellbrowser/cellbrowser.py @@ -3565,32 +3565,35 @@ for line in openFile(fname): if line.startswith("#"): continue line = removeBom(line) line = line.rstrip("\r\n") if len(line)==0: continue hasDesc = False hasPmid = False if line.startswith("symbol"): continue row = line.split(sep) geneOrSym = row[0] - # case 1: user provides both geneId and symbol. Rare. - # Necessary when symbol <-> geneId is not unique + # The following looks overly complicated but that's due to the complexity of combinations + # the we allow and because datasets come in different shapes + + # case 1: user provides both geneId and symbol in the quickgenes file. Rare. + # Necessary when symbol <-> geneId is not unique and wrangler wants a particular gene if "|" in geneOrSym: geneId, sym = geneOrSym.split("|") if geneId not in matrixGeneIds: logging.info("case 1: geneId %s in quickgenes file is not in expression matrix" % repr(geneId)) continue geneStr = geneOrSym # case 2: matrix has only symbols and user provides symbol. This is our legacy format for old datasets. # store only the symbol. We could look up the geneId but that's data inference, # which we try not to do. The lookup could be wrong. elif matrixSyms is not None and geneOrSym in matrixSyms: geneStr = geneOrSym if geneStr not in matrixGeneIds: logging.info("case 2: geneId %s in quickgenes file is not in expression matrix" % repr(geneStr)) continue