2d251af86fb82dbd404bea6f26525429afb7358b max Thu Jun 19 10:16:13 2025 -0700 add featDesc to conf file for PSI dataset diff --git src/cbPyLib/cellbrowser/cellbrowser.py src/cbPyLib/cellbrowser/cellbrowser.py index 780f447..09b85a4 100755 --- src/cbPyLib/cellbrowser/cellbrowser.py +++ src/cbPyLib/cellbrowser/cellbrowser.py @@ -1566,30 +1566,31 @@ def iterRows(self): " yield (geneId, symbol, array) tuples from gene expression file. " mat = self.mat genes = self.genes skipIds = 0 geneToSym = self.geneToSym for i in range(0, len(self.genes)): geneId = genes[i] geneId, geneSym, skipIds = resolveGene(geneId, geneToSym, skipIds) logging.debug("geneId %s, geneSym %s", geneId, geneSym) if i%1000==0: logging.info("%d genes written..." % i) arr = mat.getrow(i).toarray() + #arr = arr[arr==numpy.nan]=numpy.nan if arr.ndim==2: # scipy sparse arrays have changed their entire data model and now all operations # return 2D matrices. So need to unpack it to get the array. Grrr. arr = arr[0] yield (geneId, geneSym, arr) def resolveGene(gene, geneToSym, skipIds): if "|" in gene: gene, symbol = gene.split("|") else: if geneToSym is None: symbol = gene else: if gene.startswith("EN"): gene = gene.split(".")[0] @@ -3527,32 +3528,50 @@ start1|end1, start2|end2, etc all are within start-end. Returns an empty string if nothing found. """ chrom, pos = posStr.split(":") start, end = pos.split("-") start = int(start) end = int(end) # this would be a lot faster with binary search but also a lot more complicated... if chrom not in atacByChrom: errAbort("The quickGenes file contains %s, but this chromosome has no features in the matrix." % repr(posStr)) chromRanges = atacByChrom[chrom] foundRanges = [] for rangeStart, rangeEnd, offset, dataLen in chromRanges: if start <= rangeStart and rangeEnd <= end: - foundRanges.append( "|".join( [chrom, str(rangeStart), str(rangeEnd)] ) ) - return "+".join(foundRanges) + foundRanges.append( (chrom, rangeStart, rangeEnd) ) + + # if we have one exact match, get rid of all the others + # this was added for cortex-dev-splincing/psi, where quick ranges can include others + # since ranges overlap each other, but the quick ranges should only be one exact matching range + exactRanges = [] + for ft in foundRanges: + chrom, rangeStart, rangeEnd = ft + if start == rangeStart and rangeEnd == end: + exactRanges.append( (chrom, rangeStart, rangeEnd) ) + + if len(exactRanges)==1: + foundRanges = exactRanges + + foundStrRanges = [] + for ft in foundRanges: + chrom, rangeStart, rangeEnd = ft + foundStrRanges.append( "|".join( [chrom, str(rangeStart), str(rangeEnd)] ) ) + + return "+".join(foundStrRanges) def parseGeneInfo(geneToSym, fname, matrixSyms, matrixGeneIds): """ parse quick genes file with three columns: symbol or geneId, desc (optional), pmid (optional). Return as a dict geneId|symbol -> description """ if fname is None: return {} logging.debug("Parsing %s" % fname) symToGene = None nonUniqueSyms = defaultdict(set) if geneToSym is not None: symToGene = dict() for gene, sym in iterItems(geneToSym): if sym in symToGene: #logging.warning("Symbol %s is not unique, geneID %s" % (sym, gene)) nonUniqueSyms[sym].add(gene) @@ -4675,31 +4694,31 @@ foundConf = writeDatasetDesc(inConf["inDir"], outConf, datasetDir, coordFiles, outMatrixFname) if geneToSym==-1: geneToSym = readGeneSymbols(inConf.get("geneIdType"), inMatrixFname) matrixSyms, matrixGeneIds, geneToSymFromMatrix = readValidGenes(datasetDir, inConf) convertMarkers(inConf, outConf, geneToSym, clusterLabels, matrixGeneIds, datasetDir) readQuickGenes(inConf, geneToSym, matrixSyms, matrixGeneIds, geneToSymFromMatrix, datasetDir, outConf) copyBackgroundImages(inDir, inConf, outConf, datasetDir) # a few settings are passed through to the Javascript as they are - for tag in ["shortLabel", "radius", "alpha", "priority", "tags", "sampleDesc", "geneLabel", + for tag in ["shortLabel", "radius", "alpha", "priority", "tags", "sampleDesc", "featDesc", "geneLabel", "clusterField", "defColorField", "xenaPhenoId", "xenaId", "hubUrl", "showLabels", "ucscDb", "unit", "violinField", "visibility", "coordLabel", "lineWidth", "hideDataset", "hideDownload", "metaBarWidth", "supplFiles", "defQuantPal", "defCatPal", "clusterPngDir", "wrangler", "shepherd", "binStrategy", "split", "lineAlpha", "lineWidth", "lineColor", # the following are there only for old datasets, they are now nested under "facets" # they are just here for backwards-compatibility and will eventually get removed "body_parts", "organisms", "diseases", "projects", "life_stages", "domains", "sources", "assays", # facets are taking their place now "facets", "multiModal"]: copyConf(inConf, outConf, tag) if "name" not in outConf: copyConf(inConf, outConf, "name") @@ -4772,35 +4791,40 @@ coordDf.columns=['x','y'] coordDf.to_csv(fname,sep='\t') desc.append( {'file':fileBase, 'shortLabel': fullName} ) return coordFields def writeCellbrowserConf(name, coordsList, fname, addMarkers=True, args={}): checkDsName(name) metaFname = args.get("meta", "meta.tsv") clusterField = args.get("clusterField", "Louvain Cluster") coordStr = json.dumps(coordsList, indent=4) matrixFname = args.get("exprMatrix", "exprMatrix.tsv.gz") + cmdLine = " ".join(sys.argv) + dateStr = datetime.now().isoformat() + conf = """ # This is a bare-bones, auto-generated Cell Browser config file. # Look at https://github.com/maximilianh/cellBrowser/blob/master/src/cbPyLib/cellbrowser/sampleConfig/cellbrowser.conf # for a list of possible options # You can also write a default template into the current directory with cbBuild --init +# Command was: %(cmdLine)s +# Time: %(dateStr)s name='%(name)s' shortLabel='%(name)s' exprMatrix='%(matrixFname)s' #tags = ["10x", 'smartseq2'] meta='%(metaFname)s' geneIdType='auto' defColorField='%(clusterField)s' labelField='%(clusterField)s' enumFields=['%(clusterField)s'] coords=%(coordStr)s #alpha=0.3 #radius=2 """ % locals() if addMarkers: