src/cbPyLib/cellbrowser/cellbrowser.py 2d251af86fb82dbd404bea6f26525429afb7358b

2d251af86fb82dbd404bea6f26525429afb7358b
max
  Thu Jun 19 10:16:13 2025 -0700
add featDesc to conf file for PSI dataset

diff --git src/cbPyLib/cellbrowser/cellbrowser.py src/cbPyLib/cellbrowser/cellbrowser.py
index 780f447..09b85a4 100755
--- src/cbPyLib/cellbrowser/cellbrowser.py
+++ src/cbPyLib/cellbrowser/cellbrowser.py
@@ -1566,30 +1566,31 @@
     def iterRows(self):
         " yield (geneId, symbol, array) tuples from gene expression file. "
         mat = self.mat
         genes = self.genes
         skipIds = 0
         geneToSym = self.geneToSym
         for i in range(0, len(self.genes)):
 
             geneId = genes[i]
             geneId, geneSym, skipIds  = resolveGene(geneId, geneToSym, skipIds)
             logging.debug("geneId %s, geneSym %s", geneId, geneSym)
 
             if i%1000==0:
                 logging.info("%d genes written..." % i)
             arr = mat.getrow(i).toarray()
+            #arr = arr[arr==numpy.nan]=numpy.nan
             if arr.ndim==2:
                 # scipy sparse arrays have changed their entire data model and now all operations
                 # return 2D matrices. So need to unpack it to get the array. Grrr.
                 arr = arr[0]
             yield (geneId, geneSym, arr)
 
 def resolveGene(gene, geneToSym, skipIds):
     if "|" in gene:
         gene, symbol = gene.split("|")
     else:
         if geneToSym is None:
             symbol = gene
         else:
             if gene.startswith("EN"):
                 gene = gene.split(".")[0]
@@ -3527,32 +3528,50 @@
     start1|end1, start2|end2, etc all are within start-end. Returns an empty string if nothing found. """
 
     chrom, pos = posStr.split(":")
     start, end = pos.split("-")
     start = int(start)
     end = int(end)
 
     # this would be a lot faster with binary search but also a lot more complicated...
     if chrom not in atacByChrom:
         errAbort("The quickGenes file contains %s, but this chromosome has no features in the matrix." % repr(posStr))
 
     chromRanges = atacByChrom[chrom]
     foundRanges = []
     for rangeStart, rangeEnd, offset, dataLen in chromRanges:
         if start <= rangeStart  and rangeEnd <= end:
-            foundRanges.append( "|".join( [chrom, str(rangeStart), str(rangeEnd)] ) )
-    return "+".join(foundRanges)
+            foundRanges.append( (chrom, rangeStart, rangeEnd) )
+
+    # if we have one exact match, get rid of all the others
+    # this was added for cortex-dev-splincing/psi, where quick ranges can include others
+    # since ranges overlap each other, but the quick ranges should only be one exact matching range
+    exactRanges = []
+    for ft in foundRanges:
+        chrom, rangeStart, rangeEnd = ft
+        if start == rangeStart and rangeEnd == end:
+            exactRanges.append( (chrom, rangeStart, rangeEnd) )
+
+    if len(exactRanges)==1:
+        foundRanges = exactRanges
+
+    foundStrRanges = []
+    for ft in foundRanges:
+        chrom, rangeStart, rangeEnd = ft
+        foundStrRanges.append( "|".join( [chrom, str(rangeStart), str(rangeEnd)] ) )
+
+    return "+".join(foundStrRanges)
 
 def parseGeneInfo(geneToSym, fname, matrixSyms, matrixGeneIds):
     """ parse quick genes file with three columns: symbol or geneId, desc (optional), pmid (optional).
     Return as a dict geneId|symbol -> description """
     if fname is None:
         return {}
     logging.debug("Parsing %s" % fname)
     symToGene = None
     nonUniqueSyms = defaultdict(set)
     if geneToSym is not None:
         symToGene = dict()
         for gene, sym in iterItems(geneToSym):
             if sym in symToGene:
                 #logging.warning("Symbol %s is not unique, geneID %s" % (sym, gene))
                 nonUniqueSyms[sym].add(gene)
@@ -4675,31 +4694,31 @@
 
     foundConf = writeDatasetDesc(inConf["inDir"], outConf, datasetDir, coordFiles, outMatrixFname)
 
     if geneToSym==-1:
         geneToSym = readGeneSymbols(inConf.get("geneIdType"), inMatrixFname)
 
     matrixSyms, matrixGeneIds, geneToSymFromMatrix = readValidGenes(datasetDir, inConf)
 
     convertMarkers(inConf, outConf, geneToSym, clusterLabels, matrixGeneIds, datasetDir)
 
     readQuickGenes(inConf, geneToSym, matrixSyms, matrixGeneIds, geneToSymFromMatrix, datasetDir, outConf)
 
     copyBackgroundImages(inDir, inConf, outConf, datasetDir)
 
     # a few settings are passed through to the Javascript as they are
-    for tag in ["shortLabel", "radius", "alpha", "priority", "tags", "sampleDesc", "geneLabel",
+    for tag in ["shortLabel", "radius", "alpha", "priority", "tags", "sampleDesc", "featDesc", "geneLabel",
         "clusterField", "defColorField", "xenaPhenoId", "xenaId", "hubUrl", "showLabels", "ucscDb",
         "unit", "violinField", "visibility", "coordLabel", "lineWidth", "hideDataset", "hideDownload",
         "metaBarWidth", "supplFiles", "defQuantPal", "defCatPal", "clusterPngDir", "wrangler", "shepherd",
         "binStrategy", "split",
         "lineAlpha", "lineWidth", "lineColor",
         # the following are there only for old datasets, they are now nested under "facets"
         # they are just here for backwards-compatibility and will eventually get removed
         "body_parts", "organisms", "diseases", "projects", "life_stages", "domains", "sources", "assays", 
         # facets are taking their place now
         "facets", "multiModal"]:
         copyConf(inConf, outConf, tag)
 
     if "name" not in outConf:
         copyConf(inConf, outConf, "name")
 
@@ -4772,35 +4791,40 @@
             coordDf.columns=['x','y']
 
         coordDf.to_csv(fname,sep='\t')
         desc.append( {'file':fileBase, 'shortLabel': fullName} )
 
     return coordFields
 
 def writeCellbrowserConf(name, coordsList, fname, addMarkers=True, args={}):
     checkDsName(name)
 
     metaFname = args.get("meta", "meta.tsv")
     clusterField = args.get("clusterField", "Louvain Cluster")
     coordStr = json.dumps(coordsList, indent=4)
     matrixFname = args.get("exprMatrix", "exprMatrix.tsv.gz")
 
+    cmdLine = " ".join(sys.argv)
+    dateStr = datetime.now().isoformat()
+
     conf = """
 # This is a bare-bones, auto-generated Cell Browser config file.
 # Look at https://github.com/maximilianh/cellBrowser/blob/master/src/cbPyLib/cellbrowser/sampleConfig/cellbrowser.conf
 # for a list of possible options
 # You can also write a default template into the current directory with cbBuild --init
+# Command was: %(cmdLine)s
+# Time: %(dateStr)s
 name='%(name)s'
 shortLabel='%(name)s'
 exprMatrix='%(matrixFname)s'
 #tags = ["10x", 'smartseq2']
 meta='%(metaFname)s'
 geneIdType='auto'
 defColorField='%(clusterField)s'
 labelField='%(clusterField)s'
 enumFields=['%(clusterField)s']
 coords=%(coordStr)s
 #alpha=0.3
 #radius=2
 """ % locals()
 
     if addMarkers: