20d541d28092f1324307a0d34a1e65d145be6446
max
  Thu Jun 26 04:46:08 2025 -0700
removing config filename checks, refs #32013

diff --git src/cbPyLib/cellbrowser/cellbrowser.py src/cbPyLib/cellbrowser/cellbrowser.py
index 512a4f5..cfec862 100755
--- src/cbPyLib/cellbrowser/cellbrowser.py
+++ src/cbPyLib/cellbrowser/cellbrowser.py
@@ -1569,31 +1569,30 @@
     def iterRows(self):
         " yield (geneId, symbol, array) tuples from gene expression file. "
         mat = self.mat
         genes = self.genes
         skipIds = 0
         geneToSym = self.geneToSym
         for i in range(0, len(self.genes)):
 
             geneId = genes[i]
             geneId, geneSym, skipIds  = resolveGene(geneId, geneToSym, skipIds)
             logging.debug("geneId %s, geneSym %s", geneId, geneSym)
 
             if i%1000==0:
                 logging.info("%d genes written..." % i)
             arr = mat.getrow(i).toarray()
-            #arr = arr[arr==numpy.nan]=numpy.nan
             if arr.ndim==2:
                 # scipy sparse arrays have changed their entire data model and now all operations
                 # return 2D matrices. So need to unpack it to get the array. Grrr.
                 arr = arr[0]
             yield (geneId, geneSym, arr)
 
 def resolveGene(gene, geneToSym, skipIds):
     if "|" in gene:
         gene, symbol = gene.split("|")
     else:
         if geneToSym is None:
             symbol = gene
         else:
             if gene.startswith("EN"):
                 gene = gene.split(".")[0]
@@ -2061,50 +2060,55 @@
     - len(descStr) bytes: the descriptive string descStr
     - array of n 4-byte floats (n = number of cells) or 4-byte unsigned ints
     """
     geneDesc = str(geneDesc) # make sure no unicode
     geneIdLen = struct.pack("<H", len(geneDesc))
 
     # on cortex-dev, numpy was around 30% faster. Not a huge difference.
     if numpyLoaded:
         if matType=="float":
             exprArr = exprArr.astype("float32")
         elif matType=="int":
             exprArr = exprArr.astype("uint32")
         else:
             assert(False) # internal error
         exprStr = exprArr.tobytes()
+        #minVal = np.amin(exprArr[~np.isneginf(exprArr)])
+        exprArr[np.isnan(exprArr)] = FLOATNAN
         minVal = np.amin(exprArr)
+        # cortex-dev-splicing/psi has NAN values in the mtx file
     else:
         if matType=="float":
             arrType = "f"
         elif matType=="int" or matType=="forceInt":
             arrType = "L"
         else:
             assert(False) # internal error
 
         # if as too-old numpy version is loaded isNumpy is false, but the type may
         # still be a numpy array if we loaded from MTX -> force to a list
         if str(type(exprArr))=="<type 'numpy.ndarray'>":
             exprArr = exprArr.tolist()[0]
 
+        exprArr = [FLOATNAN if math.isnan(x) else x for x in exprArr]
         # Python 3.9 removed tostring()
         if sys.version_info >= (3, 2):
             exprStr = array.array(arrType, exprArr).tobytes()
         else:
             exprStr = array.array(arrType, exprArr).tostring()
 
+        #minVal = min([x for x in exprArr if not (math.isinf(x) and x < 0)]) # this is super slow...
         minVal = min(exprArr)
 
     if isPy3:
         geneStr = geneIdLen+bytes(geneDesc, encoding="ascii")+exprStr
     else:
         geneStr = geneIdLen+geneDesc+exprStr
 
     geneCompr = zlib.compress(geneStr)
 
     fact = float(len(geneCompr)) / len(geneStr)
     logging.debug("raw - compression factor of %s: %f, before %d, after %d"% (geneDesc, fact, len(geneStr), len(geneCompr)))
     return geneCompr, minVal
 
 def indexAtacOffsetsByChrom(exprIndex):
     """ given a dict with name -> (offset, len) and name being a string of chrom:start-end,
@@ -2242,31 +2246,31 @@
     else:
         if atacChromCount > 100:
             errAbort("There are more than 100 genes that look like a chrom_start_end range but the atacSearch cellbrowser.conf"
                     " is not set. Please add this option or contact us if you are confused about this error.")
 
     if highCount==0:
         logging.warn("No single value in the matrix is > 100. It looks like this "
         "matrix has been log'ed. Our recommendation for visual inspection is to not transform matrices")
 
     if len(exprIndex)==0:
         errAbort("No genes from the expression matrix could be mapped to symbols."
             "Are you sure these are Ensembl IDs? Adapt geneIdType in cellbrowser.conf.")
 
     # keep a flag so the client later can figure out if the expression matrix contains any negative values
     # this is important for handling the 0-value
-    exprIndex["_range"] = (int(allMin),0)
+    exprIndex["_range"] = (float(allMin),0) # float() in case it is -inf as a np.float32, must be native Python float, not numpy
     logging.info("Global minimum in matrix is: %f" % allMin)
 
     jsonOfh = open(jsonFname, "w")
     json.dump(exprIndex, jsonOfh)
     jsonOfh.close()
 
     jsonOfh = open(discretJsonFname, "w")
     json.dump(discretIndex, jsonOfh)
     jsonOfh.close()
 
     renameFile(tmpFname, binFname)
     renameFile(discretTmp, discretBinFname)
 
     return matType
 
@@ -5736,31 +5740,30 @@
     collInfo["abstract"] = """This is a local Cell Browser installation, without dataset hierarchies.
         Please select a dataset from the list on the left.<p>
         <p>
         See the documentation on <a target=_blank href="https://cellbrowser.readthedocs.io/collections.html">
         dataset hierarchies</a>.
 
     """
     summInfo = summarizeDatasets(datasets)
     collInfo["datasets"] = summInfo
     outFname = join(outDir, "dataset.json")
     writeJson(collInfo, outFname)
 
 
 def checkDsCase(inConfFname, relPath, inConfig):
     """ relPath should not be uppercase for top-level datasets at UCSC, as we use the hostname part """
-    if not inConfFname.endswith(".cellbrowser.conf") and not inConfFname.endswith(".cellbrowser"):
     if not "/" in relPath and getConfig("onlyLower", False) and \
             "name" in inConfig and inConfig["name"].isupper():
         errAbort("dataset name or directory name should not contain uppercase characters, as these do not work "
                 "if the dataset name is specified in the URL hostname itself (e.g. cortex-dev.cells.ucsc.edu)")
 
 def build(confFnames, outDir, port=None, doDebug=False, devMode=False, redo=None):
     " build browser from config files confFnames into directory outDir and serve on port "
     outDir = resolveOutDir(outDir)
 
     if outDir=="" or outDir==None:
         outDir = defOutDir
     outDir = expanduser(outDir)
 
     if not isdir(outDir):
         logging.warn("The directory %s does not exist. Making a new directory now." % (outDir))