b31323ed684da8a9f11edf81aba1e4ed6f20037a max Thu Oct 23 09:16:04 2025 -0700 handle NAN values betterthan before, important for the cortex-psi dataset diff --git src/cbPyLib/cellbrowser/cbWeb/js/cbData.js src/cbPyLib/cellbrowser/cbWeb/js/cbData.js index aaca592..7cc7589 100644 --- src/cbPyLib/cellbrowser/cbWeb/js/cbData.js +++ src/cbPyLib/cellbrowser/cbWeb/js/cbData.js @@ -94,37 +94,38 @@ chunks.push(chunk); return chunks; } my.loadJson = function(url, onSuccess, silent) { /* load json file from url and run onSuccess when done. Alert if it doesn't work. */ var req = jQuery.ajax({ "url" : url, type : "GET", dataType : "json", //mimeType : 'text/plain; charset=x-user-defined', // for local files, avoids errors success: function(data) { onSuccess(data); }, error : function() { - if (!silent) + if (!silent) { if (url.search("dataset.json")>-1) alert("Could not find a dataset at "+url+". If you are sure that the link is correct, please "+ "contact the administrator of this server, "+ "or cells@ucsc.edu if this is running at UCSC. "); else alert("Could not load "+url); + } onSuccess(null); } }); }; my.loadFile = function(url, arrType, onDone, onProgress, otherInfo, start, end) { /* load text or binary file with HTTP GET into fileData variable and call function * onDone(fileData, otherInfo) when done. * convert the data to arrType. arrType can be either a class like * Uint8Array or the value 'string', for normal text or 'comprText' * for gzip'ed string. To switch off type casting, set arrType=null * * optional: byte range request for start-end (0-based, inclusive). * */ var oReq = new XMLHttpRequest(); @@ -428,43 +429,48 @@ function gotOneFile() { doneCount++; if (doneCount===2) { if (self.conf.atacSearch) { self.peakOffsets = matrixIndex; self.indexGenesAtac(); } else { self.geneOffsets = matrixIndex; self.indexGenes(); } onDone(self.name); } } + function gotMatrix(data) { + matrixIndex = data; + gotOneFile(); + } + // load config and call onDone var dsUrl = cbUtil.joinPaths([this.url, "dataset.json"]); // deactivate the cache - this is a small file that users typically change often if (!md5) dsUrl = dsUrl+"?"+Math.floor(Math.random()*100000000); else dsUrl = dsUrl+"?"+md5; cbUtil.loadJson(dsUrl, function(data) { self.conf = data; gotOneFile();}); if (self.name!='') { // start loading gene offsets in the background now, because this takes a while var osUrl = cbUtil.joinPaths([this.url, "exprMatrix.json"]); - cbUtil.loadJson(osUrl, function(data) { matrixIndex = data; gotOneFile();}, true); + cbUtil.loadJson(osUrl, gotMatrix, true); } else { gotOneFile(); } }; this.loadBackgroundImage = function(url, onDone, imgIdx, imgCount, onProgress) { /* load the background image from URL, then call onDone() */ var image = new Image(); image.onload = function() { console.log("Done loading image "+url); onDone(image) if (imgIdx==imgCount-1) { var pe = new ProgressEvent("loadEnd"); pe.text = ""; onProgress(pe); @@ -753,91 +759,99 @@ binCounts[binIdx]++; } return {"dArr":dArr, "binCounts":binCounts}; } function discretizeArray(arr, maxBinCount, bin0Val) { /* This is the default for most users: discretize numeric values to * deciles. return an obj with dArr and binInfo */ /* bin0Val is the value that is treated differently, it is kept in its * own bin */ /* is bin0Val is null, switch off special bin0Value handling /* Code ported from Python cbAdd:discretizeArray */ /* supports NaN special values */ var breaks = []; + // remove the Nan values - XX REVISIT + for (let i = 0; i < arr.length; i++) { + if (Number.isNaN(arr[i])) { + arr[i] = -Infinity; + } + } + // sort expression values into a new array var arrSorted = arr.slice(); // slice() = "make copy" arrSorted.sort(); - var pos = 0; - if (arrSorted[0] == bin0Val) { // skip all bin0Val and remember position - var zeros = 0; + var minPos = 0; + if (arrSorted[0] == bin0Val) { // do we have any values in bin0? -> skip them and keep their position for (var i = 0, I = arrSorted.length; i < I; i++) { if (arrSorted[i] > bin0Val) { - pos = i; + minPos = i; break; } - zeros += 1; } } - var minVal = arrSorted[pos]; + var minVal = arrSorted[minPos]; // calculate optimal bin size in numbers of cells - var desiredBinSize = Math.floor((arrSorted.length - pos) / (maxBinCount - breaks.length)); + var desiredBinSize = Math.floor((arrSorted.length - minPos) / (maxBinCount - breaks.length)); var currentCount = 0; - var binMin = arrSorted[pos]; + var binMin = arrSorted[minPos]; var binMax; var lastValue; - for (var i = pos, I = arrSorted.length; i < I; i++) { + for (var i = minPos, I = arrSorted.length; i < I; i++) { // determine if current value can be used as a break // i.e. it is different from the previous var isBreak = false; if (lastValue !== undefined && arrSorted[i] > lastValue) { isBreak = true; } currentCount += 1; if (currentCount >= desiredBinSize && isBreak) { breaks.push(lastValue); binMin = arrSorted[i]; currentCount = 0; if (breaks.length + 1 == maxBinCount + 2) { breaks.push(binMin); break; } // recalculate optimal bin size desiredBinSize = Math.floor((arrSorted.length - i) / (maxBinCount - breaks.length)); } lastValue = arrSorted[i]; } breaks.push(arrSorted[I - 1]); var fb = findBins(arr, bin0Val, breaks); var dArr = fb.dArr; var binCounts = fb.binCounts; var binInfo = []; - var bin0MinMax = "Unknown"; + var bin0MinMax = "Unknown"; // meta data often has empty string = "Unknown" if (bin0Val === 0) { bin0MinMax = 0; } + if (bin0Val === FLOATNAN) { // this can only happen for expression matrices with "NAN" values in them + bin0MinMax = "NaN"; + } binInfo.push([bin0MinMax, bin0MinMax, binCounts[0]]); var idx = binCounts[0]; for (let i=0; i < breaks.length; i++) { // use sorted array of expression values - // to get more accurate values + // to get the exact break values var binMin = arrSorted[idx]; var binCount = binCounts[i+1]; idx += binCount - 1; var binMax = arrSorted[idx]; idx += 1; binInfo.push( [binMin, binMax, binCount] ); } return {"dArr": dArr, "binInfo": binInfo}; } function discretizeArray_binSize(arr, maxBinCount, bin0Val) { /* discretize an array such that each bin has the same size, not the same number of cells */ let minMax = cbUtil.arrMinMax(arr); let min = minMax[0]; let max = minMax[1]; @@ -996,35 +1010,37 @@ } } // set gene description to an ;-separated list for multi-gene mode and // to chrom:minStart-maxStart for ATAC mode let geneDesc; if (self.isAtacMode()) { let chrom = loadedRanges[0].name.split("|")[0]; let minStart = loadedRanges[0].name.split("|")[1]; let maxEnd = loadedRanges[loadedRanges.length-1].name.split("|")[2]; geneDesc = chrom+":"+minStart+"-"+maxEnd; } else geneDesc = geneDescs.join("; "); - // specVal is the value for a special bin, usually 0 + // specVal is the value for the first bin, usually 0. But can also be -Infinity var specVal = 0; var matrixMin = self.getMatrixMin(); if (matrixMin < 0) - specVal = null; + specVal = null; // null = no special bin handling at all + if (matrixMin === FLOATNAN) + specVal = FLOATNAN; let newArr = []; if (updateOp) { if (!self.currExprArr) newArr = arrs[0]; // first click ever = there is nothing to add to. XX reset ... when? else if (updateOp==="+") newArr = cbUtil.arrAddMult(self.currExprArr, arrs); else newArr = cbUtil.arrSubMult(self.currExprArr, arrs); } else { if (arrs.length===1) newArr = arrs[0]; else newArr = sumAllArrs(ArrType, arrs); @@ -1674,30 +1690,32 @@ var metaFieldInfo = self.getMetaFields(); for (var fieldIdx = 0; fieldIdx < metaFieldInfo.length; fieldIdx++) { var fieldInfo = metaFieldInfo[fieldIdx]; if (fieldInfo.type==="uniqueString" || fieldInfo.arr) continue; self.loadMetaVec(fieldInfo, doneMetaVec); } } this.getMatrixMin = function() { /* return the minimum value in the matrix */ var validNames = self.getMatrixIndex(); var matrixMin = 0; if ("_range" in validNames) matrixMin = validNames["_range"][0]; + if (matrixMin === null) + matrixMin = FLOATNAN; return matrixMin; } this.preloadGenes = function(geneSyms, onDone, onProgress, strategy) { /* start loading the gene expression vectors in the background. call onDone when done. */ var validGenes = self.geneOffsets; var loadCounter = 0; if (geneSyms) { for (var i=0; i<geneSyms.length; i++) { var geneId = geneSyms[i][0]; //if (! (sym in validGenes)) { //alert("Error: "+sym+" is in quick genes list but is not a valid gene"); //continue; //}