b31323ed684da8a9f11edf81aba1e4ed6f20037a max Thu Oct 23 09:16:04 2025 -0700 handle NAN values betterthan before, important for the cortex-psi dataset diff --git src/cbPyLib/cellbrowser/cbWeb/js/cbData.js src/cbPyLib/cellbrowser/cbWeb/js/cbData.js index aaca592..7cc7589 100644 --- src/cbPyLib/cellbrowser/cbWeb/js/cbData.js +++ src/cbPyLib/cellbrowser/cbWeb/js/cbData.js @@ -1,1771 +1,1789 @@ // scDb: a class accessing single cell data from a URL 'use strict'; /*jshint globalstrict: true*/ /* jshint -W104 */ // allow some es6 parts (const) /* jshint -W117 */ // ignore undefined classes /* a module with some helper functions */ var cbUtil = (function () { var my = {}; // the byte range warning message will be shown only once, so we need a global flag my.byteRangeWarningShown = false; my.absPath = function absPaths(base, rel) { /* https://www.geeksforgeeks.org/convert-relative-path-url-to-absolute-path-url-using-javascript/ */ var st = base.split("/"); var arr = rel.split("/"); st.pop(); // ignore the current file name (or no string) // (ignore if "base" is the current folder without having slash in trail) for (var i = 0; i < arr.length; i++) { if (arr[i] == ".") continue; if (arr[i] == "..") st.pop(); else st.push(arr[i]); } return st.join("/"); }; my.joinPaths = function joinPaths(parts, separator) { // join paths together, taking care of duplicated /s if (parts[0]==="") // ["", "test.txt] should just be test.txt, not /test.txt parts.shift(); return parts.map(function(part) { return part.trim().replace(/(^[\/]*|[\/]*$)/g, ''); }).join(separator || '/'); }; my.dumpObj = function (o) { /* for debugging */ console.log(JSON.stringify(o)); }; my.keys = function(o, isInt) { /* return all keys of object as an array */ var allKeys = []; if (isInt) for(var k in o) allKeys.push(parseInt(k)); else for(var j in o) allKeys.push(j); return allKeys; }; my.parseRange = function(chromPos) { /* parse a string of the format chrom:start-end and return as obj with chrom, start, end * Tolerates commas and knows about "k" and "m" suffixes". * */ if (!chromPos.match(/[a-zA-Z0-9_-]+:[0-9,km]+-[0-9,km]+/)) return null; var chromPosArr = chromPos.split(":"); var chrom = chromPosArr[0]; var startEnd = chromPosArr[1].split("-"); var startStr = startEnd[0]; var endStr = startEnd[1]; startStr = startStr.replace(",", "").replace("k", "000").replace("m", "000000"); endStr = endStr.replace(",", "").replace("k", "000").replace("m", "000000"); var ret = {}; ret.chrom = chrom; ret.start = parseInt(startStr); ret.end = parseInt(endStr); return ret; } my.rangeToStr = function (o) { /* convert a range object back to a string */ return o.chrom+":"+o.start+"-"+o.end; } my.rangesToChunks = function(ranges) { /* given an array of [start, end, name], put adjacent elements into separate arrays * called chunks, e.g. [ [ [1, 10, "name1"], [10, 20, "name2"] ], [ [30, 40, "name3"] ] ] */ let chunks = []; let chunk = [ranges[0]]; for (let i=1; i < ranges.length; i++) { let left = ranges[i-1]; let right = ranges[i]; if (left[1] === right[0]) chunk.push(right) else { chunks.push(chunk); chunk = [right]; } } chunks.push(chunk); return chunks; } my.loadJson = function(url, onSuccess, silent) { /* load json file from url and run onSuccess when done. Alert if it doesn't work. */ var req = jQuery.ajax({ "url" : url, type : "GET", dataType : "json", //mimeType : 'text/plain; charset=x-user-defined', // for local files, avoids errors success: function(data) { onSuccess(data); }, error : function() { - if (!silent) + if (!silent) { if (url.search("dataset.json")>-1) alert("Could not find a dataset at "+url+". If you are sure that the link is correct, please "+ "contact the administrator of this server, "+ "or cells@ucsc.edu if this is running at UCSC. "); else alert("Could not load "+url); + } onSuccess(null); } }); }; my.loadFile = function(url, arrType, onDone, onProgress, otherInfo, start, end) { /* load text or binary file with HTTP GET into fileData variable and call function * onDone(fileData, otherInfo) when done. * convert the data to arrType. arrType can be either a class like * Uint8Array or the value 'string', for normal text or 'comprText' * for gzip'ed string. To switch off type casting, set arrType=null * * optional: byte range request for start-end (0-based, inclusive). * */ var oReq = new XMLHttpRequest(); oReq.open("GET", url, true); if (arrType==="string") oReq.responseType = "text"; else oReq.responseType = "arraybuffer"; oReq.onload = cbUtil.onDoneBinaryData; oReq.onprogress = onProgress; oReq.onerror = function(e) { // Github rejects accept-encoding: headers coming from Firefox at the moment. alert("Could not load file "+url+". If this is Firefox and running on Github, please contact us."); }; oReq._onDone = onDone; // keep this for the callback oReq._otherInfo = otherInfo; // keep this for the callback oReq._arrType = arrType; // keep this for the callback casting oReq._url = url; // keep this for the callback error messages if (start!==undefined) { oReq.setRequestHeader('Range', "bytes="+start+"-"+end); // the bytes (incl.) you request oReq._start = start; oReq._end = end; } oReq.send(null); }; my.loadTsvFile = function(url, onDone, addInfo) { /* load a tsv file relative to baseUrl and call a function when done */ Papa.parse(url, { delimiter : "\t", skipEmptyLines : true, fastMode : true, download: true, complete: function(results, localFile) { onDone(results, localFile, addInfo); }, error: function(err, file) { if (addInfo!==undefined) alert("could not load "+url); } }); }; my.onDoneBinaryData = function(oEvent) { /* called when binary file has been loaded. */ var url = this._url; // 200 = OK, 206 = Partial Content OK if (this.status !== 200 && this.status !== 206) { alert("Could not load "+url+", error " + oEvent.statusText); return; } var binData = this.response; if (!binData) { alert("internal error when loading "+url+": no reponse from server?"); return; } // if the user wants only a byte range... if (this._start!==undefined) { // check if the expected length is OK. Some webservers don't support byte range // requests. var expLength = this._end-this._start+1; // byte range is inclusive var dataLen = binData.byteLength; if (!dataLen) dataLen = new Blob([binData]).size;; if (dataLen < expLength-1) // Yes, the -1 does not make sense. // This happens only with https://cells-beta.gi.ucsc.edu/?ds=engraftable-hsc+adt // and I have no idea why. alert("internal error cbData.js: data received from web server is too short. Expected data size was "+exprLength+ " but received "+dataLen+" bytes. URL: "+url+ "Does the HTTP server really support byte range requests? You probably will have to contact us to "+ " narrow down this problem."); if (dataLen > expLength) { console.log("Webserver does not support byte range requests, working around it, but this may be slow"); if (dataLen>30000000 && !my.byteRangeWarningShown) { alert("The webserver of this site does not support byte-range requests. " + "While the cell browser may work to some extent, it will " + " be slower and use more memory than normal. Please contact the administrator who setup this cell browser."); my.byteRangeWarningShown = true; } if (this._arrType) { if (this._arrType==="string") // for strings, it's easy to grab a part of it binData = binData.substring(this._start, this._end); else { // it's a bit harder if the data is a buffer binData = new Uint8Array(binData); // buffers don't support slicing, so cast to array first binData = binData.slice(this._start, this._end).buffer; // slice does not include the end } } } } if (this._arrType) { if (this._arrType==="comprText") { var arr = pako.ungzip(binData); // https://stackoverflow.com/questions/6965107/converting-between-strings-and-arraybuffers // convert byte array to strin // I used the apply() function originally, that's more compatible, but leads to a // stack overflow in bigger datasets //binData = String.fromCharCode.apply(null, arr); var dec = new TextDecoder("utf-8"); binData = dec.decode(arr); } else if (this._arrType!=="string") binData = new this._arrType(binData); } this._onDone(binData, this._otherInfo); }; my.searchKeys = function(geneIdx, searchStr) { /* search the keys of an object for matches. Uses a strategy adapted for gene identifers and symbols: * - ignore case * - prefix search * - if not match, try suffix search * returns an array of all objects for those keys. */ searchStr = searchStr.toLowerCase(); var foundGenes = []; for (var name in geneIdx) if (name.startsWith(searchStr)) foundGenes.push(geneIdx[name]); // try a suffix search if the prefix search did not work // This is so people can enter just the last few digits of the ENSG IDS if (foundGenes.length===0) { for (var name in geneIdx) if (name.endsWith(searchStr)) foundGenes.push(geneIdx[name]); } return foundGenes; } my.makeType = function(typeStr) { /* given a string, return the correct type array for it */ typeStr = typeStr.toLowerCase(); if ((typeStr==="double" || typeStr==="float64")) return Float64Array; if ((typeStr==="float" || typeStr==="float32")) return Float32Array; else if (typeStr==="int32") return Int32Array; else if (typeStr==="uint32" || typeStr==="dword") return Uint32Array; else if (typeStr==="uint16" || typeStr==="word") return Uint16Array; else if (typeStr==="uint8" || typeStr==="byte") return Uint8Array; else alert("Not a valid array type: "+typeStr); return null; }; my.findObjWhereEq = function(objArr, keyName, searchName) { // given a list of objects, return the one where keyName==searchName var found = null; for (var i = 0; i < objArr.length; i++) { var el = objArr[i]; if (el[keyName]===searchName) { found = el; break; } } return found; }; my.findIdxWhereEq = function(objArr, keyName, searchName) { // given a list of objects, return the index where keyName==searchName var found = null; for (var i = 0; i < objArr.length; i++) { var el = objArr[i]; if (el[keyName]===searchName) { found = i; break; } } return found; }; my.arrAdd = function (a, b) { /* add array b to array a, modifying a in place. Return a. */ if (a.length !== b.length) alert("cbUtil.arrAdd: input arrays must have same size"); for (var i=0; i < a.length; i++) a[i] = a[i]+b[i]; return a; }; my.arrAddMult = function (a, bArrs) { /* add all arrays in bArrs to array a, modifying a in place. Return a. */ for (var bi=0; bi < bArrs.length; bi++) { var a = cbUtil.arrAdd(a, bArrs[bi]); } return a; }; my.arrSub = function (a, b) { /* substract array b from array a, modifying a in place. Return a. */ if (a.length !== b.length) alert("cbUtil.arrAdd: input arrays must have same size"); for (var i=0; i < a.length; i++) a[i] = a[i]-b[i]; return a; }; my.arrSubMult = function (a, bArrs) { /* substract all arrays in bArrs from array a, modifying a in place. Return a. */ for (var bi=0; bi < bArrs.length; bi++) { var a = cbUtil.arrSub(a, bArrs[bi]); } return a; }; my.arrMinMax = function (a) { /* return [min, max] of array */ let min = a[0], max = a[0]; for (let i = 1; i < a.length; i++) { let value = a[i] min = (value < min) ? value : min max = (value > max) ? value : max } return [min, max] } my.baReadBigOffset = function(ba, o) { /* given a byte array, return the unsigned long long int (little endian), so eight bytes, at offset o */ var offset = ba[o] | ba[o+1] << 8 | ba[o+2] << 16 | ba[o+3] << 24 | ba[o+4] << 32 | ba[o+5] << 40 | ba[o+6] << 48 | ba[o+7] << 56; return offset; }; my.baReadOffset = function(ba, o) { /* given a byte array, return the unsigned long int (little endian), so four bytes, at offset o */ var offset = ba[o] | ba[o+1] << 8 | ba[o+2] << 16 | ba[o+3] << 24; return offset; }; my.baReadUint16 = function(ba, o) { /* read 16 bits, little endian, from byte array */ var num = ba[o] | ba[o+1] << 16; return num; }; return my; }()); function CbDbFile(url) { // a class that loads all data from binary files loading for the cell browser: // loading coordinates, loading meta data, resolve sample names to sample // indices, resolve sample indices to sample names load meta for a given // cell index var self = this; // this has two conflicting meanings in javascript. // To make it a little more readble, we use 'self' to refer to object variables and 'this' to refer to the calling object self.name = url; self.url = url; self.exprBinCount = 10; // for quick gene name searching self.geneSyns = null; // array of [geneSynonymLowercase, geneId] // for normal gene mode self.geneOffsets = null; // object with geneId -> [offset in file, data size in bytes] // for ATAC mode self.peakOffsets = null; // object with chrom -> list of [chromStart, chromEnd, offset in file, data size in bytes] self.geneLocs = null // gene locations as chrom -> list of [start, end, strand, geneId|sym (string) ] self.geneToTss = null; // object with geneId -> [chrom, chromStart, index into geneLocs[chrom] self.exprCache = {}; // cached compressed expression arrays self.metaCache = {}; // cached compressed meta arrays self.quickExpr = {}; // uncompressed expression arrays self.allMeta = {}; // uncompressed meta arrays // special values representing NaN in data arrays, must match same variables in cellBrowser.py var FLOATNAN = Number.NEGATIVE_INFINITY; // NaN and sorting does not work. we want NaN always to be first, so encode as -inf this.conf = null; this.loadConfig = function(onDone, md5) { /* load config and gene offsets from URL and call func when done */ var doneCount = 0; var matrixIndex = null; function gotOneFile() { doneCount++; if (doneCount===2) { if (self.conf.atacSearch) { self.peakOffsets = matrixIndex; self.indexGenesAtac(); } else { self.geneOffsets = matrixIndex; self.indexGenes(); } onDone(self.name); } } + function gotMatrix(data) { + matrixIndex = data; + gotOneFile(); + } + // load config and call onDone var dsUrl = cbUtil.joinPaths([this.url, "dataset.json"]); // deactivate the cache - this is a small file that users typically change often if (!md5) dsUrl = dsUrl+"?"+Math.floor(Math.random()*100000000); else dsUrl = dsUrl+"?"+md5; cbUtil.loadJson(dsUrl, function(data) { self.conf = data; gotOneFile();}); if (self.name!='') { // start loading gene offsets in the background now, because this takes a while var osUrl = cbUtil.joinPaths([this.url, "exprMatrix.json"]); - cbUtil.loadJson(osUrl, function(data) { matrixIndex = data; gotOneFile();}, true); + cbUtil.loadJson(osUrl, gotMatrix, true); } else { gotOneFile(); } }; this.loadBackgroundImage = function(url, onDone, imgIdx, imgCount, onProgress) { /* load the background image from URL, then call onDone() */ var image = new Image(); image.onload = function() { console.log("Done loading image "+url); onDone(image) if (imgIdx==imgCount-1) { var pe = new ProgressEvent("loadEnd"); pe.text = ""; onProgress(pe); } }; console.log("Start loading image "+url); image.src = url; if (imgIdx==0) { var pe = new ProgressEvent("loadStart"); pe.text = "High-res background image loading..."; onProgress(pe); } } this.findCoordIdx = function(name) { /* given coord label return its index */ let coords = self.conf.coords; for (let i=0; i b[j] ? 1 : -1; }); } function arrToEnum(arr, counts) { /* given an array of numbers, count how often each number appears. * return an obj with two keys: * - dArr is the new array with the index of each value * - binInfo is an array with for each index a tuple of (value, value, count) */ // replace values in array with their enum-index // -> make a mapping value -> bin var valToBin = {}; sortArrOfArr(counts, 0); // sort by value for (var i=0; i skip them and keep their position for (var i = 0, I = arrSorted.length; i < I; i++) { if (arrSorted[i] > bin0Val) { - pos = i; + minPos = i; break; } - zeros += 1; } } - var minVal = arrSorted[pos]; + var minVal = arrSorted[minPos]; // calculate optimal bin size in numbers of cells - var desiredBinSize = Math.floor((arrSorted.length - pos) / (maxBinCount - breaks.length)); + var desiredBinSize = Math.floor((arrSorted.length - minPos) / (maxBinCount - breaks.length)); var currentCount = 0; - var binMin = arrSorted[pos]; + var binMin = arrSorted[minPos]; var binMax; var lastValue; - for (var i = pos, I = arrSorted.length; i < I; i++) { + for (var i = minPos, I = arrSorted.length; i < I; i++) { // determine if current value can be used as a break // i.e. it is different from the previous var isBreak = false; if (lastValue !== undefined && arrSorted[i] > lastValue) { isBreak = true; } currentCount += 1; if (currentCount >= desiredBinSize && isBreak) { breaks.push(lastValue); binMin = arrSorted[i]; currentCount = 0; if (breaks.length + 1 == maxBinCount + 2) { breaks.push(binMin); break; } // recalculate optimal bin size desiredBinSize = Math.floor((arrSorted.length - i) / (maxBinCount - breaks.length)); } lastValue = arrSorted[i]; } breaks.push(arrSorted[I - 1]); var fb = findBins(arr, bin0Val, breaks); var dArr = fb.dArr; var binCounts = fb.binCounts; var binInfo = []; - var bin0MinMax = "Unknown"; + var bin0MinMax = "Unknown"; // meta data often has empty string = "Unknown" if (bin0Val === 0) { bin0MinMax = 0; } + if (bin0Val === FLOATNAN) { // this can only happen for expression matrices with "NAN" values in them + bin0MinMax = "NaN"; + } binInfo.push([bin0MinMax, bin0MinMax, binCounts[0]]); var idx = binCounts[0]; for (let i=0; i < breaks.length; i++) { // use sorted array of expression values - // to get more accurate values + // to get the exact break values var binMin = arrSorted[idx]; var binCount = binCounts[i+1]; idx += binCount - 1; var binMax = arrSorted[idx]; idx += 1; binInfo.push( [binMin, binMax, binCount] ); } return {"dArr": dArr, "binInfo": binInfo}; } function discretizeArray_binSize(arr, maxBinCount, bin0Val) { /* discretize an array such that each bin has the same size, not the same number of cells */ let minMax = cbUtil.arrMinMax(arr); let min = minMax[0]; let max = minMax[1]; let binSize = (max-min)/(maxBinCount-1); var binCounts = []; for (let i=0; i < maxBinCount; i++) { binCounts.push(0); } var dArr = []; for (let i=0; i < arr.length; i++) { var binIdx = Math.round( (arr[i]-min) / binSize ) dArr.push( binIdx ); binCounts[binIdx]++; } let binInfo = []; for (let i=0; i < maxBinCount; i++) binInfo.push( [i*binSize, (i+1)*binSize, binCounts[i]] ); let ret = {}; ret.dArr = dArr; ret.binInfo = binInfo; return ret; } this.locusToOffset = function(name) { /* for both gene and ATAC mode: */ /* return an array [start, end, name] given a locus description (=a string: gene ID/symbol or chr|start|end) */ var off = null; var geneId = name; //if (name.includes("|")) { // atac mode: name is chrom|start|end if (self.peakOffsets !== null) { // atac mode: name is chrom|start|end let pos = name.split("|"); off = self.findAtacOffsets(pos[0], parseInt(pos[1]), parseInt(pos[2])); } else { var geneIds = self.findGenesExact(name); if (geneIds.length!==0) { geneId = geneIds[0]; off = self.geneOffsets[geneIds[0]]; if (geneIds.length>1) alert("More than one match for gene name "+name+", using only first match."); } } if (off===null || off===undefined) { alert("internal error: there is no gene with the name "+name+" in the expression matrix"); return; } let start = off[0]; let len = off[1]; let end = start+len; return [start, end, geneId]; } this.namesToChunks = function(lociNames) { /* given a list of loci names, return a sorted list of "chunks", * Each chunk is an array of adjacent ranges. * [ [[offset1, end1, name1], [offset2, end2, name2] ] , ... ] */ // transform to a flat list of [start, end, name], sorted by start let ranges = []; for (let name of lociNames) { let startEndName = self.locusToOffset(name); ranges.push( startEndName ); } ranges.sort(function(a, b) { return b[0] - a[0]; }); return cbUtil.rangesToChunks(ranges); } function gunzipAndConvert(comprData, ArrType, sampleCount) { /* gunzip and convert the array type, return object with arr and desc */ var buf = pako.inflate(comprData); // see python code in cellbrowser.py, function 'exprRowEncode': //# The format of a record is: //# - 2 bytes: length of descStr, e.g. gene identifier or else //# - len(descStr) bytes: the descriptive string descStr //# - array of n*4 bytes, n = number of cells // read the gene description var descLen = cbUtil.baReadUint16(buf, 0); var arr = buf.slice(2, 2+descLen); var geneDesc = String.fromCharCode.apply(null, arr); // arrays always use 4 bytes per value. var arrData = buf.slice(2+descLen, 2+descLen+(4*sampleCount)); var exprArr = new ArrType(arrData.buffer); return {"arr":exprArr, "desc":geneDesc}; } function sumAllArrs(ArrType, arrs) { /* given an array of arrays, return a new array of ArrType with the sum of these */ let arrCount = arrs.length; let arrSize = arrs[0].length; let newArr = new ArrType(arrSize); for (let i=0; i operation onDone(newArr, da.dArr, locusName, geneDesc, da.binInfo); } // this function gets called when a chunk has been loaded function onChunkDone(arr, ranges) { /* slice the buffer by ranges, convert each range and add to loadedRanges */ for (let range of ranges) { let start = range[0]; let end = range[1]; let name = range[2]; let comprData = arr.slice(start, end); console.log("Got expression data, size = "+comprData.length+" bytes"); let exprInfo = gunzipAndConvert(comprData, ArrType, sampleCount); exprInfo.name = name; loadedRanges.push(exprInfo); if (loadedRanges.length===namesToLoad.length) { allRangesDone(); } } } // start of function let url = cbUtil.joinPaths([self.url, "exprMatrix.bin"]); let chunks = self.namesToChunks(namesToLoad); for (let ranges of chunks) { // submit http request from start of first range to end of last range let minStart = ranges[0][0]; let maxEnd = ranges[ranges.length-1][1]; let names = []; let relRanges = [] for (let r of ranges) { names.push(r[2]); relRanges.push( [ r[0]-minStart, r[1]-minStart, r[2] ] ); } cbUtil.loadFile(url+"?"+names.join("-"), Uint8Array, onChunkDone, onProgress, relRanges, minStart, maxEnd); } }; this.loadExprVec = function(geneSym, onDone, onProgress, otherInfo) { /* given a geneSym (string), retrieve array of values and call onDone with * (array, geneSym, geneDesc) */ function onGeneDone(comprData, geneSym) { // decompress data and run onDone when ready self.exprCache[geneSym] = comprData; console.log("Got expression data, size = "+comprData.length+" bytes"); var buf = pako.inflate(comprData); // see python code in cbAdd, function 'exprRowEncode': //# The format of a record is: //# - 2 bytes: length of descStr, e.g. gene identifier or else //# - len(descStr) bytes: the descriptive string descStr //# - array of n*4 bytes, n = number of cells // read the gene description var descLen = cbUtil.baReadUint16(buf, 0); var arr = buf.slice(2, 2+descLen); var geneDesc = String.fromCharCode.apply(null, arr); // read the expression array var sampleCount = self.conf.sampleCount; var matrixType = self.conf.matrixArrType; if (matrixType===undefined) alert("dataset JSON config file: missing matrixArrType attribute"); var ArrType = cbUtil.makeType(matrixType); // currently, all arrays use 4 bytes per value. Compression takes care of the size. var arrData = buf.slice(2+descLen, 2+descLen+(4*sampleCount)); var exprArr = new ArrType(arrData.buffer); onDone(exprArr, geneSym, geneDesc, otherInfo); } var start = 0; var lineLen = 0; if (self.isAtacMode()) { let r = cbUtil.parseRange(geneSym); if (r===null) { alert("Cannot color on "+geneSym+". This is an ATAC dataset, but the input does not look like a chromosome region in a format like chr1:1-1000.") return; } let ranges = self.findOffsetsWithinPos(r.chrom, r.start, r.end); if (ranges.length === 0) { alert("cbData.js: "+geneSym+" does not match a single ATAC region."); onDone(null); } else if (ranges.length > 1) { alert("cbData.js: "+geneSym+" matches more than one ATAC region. Using only first one."); } let firstRange = ranges[0]; start = firstRange[2]; lineLen = firstRange[3]; } else { let offsData = null; offsData = self.geneOffsets[geneSym]; if (offsData===undefined) { alert("cbData.js: "+geneSym+" is not in the expression matrix"); onDone(null); } start = offsData[0]; lineLen = offsData[1]; } var end = start + lineLen - 1; // end pos is inclusive var url = cbUtil.joinPaths([self.url, "exprMatrix.bin"]); if (geneSym in this.exprCache) onGeneDone(this.exprCache[geneSym], geneSym); else cbUtil.loadFile(url+"?"+geneSym, Uint8Array, onGeneDone, onProgress, geneSym, start, end); }; this.loadClusterMarkers = function(markerIndex, clusterName, onDone, onProgress) { /* given the name of a cluster, return an array of rows with the cluster-specific genes */ var url = cbUtil.joinPaths([self.url, "markers", "markers_"+markerIndex, clusterName.replace("/", "_")+".tsv"]); cbUtil.loadTsvFile(url, onMarkersDone, {"clusterName":clusterName}); function onMarkersDone(papaResults, url, otherData) { var rows = papaResults.data; onDone(rows, otherData); } }; this.getMetaFields = function() { /* return an array of the meta fields, in the format of the config file: * objects with 'name', 'label', 'valCounts', etc */ return self.conf.metaFields; }; this.addCustomMetaField = function(metaInfo) { //if (!self.customMeta) //self.customMeta = []; //self.customMeta.push(metaInfo); metaInfo.isCustom = true; self.conf.metaFields.unshift(metaInfo); } this.getConf = function() { /* return an object with a few general settings for the viewer: * - alpha: default transparency * - radius: circle default radius */ return self.conf; }; this.isAtacMode = function() { return (self.conf.atacSearch!==undefined) } this.getMatrixIndex = function() { /* return an object with the geneSymbols or peak locations */ if (self.isAtacMode()) return self.peakOffsets; else return self.geneOffsets; }; this.getRandomLocus = function() { if (self.isAtacMode()) alert("getRandomLocus() not implemented for ATAC mode yet.") else { return cbUtil.keys(self.geneOffsets)[0]; } } this.getGeneInfo = function(geneId) { /* given geneId, return an object with .geneId and .sym */ var genes = self.geneOffsets; var sym = genes[geneId][2]; return {"id":geneId, "sym":sym}; } this.getGeneInfoAtac = function(geneId) { /* in atac mode: given geneId, return an object with .sym, .chrom and .chromStart */ var geneTss = self.geneToTss[geneId]; // geneLoc is [chrom, tss, geneIds], see indexGenesAtac() var geneChrom = geneTss[0]; var tssStart = geneTss[1]; var geneIdx = geneTss[2]; // geneLocs is [start, end, strand, geneIdAndSymbol], see indexGenesAtac() var sym = self.geneLocs[geneChrom][geneIdx][3]; // backwards compatibility: gene names can include symbols if (sym.indexOf("|")!==-1) sym = sym.split("|")[1]; return {"chrom" : geneChrom, "chromStart":tssStart, "sym":sym}; } this.getCellIdMeta = function() { /* return the cell ID meta field */ for (let metaInfo of self.conf.metaFields) if (!metaInfo.isCustom) return metaInfo; } this.loadCellIds = function(idxArray, onDone, onProgress) { /* Get the cellId strings, the first meta field, for the integer IDs of cells in idxArray. * Calls onDone with an array of strings. * If idxArray is null, gets all cellIds */ function mapIdxToId(idxArray) { /* Internally cells are referred to by an array of cell indices. Translate an array of these numbers to an array of strings */ if (idxArray===null) return self.cellIds; var idList = []; for (var i=0; i 4GB need 8 bytes for the offset + 2 for the line length var end = start+6; function lineDone(text) { /* called when the line from meta.tsv has been read */ var fields = text.split("\t"); fields[fields.length-1] = fields[fields.length-1].trim(); // remove newline onDone(fields); } function offsetDone(arr) { /* called when the offset in meta.index has been read */ var offset; if (self.conf.metaNeedsEightBytes) offset = cbUtil.baReadBigOffset(arr, 0); else offset = cbUtil.baReadOffset(arr, 0); var lineLen = cbUtil.baReadUint16(arr, 4); // now get the line from the .tsv file var url = cbUtil.joinPaths([self.url, "meta.tsv"]); cbUtil.loadFile(url+"?"+cellIdx, "string", lineDone, onProgress, null, offset, offset+lineLen); } // chrome caching sometimes fails with byte range requests, so add cellidx to the URL cbUtil.loadFile(url+"?"+cellIdx, Uint8Array, function(byteArr) { offsetDone(byteArr); }, undefined, null, start, end); }; function countAndSort(arr) { /* count values in array, return an array of [value, count]. */ //var counts = {}; var counts = new Map(); // this is a relatively recent Javascript feature, but is faster for (var i=0; i < arr.length; i++) { var num = arr[i]; //without the Map(), it's a bit slower: counts[num] = counts[num] ? counts[num] + 1 : 1; counts.set(num, (counts.get(num) | 0) + 1); } var entries = Array.from(counts.entries()); entries.sort(function(a,b) { return a[0]-b[0]}); return entries; } // Todo: check one day if this really faster than a simple iteration function smoolakBS_left(arr, find) { // binary search, returns index left of insert point // based on https://jsperf.com/binary-search-in-javascript/7 // insert_left based on https://rosettacode.org/wiki/Binary_search var lo = 0; var hi = arr.length - 1; var i; while(lo <= hi) { i = ((lo + hi) >> 1); if(arr[i] >= find) hi = i - 1; else //if (arr[i] < find) lo = i + 1; //else //{ return lo; } } return lo; } function addToGeneSynsAndSplit(geneSyns, name) { /* given a symbol which can be geneId|sym, append to geneSyn array 1 or 2 tuples with [searchKey, geneId] */ var geneId = name; var sym = name; if (name.indexOf("|")===-1) { // datasets with only one gene name: old behavior geneSyns.push( [name.toLowerCase(), name] ); } else { // dataset with geneId and symbol var parts = name.split("|"); geneId = parts[0]; sym = parts[1]; geneSyns.push( [geneId.toLowerCase(), geneId] ); geneSyns.push( [sym.toLowerCase(), geneId] ); } return [geneId, sym]; } this.indexGenes = function() { /* for the case of normal gene mode, not ATAC-mode: split geneId from symbol to allow fast lookups of the symbols * changes self.geneOffsets from geneId|symbol -> [start, end] to geneId -> [start, end, symbol] * (allows us to use the geneId everywhere instead of a new geneNumber) * Creates self.geneSyns as array of [synonym, geneId] - for searching, allows 1:many relationships */ var newIdx = {}; var geneIdx = self.geneOffsets; var geneSyns = []; for (var geneName in geneIdx) { // as of 2019, faster than Object.entries() // a geneName can be either a single symbol or a string like geneId|sym var offsets = geneIdx[geneName]; var geneIdSym = addToGeneSynsAndSplit(geneSyns, geneName); var geneId = geneIdSym[0]; var sym = geneIdSym[1]; newIdx[geneId] = [offsets[0], offsets[1], sym]; } self.geneOffsets=newIdx; self.geneSyns = geneSyns; } function searchGeneNames(geneSyns, searchStr) { /* search the geneSyns (arr of [syn, geneId]) for matches. Return arr of geneId */ var foundIds = []; for (var i=0; i1 are found, abort and show error message. */ if (symOrId.indexOf("|")!==-1) symOrId = symOrId.split("|")[0]; let geneIds = self.findGenesExact(symOrId); if (geneIds.length===0) { alert("Could not find gene symbold or ID: "+symOrId); return null; } if (geneIds.length>1) { alert("Found more than one geneId for symbol, using only the first match: "+symOrId); } return geneIds[0]; } this.isGeneId = function (geneId) { /* check if a given string is a geneId */ return (geneId in self.geneOffsets); } function pickTssForGene(loc) { /* given a chromLoc tuple (start, end, strand, sym), return the TSS of the gene (start or end )*/ var start = loc[0]; var end = loc[1]; var strand = loc[2]; var sym = loc[3]; var tss = start; if (strand==="-") tss = end; return tss; } this.indexGenesAtac = function () { /* like indexGenes(), but for ATAC mode. * db.geneLocs has the genes as chrom -> list of [start, end, strand, geneId|sym ( as string ) ] * where geneName can be geneId|sym or just sym. * create self.geneToTss geneId -> [chrom, tssStart, geneIdx] for TSS lookups * and self.geneSyns = [ [geneSyn, geneId], ... ] for quick search (syn is a symbol or id) * */ var geneToTss = {} var geneLocs = self.geneLocs; var geneSyns = []; //for (chrom, chromLocs] of Object.entries(self.geneLocs)) // old-school for... in is faster than .entries ! for (var chrom in geneLocs) { var chromLocs = self.geneLocs[chrom]; for (var geneIdx = 0; geneIdx < chromLocs.length; geneIdx++) { var geneLoc = chromLocs[geneIdx]; var tss = pickTssForGene(geneLoc); var sym = geneLoc[3]; var geneId = sym; if (sym.indexOf("|")!==-1) { var parts = sym.split("|"); geneId = parts[0]; sym = parts[1]; } if (sym in geneToTss) sym = sym+"_"+chrom; // same symbol, but on two different chromosomes if (sym in geneToTss) console.log(sym+" appears twice on the some chromosome. Quick search broken?"); geneToTss[geneId] = [chrom, tss, geneIdx]; geneSyns.push( [sym.toLowerCase(), geneId] ) geneSyns.push( [geneId.toLowerCase(), geneId] ) } } self.geneToTss = geneToTss; self.geneSyns = geneSyns; } this.findGenesAtac = function(searchStr) { /* like findGenes(), but for ATAC mode: return an array of {.id and .sym} given a search string */ //var geneInfos = cbUtil.searchKeys(self.geneToTss, searchStr); var geneIds = self.findGeneIdsPrefixSuffix(searchStr); if (geneIds.length===0) return []; var geneNameObjs = []; for (var geneId of geneIds) { var gene = self.getGeneInfoAtac(geneId); geneNameObjs.push({id:geneId, sym:gene.sym}); // same format as findGenes() } return geneNameObjs; } this.findAtacOffsets = function(chrom, start, end) { /* return a single arr with [offset, end] into the matrix file that matches chrom, start, end */ var chromRanges = self.peakOffsets[chrom]; if (chromRanges===undefined) { alert("This gene is on "+chrom+" but there are no peaks at all on this chromosome.") return; } for (let rangeArr of chromRanges) { let rangeStart = rangeArr[0]; let rangeEnd = rangeArr[1]; if (start === rangeStart && rangeEnd === end) { return [rangeArr[2], rangeArr[3]]; } } return null; } this.findOffsetsWithinPos = function(chrom, start, end) { /* return an array of arrays [start,end,offset,length] about all included atac regions * that are within chrom:start-end*/ var chromRanges = self.peakOffsets[chrom]; if (chromRanges===undefined) { alert("This gene is on "+chrom+" but there are no peaks at all on this chromosome.") return; } var foundArr = []; for (let rangeArr of chromRanges) { let rangeStart = rangeArr[0]; let rangeEnd = rangeArr[1]; if (start <= rangeStart && rangeEnd <= end) { foundArr.push( rangeArr ); } } return foundArr; } this.findRangesByGene = function(geneId) { /* return object with .ranges = all ranges flanking a gene, and .pos = object * with chrom/start/end of area around the gene */ // get position of gene var tssInfo = self.geneToTss[geneId]; var geneChrom = tssInfo[0]; var tssStart = tssInfo[1]; var geneIdx = tssInfo[2]; var chromLocs = self.geneLocs[geneChrom]; // determine TSS of left and right neighbor gene var leftTss = 0; if (geneIdx > 0) leftTss = pickTssForGene(chromLocs[geneIdx-1]); // determine TSS of right neighbor gene var rightTss = 1e10; // = I do not have chrom size date in .js if (geneIdx+1 < chromLocs.length) rightTss = pickTssForGene(chromLocs[geneIdx+1]); var res = {} res.ranges = self.findOffsetsWithinPos(geneChrom, leftTss, rightTss); res.pos = { chrom:geneChrom, start:leftTss, end:rightTss }; return res; } this.getName = function() { /* return name of current dataset*/ if (self.conf!==null) return self.conf.name; else return self.name; }; this.preloadAllMeta = function() { /* start loading all meta value vectors and add them to db.allMeta. */ self.allMeta = {}; function doneMetaVec(arr, metaInfo, otherInfo) { self.allMeta[metaInfo.name] = arr; delete self.metaCache[fieldIdx]; } var metaFieldInfo = self.getMetaFields(); for (var fieldIdx = 0; fieldIdx < metaFieldInfo.length; fieldIdx++) { var fieldInfo = metaFieldInfo[fieldIdx]; if (fieldInfo.type==="uniqueString" || fieldInfo.arr) continue; self.loadMetaVec(fieldInfo, doneMetaVec); } } this.getMatrixMin = function() { /* return the minimum value in the matrix */ var validNames = self.getMatrixIndex(); var matrixMin = 0; if ("_range" in validNames) matrixMin = validNames["_range"][0]; + if (matrixMin === null) + matrixMin = FLOATNAN; return matrixMin; } this.preloadGenes = function(geneSyms, onDone, onProgress, strategy) { /* start loading the gene expression vectors in the background. call onDone when done. */ var validGenes = self.geneOffsets; var loadCounter = 0; if (geneSyms) { for (var i=0; i