043102a443731a0d579d9766157ff57968d30d02
braney
  Tue Jan 9 14:52:24 2018 -0800
implement missing data and reading of WIB files into mathWigs.

diff --git src/hg/lib/mathWig.c src/hg/lib/mathWig.c
index 988806e..01b078a 100644
--- src/hg/lib/mathWig.c
+++ src/hg/lib/mathWig.c
@@ -1,207 +1,209 @@
 /* Copyright (C) 2017 The Regents of the University of California 
  * See README in this or parent directory for licensing information. */
 
 #include "common.h"
 #include "bigWig.h"
+#include "hdb.h"
+#include "wiggle.h"
+#include "trackHub.h"
 
 // stack of arithmetic operators
 struct opcodeStack
     {
     int depth;
     int maxDepth;
     char *stack;
     };
 
 static boolean pushOpcode(struct opcodeStack *opcodeStack, char *word)
 /* Check to see if word is operator, if so, push it onto the stack */
 {
 boolean ret = FALSE;
 
 if (strlen(word) > 1)
     return FALSE;
 
 switch(*word)
     {
     case '+':
     case '*':
     case '-':
     case '/':
         if (opcodeStack->depth == opcodeStack->maxDepth)
             {
             opcodeStack->maxDepth += 10;
             opcodeStack->stack = realloc(opcodeStack->stack, opcodeStack->maxDepth);
             }
         opcodeStack->stack[opcodeStack->depth] = *word;
         opcodeStack->depth++;
         ret =  TRUE;
         break;
     }
 
 return ret;
 }
 
-double *mathWigGetValues(char *equation, char *chrom, unsigned winStart, unsigned winEnd)
-/* Build an array of doubles that is calculated from bigWig's listed
- * in equation in the requested region. */
+void getWigDataFromFile(struct wiggle *wi, double *array, int winStart, int winEnd)
+/* Read a wib file to get wiggle values for a range. */
 {
-static char lastOpcode = 0;
-char *words[100];
-int count = chopByWhite(equation, words, sizeof(words)/sizeof(char *));
-int jj,ii;
-unsigned width = winEnd - winStart;
-struct lm *lm = lmInit(0);
-double *data = needHugeMem(width * sizeof(double));
-bzero(data, width * sizeof(double));
-struct opcodeStack opcodeStack;
-bzero(&opcodeStack, sizeof opcodeStack);
+static char *fileName = NULL;
+static struct udcFile *wibFH = NULL;
+unsigned char *readData;    /* the bytes read in from the file */
+int dataOffset;
 
-boolean firstTime = TRUE;
-for (jj=0; jj < count; jj++)
+if ((fileName == NULL) || differentString(fileName, wi->file))
     {
-    if (pushOpcode(&opcodeStack, words[jj]))
-        continue;
-    if (startsWith("$", words[jj]))  // ignore native tracks for the moment
-        continue;
-    struct bbiFile *bwf = bigWigFileOpen(words[jj]);
-
-    struct bbiInterval *iv, *ivList = bigWigIntervalQuery(bwf, chrom, winStart, winEnd, lm);
-
-    if (firstTime)
+    if (wibFH != NULL)
+        udcFileClose(&wibFH);
+    fileName = cloneString(wi->file);
+    wibFH = udcFileOpen(hReplaceGbdb(fileName), NULL);
+    }
+udcSeek(wibFH, wi->offset);
+readData = (unsigned char *) needMem((size_t) (wi->count + 1));
+udcRead(wibFH, readData,
+    (size_t) wi->count * (size_t) sizeof(unsigned char));
+/*  walk through all the data in this block */
+int width = winEnd - winStart;
+for (dataOffset = 0; dataOffset < wi->count; ++dataOffset)
     {
-        for (iv = ivList; iv != NULL; iv = iv->next)
+    unsigned char datum = readData[dataOffset];
+    if (datum != WIG_NO_DATA)
         {
-            unsigned start = max(0, iv->start - winStart);
-            unsigned end = min(width, iv->end - winStart);
-
-            for (ii = start; ii < end; ii++)
-                data[ii] = iv->val;
+        int x1 = ((wi->chromStart - winStart) + (dataOffset * wi->span));
+        int x2 = x1 + wi->span;
+        if (x2 >= width )
+            x2 = width ;
+        if (x1 < width)
+            {
+            printf("x %d %d\n", x1, x2);
+            for (; x1 < x2; ++x1)
+                {
+                if ((x1 >= 0))
+                    {
+                    array[x1] = BIN_TO_VALUE(datum,wi->lowerLimit,wi->dataRange);
                     }
-
-        firstTime = FALSE;
                 }   
-    else
-        {
-        char opcode;
-        if (opcodeStack.depth == 0)
+            }   
+        }   
+    }   
+}
+
+void getWigData(char *db, char *table, char *chrom, unsigned winStart, unsigned winEnd, double *array)
+/* Query the database to find the regions in the WIB file we need to read to get data for a specified range. */
 {
-            opcode = lastOpcode;
-            if (opcode == 0)
-                opcode = '+';
+struct sqlConnection *conn = hAllocConn(db);
+int rowOffset;
+struct sqlResult *sr;
+char **row;
+struct wiggle wiggle;
+int span = 0;
 
-            //errAbort("underflow in opcode stack");
-            }
+sr = hRangeQuery(conn, table, chrom, winStart, winEnd,
+        NULL, &rowOffset);
+while ((row = sqlNextRow(sr)) != NULL)
+    {
+    wiggleStaticLoad(row + rowOffset, &wiggle);
+    getWigDataFromFile(&wiggle, array, winStart, winEnd);
+    if (span == 0)
+        span = wiggle.span;
     else
-            lastOpcode = opcode = opcodeStack.stack[--opcodeStack.depth];
+        if (span != wiggle.span)
+            errAbort("multiple spans in wiggle table");
+    }
+}
+
+void getBigWigData(char *file, char *chrom, unsigned winStart, unsigned winEnd, double *array)
+/* Query a bigBed file to find the wiggle values we need for a specified range. */
+{
+struct lm *lm = lmInit(0);
+struct bbiFile *bwf = bigWigFileOpen(file);
+struct bbiInterval *iv, *ivList = bigWigIntervalQuery(bwf, chrom, winStart, winEnd, lm);
+unsigned width = winEnd - winStart;
+
 for (iv = ivList; iv != NULL; iv = iv->next)
     {
     unsigned start = max(0, iv->start - winStart);
     unsigned end = min(width, iv->end - winStart);
+    int ii;
+
     for (ii = start; ii < end; ii++)
-                {
-                switch(opcode)
-                    {
-                    case '+':
-                        data[ii] += iv->val;
-                        break;
-                    case '-':
-                        data[ii] -= iv->val;
-                        break;
-                    case '*':
-                        data[ii] *= iv->val;
-                        break;
-                    case '/':
-                        data[ii] /= iv->val;
-                        break;
-                    }
-                }
-            }
+        array[ii] = iv->val;
     }
 }
 
-return data;
-}
-
-double *mathWigGetValuesMissing(char *equation, char *chrom, unsigned winStart, unsigned winEnd)
-/* Build an array of doubles that is calculated from bigWig's listed
+double *mathWigGetValues(char *db, char *equation, char *chrom, unsigned winStart, unsigned winEnd, boolean missingIsZero)
+/* Build an array of doubles that is calculated from bigWig or wig listed
  * in equation in the requested region. */
 {
 static char lastOpcode = 0;
 char *words[100];
 int count = chopByWhite(equation, words, sizeof(words)/sizeof(char *));
 int jj,ii;
 unsigned width = winEnd - winStart;
-struct lm *lm = lmInit(0);
-double *data = needHugeMem(width * sizeof(double));
-double *data2 = needHugeMem(width * sizeof(double));
-for(ii=0; ii < width; ii++)
-    data[ii] = NAN;
+double *accumulator = needHugeMem(width * sizeof(double));
+double *operand = needHugeMem(width * sizeof(double));
 struct opcodeStack opcodeStack;
 bzero(&opcodeStack, sizeof opcodeStack);
 
 boolean firstTime = TRUE;
 for (jj=0; jj < count; jj++)
     {
+    double *dataLoad = operand;
+    if (firstTime)
+        dataLoad = accumulator;
+
     if (pushOpcode(&opcodeStack, words[jj]))
         continue;
-    if (startsWith("$", words[jj]))  // ignore native tracks for the moment
-        continue;
-    struct bbiFile *bwf = bigWigFileOpen(words[jj]);
-    if (!firstTime)
-        for(ii=0; ii < width; ii++)
-            data2[ii] = NAN;
 
-    struct bbiInterval *iv, *ivList = bigWigIntervalQuery(bwf, chrom, winStart, winEnd, lm);
-
-    for (iv = ivList; iv != NULL; iv = iv->next)
-        {
-        unsigned start = max(0, iv->start - winStart);
-        unsigned end = min(width, iv->end - winStart);
+    if (missingIsZero)
+        for(ii=0; ii < width; ii++)
+            dataLoad[ii] = 0;
+    else
+        for(ii=0; ii < width; ii++)
+            dataLoad[ii] = NAN;
 
-        if (firstTime)
-            for (ii = start; ii < end; ii++)
-                data[ii] = iv->val;
+    if (startsWith("$", words[jj]))  // ignore native tracks for the moment
+        getWigData(db, &words[jj][1], chrom, winStart, winEnd, dataLoad);
     else
-            for (ii = start; ii < end; ii++)
-                data2[ii] = iv->val;
-        }
+        getBigWigData(words[jj], chrom, winStart, winEnd, dataLoad);
         
     if (firstTime)
         {
         firstTime = FALSE;
         continue;
         }
 
     char opcode;
     if (opcodeStack.depth == 0)
         {
         opcode = lastOpcode;
         if (opcode == 0)
             opcode = '+';
 
         //errAbort("underflow in opcode stack");
         }
     else
         lastOpcode = opcode = opcodeStack.stack[--opcodeStack.depth];
 
     for(ii=0; ii < width; ii++)
         {
         switch(opcode)
             {
             case '+':
-                data[ii] += data2[ii];
+                accumulator[ii] += operand[ii];
                 break;
             case '-':
-                data[ii] -= data2[ii];
+                accumulator[ii] -= operand[ii];
                 break;
             case '*':
-                data[ii] *= data2[ii];
+                accumulator[ii] *= operand[ii];
                 break;
             case '/':
-                data[ii] /= data2[ii];
+                accumulator[ii] /= operand[ii];
                 break;
             }
         }
     }
 
-return data;
+return accumulator;
 }