src/hg/hgTables/bigBed.c 1.7

1.7 2009/03/18 01:34:52 kent
Getting filters to work on bigBed. Next up - intersections!
Index: src/hg/hgTables/bigBed.c
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/hgTables/bigBed.c,v
retrieving revision 1.6
retrieving revision 1.7
diff -b -B -U 4 -r1.6 -r1.7
--- src/hg/hgTables/bigBed.c	17 Mar 2009 20:48:36 -0000	1.6
+++ src/hg/hgTables/bigBed.c	18 Mar 2009 01:34:52 -0000	1.7
@@ -164,9 +164,297 @@
 bbiFileClose(&bbi);
 return list;
 }
 
-void bigBedTabOut(char *table, struct sqlConnection *conn, char *fields, FILE *f)
+
+enum asFilterDataType
+/* High level data type. */
+   {
+   afdtNone = 0,
+   afdtString = 1,
+   afdtLong = 2,
+   afdtDouble = 3,
+   afdtChar = 4,
+   };
+
+struct asLongFilter
+/* Filter on long value */
+   {
+   enum numericFilterType op;
+   long long *thresholds;
+   };
+
+struct asDoubleFilter
+/* Filter on double value */
+   {
+   enum numericFilterType op;
+   double *thresholds;
+   };
+
+struct asCharFilter
+/* Filter on a char value */
+    {
+    enum charFilterType op;
+    char *matches;
+    boolean invert;
+    };
+
+struct asStringFilter
+/* Filter on a string value */
+    {
+    enum stringFilterType op;
+    char **matches;
+    boolean invert;
+    };
+
+void asCharFilterFree(struct asCharFilter **pFilter)
+/* Free up memory associated with filter. */
+{
+struct asCharFilter *filter = *pFilter;
+if (filter != NULL)
+    {
+    freeMem(filter->matches);
+    freez(pFilter);
+    }
+}
+
+void asStringFilterFree(struct asStringFilter **pFilter)
+/* Free up memory associated with filter. */
+{
+struct asStringFilter *filter = *pFilter;
+if (filter != NULL)
+    {
+    freeMem(filter->matches);
+    freez(pFilter);
+    }
+}
+
+struct asDoubleFilter *asDoubleFilterFromCart(struct cart *cart, char *fieldPrefix)
+/* Get filter settings for double out of cart. */
+{
+struct asDoubleFilter *filter = NULL;
+char varName[256];
+safef(varName, sizeof(varName), "%s%s", fieldPrefix, filterCmpVar);
+char *cmp = cartOptionalString(cart, varName);
+safef(varName, sizeof(varName), "%s%s", fieldPrefix, filterPatternVar);
+char *pat = cartOptionalString(cart, varName);
+if (!isEmpty(cmp) && !isEmpty(pat))
+    {
+    AllocVar(filter);
+    cgiToDoubleFilter(cmp, pat, &filter->op, &filter->thresholds);
+    }
+return filter;
+}
+
+struct asLongFilter *asLongFilterFromCart(struct cart *cart, char *fieldPrefix)
+/* Get filter settings for double out of cart. */
+{
+struct asLongFilter *filter = NULL;
+char varName[256];
+safef(varName, sizeof(varName), "%s%s", fieldPrefix, filterCmpVar);
+char *cmp = cartOptionalString(cart, varName);
+safef(varName, sizeof(varName), "%s%s", fieldPrefix, filterPatternVar);
+char *pat = cartOptionalString(cart, varName);
+if (!isEmpty(cmp) && !isEmpty(pat))
+    {
+    AllocVar(filter);
+    cgiToLongFilter(cmp, pat, &filter->op, &filter->thresholds);
+    }
+return filter;
+}
+
+struct asCharFilter *asCharFilterFromCart(struct cart *cart, char *fieldPrefix)
+/* Get filter settings for double out of cart. */
+{
+struct asCharFilter *filter = NULL;
+char varName[256];
+safef(varName, sizeof(varName), "%s%s", fieldPrefix, filterDdVar);
+char *dd = cartOptionalString(cart, varName);
+safef(varName, sizeof(varName), "%s%s", fieldPrefix, filterPatternVar);
+char *pat = cartOptionalString(cart, varName);
+if (!isEmpty(dd) && !isEmpty(pat))
+    {
+    AllocVar(filter);
+    cgiToCharFilter(dd, pat, &filter->op, &filter->matches, &filter->invert);
+    if (filter->op == cftIgnore)	// Filter out nop
+	asCharFilterFree(&filter);
+    }
+return filter;
+}
+
+struct asStringFilter *asStringFilterFromCart(struct cart *cart, char *fieldPrefix)
+/* Get filter settings for double out of cart. */
+{
+struct asStringFilter *filter = NULL;
+char varName[256];
+safef(varName, sizeof(varName), "%s%s", fieldPrefix, filterDdVar);
+char *dd = cartOptionalString(cart, varName);
+safef(varName, sizeof(varName), "%s%s", fieldPrefix, filterPatternVar);
+char *pat = cartOptionalString(cart, varName);
+if (!isEmpty(dd) && !isEmpty(pat))
+    {
+    AllocVar(filter);
+    cgiToStringFilter(dd, pat, &filter->op, &filter->matches, &filter->invert);
+    if (filter->op == sftIgnore)	// Filter out nop
+	asStringFilterFree(&filter);
+    }
+return filter;
+}
+
+union asFilterData
+/* One of the above four. */
+    {
+    struct asLongFilter *l;
+    struct asDoubleFilter *d;
+    struct asCharFilter *c;
+    struct asStringFilter *s;
+    };
+
+struct asFilterColumn
+/* A type of filter applied to a column. */
+   {
+   struct asFilterColumn *next;
+   struct asColumn *col;		/* Column we operate on. */
+   int colIx;				/* Index of column. */
+   enum asFilterDataType dataType;	/* Type of limit parameters. */
+   union asFilterData filter;		/* Filter data including op. */
+   };
+
+struct asFilter
+/* A filter that can be applied to weed out rows in a table with an associated .as file. */
+    {
+    struct asFilter *next;
+    struct asFilterColumn *columnList;  /* A list of column filters to apply */
+    };
+
+
+boolean asFilterString(struct asStringFilter *filter, char *x)
+/* Return TRUE if x passes filter. */
+{
+return bedFilterString(x, filter->op, filter->matches, filter->invert);
+}
+
+boolean asFilterLong(struct asLongFilter *filter, long long x)
+/* Return TRUE if x passes filter. */
+{
+return bedFilterLong(x, filter->op, filter->thresholds);
+}
+
+boolean asFilterDouble(struct asDoubleFilter *filter, double x)
+/* Return TRUE if x passes filter. */
+{
+return bedFilterDouble(x, filter->op, filter->thresholds);
+}
+
+boolean asFilterChar(struct asCharFilter *filter, char x)
+/* Return TRUE if x passes filter. */
+{
+return bedFilterChar(x, filter->op, filter->matches, filter->invert);
+}
+
+boolean asFilterOneCol(struct asFilterColumn *filtCol, char *s)
+/* Return TRUE if s passes filter. */
+{
+switch (filtCol->dataType)
+    {
+    case afdtString:
+        return asFilterString(filtCol->filter.s, s);
+    case afdtLong:
+        return asFilterLong(filtCol->filter.l, atoll(s));
+    case afdtDouble:
+        return asFilterDouble(filtCol->filter.d, atof(s));
+    case afdtChar:
+        return asFilterChar(filtCol->filter.c, s[0]);
+    default:
+        internalErr();
+	return FALSE;
+    }
+}
+
+boolean asFilterOnRow(struct asFilter *filter, char **row)
+/* Return TRUE if row passes filter if any. */
+{
+if (filter != NULL)
+    {
+    struct asFilterColumn *col;
+    for (col = filter->columnList; col != NULL; col = col->next)
+	{
+	if (!asFilterOneCol(col, row[col->colIx]))
+	    return FALSE;
+	}
+    }
+return TRUE;
+}
+
+struct asFilter *asFilterFromCart(struct cart *cart, char *db, char *table, struct asObject *as)
+/* Examine cart for filter relevant to this table, and create object around it. */
+{
+/* Get list of filter variables for this table. */
+char tablePrefix[128], fieldPrefix[192];
+safef(tablePrefix, sizeof(tablePrefix), "%s%s.%s.", hgtaFilterVarPrefix, db, table);
+
+struct asFilter *asFilter;
+AllocVar(asFilter);
+
+struct asColumn *col;
+int colIx = 0;
+for (col = as->columnList; col != NULL; col = col->next, ++colIx)
+    {
+    safef(fieldPrefix, sizeof(fieldPrefix), "%s%s.", tablePrefix, col->name);
+    struct asTypeInfo *lt = col->lowType;
+    union asFilterData lowFilter;
+    enum asFilterDataType dataType = afdtNone;	
+    lowFilter.d = NULL;
+    switch (lt->type)
+	{
+	case t_double:
+	case t_float:
+	    lowFilter.d = asDoubleFilterFromCart(cart, fieldPrefix);
+	    dataType = afdtDouble;
+	    break;
+	case t_char:
+	    lowFilter.c = asCharFilterFromCart(cart, fieldPrefix);
+	    dataType = afdtChar;
+	    break;
+	case t_int:
+	case t_uint:
+	case t_short:
+	case t_ushort:
+	case t_byte:
+	case t_ubyte:
+	case t_off:
+	    lowFilter.l = asLongFilterFromCart(cart, fieldPrefix);
+	    dataType = afdtLong;
+	    break;
+	case t_string:
+	case t_lstring:
+	    lowFilter.s = asStringFilterFromCart(cart, fieldPrefix);
+	    dataType = afdtString;
+	    break;
+	case t_object:
+	case t_simple:
+	case t_enum:
+	case t_set:
+	default:
+	    internalErr();
+	    break;
+	}
+    if (lowFilter.d != NULL)
+        {
+	struct asFilterColumn *colFilt;
+	AllocVar(colFilt);
+	colFilt->col = col;
+	colFilt->colIx = colIx;
+	colFilt->dataType = dataType;
+	colFilt->filter = lowFilter;
+	slAddHead(&asFilter->columnList, colFilt);
+	}
+    }
+slReverse(&asFilter->columnList);
+return asFilter;
+}
+
+void bigBedTabOut(char *db, char *table, struct sqlConnection *conn, char *fields, FILE *f)
 /* Print out selected fields from Big Bed.  If fields is NULL, then print out all fields. */
 {
 if (f == NULL)
     f = stdout;
@@ -201,8 +489,19 @@
 
 /* Open up bigBed file. */
 char *fileName = bigBedFileName(table, conn);
 struct bbiFile *bbi = bigBedFileOpen(fileName);
+struct asObject *as = bigBedAsOrDefault(bbi);
+struct asFilter *filter = NULL;
+
+if (anyFilter())
+    {
+    filter = asFilterFromCart(cart, db, table, as);
+    if (filter)
+        {
+	fprintf(f, "# Filtering on %d columns\n", slCount(filter->columnList));
+	}
+    }
 
 /* Loop through outputting each region */
 struct region *region, *regionList = getRegions();
 for (region = regionList; region != NULL; region = region->next)
@@ -214,14 +513,17 @@
     char startBuf[16], endBuf[16];
     for (iv = ivList; iv != NULL; iv = iv->next)
         {
 	bigBedIntervalToRow(iv, region->chrom, startBuf, endBuf, row, bbi->fieldCount);
+	if (asFilterOnRow(filter, row))
+	    {
 	int i;
 	fprintf(f, "%s", row[columnArray[0]]);
 	for (i=1; i<fieldCount; ++i)
 	    fprintf(f, "\t%s", row[columnArray[i]]);
 	fprintf(f, "\n");
 	}
+	}
     lmCleanup(&lm);
     }
 
 /* Clean up and exit. */