f3e2a9eb74858d409ad94350c64496a09561e26f kent Thu Jan 13 21:32:06 2011 -0800 Starting to work on getting bigBeds from data hubs to work with table browser. Still a ways to go though. diff --git src/hg/hgTables/bigBed.c src/hg/hgTables/bigBed.c index bc0ec09..11b1d75 100644 --- src/hg/hgTables/bigBed.c +++ src/hg/hgTables/bigBed.c @@ -1,676 +1,682 @@ /* bigBed - stuff to handle bigBed in the Table Browser. */ #include "common.h" #include "hash.h" #include "linefile.h" #include "dystring.h" #include "localmem.h" #include "jksql.h" #include "cheapcgi.h" #include "cart.h" #include "web.h" #include "bed.h" #include "hdb.h" #include "trackDb.h" #include "obscure.h" #include "hmmstats.h" #include "correlate.h" #include "asParse.h" #include "bbiFile.h" #include "bigBed.h" #include "hgTables.h" static char const rcsid[] = "$Id: bigBed.c,v 1.11 2010/05/21 23:45:38 braney Exp $"; char *bigBedFileName(char *table, struct sqlConnection *conn) /* Return file name associated with bigBed. This handles differences whether it's * a custom or built-in track. Do a freeMem on returned string when done. */ { /* Implementation is same as bigWig. */ return bigWigFileName(table, conn); } struct hash *asColumnHash(struct asObject *as) /* Return a hash full of the object's columns, keyed by colum name */ { struct hash *hash = hashNew(6); struct asColumn *col; for (col = as->columnList; col != NULL; col = col->next) hashAdd(hash, col->name, col); return hash; } static void fillField(struct hash *colHash, char *key, char output[HDB_MAX_FIELD_STRING]) /* If key is in colHash, then copy key to output. */ { if (hashLookup(colHash, key)) strncpy(output, key, HDB_MAX_FIELD_STRING-1); } static struct asObject *bigBedAsOrDefault(struct bbiFile *bbi) /* Get asObject associated with bigBed - if none exists in file make it up from field counts. */ { struct asObject *as = bigBedAs(bbi); if (as == NULL) as = asParseText(bedAsDef(bbi->definedFieldCount, bbi->fieldCount)); return as; } struct asObject *bigBedAsForTable(char *table, struct sqlConnection *conn) /* Get asObject associated with bigBed table. */ { char *fileName = bigBedFileName(table, conn); struct bbiFile *bbi = bigBedFileOpen(fileName); struct asObject *as = bigBedAsOrDefault(bbi); bbiFileClose(&bbi); freeMem(fileName); return as; } struct hTableInfo *bigBedToHti(char *table, struct sqlConnection *conn) /* Get fields of bigBed into hti structure. */ { /* Get columns in asObject format. */ +uglyf("ok 5.1.1.1
\n"); char *fileName = bigBedFileName(table, conn); +uglyf("ok 5.1.1.2 fileName=%s
\n", fileName); struct bbiFile *bbi = bigBedFileOpen(fileName); +uglyf("ok 5.1.1.3
\n"); struct asObject *as = bigBedAsOrDefault(bbi); +uglyf("ok 5.1.1.4
\n"); /* Allocate hTableInfo structure and fill in info about bed fields. */ struct hash *colHash = asColumnHash(as); struct hTableInfo *hti; AllocVar(hti); hti->rootName = cloneString(table); hti->isPos= TRUE; fillField(colHash, "chrom", hti->chromField); fillField(colHash, "chromStart", hti->startField); fillField(colHash, "chromEnd", hti->endField); fillField(colHash, "name", hti->nameField); fillField(colHash, "score", hti->scoreField); fillField(colHash, "strand", hti->strandField); fillField(colHash, "thickStart", hti->cdsStartField); fillField(colHash, "thickEnd", hti->cdsEndField); fillField(colHash, "blockCount", hti->countField); fillField(colHash, "chromStarts", hti->startsField); fillField(colHash, "blockSizes", hti->endsSizesField); hti->hasCDS = (bbi->definedFieldCount >= 8); hti->hasBlocks = (bbi->definedFieldCount >= 12); char type[256]; safef(type, sizeof(type), "bed %d %c", bbi->definedFieldCount, (bbi->definedFieldCount == bbi->fieldCount ? '.' : '+')); hti->type = cloneString(type); +uglyf("ok 5.1.1.5
\n"); freeMem(fileName); hashFree(&colHash); bbiFileClose(&bbi); +uglyf("ok 5.1.1.6
\n"); return hti; } struct slName *asColNames(struct asObject *as) /* Get list of column names. */ { struct slName *list = NULL, *el; struct asColumn *col; for (col = as->columnList; col != NULL; col = col->next) { el = slNameNew(col->name); slAddHead(&list, el); } slReverse(&list); return list; } struct slName *bigBedGetFields(char *table, struct sqlConnection *conn) /* Get fields of bigBed as simple name list. */ { char *fileName = bigBedFileName(table, conn); struct bbiFile *bbi = bigBedFileOpen(fileName); struct asObject *as = bigBedAsOrDefault(bbi); struct slName *names = asColNames(as); freeMem(fileName); bbiFileClose(&bbi); return names; } struct sqlFieldType *sqlFieldTypesFromAs(struct asObject *as) /* Convert asObject to list of sqlFieldTypes */ { struct sqlFieldType *ft, *list = NULL; struct asColumn *col; for (col = as->columnList; col != NULL; col = col->next) { struct dyString *type = asColumnToSqlType(col); ft = sqlFieldTypeNew(col->name, type->string); slAddHead(&list, ft); dyStringFree(&type); } slReverse(&list); return list; } struct sqlFieldType *bigBedListFieldsAndTypes(char *table, struct sqlConnection *conn) /* Get fields of bigBed as list of sqlFieldType. */ { char *fileName = bigBedFileName(table, conn); struct bbiFile *bbi = bigBedFileOpen(fileName); struct asObject *as = bigBedAsOrDefault(bbi); struct sqlFieldType *list = sqlFieldTypesFromAs(as); freeMem(fileName); bbiFileClose(&bbi); return list; } enum asFilterDataType /* High level data type. */ { afdtNone = 0, afdtString = 1, afdtLong = 2, afdtDouble = 3, afdtChar = 4, }; struct asLongFilter /* Filter on long value */ { enum numericFilterType op; long long *thresholds; }; struct asDoubleFilter /* Filter on double value */ { enum numericFilterType op; double *thresholds; }; struct asCharFilter /* Filter on a char value */ { enum charFilterType op; char *matches; boolean invert; }; struct asStringFilter /* Filter on a string value */ { enum stringFilterType op; char **matches; boolean invert; }; void asCharFilterFree(struct asCharFilter **pFilter) /* Free up memory associated with filter. */ { struct asCharFilter *filter = *pFilter; if (filter != NULL) { freeMem(filter->matches); freez(pFilter); } } void asStringFilterFree(struct asStringFilter **pFilter) /* Free up memory associated with filter. */ { struct asStringFilter *filter = *pFilter; if (filter != NULL) { freeMem(filter->matches); freez(pFilter); } } struct asDoubleFilter *asDoubleFilterFromCart(struct cart *cart, char *fieldPrefix) /* Get filter settings for double out of cart. */ { struct asDoubleFilter *filter = NULL; char varName[256]; safef(varName, sizeof(varName), "%s%s", fieldPrefix, filterCmpVar); char *cmp = cartOptionalString(cart, varName); safef(varName, sizeof(varName), "%s%s", fieldPrefix, filterPatternVar); char *pat = cartOptionalString(cart, varName); if (!isEmpty(cmp) && !isEmpty(pat)) { AllocVar(filter); cgiToDoubleFilter(cmp, pat, &filter->op, &filter->thresholds); } return filter; } struct asLongFilter *asLongFilterFromCart(struct cart *cart, char *fieldPrefix) /* Get filter settings for double out of cart. */ { struct asLongFilter *filter = NULL; char varName[256]; safef(varName, sizeof(varName), "%s%s", fieldPrefix, filterCmpVar); char *cmp = cartOptionalString(cart, varName); safef(varName, sizeof(varName), "%s%s", fieldPrefix, filterPatternVar); char *pat = cartOptionalString(cart, varName); if (!isEmpty(cmp) && !isEmpty(pat)) { AllocVar(filter); cgiToLongFilter(cmp, pat, &filter->op, &filter->thresholds); } return filter; } struct asCharFilter *asCharFilterFromCart(struct cart *cart, char *fieldPrefix) /* Get filter settings for double out of cart. */ { struct asCharFilter *filter = NULL; char varName[256]; safef(varName, sizeof(varName), "%s%s", fieldPrefix, filterDdVar); char *dd = cartOptionalString(cart, varName); safef(varName, sizeof(varName), "%s%s", fieldPrefix, filterPatternVar); char *pat = cartOptionalString(cart, varName); if (!isEmpty(dd) && !isEmpty(pat)) { AllocVar(filter); cgiToCharFilter(dd, pat, &filter->op, &filter->matches, &filter->invert); if (filter->op == cftIgnore) // Filter out nop asCharFilterFree(&filter); } return filter; } struct asStringFilter *asStringFilterFromCart(struct cart *cart, char *fieldPrefix) /* Get filter settings for double out of cart. */ { struct asStringFilter *filter = NULL; char varName[256]; safef(varName, sizeof(varName), "%s%s", fieldPrefix, filterDdVar); char *dd = cartOptionalString(cart, varName); safef(varName, sizeof(varName), "%s%s", fieldPrefix, filterPatternVar); char *pat = cartOptionalString(cart, varName); if (!isEmpty(dd) && !isEmpty(pat)) { AllocVar(filter); cgiToStringFilter(dd, pat, &filter->op, &filter->matches, &filter->invert); if (filter->op == sftIgnore) // Filter out nop asStringFilterFree(&filter); } return filter; } union asFilterData /* One of the above four. */ { struct asLongFilter *l; struct asDoubleFilter *d; struct asCharFilter *c; struct asStringFilter *s; }; struct asFilterColumn /* A type of filter applied to a column. */ { struct asFilterColumn *next; struct asColumn *col; /* Column we operate on. */ int colIx; /* Index of column. */ enum asFilterDataType dataType; /* Type of limit parameters. */ union asFilterData filter; /* Filter data including op. */ }; struct asFilter /* A filter that can be applied to weed out rows in a table with an associated .as file. */ { struct asFilter *next; struct asFilterColumn *columnList; /* A list of column filters to apply */ }; boolean asFilterString(struct asStringFilter *filter, char *x) /* Return TRUE if x passes filter. */ { return bedFilterString(x, filter->op, filter->matches, filter->invert); } boolean asFilterLong(struct asLongFilter *filter, long long x) /* Return TRUE if x passes filter. */ { return bedFilterLong(x, filter->op, filter->thresholds); } boolean asFilterDouble(struct asDoubleFilter *filter, double x) /* Return TRUE if x passes filter. */ { return bedFilterDouble(x, filter->op, filter->thresholds); } boolean asFilterChar(struct asCharFilter *filter, char x) /* Return TRUE if x passes filter. */ { return bedFilterChar(x, filter->op, filter->matches, filter->invert); } boolean asFilterOneCol(struct asFilterColumn *filtCol, char *s) /* Return TRUE if s passes filter. */ { switch (filtCol->dataType) { case afdtString: return asFilterString(filtCol->filter.s, s); case afdtLong: return asFilterLong(filtCol->filter.l, atoll(s)); case afdtDouble: return asFilterDouble(filtCol->filter.d, atof(s)); case afdtChar: return asFilterChar(filtCol->filter.c, s[0]); default: internalErr(); return FALSE; } } boolean asFilterOnRow(struct asFilter *filter, char **row) /* Return TRUE if row passes filter if any. */ { if (filter != NULL) { struct asFilterColumn *col; for (col = filter->columnList; col != NULL; col = col->next) { if (!asFilterOneCol(col, row[col->colIx])) return FALSE; } } return TRUE; } struct asFilter *asFilterFromCart(struct cart *cart, char *db, char *table, struct asObject *as) /* Examine cart for filter relevant to this table, and create object around it. */ { /* Get list of filter variables for this table. */ char tablePrefix[128], fieldPrefix[192]; safef(tablePrefix, sizeof(tablePrefix), "%s%s.%s.", hgtaFilterVarPrefix, db, table); struct asFilter *asFilter; AllocVar(asFilter); struct asColumn *col; int colIx = 0; for (col = as->columnList; col != NULL; col = col->next, ++colIx) { safef(fieldPrefix, sizeof(fieldPrefix), "%s%s.", tablePrefix, col->name); struct asTypeInfo *lt = col->lowType; union asFilterData lowFilter; enum asFilterDataType dataType = afdtNone; lowFilter.d = NULL; switch (lt->type) { case t_double: case t_float: lowFilter.d = asDoubleFilterFromCart(cart, fieldPrefix); dataType = afdtDouble; break; case t_char: lowFilter.c = asCharFilterFromCart(cart, fieldPrefix); dataType = afdtChar; break; case t_int: case t_uint: case t_short: case t_ushort: case t_byte: case t_ubyte: case t_off: lowFilter.l = asLongFilterFromCart(cart, fieldPrefix); dataType = afdtLong; break; case t_string: case t_lstring: lowFilter.s = asStringFilterFromCart(cart, fieldPrefix); dataType = afdtString; break; case t_object: case t_simple: case t_enum: case t_set: default: internalErr(); break; } if (lowFilter.d != NULL) { struct asFilterColumn *colFilt; AllocVar(colFilt); colFilt->col = col; colFilt->colIx = colIx; colFilt->dataType = dataType; colFilt->filter = lowFilter; slAddHead(&asFilter->columnList, colFilt); } } slReverse(&asFilter->columnList); return asFilter; } static void addFilteredBedsOnRegion(struct bbiFile *bbi, struct region *region, char *table, struct asFilter *filter, struct lm *bedLm, struct bed **pBedList) /* Add relevant beds in reverse order to pBedList */ { struct lm *bbLm = lmInit(0); struct bigBedInterval *ivList = NULL, *iv; ivList = bigBedIntervalQuery(bbi, region->chrom, region->start, region->end, 0, bbLm); char *row[bbi->fieldCount]; char startBuf[16], endBuf[16]; for (iv = ivList; iv != NULL; iv = iv->next) { bigBedIntervalToRow(iv, region->chrom, startBuf, endBuf, row, bbi->fieldCount); if (asFilterOnRow(filter, row)) { struct bed *bed = bedLoadN(row, bbi->definedFieldCount); struct bed *lmBed = lmCloneBed(bed, bedLm); slAddHead(pBedList, lmBed); bedFree(&bed); } } lmCleanup(&bbLm); } struct bed *bigBedGetFilteredBedsOnRegions(struct sqlConnection *conn, char *db, char *table, struct region *regionList, struct lm *lm, int *retFieldCount) /* Get list of beds from bigBed, in all regions, that pass filtering. */ { /* Connect to big bed and get metadata and filter. */ char *fileName = bigBedFileName(table, conn); struct bbiFile *bbi = bigBedFileOpen(fileName); struct asObject *as = bigBedAsOrDefault(bbi); struct asFilter *filter = asFilterFromCart(cart, db, table, as); /* Get beds a region at a time. */ struct bed *bedList = NULL; struct region *region; for (region = regionList; region != NULL; region = region->next) addFilteredBedsOnRegion(bbi, region, table, filter, lm, &bedList); slReverse(&bedList); /* Clean up and return. */ if (retFieldCount != NULL) *retFieldCount = bbi->definedFieldCount; bbiFileClose(&bbi); freeMem(fileName); return bedList; } void bigBedTabOut(char *db, char *table, struct sqlConnection *conn, char *fields, FILE *f) /* Print out selected fields from Big Bed. If fields is NULL, then print out all fields. */ { if (f == NULL) f = stdout; /* Convert comma separated list of fields to array. */ int fieldCount = chopByChar(fields, ',', NULL, 0); char **fieldArray; AllocArray(fieldArray, fieldCount); chopByChar(fields, ',', fieldArray, fieldCount); /* Get list of all fields in big bed and turn it into a hash of column indexes keyed by * column name. */ struct hash *fieldHash = hashNew(0); struct slName *bb, *bbList = bigBedGetFields(table, conn); int i; for (bb = bbList, i=0; bb != NULL; bb = bb->next, ++i) hashAddInt(fieldHash, bb->name, i); /* Create an array of column indexes corresponding to the selected field list. */ int *columnArray; AllocArray(columnArray, fieldCount); for (i=0; icolumnList)); } } /* Loop through outputting each region */ struct region *region, *regionList = getRegions(); for (region = regionList; region != NULL; region = region->next) { struct lm *lm = lmInit(0); struct bigBedInterval *iv, *ivList = bigBedIntervalQuery(bbi, region->chrom, region->start, region->end, 0, lm); char *row[bbi->fieldCount]; char startBuf[16], endBuf[16]; for (iv = ivList; iv != NULL; iv = iv->next) { bigBedIntervalToRow(iv, region->chrom, startBuf, endBuf, row, bbi->fieldCount); if (asFilterOnRow(filter, row)) { int i; fprintf(f, "%s", row[columnArray[0]]); for (i=1; iname, 0, chromList->size, 10, lm); /* Get description of columns, making it up from BED records if need be. */ struct asObject *as = bigBedAs(bbi); if (as == NULL) as = asParseText(bedAsDef(bbi->definedFieldCount, bbi->fieldCount)); hPrintf("Database: %s", database); hPrintf("    Primary Table: %s
", table); hPrintf("Big Bed File: %s", fileName); if (bbi->version >= 2) { hPrintf("    Item Count: "); printLongWithCommas(stdout, bigBedItemCount(bbi)); } hPrintf("
\n"); hPrintf("Format description: %s
", as->comment); /* Put up table that describes fields. */ hTableStart(); hPrintf("field"); hPrintf("example"); hPrintf("description "); puts("\n"); struct asColumn *col; int colCount = 0; char *row[bbi->fieldCount]; char startBuf[16], endBuf[16]; char *dupeRest = lmCloneString(lm, ivList->rest); /* Manage rest-stomping side-effect */ bigBedIntervalToRow(ivList, chromList->name, startBuf, endBuf, row, bbi->fieldCount); ivList->rest = dupeRest; for (col = as->columnList; col != NULL; col = col->next) { hPrintf("%s", col->name); hPrintf("%s", row[colCount]); hPrintf("%s", col->comment); ++colCount; } /* If more fields than descriptions put up minimally helpful info (at least has example). */ for ( ; colCount < bbi->fieldCount; ++colCount) { hPrintf("column%d", colCount+1); hPrintf("%s", row[colCount]); hPrintf("n/a\n"); } hTableEnd(); /* Put up another section with sample rows. */ webNewSection("Sample Rows"); hTableStart(); /* Print field names as column headers for example */ hPrintf(""); int colIx = 0; for (col = as->columnList; col != NULL; col = col->next) { hPrintf("%s", col->name); ++colIx; } for (; colIx < colCount; ++colIx) hPrintf("column%d", colIx+1); hPrintf("\n"); /* Print sample lines. */ struct bigBedInterval *iv; for (iv=ivList; iv != NULL; iv = iv->next) { bigBedIntervalToRow(iv, chromList->name, startBuf, endBuf, row, bbi->fieldCount); hPrintf(""); for (colIx=0; colIx\n"); } hTableEnd(); /* Clean up and go home. */ lmCleanup(&lm); bbiFileClose(&bbi); freeMem(fileName); hFreeConn(&conn); }