46169b41deecd48121198e1911c41dc0a3f96b47 chmalee Tue Jan 19 18:12:04 2021 -0800 Allow variable size data tables on hgc. Allow these tables to be JSON or pipe and semi-colon encoded. Add more support for external data references in bigBeds: allow relevant trackDb settings like skipEmptyFields, allow variable size tables in external files, allow gzip compressed external files. diff --git src/hg/hgc/bigBedClick.c src/hg/hgc/bigBedClick.c index b45f078..d2dde37 100644 --- src/hg/hgc/bigBedClick.c +++ src/hg/hgc/bigBedClick.c @@ -193,152 +193,174 @@ " return false;\n" "}\n" ); } } static void detailsTabPrintSpecial(char *name, char *val, struct slPair *extraFields) /* some extra fields require special printing code, they all start with '_' */ { if (sameWord(name, "_mismatchCounts")) extFieldMismatchCounts(val); else if (sameWord(name, "_crisprOfftargets")) extFieldCrisprOfftargets(val, extraFields); } -static void seekAndPrintTable(char *detailsUrl, off_t offset, struct slPair *extraFields) +static int seekAndPrintTable(struct trackDb *tdb, char *detailsUrl, off_t offset, struct slPair *extraFields) /* seek to 0 at url, get headers, then seek to offset, read tab-sep fields and output - * (extraFields are needed for some special field handlers) */ + * (extraFields are needed for some special field handlers). Return the number of fields + * successfully printed. */ { -// open the URL -struct lineFile *lf = lineFileUdcMayOpen(detailsUrl, TRUE); -if (lf==NULL) +int printCount = 0; +// open the URL and get the first line +char *headerLine = readOneLineMaybeBgzip(detailsUrl, 0, 0); +if (headerLine == NULL) { printf("Error: Could not open the URL referenced in detailsTabUrls, %s", detailsUrl); - return; + return printCount; } +boolean skipEmptyFields = trackDbSettingOn(tdb, "skipEmptyFields"); + // get the headers -char *headLine = NULL; -int lineSize = 0; -lineFileNext(lf, &headLine, &lineSize); char *headers[1024]; -int headerCount = chopTabs(headLine, headers); +int headerCount = chopTabs(headerLine, headers); // clone the headers int i; for (i=0; i<headerCount; i++) headers[i] = cloneString(headers[i]); -lineFileSeek(lf, offset, SEEK_SET); - // read a line -char *detailsLine; -lineFileNext(lf, &detailsLine, &lineSize); +char *detailsLine = readOneLineMaybeBgzip(detailsUrl, offset, 0);; if (!detailsLine || isEmpty(detailsLine)) - return; + return printCount; char *fields[1024]; int fieldCount = chopTabs(detailsLine, fields); if (fieldCount!=headerCount) { printf("Error encountered when reading %s:<br>", detailsUrl); printf("The header line of the tab-sep file has a different number of fields compared "); printf("with the line pointed to by offset %lld in the bigBed file.<br>", (long long int)offset); printf("Number of headers: %d", headerCount); printf("Number of fields at offset: %d", fieldCount); - return; + return printCount; } +struct slName *tblFieldNames = NULL; +struct hash *fieldsToEmbeddedTbl = hashNew(0); +struct embeddedTbl *tblList = NULL; +getExtraTableFields(tdb, &tblFieldNames, &tblList, fieldsToEmbeddedTbl); // print the table for all external extra fields printf("<br><table class='bedExtraTbl'>\n"); fieldCount = min(fieldCount, headerCount); +struct embeddedTbl *userTbl = NULL; +struct dyString *tableLabelsDy = dyStringNew(0); +dyStringPrintf(tableLabelsDy, "var _jsonHgcLabels = ["); for (i=0; i<fieldCount; i++) { char *name = headers[i]; char *val = fields[i]; - if (startsWith("_", name)) + // skip this field if it's empty and "skipEmptyFields" option is set + if (skipEmptyFields && isEmpty(val)) + continue; + + // skip an optional '#' on the first field name + if (i == 0 && startsWith("#", name)) + name = skipBeyondDelimit(name, '#'); + + if (startsWith("_", name) && !(startsWith("_json", name)) && !(startsWith("json", name))) detailsTabPrintSpecial(name, val, extraFields); + else if (slNameInList(tblFieldNames, name)) + { + userTbl = (struct embeddedTbl *)hashFindVal(fieldsToEmbeddedTbl, name); + userTbl->encodedTbl = val; + printEmbeddedTable(tdb, userTbl, tableLabelsDy); + } else { printf("<tr><td>%s</td>\n", name); printf("<td>%s</td></tr>\n", val); } + printCount++; } printf("</table>\n"); - -lineFileClose(&lf); +dyStringPrintf(tableLabelsDy, "];\n"); +jsInline(dyStringCannibalize(&tableLabelsDy)); +return printCount; } struct slPair *parseDetailsTablUrls(struct trackDb *tdb) /* Parse detailsTabUrls setting string into an slPair list of {offset column name, fileOrUrl} */ { char *detailsUrlsStr = trackDbSetting(tdb, "detailsTabUrls"); if (!detailsUrlsStr) return NULL; struct slPair *detailsUrls = slPairListFromString(detailsUrlsStr, TRUE); if (!detailsUrls) { printf("Problem when parsing trackDb setting detailsTabUrls<br>\n"); printf("Expected: a space-separated key=val list, like 'fieldName1=URL1 fieldName2=URL2'<br>\n"); printf("But got: '%s'<br>", detailsUrlsStr); return NULL; } struct slPair *pair; for (pair = detailsUrls; pair != NULL; pair = pair->next) pair->val = hReplaceGbdb(replaceChars(pair->val, "$db", database)); return detailsUrls; } -static void printAllExternalExtraFields(struct trackDb *tdb, struct slPair *extraFields) +static int printAllExternalExtraFields(struct trackDb *tdb, struct slPair *extraFields) /* handle the "detailsTabUrls" trackDb setting: * For each field, print a separate html table with all field names and values - * from the external tab-sep file */ - + * from the external tab-sep file. Return the number of fields we successfully printed */ { +int printCount = 0; struct slPair *detailsUrls = parseDetailsTablUrls(tdb), *pair; for (pair = detailsUrls; pair != NULL; pair = pair->next) { char *fieldName = pair->name; char *detailsUrl = pair->val; // get extra bigBed field (=the offset) and seek to it void *p = slPairFindVal(extraFields, fieldName); if (p==NULL) { printf("Error when parsing trackDb detailsTabUrls statement:<br>\n"); printf("Cannot find extra bigBed field with name %s\n", fieldName); - return; + return 0; } char *offsetStr = (char*)p; if (offsetStr==NULL || sameWord(offsetStr, "0")) { /* need to show the empty off-targets for crispr tracks */ if (startsWith("crispr", tdb->track)) extFieldCrisprOfftargets(NULL, NULL); // empty or "0" value in bigBed means that the lookup should not be performed continue; } off_t offset = atoll(offsetStr); - seekAndPrintTable(detailsUrl, offset, extraFields); + printCount += seekAndPrintTable(tdb, detailsUrl, offset, extraFields); } slPairFreeValsAndList(&detailsUrls); +return printCount; } static void bigBedClick(char *fileName, struct trackDb *tdb, char *item, int start, int end, int bedSize) /* Handle click in generic bigBed track. */ { char *chrom = cartString(cart, "c"); /* Open BigWig file and get interval list. */ struct bbiFile *bbi = bigBedFileOpen(fileName); struct lm *lm = lmInit(0); int ivStart = start, ivEnd = end; char *itemForUrl = item; if (start == end) { @@ -420,31 +442,31 @@ printCustomUrlWithFields(tdb, bed->name, bed->name, item == itemForUrl, extraFieldPairs); if (itemForUrl) printIframe(tdb, itemForUrl); bedPrintPos(bed, bedSize, tdb); // display seq1 and seq2 if (seq1Seq2 && bedSize+seq1Seq2Fields == 8) printf("<table><tr><th>Sequence 1</th><th>Sequence 2</th></tr>" "<tr><td> %s </td><td> %s </td></tr></table>", fields[6], fields[7]); else if (restCount > 0) { if (restCount > restBedFields) { int printCount = extraFieldsPrint(tdb, NULL, extraFields, extraFieldCount); - printAllExternalExtraFields(tdb, extraFieldPairs); + printCount += printAllExternalExtraFields(tdb, extraFieldPairs); if (printCount == 0) { int i; char label[20]; safef(label, sizeof(label), "nonBedFieldsLabel"); printf("<B>%s </B>", trackDbSettingOrDefault(tdb, label, "Non-BED fields:")); for (i = restBedFields; i < restCount; i++) printf("%s%s", (i > 0 ? "\t" : ""), restFields[i]); printf("<BR>\n"); } } if (sameString(tdb->type, "bigGenePred")) bigGenePredLinks(tdb->track, item);