46169b41deecd48121198e1911c41dc0a3f96b47 chmalee Tue Jan 19 18:12:04 2021 -0800 Allow variable size data tables on hgc. Allow these tables to be JSON or pipe and semi-colon encoded. Add more support for external data references in bigBeds: allow relevant trackDb settings like skipEmptyFields, allow variable size tables in external files, allow gzip compressed external files. diff --git src/hg/hgc/bigBedClick.c src/hg/hgc/bigBedClick.c index b45f078..d2dde37 100644 --- src/hg/hgc/bigBedClick.c +++ src/hg/hgc/bigBedClick.c @@ -193,152 +193,174 @@ " return false;\n" "}\n" ); } } static void detailsTabPrintSpecial(char *name, char *val, struct slPair *extraFields) /* some extra fields require special printing code, they all start with '_' */ { if (sameWord(name, "_mismatchCounts")) extFieldMismatchCounts(val); else if (sameWord(name, "_crisprOfftargets")) extFieldCrisprOfftargets(val, extraFields); } -static void seekAndPrintTable(char *detailsUrl, off_t offset, struct slPair *extraFields) +static int seekAndPrintTable(struct trackDb *tdb, char *detailsUrl, off_t offset, struct slPair *extraFields) /* seek to 0 at url, get headers, then seek to offset, read tab-sep fields and output - * (extraFields are needed for some special field handlers) */ + * (extraFields are needed for some special field handlers). Return the number of fields + * successfully printed. */ { -// open the URL -struct lineFile *lf = lineFileUdcMayOpen(detailsUrl, TRUE); -if (lf==NULL) +int printCount = 0; +// open the URL and get the first line +char *headerLine = readOneLineMaybeBgzip(detailsUrl, 0, 0); +if (headerLine == NULL) { printf("Error: Could not open the URL referenced in detailsTabUrls, %s", detailsUrl); - return; + return printCount; } +boolean skipEmptyFields = trackDbSettingOn(tdb, "skipEmptyFields"); + // get the headers -char *headLine = NULL; -int lineSize = 0; -lineFileNext(lf, &headLine, &lineSize); char *headers[1024]; -int headerCount = chopTabs(headLine, headers); +int headerCount = chopTabs(headerLine, headers); // clone the headers int i; for (i=0; i", detailsUrl); printf("The header line of the tab-sep file has a different number of fields compared "); printf("with the line pointed to by offset %lld in the bigBed file.
", (long long int)offset); printf("Number of headers: %d", headerCount); printf("Number of fields at offset: %d", fieldCount); - return; + return printCount; } +struct slName *tblFieldNames = NULL; +struct hash *fieldsToEmbeddedTbl = hashNew(0); +struct embeddedTbl *tblList = NULL; +getExtraTableFields(tdb, &tblFieldNames, &tblList, fieldsToEmbeddedTbl); // print the table for all external extra fields printf("
\n"); fieldCount = min(fieldCount, headerCount); +struct embeddedTbl *userTbl = NULL; +struct dyString *tableLabelsDy = dyStringNew(0); +dyStringPrintf(tableLabelsDy, "var _jsonHgcLabels = ["); for (i=0; iencodedTbl = val; + printEmbeddedTable(tdb, userTbl, tableLabelsDy); + } else { printf("\n", name); printf("\n", val); } + printCount++; } printf("
%s%s
\n"); - -lineFileClose(&lf); +dyStringPrintf(tableLabelsDy, "];\n"); +jsInline(dyStringCannibalize(&tableLabelsDy)); +return printCount; } struct slPair *parseDetailsTablUrls(struct trackDb *tdb) /* Parse detailsTabUrls setting string into an slPair list of {offset column name, fileOrUrl} */ { char *detailsUrlsStr = trackDbSetting(tdb, "detailsTabUrls"); if (!detailsUrlsStr) return NULL; struct slPair *detailsUrls = slPairListFromString(detailsUrlsStr, TRUE); if (!detailsUrls) { printf("Problem when parsing trackDb setting detailsTabUrls
\n"); printf("Expected: a space-separated key=val list, like 'fieldName1=URL1 fieldName2=URL2'
\n"); printf("But got: '%s'
", detailsUrlsStr); return NULL; } struct slPair *pair; for (pair = detailsUrls; pair != NULL; pair = pair->next) pair->val = hReplaceGbdb(replaceChars(pair->val, "$db", database)); return detailsUrls; } -static void printAllExternalExtraFields(struct trackDb *tdb, struct slPair *extraFields) +static int printAllExternalExtraFields(struct trackDb *tdb, struct slPair *extraFields) /* handle the "detailsTabUrls" trackDb setting: * For each field, print a separate html table with all field names and values - * from the external tab-sep file */ - + * from the external tab-sep file. Return the number of fields we successfully printed */ { +int printCount = 0; struct slPair *detailsUrls = parseDetailsTablUrls(tdb), *pair; for (pair = detailsUrls; pair != NULL; pair = pair->next) { char *fieldName = pair->name; char *detailsUrl = pair->val; // get extra bigBed field (=the offset) and seek to it void *p = slPairFindVal(extraFields, fieldName); if (p==NULL) { printf("Error when parsing trackDb detailsTabUrls statement:
\n"); printf("Cannot find extra bigBed field with name %s\n", fieldName); - return; + return 0; } char *offsetStr = (char*)p; if (offsetStr==NULL || sameWord(offsetStr, "0")) { /* need to show the empty off-targets for crispr tracks */ if (startsWith("crispr", tdb->track)) extFieldCrisprOfftargets(NULL, NULL); // empty or "0" value in bigBed means that the lookup should not be performed continue; } off_t offset = atoll(offsetStr); - seekAndPrintTable(detailsUrl, offset, extraFields); + printCount += seekAndPrintTable(tdb, detailsUrl, offset, extraFields); } slPairFreeValsAndList(&detailsUrls); +return printCount; } static void bigBedClick(char *fileName, struct trackDb *tdb, char *item, int start, int end, int bedSize) /* Handle click in generic bigBed track. */ { char *chrom = cartString(cart, "c"); /* Open BigWig file and get interval list. */ struct bbiFile *bbi = bigBedFileOpen(fileName); struct lm *lm = lmInit(0); int ivStart = start, ivEnd = end; char *itemForUrl = item; if (start == end) { @@ -420,31 +442,31 @@ printCustomUrlWithFields(tdb, bed->name, bed->name, item == itemForUrl, extraFieldPairs); if (itemForUrl) printIframe(tdb, itemForUrl); bedPrintPos(bed, bedSize, tdb); // display seq1 and seq2 if (seq1Seq2 && bedSize+seq1Seq2Fields == 8) printf("" "
Sequence 1Sequence 2
%s %s
", fields[6], fields[7]); else if (restCount > 0) { if (restCount > restBedFields) { int printCount = extraFieldsPrint(tdb, NULL, extraFields, extraFieldCount); - printAllExternalExtraFields(tdb, extraFieldPairs); + printCount += printAllExternalExtraFields(tdb, extraFieldPairs); if (printCount == 0) { int i; char label[20]; safef(label, sizeof(label), "nonBedFieldsLabel"); printf("%s ", trackDbSettingOrDefault(tdb, label, "Non-BED fields:")); for (i = restBedFields; i < restCount; i++) printf("%s%s", (i > 0 ? "\t" : ""), restFields[i]); printf("
\n"); } } if (sameString(tdb->type, "bigGenePred")) bigGenePredLinks(tdb->track, item);