46169b41deecd48121198e1911c41dc0a3f96b47
chmalee
  Tue Jan 19 18:12:04 2021 -0800
Allow variable size data tables on hgc. Allow these tables to be JSON or
pipe and semi-colon encoded. Add more support for external data
references in bigBeds: allow relevant trackDb settings like
skipEmptyFields, allow variable size tables in external files, allow
gzip compressed external files.

diff --git src/hg/hgc/bigBedClick.c src/hg/hgc/bigBedClick.c
index b45f078..d2dde37 100644
--- src/hg/hgc/bigBedClick.c
+++ src/hg/hgc/bigBedClick.c
@@ -193,152 +193,174 @@
 	"    return false;\n"
     	"}\n"
 	);
     }
 }
 
 static void detailsTabPrintSpecial(char *name, char *val, struct slPair *extraFields)
 /* some extra fields require special printing code, they all start with '_'  */
 {
 if (sameWord(name, "_mismatchCounts"))
     extFieldMismatchCounts(val);
 else if (sameWord(name, "_crisprOfftargets"))
     extFieldCrisprOfftargets(val, extraFields);
 }
 
-static void seekAndPrintTable(char *detailsUrl, off_t offset, struct slPair *extraFields)
+static int seekAndPrintTable(struct trackDb *tdb, char *detailsUrl, off_t offset, struct slPair *extraFields)
 /* seek to 0 at url, get headers, then seek to offset, read tab-sep fields and output 
- * (extraFields are needed for some special field handlers) */
+ * (extraFields are needed for some special field handlers). Return the number of fields
+ * successfully printed.  */
 {
-// open the URL
-struct lineFile *lf = lineFileUdcMayOpen(detailsUrl, TRUE);
-if (lf==NULL)
+int printCount = 0;
+// open the URL and get the first line
+char *headerLine = readOneLineMaybeBgzip(detailsUrl, 0, 0);
+if (headerLine == NULL)
     {
     printf("Error: Could not open the URL referenced in detailsTabUrls, %s", detailsUrl);
-    return;
+    return printCount;
     }
 
+boolean skipEmptyFields = trackDbSettingOn(tdb, "skipEmptyFields");
+
 // get the headers
-char *headLine = NULL;
-int lineSize = 0;
-lineFileNext(lf, &headLine, &lineSize);
 char *headers[1024];
-int headerCount = chopTabs(headLine, headers);
+int headerCount = chopTabs(headerLine, headers);
 
 // clone the headers
 int i;
 for (i=0; i<headerCount; i++)
     headers[i] = cloneString(headers[i]);
 
-lineFileSeek(lf, offset, SEEK_SET);
-
 // read a line
-char *detailsLine;
-lineFileNext(lf, &detailsLine, &lineSize);
+char *detailsLine = readOneLineMaybeBgzip(detailsUrl, offset, 0);;
 if (!detailsLine || isEmpty(detailsLine))
-    return;
+    return printCount;
 char *fields[1024];
 int fieldCount = chopTabs(detailsLine, fields);
 
 if (fieldCount!=headerCount)
     {
     printf("Error encountered when reading %s:<br>", detailsUrl);
     printf("The header line of the tab-sep file has a different number of fields compared ");
     printf("with the line pointed to by offset %lld in the bigBed file.<br>", (long long int)offset);
     printf("Number of headers: %d", headerCount);
     printf("Number of fields at offset: %d", fieldCount);
-    return;
+    return printCount;
     }
+struct slName *tblFieldNames = NULL;
+struct hash *fieldsToEmbeddedTbl = hashNew(0);
+struct embeddedTbl *tblList = NULL;
+getExtraTableFields(tdb, &tblFieldNames, &tblList, fieldsToEmbeddedTbl);
 
 // print the table for all external extra fields 
 printf("<br><table class='bedExtraTbl'>\n");
 fieldCount = min(fieldCount, headerCount);
+struct embeddedTbl *userTbl = NULL;
+struct dyString *tableLabelsDy = dyStringNew(0);
+dyStringPrintf(tableLabelsDy, "var _jsonHgcLabels = [");
 for (i=0; i<fieldCount; i++)
     {
     char *name = headers[i];
     char *val  = fields[i];
 
-    if (startsWith("_", name))
+    // skip this field if it's empty and "skipEmptyFields" option is set
+    if (skipEmptyFields && isEmpty(val))
+        continue;
+
+    // skip an optional '#' on the first field name
+    if (i == 0 && startsWith("#", name))
+        name = skipBeyondDelimit(name, '#');
+
+    if (startsWith("_", name) && !(startsWith("_json", name)) && !(startsWith("json", name)))
         detailsTabPrintSpecial(name, val, extraFields);
+    else if (slNameInList(tblFieldNames, name))
+        {
+        userTbl = (struct embeddedTbl *)hashFindVal(fieldsToEmbeddedTbl, name);
+        userTbl->encodedTbl = val;
+        printEmbeddedTable(tdb, userTbl, tableLabelsDy);
+        }
     else
         {
         printf("<tr><td>%s</td>\n", name);
         printf("<td>%s</td></tr>\n", val);
         }
+    printCount++;
     }
 printf("</table>\n");
-
-lineFileClose(&lf);
+dyStringPrintf(tableLabelsDy, "];\n");
+jsInline(dyStringCannibalize(&tableLabelsDy));
+return printCount;
 }
 
 struct slPair *parseDetailsTablUrls(struct trackDb *tdb)
 /* Parse detailsTabUrls setting string into an slPair list of {offset column name, fileOrUrl} */
 {
 char *detailsUrlsStr = trackDbSetting(tdb, "detailsTabUrls");
 if (!detailsUrlsStr)
     return NULL;
 
 struct slPair *detailsUrls = slPairListFromString(detailsUrlsStr, TRUE);
 if (!detailsUrls)
     {
     printf("Problem when parsing trackDb setting detailsTabUrls<br>\n");
     printf("Expected: a space-separated key=val list, like 'fieldName1=URL1 fieldName2=URL2'<br>\n");
     printf("But got: '%s'<br>", detailsUrlsStr);
     return NULL;
     }
 struct slPair *pair;
 for (pair = detailsUrls;  pair != NULL;  pair = pair->next)
     pair->val = hReplaceGbdb(replaceChars(pair->val, "$db", database));
 
 return detailsUrls;
 }
 
-static void printAllExternalExtraFields(struct trackDb *tdb, struct slPair *extraFields)
+static int printAllExternalExtraFields(struct trackDb *tdb, struct slPair *extraFields)
 /* handle the "detailsTabUrls" trackDb setting: 
  * For each field, print a separate html table with all field names and values
- * from the external tab-sep file */
-
+ * from the external tab-sep file. Return the number of fields we successfully printed  */
 {
+int printCount = 0;
 struct slPair *detailsUrls = parseDetailsTablUrls(tdb), *pair;
 for (pair = detailsUrls; pair != NULL; pair = pair->next)
     {
     char *fieldName = pair->name;
     char *detailsUrl = pair->val;
 
     // get extra bigBed field (=the offset) and seek to it
     void *p = slPairFindVal(extraFields, fieldName);
     if (p==NULL)
         {
         printf("Error when parsing trackDb detailsTabUrls statement:<br>\n");
         printf("Cannot find extra bigBed field with name %s\n", fieldName);
-        return;
+        return 0;
         }
     char *offsetStr = (char*)p;
 
     if (offsetStr==NULL || sameWord(offsetStr, "0"))
 	{
 	/* need to show the empty off-targets for crispr tracks */
 	if (startsWith("crispr", tdb->track))
 	    extFieldCrisprOfftargets(NULL, NULL);
         // empty or "0" value in bigBed means that the lookup should not be performed
         continue;
 	}
     off_t offset = atoll(offsetStr);
 
-    seekAndPrintTable(detailsUrl, offset, extraFields);
+    printCount += seekAndPrintTable(tdb, detailsUrl, offset, extraFields);
     }
 slPairFreeValsAndList(&detailsUrls);
+return printCount;
 }
 
 static void bigBedClick(char *fileName, struct trackDb *tdb,
                      char *item, int start, int end, int bedSize)
 /* Handle click in generic bigBed track. */
 {
 char *chrom = cartString(cart, "c");
 
 /* Open BigWig file and get interval list. */
 struct bbiFile *bbi = bigBedFileOpen(fileName);
 struct lm *lm = lmInit(0);
 int ivStart = start, ivEnd = end;
 char *itemForUrl = item;
 if (start == end)
     {
@@ -420,31 +442,31 @@
     printCustomUrlWithFields(tdb, bed->name, bed->name, item == itemForUrl, extraFieldPairs);
     if (itemForUrl)
         printIframe(tdb, itemForUrl);
 
     bedPrintPos(bed, bedSize, tdb);
 
     // display seq1 and seq2
     if (seq1Seq2 && bedSize+seq1Seq2Fields == 8)
         printf("<table><tr><th>Sequence 1</th><th>Sequence 2</th></tr>"
             "<tr><td> %s </td><td> %s </td></tr></table>", fields[6], fields[7]);
     else if (restCount > 0)
         {
         if (restCount > restBedFields)
             {
             int printCount = extraFieldsPrint(tdb, NULL, extraFields, extraFieldCount);
-            printAllExternalExtraFields(tdb, extraFieldPairs);
+            printCount += printAllExternalExtraFields(tdb, extraFieldPairs);
 
             if (printCount == 0)
                 {
                 int i;
                 char label[20];
                 safef(label, sizeof(label), "nonBedFieldsLabel");
                 printf("<B>%s&nbsp;</B>",
                        trackDbSettingOrDefault(tdb, label, "Non-BED fields:"));
                 for (i = restBedFields;  i < restCount;  i++)
                     printf("%s%s", (i > 0 ? "\t" : ""), restFields[i]);
                 printf("<BR>\n");
                 }
             }
         if (sameString(tdb->type, "bigGenePred"))
             bigGenePredLinks(tdb->track, item);