459b8273fa9eaafb16d8980567a7fcfa209fd4ef
max
  Wed Nov 23 09:30:30 2016 -0800
Adding VAI/DAI support for bigDataIndex, refs #18420

diff --git src/hg/lib/hAnno.c src/hg/lib/hAnno.c
index ca724be..63c6b76 100644
--- src/hg/lib/hAnno.c
+++ src/hg/lib/hAnno.c
@@ -126,30 +126,39 @@
 if (asObj == NULL)
     asObj = asFromTableDescriptions(conn, table);
 hFreeConn(&conn);
 if (columnsMatch(asObj, fieldList))
     return asObj;
 else
     {
     // Special case for pgSnp, which includes its bin column in autoSql...
     struct asObject *pgSnpAsO = pgSnpAsObj();
     if (columnsMatch(pgSnpAsO, fieldList))
         return pgSnpAsO;
     return asObjectFromFields(table, fieldList, skipBin);
     }
 }
 
+static char *getBigDataIndexName(struct trackDb *tdb)
+/* Get tbi/bai URL for a BAM/VCF from trackDb or custom track. */
+{
+char *bigIndexUrl = trackDbSetting(tdb, "bigDataIndex");
+if (isNotEmpty(bigIndexUrl))
+    return bigIndexUrl;
+return NULL;
+}
+
 static char *getBigDataFileName(char *db, struct trackDb *tdb, char *selTable, char *chrom)
 /* Get fileName from bigBed/bigWig/BAM/VCF database table, or bigDataUrl from custom track. */
 {
 char *bigDataUrl = trackDbSetting(tdb, "bigDataUrl");
 if (isNotEmpty(bigDataUrl))
     {
     return bigDataUrl;
     }
 else
     {
     struct sqlConnection *conn = hAllocConn(db);
     char *fileOrUrl = bbiNameFromSettingOrTableChrom(tdb, conn, selTable, chrom);
     hFreeConn(&conn);
     return fileOrUrl;
     }
@@ -157,55 +166,56 @@
 
 static boolean dbTableMatchesAutoSql(char *db, char *table, struct asObject *asObj)
 /* Return true if table exists and its fields match the columns of asObj. */
 {
 boolean matches = FALSE;
 struct sqlConnection *conn = hAllocConn(db);
 if (sqlTableExists(conn, table))
     {
     struct sqlFieldInfo *fieldList = sqlFieldInfoGet(conn, table);
     matches = columnsMatch(asObj, fieldList);
     }
 hFreeConn(&conn);
 return matches;
 }
 
-struct annoStreamer *hAnnoStreamerFromBigFileUrl(char *fileOrUrl, struct annoAssembly *assembly,
+struct annoStreamer *hAnnoStreamerFromBigFileUrl(char *fileOrUrl, char *indexUrl, struct annoAssembly *assembly,
                                                  int maxOutRows, char *type)
 /* Determine what kind of big data file/url we have and make streamer for it.
- * If type is NULL, this will determine type using custom track type or file suffix. */
+ * If type is NULL, this will determine type using custom track type or file suffix.
+ * indexUrl can be NULL, unless the type is VCF and the .tbi file is not alongside the .VCF */
 {
 struct annoStreamer *streamer = NULL;
 if (isEmpty(type))
     type = customTrackTypeFromBigFile(fileOrUrl);
 if (type == NULL)
     {
     if (endsWith(fileOrUrl, "pgSnp") || endsWith(fileOrUrl, "pgsnp") ||
         endsWith(fileOrUrl, "pgSnp.gz") || endsWith(fileOrUrl, "pgsnp.gz") ||
         endsWith(fileOrUrl, "bed") || endsWith(fileOrUrl, "bed.gz"))
         {
         type = "pgSnp";
         }
     else
         errAbort("Unrecognized bigData type of file or url '%s'", fileOrUrl);
     }
 if (sameString(type, "bigBed") || sameString("bigGenePred", type))
     streamer = annoStreamBigBedNew(fileOrUrl, assembly, maxOutRows);
 else if (sameString(type, "vcfTabix"))
-    streamer = annoStreamVcfNew(fileOrUrl, TRUE, assembly, maxOutRows);
+    streamer = annoStreamVcfNew(fileOrUrl, indexUrl, TRUE, assembly, maxOutRows);
 else if (sameString(type, "vcf"))
-    streamer = annoStreamVcfNew(fileOrUrl, FALSE, assembly, maxOutRows);
+    streamer = annoStreamVcfNew(fileOrUrl, NULL, FALSE, assembly, maxOutRows);
 else if (sameString(type, "bigWig"))
     streamer = annoStreamBigWigNew(fileOrUrl, assembly);
 else if (sameString(type, "pgSnp"))
     streamer = annoStreamTabNew(fileOrUrl, assembly, pgSnpFileAsObj());
 else if (sameString(type, "bam"))
     errAbort("Sorry, BAM is not yet supported");
 else
     errAbort("Unrecognized bigData type %s of file or url '%s'", type, fileOrUrl);
 return streamer;
 }
 
 struct annoStreamer *hAnnoStreamerFromTrackDb(struct annoAssembly *assembly, char *selTable,
                                               struct trackDb *tdb, char *chrom, int maxOutRows,
                                               struct jsonElement *config)
 /* Figure out the source and type of data and make an annoStreamer. */
@@ -219,36 +229,37 @@
     dbTable = trackDbSetting(tdb, "dbTableName");
     if (dbTable != NULL)
 	// This is really a database table, not a bigDataUrl CT.
 	dataDb = CUSTOM_TRASH;
     }
 if (startsWithWord("wig", tdb->type))
     streamer = annoStreamWigDbNew(dataDb, dbTable, assembly, maxOutRows);
 else if (sameString("longTabix", tdb->type))
     {
     char *fileOrUrl = getBigDataFileName(dataDb, tdb, selTable, chrom);
     streamer = annoStreamTabixNew(fileOrUrl,  assembly, maxOutRows);
     }
 else if (sameString("vcfTabix", tdb->type))
     {
     char *fileOrUrl = getBigDataFileName(dataDb, tdb, selTable, chrom);
-    streamer = annoStreamVcfNew(fileOrUrl, TRUE, assembly, maxOutRows);
+    char *indexUrl = getBigDataIndexName(tdb);
+    streamer = annoStreamVcfNew(fileOrUrl, indexUrl, TRUE, assembly, maxOutRows);
     }
 else if (sameString("vcf", tdb->type))
     {
     char *fileOrUrl = getBigDataFileName(dataDb, tdb, dbTable, chrom);
-    streamer = annoStreamVcfNew(fileOrUrl, FALSE, assembly, maxOutRows);
+    streamer = annoStreamVcfNew(fileOrUrl, NULL, FALSE, assembly, maxOutRows);
     }
 else if (sameString("bam", tdb->type))
     {
     warn("Sorry, BAM is not yet supported");
     }
 else if (startsWith("bigBed", tdb->type) || sameString("bigGenePred", tdb->type))
     {
     char *fileOrUrl = getBigDataFileName(dataDb, tdb, selTable, chrom);
     streamer = annoStreamBigBedNew(fileOrUrl, assembly, maxOutRows);
     }
 else if (startsWith("bigWig", tdb->type))
     {
     char *fileOrUrl = getBigDataFileName(dataDb, tdb, selTable, chrom);
     streamer = annoStreamBigWigNew(fileOrUrl, assembly); //#*** no maxOutRows support
     }
@@ -257,76 +268,77 @@
     {
     char *sourceTable = trackDbSetting(tdb, "sourceTable");
     char *inputsTable = trackDbSetting(tdb, "inputTrackTable");
     streamer = annoStreamDbFactorSourceNew(dataDb, tdb->track, sourceTable, inputsTable, assembly,
 					   maxOutRows);
     }
 else if (trackHubDatabase(db))
     errAbort("Unrecognized type '%s' for hub track '%s'", tdb->type, tdb->track);
 if (streamer == NULL)
     {
     streamer = annoStreamDbNew(dataDb, dbTable, assembly, maxOutRows, config);
     }
 return streamer;
 }
 
-struct annoGrator *hAnnoGratorFromBigFileUrl(char *fileOrUrl, struct annoAssembly *assembly,
+struct annoGrator *hAnnoGratorFromBigFileUrl(char *fileOrUrl, char *indexUrl, struct annoAssembly *assembly,
                                              int maxOutRows, enum annoGratorOverlap overlapRule)
 /* Determine what kind of big data file/url we have and make streamer & grator for it. */
 {
 struct annoStreamer *streamer = NULL;
 struct annoGrator *grator = NULL;
 char *type = customTrackTypeFromBigFile(fileOrUrl);
 if (sameString(type, "bigBed") || sameString("bigGenePred", type))
     streamer = annoStreamBigBedNew(fileOrUrl, assembly, maxOutRows);
 else if (sameString(type, "vcfTabix"))
-    streamer = annoStreamVcfNew(fileOrUrl, TRUE, assembly, maxOutRows);
+    streamer = annoStreamVcfNew(fileOrUrl, indexUrl, TRUE, assembly, maxOutRows);
 else if (sameString(type, "bigWig"))
     grator = annoGrateBigWigNew(fileOrUrl, assembly, agwmAverage);
 else if (sameString(type, "bam"))
     errAbort("Sorry, BAM is not yet supported");
 else
     errAbort("Unrecognized bigData type %s of file or url '%s'", type, fileOrUrl);
 if (grator == NULL)
     grator = annoGratorNew(streamer);
 grator->setOverlapRule(grator, overlapRule);
 return grator;
 }
 
 struct annoGrator *hAnnoGratorFromTrackDb(struct annoAssembly *assembly, char *selTable,
                                           struct trackDb *tdb, char *chrom, int maxOutRows,
                                           struct asObject *primaryAsObj,
                                           enum annoGratorOverlap overlapRule,
                                           struct jsonElement *config)
 /* Figure out the source and type of data, make an annoStreamer & wrap in annoGrator.
  * If not NULL, primaryAsObj is used to determine whether we can make an annoGratorGpVar. */
 {
 struct annoGrator *grator = NULL;
 boolean primaryIsVariants = (primaryAsObj != NULL &&
                              (asObjectsMatch(primaryAsObj, pgSnpAsObj()) ||
                               asObjectsMatch(primaryAsObj, pgSnpFileAsObj()) ||
                               asObjectsMatch(primaryAsObj, vcfAsObj())));
 char *bigDataUrl = trackDbSetting(tdb, "bigDataUrl");
+char *indexUrl = getBigDataIndexName(tdb);
 if (bigDataUrl != NULL)
     {
     if (primaryIsVariants && sameString("bigGenePred", tdb->type))
         {
         struct annoStreamer *streamer = annoStreamBigBedNew(bigDataUrl, assembly, maxOutRows);
         grator = annoGratorGpVarNew(streamer);
         }
     else
-        grator = hAnnoGratorFromBigFileUrl(bigDataUrl, assembly, maxOutRows, overlapRule);
+        grator = hAnnoGratorFromBigFileUrl(bigDataUrl, indexUrl, assembly, maxOutRows, overlapRule);
     }
 else if (startsWithWord("wig", tdb->type))
     {
     char *dataDb = assembly->name;
     char *dbTable = selTable;
     if (isCustomTrack(selTable))
         {
         dbTable = trackDbSetting(tdb, "dbTableName");
         if (dbTable != NULL)
             // This is really a database table, not a bigDataUrl CT.
             dataDb = CUSTOM_TRASH;
         }
     grator = annoGrateWigDbNew(dataDb, dbTable, assembly, agwmAverage, maxOutRows);
     }
 else if (startsWithWord("bigWig", tdb->type))