src/hg/hgTables/vcf.c a1dcdda03cad5b931d17066789091a64f8dceca6

a1dcdda03cad5b931d17066789091a64f8dceca6
max
  Tue Nov 22 16:05:21 2016 -0800
adding bigDataIndex support to the table browser, refs #18420

diff --git src/hg/hgTables/vcf.c src/hg/hgTables/vcf.c
index 4200a97..1c68406 100644
--- src/hg/hgTables/vcf.c
+++ src/hg/hgTables/vcf.c
@@ -244,32 +244,35 @@
 struct region *region, *regionList = getRegions();
 int maxOut = bigFileMaxOutput();
 // Include the header, absolutely necessary for VCF parsing.
 boolean printedHeader = FALSE;
 // Temporary storage for row-ification:
 struct dyString *dyAlt = newDyString(1024);
 struct dyString *dyFilter = newDyString(1024);
 struct dyString *dyInfo = newDyString(1024);
 struct dyString *dyGt = newDyString(1024);
 struct vcfRecord *rec;
 for (region = regionList; region != NULL && (maxOut > 0); region = region->next)
     {
     char *fileName = vcfFileName(conn, table, region->chrom);
     struct vcfFile *vcff;
     if (isTabix)
-	vcff = vcfTabixFileMayOpen(fileName, region->chrom, region->start, region->end,
+        {
+        char *indexUrl = bigDataIndexFromCtOrHub(table, conn);
+	vcff = vcfTabixFileAndIndexMayOpen(fileName, indexUrl, region->chrom, region->start, region->end,
 				   100, maxOut);
+        }
     else
 	vcff = vcfFileMayOpen(fileName, region->chrom, region->start, region->end,
 			      100, maxOut, TRUE);
     if (vcff == NULL)
 	noWarnAbort();
     // If we are outputting all fields, but this VCF has no genotype info, omit the
     // genotype columns from output:
     if (allFields && vcff->genotypeCount == 0)
 	fieldCount = VCFDATALINE_NUM_COLS - 2;
     if (!printedHeader)
 	{
 	fprintf(f, "%s", vcff->headerString);
 	if (filter)
 	    fprintf(f, "# Filtering on %d columns\n", slCount(filter->columnList));
 	if (!allFields)
@@ -305,39 +308,39 @@
 	    }
 	}
     vcfFileFree(&vcff);
     freeMem(fileName);
     }
 
 if (maxOut == 0)
     errAbort("Reached output limit of %d data values, please make region smaller,\n\tor set a higher output line limit with the filter settings.", bigFileMaxOutput());
 /* Clean up and exit. */
 dyStringFree(&dyAlt);  dyStringFree(&dyFilter);  dyStringFree(&dyInfo);  dyStringFree(&dyGt);
 hashFree(&fieldHash);
 freeMem(fieldArray);
 freeMem(columnArray);
 }
 
-static void addFilteredBedsOnRegion(char *fileName, struct region *region, char *table,
+static void addFilteredBedsOnRegion(char *fileName, char *indexUrl, struct region *region, char *table,
 				    struct asFilter *filter, struct lm *bedLm,
 				    struct bed **pBedList, struct hash *idHash, int *pMaxOut,
 				    boolean isTabix)
 /* Add relevant beds in reverse order to pBedList */
 {
 struct vcfFile *vcff;
 if (isTabix)
-    vcff = vcfTabixFileMayOpen(fileName, region->chrom, region->start, region->end,
+    vcff = vcfTabixFileAndIndexMayOpen(fileName, indexUrl, region->chrom, region->start, region->end,
             100, *pMaxOut);
 else
     vcff = vcfFileMayOpen(fileName, region->chrom, region->start, region->end,
 			  100, *pMaxOut, TRUE);
 if (vcff == NULL)
     noWarnAbort();
 struct lm *lm = lmInit(0);
 char *row[VCFDATALINE_NUM_COLS];
 char numBuf[VCF_NUM_BUF_SIZE];
 // Temporary storage for row-ification:
 struct dyString *dyAlt = newDyString(1024);
 struct dyString *dyFilter = newDyString(1024);
 struct dyString *dyInfo = newDyString(1024);
 struct dyString *dyGt = newDyString(1024);
 struct vcfRecord *rec;
@@ -372,49 +375,52 @@
 {
 int maxOut = bigFileMaxOutput();
 /* Figure out vcf file name get column info and filter. */
 struct asObject *as = vcfAsObj();
 struct asFilter *filter = asFilterFromCart(cart, db, table, as);
 struct hash *idHash = identifierHash(db, table);
 
 /* Get beds a region at a time. */
 struct bed *bedList = NULL;
 struct region *region;
 for (region = regionList; region != NULL; region = region->next)
     {
     char *fileName = vcfFileName(conn, table, region->chrom);
     if (fileName == NULL)
 	continue;
-    addFilteredBedsOnRegion(fileName, region, table, filter, lm, &bedList, idHash, &maxOut,
+    char *indexUrl = bigDataIndexFromCtOrHub(table, conn);
+    addFilteredBedsOnRegion(fileName, indexUrl, region, table, filter, lm, &bedList, idHash, &maxOut,
 			    isTabix);
     freeMem(fileName);
     if (maxOut <= 0)
 	{
 	errAbort("Reached output limit of %d data values, please make region smaller,\n"
 	     "\tor set a higher output line limit with the filter settings.", bigFileMaxOutput());
 	}
     }
 slReverse(&bedList);
 return bedList;
 }
 
 struct slName *randomVcfIds(char *table, struct sqlConnection *conn, int count, boolean isTabix)
 /* Return some semi-random IDs from a VCF file. */
 {
 /* Read 10000 items from vcf file,  or if they ask for a big list, then 4x what they ask for. */
 char *fileName = vcfFileName(conn, table, hDefaultChrom(database));
-struct lineFile *lf = isTabix ? lineFileTabixMayOpen(fileName, TRUE) :
+char *indexUrl = bigDataIndexFromCtOrHub(table, conn);
+
+struct lineFile *lf = isTabix ? lineFileTabixAndIndexMayOpen(fileName, indexUrl, TRUE) :
 				lineFileMayOpen(fileName, TRUE);
 if (lf == NULL)
     noWarnAbort();
 int orderedCount = count * 4;
 if (orderedCount < 100)
     orderedCount = 100;
 struct slName *idList = NULL;
 char *words[4];
 int i;
 for (i = 0;  i < orderedCount && lineFileChop(lf, words); i++)
     {
     // compress runs of identical ID, in case most are placeholder
     if (i == 0 || !sameString(words[2], idList->name))
 	slAddHead(&idList, slNameNew(words[2]));
     }
@@ -459,31 +465,32 @@
 struct asColumn *col;
 int colCount = 0;
 for (col = as->columnList; col != NULL; col = col->next)
     {
     hPrintf("<TR><TD><TT>%s</TT></TD>", col->name);
     hPrintf("<TD>%s</TD></TR>", col->comment);
     ++colCount;
     }
 hTableEnd();
 
 /* Put up another section with sample rows. */
 webNewSection("Sample Rows");
 hTableStart();
 
 /* Fetch sample rows. */
-struct lineFile *lf = isTabix ? lineFileTabixMayOpen(fileName, TRUE) :
+char *indexUrl = bigDataIndexFromCtOrHub(table, conn);
+struct lineFile *lf = isTabix ? lineFileTabixAndIndexMayOpen(fileName, indexUrl, TRUE) :
 				lineFileMayOpen(fileName, TRUE);
 if (lf == NULL)
     noWarnAbort();
 char *row[VCF_MAX_SCHEMA_COLS];
 int i;
 for (i = 0;  i < 10;  i++)
     {
     int colCount = lineFileChop(lf, row);
     int colIx;
     if (i == 0)
 	{
 	// Print field names as column headers, using colCount to compute genotype span
 	hPrintf("<TR>");
 	for (colIx = 0, col = as->columnList; col != NULL && colIx < colCount;
 	     colIx++, col = col->next)