cd8540f3042b909432e4257588c13d9249e583f2
angie
Wed Apr 1 16:49:33 2015 -0700
hgTables fixes to support bigBed and VCF tracks that have no mysql table,
only a bigDataUrl setting that may point to a local file.
refs #15059
diff --git src/hg/hgTables/vcf.c src/hg/hgTables/vcf.c
index e675028..a9286b6 100644
--- src/hg/hgTables/vcf.c
+++ src/hg/hgTables/vcf.c
@@ -164,45 +164,37 @@
row[5] = rec->qual;
dyJoin(dyFilter, ";", rec->filters, rec->filterCount);
row[6] = dyFilter->string;
vcfInfoElsToString(dyInfo, vcff, rec);
row[7] = dyInfo->string;
if (vcff->genotypeCount > 0)
{
row[8] = rec->format;
dyJoin(dyGt, "\t", rec->genotypeUnparsedStrings, vcff->genotypeCount);
row[9] = dyGt->string;
}
else
row[8] = row[9] = ""; // compatible with localmem usage
}
-static char *vcfFileName(struct trackDb *tdb, struct sqlConnection *conn, char *table, char *chrom)
+static char *vcfFileName(struct sqlConnection *conn, char *table, char *chrom)
// Look up the vcf or vcfTabix file name, using CUSTOM_TRASH if necessary.
{
-boolean isCt = isCustomTrack(table);
-char *dbTable = table;
-struct sqlConnection *dbConn = conn;
-if (isCt)
- {
- dbConn = hAllocConn(CUSTOM_TRASH);
- struct customTrack *ct = ctLookupName(table);
- dbTable = ct->dbTableName;
- }
-char *fileName = bbiNameFromSettingOrTableChrom(tdb, dbConn, dbTable, chrom);
-if (isCt)
- hFreeConn(&dbConn);
+char *fileName = bigFileNameFromCtOrHub(table, conn);
+struct trackDb *tdb = hashFindVal(fullTableToTdbHash, table);
+if (fileName == NULL)
+ fileName = bbiNameFromSettingOrTableChrom(tdb, conn, table, chrom);
return fileName;
}
void vcfTabOut(char *db, char *table, struct sqlConnection *conn, char *fields, FILE *f,
boolean isTabix)
/* Print out selected fields from VCF. If fields is NULL, then print out all fields. */
{
struct hTableInfo *hti = NULL;
hti = getHti(db, table, conn);
struct hash *idHash = NULL;
char *idField = getIdField(db, curTrack, table, hti);
int idFieldNum = 0;
/* if we know what field to use for the identifiers, get the hash of names */
if (idField != NULL)
@@ -239,42 +231,41 @@
}
// If we are outputting a subset of fields, invalidate the VCF header.
boolean allFields = (fieldCount == VCFDATALINE_NUM_COLS);
if (!allFields)
fprintf(f, "# Only selected columns are included below; output is not valid VCF.\n");
struct asObject *as = vcfAsObj();
struct asFilter *filter = NULL;
if (anyFilter())
filter = asFilterFromCart(cart, db, table, as);
/* Loop through outputting each region */
struct region *region, *regionList = getRegions();
int maxOut = bigFileMaxOutput();
-struct trackDb *tdb = hashFindVal(fullTableToTdbHash, table);
// Include the header, absolutely necessary for VCF parsing.
boolean printedHeader = FALSE;
// Temporary storage for row-ification:
struct dyString *dyAlt = newDyString(1024);
struct dyString *dyFilter = newDyString(1024);
struct dyString *dyInfo = newDyString(1024);
struct dyString *dyGt = newDyString(1024);
struct vcfRecord *rec;
for (region = regionList; region != NULL && (maxOut > 0); region = region->next)
{
- char *fileName = vcfFileName(tdb, conn, table, region->chrom);
+ char *fileName = vcfFileName(conn, table, region->chrom);
struct vcfFile *vcff;
if (isTabix)
vcff = vcfTabixFileMayOpen(fileName, region->chrom, region->start, region->end,
100, maxOut);
else
vcff = vcfFileMayOpen(fileName, region->chrom, region->start, region->end,
100, maxOut, TRUE);
if (vcff == NULL)
noWarnAbort();
// If we are outputting all fields, but this VCF has no genotype info, omit the
// genotype columns from output:
if (allFields && vcff->genotypeCount == 0)
fieldCount = VCFDATALINE_NUM_COLS - 2;
if (!printedHeader)
{
@@ -374,58 +365,56 @@
vcfFileFree(&vcff);
}
struct bed *vcfGetFilteredBedsOnRegions(struct sqlConnection *conn,
char *db, char *table, struct region *regionList, struct lm *lm,
int *retFieldCount, boolean isTabix)
/* Get list of beds from VCF, in all regions, that pass filtering. */
{
int maxOut = bigFileMaxOutput();
/* Figure out vcf file name get column info and filter. */
struct asObject *as = vcfAsObj();
struct asFilter *filter = asFilterFromCart(cart, db, table, as);
struct hash *idHash = identifierHash(db, table);
/* Get beds a region at a time. */
-struct trackDb *tdb = hashFindVal(fullTableToTdbHash, table);
struct bed *bedList = NULL;
struct region *region;
for (region = regionList; region != NULL; region = region->next)
{
- char *fileName = vcfFileName(tdb, conn, table, region->chrom);
+ char *fileName = vcfFileName(conn, table, region->chrom);
if (fileName == NULL)
continue;
addFilteredBedsOnRegion(fileName, region, table, filter, lm, &bedList, idHash, &maxOut,
isTabix);
freeMem(fileName);
if (maxOut <= 0)
{
warn("Reached output limit of %d data values, please make region smaller,\n"
"\tor set a higher output line limit with the filter settings.", bigFileMaxOutput());
break;
}
}
slReverse(&bedList);
return bedList;
}
struct slName *randomVcfIds(char *table, struct sqlConnection *conn, int count, boolean isTabix)
/* Return some semi-random IDs from a VCF file. */
{
/* Read 10000 items from vcf file, or if they ask for a big list, then 4x what they ask for. */
-struct trackDb *tdb = hashFindVal(fullTableToTdbHash, table);
-char *fileName = vcfFileName(tdb, conn, table, hDefaultChrom(database));
+char *fileName = vcfFileName(conn, table, hDefaultChrom(database));
struct lineFile *lf = isTabix ? lineFileTabixMayOpen(fileName, TRUE) :
lineFileMayOpen(fileName, TRUE);
if (lf == NULL)
noWarnAbort();
int orderedCount = count * 4;
if (orderedCount < 100)
orderedCount = 100;
struct slName *idList = NULL;
char *words[4];
int i;
for (i = 0; i < orderedCount && lineFileChop(lf, words); i++)
{
// compress runs of identical ID, in case most are placeholder
if (i == 0 || !sameString(words[2], idList->name))
slAddHead(&idList, slNameNew(words[2]));
@@ -440,31 +429,31 @@
{
slNameFreeList(&(sl->next));
break;
}
}
freez(&fileName);
return idList;
}
#define VCF_MAX_SCHEMA_COLS 20
void showSchemaVcf(char *table, struct trackDb *tdb, boolean isTabix)
/* Show schema on vcf. */
{
struct sqlConnection *conn = hAllocConn(database);
-char *fileName = vcfFileName(tdb, conn, table, hDefaultChrom(database));
+char *fileName = vcfFileName(conn, table, hDefaultChrom(database));
struct asObject *as = vcfAsObj();
hPrintf("Database: %s", database);
hPrintf(" Primary Table: %s
", table);
hPrintf("VCF File: %s", fileName);
hPrintf("
\n");
hPrintf("Format description: %s
", as->comment);
hPrintf("See the Variant Call Format specification for more details
\n",
"http://www.1000genomes.org/wiki/analysis/vcf4.0");
/* Put up table that describes fields. */
hTableStart();
hPrintf("