ebc84c82794070f9999daf3bf8c5de7f407d5818 angie Fri Dec 2 15:35:06 2011 -0800 Feature #3707 (VCF+tabix support in hgTables): Brooke reported out-of-memconditions in notes 11 & 12. Fix: add an optional threshold on the number of records to retrieve in vcfTabixFileMayOpen. diff --git src/hg/hgTables/vcf.c src/hg/hgTables/vcf.c index ef7fd6b..d73da5e 100644 --- src/hg/hgTables/vcf.c +++ src/hg/hgTables/vcf.c @@ -235,31 +235,31 @@ /* Loop through outputting each region */ struct region *region, *regionList = getRegions(); int maxOut = bigFileMaxOutput(); // Include the header, absolutely necessary for VCF parsing. boolean printedHeader = FALSE; // Temporary storage for row-ification: struct dyString *dyAlt = newDyString(1024); struct dyString *dyFilter = newDyString(1024); struct dyString *dyInfo = newDyString(1024); struct dyString *dyGt = newDyString(1024); struct vcfRecord *rec; for (region = regionList; region != NULL && (maxOut > 0); region = region->next) { char *fileName = vcfFileName(table, conn, region->chrom); struct vcfFile *vcff = vcfTabixFileMayOpen(fileName, region->chrom, region->start, region->end, - 100); + 100, maxOut); // If we are outputting all fields, but this VCF has no genotype info, omit the // genotype columns from output: if (allFields && vcff->genotypeCount == 0) fieldCount = VCFDATALINE_NUM_COLS - 2; if (!printedHeader) { fprintf(f, "%s", vcff->headerString); if (filter) fprintf(f, "# Filtering on %d columns\n", slCount(filter->columnList)); printedHeader = TRUE; } char *row[VCFDATALINE_NUM_COLS]; char numBuf[VCF_NUM_BUF_SIZE]; for (rec = vcff->records; rec != NULL && (maxOut > 0); rec = rec->next) { @@ -288,32 +288,33 @@ if (maxOut == 0) warn("Reached output limit of %d data values, please make region smaller,\n\tor set a higher output line limit with the filter settings.", bigFileMaxOutput()); /* Clean up and exit. */ dyStringFree(&dyAlt); dyStringFree(&dyFilter); dyStringFree(&dyInfo); dyStringFree(&dyGt); hashFree(&fieldHash); freeMem(fieldArray); freeMem(columnArray); } static void addFilteredBedsOnRegion(char *fileName, struct region *region, char *table, struct asFilter *filter, struct lm *bedLm, struct bed **pBedList, struct hash *idHash) /* Add relevant beds in reverse order to pBedList */ { +int maxOut = bigFileMaxOutput(); struct vcfFile *vcff = vcfTabixFileMayOpen(fileName, region->chrom, region->start, region->end, - 100); + 100, maxOut); struct lm *lm = lmInit(0); char *row[VCFDATALINE_NUM_COLS]; char numBuf[VCF_NUM_BUF_SIZE]; // Temporary storage for row-ification: struct dyString *dyAlt = newDyString(1024); struct dyString *dyFilter = newDyString(1024); struct dyString *dyInfo = newDyString(1024); struct dyString *dyGt = newDyString(1024); struct vcfRecord *rec; for (rec = vcff->records; rec != NULL; rec = rec->next) { vcfRecordToRow(rec, region->chrom, numBuf, dyAlt, dyFilter, dyInfo, dyGt, row); if (asFilterOnRow(filter, row)) { if ((idHash != NULL) && (hashLookup(idHash, rec->name) == NULL))