5780fed8b94b69b4b22e2d389112e4d34d51368f
angie
Fri Apr 6 09:31:50 2018 -0700
Fixing lack of NULL checks on result of vcfFileName. Symptom was crash on tgpPhase3 which has no file for chrM when doing genome-wide query. refs #21218
diff --git src/hg/hgTables/vcf.c src/hg/hgTables/vcf.c
index 99ea471..b158c41 100644
--- src/hg/hgTables/vcf.c
+++ src/hg/hgTables/vcf.c
@@ -165,49 +165,59 @@
dyJoin(dyFilter, ";", rec->filters, rec->filterCount);
row[6] = dyFilter->string;
vcfInfoElsToString(dyInfo, vcff, rec);
row[7] = dyInfo->string;
if (vcff->genotypeCount > 0)
{
row[8] = rec->format;
dyJoin(dyGt, "\t", rec->genotypeUnparsedStrings, vcff->genotypeCount);
row[9] = dyGt->string;
}
else
row[8] = row[9] = ""; // compatible with localmem usage
}
static char *vcfFileName(struct sqlConnection *conn, char *table, char *chrom, boolean isTabix)
-// Look up the vcf or vcfTabix file name, using CUSTOM_TRASH if necessary.
+// Look up the vcf or vcfTabix file name, using CUSTOM_TRASH if necessary; return NULL if not found.
{
char *fileName = bigFileNameFromCtOrHub(table, conn);
struct trackDb *tdb = hashFindVal(fullTableToTdbHash, table);
if (isCustomTrack(table) && ! isTabix)
{
// fileName is stored in the customTrash table
struct customTrack *ct = ctLookupName(table);
struct sqlConnection *conn = hAllocConn(CUSTOM_TRASH);
char query[1024];
sqlSafef(query, sizeof(query), "select fileName from %s", ct->dbTableName);
fileName = sqlQuickString(conn, query);
hFreeConn(&conn);
}
if (fileName == NULL)
fileName = bbiNameFromSettingOrTableChrom(tdb, conn, table, chrom);
return fileName;
}
+static char *vcfMustFindFileName(struct sqlConnection *conn, char *table, char *chrom,
+ boolean isTabix)
+/* Look up the vcf or vcfTabix file name; errAbort if not found. */
+{
+char *fileName = vcfFileName(conn, table, chrom, isTabix);
+if (fileName == NULL)
+ errAbort("vcfMustFindFileName: can't find VCF file; chrom=%s, table=%s", chrom, table);
+return fileName;
+}
+
void vcfTabOut(char *db, char *table, struct sqlConnection *conn, char *fields, FILE *f,
boolean isTabix)
/* Print out selected fields from VCF. If fields is NULL, then print out all fields. */
{
struct hTableInfo *hti = NULL;
hti = getHti(db, table, conn);
struct hash *idHash = NULL;
char *idField = getIdField(db, curTrack, table, hti);
int idFieldNum = 0;
/* if we know what field to use for the identifiers, get the hash of names */
if (idField != NULL)
idHash = identifierHash(db, table);
if (f == NULL)
@@ -252,30 +262,32 @@
/* Loop through outputting each region */
struct region *region, *regionList = getRegions();
int maxOut = bigFileMaxOutput();
// Include the header, absolutely necessary for VCF parsing.
boolean printedHeader = FALSE;
// Temporary storage for row-ification:
struct dyString *dyAlt = newDyString(1024);
struct dyString *dyFilter = newDyString(1024);
struct dyString *dyInfo = newDyString(1024);
struct dyString *dyGt = newDyString(1024);
struct vcfRecord *rec;
for (region = regionList; region != NULL && (maxOut > 0); region = region->next)
{
char *fileName = vcfFileName(conn, table, region->chrom, isTabix);
+ if (fileName == NULL)
+ continue;
struct vcfFile *vcff;
if (isTabix)
{
char *indexUrl = bigDataIndexFromCtOrHub(table, conn);
vcff = vcfTabixFileAndIndexMayOpen(fileName, indexUrl, region->chrom, region->start, region->end,
100, maxOut);
}
else
vcff = vcfFileMayOpen(fileName, region->chrom, region->start, region->end,
100, maxOut, TRUE);
if (vcff == NULL)
noWarnAbort();
// If we are outputting all fields, but this VCF has no genotype info, omit the
// genotype columns from output:
if (allFields && vcff->genotypeCount == 0)
@@ -403,31 +415,31 @@
freeMem(fileName);
if (maxOut <= 0)
{
errAbort("Reached output limit of %d data values, please make region smaller,\n"
"\tor set a higher output line limit with the filter settings.", bigFileMaxOutput());
}
}
slReverse(&bedList);
return bedList;
}
struct slName *randomVcfIds(char *table, struct sqlConnection *conn, int count, boolean isTabix)
/* Return some semi-random IDs from a VCF file. */
{
/* Read 10000 items from vcf file, or if they ask for a big list, then 4x what they ask for. */
-char *fileName = vcfFileName(conn, table, hDefaultChrom(database), isTabix);
+char *fileName = vcfMustFindFileName(conn, table, hDefaultChrom(database), isTabix);
char *indexUrl = bigDataIndexFromCtOrHub(table, conn);
struct lineFile *lf = isTabix ? lineFileTabixAndIndexMayOpen(fileName, indexUrl, TRUE) :
lineFileMayOpen(fileName, TRUE);
if (lf == NULL)
noWarnAbort();
int orderedCount = count * 4;
if (orderedCount < 100)
orderedCount = 100;
struct slName *idList = NULL;
char *words[4];
int i;
for (i = 0; i < orderedCount && lineFileChop(lf, words); i++)
{
// compress runs of identical ID, in case most are placeholder
@@ -444,31 +456,31 @@
{
slNameFreeList(&(sl->next));
break;
}
}
freez(&fileName);
return idList;
}
#define VCF_MAX_SCHEMA_COLS 20
void showSchemaVcf(char *table, struct trackDb *tdb, boolean isTabix)
/* Show schema on vcf. */
{
struct sqlConnection *conn = hAllocConn(database);
-char *fileName = vcfFileName(conn, table, hDefaultChrom(database), isTabix);
+char *fileName = vcfMustFindFileName(conn, table, hDefaultChrom(database), isTabix);
struct asObject *as = vcfAsObj();
hPrintf("Database: %s", database);
hPrintf(" Primary Table: %s
", table);
hPrintf("VCF File: %s", fileName);
hPrintf("
\n");
hPrintf("Format description: %s
", as->comment);
hPrintf("See the Variant Call Format specification for more details
\n",
"http://www.1000genomes.org/wiki/analysis/vcf4.0");
/* Put up table that describes fields. */
hTableStart();
hPrintf("