5780fed8b94b69b4b22e2d389112e4d34d51368f angie Fri Apr 6 09:31:50 2018 -0700 Fixing lack of NULL checks on result of vcfFileName. Symptom was crash on tgpPhase3 which has no file for chrM when doing genome-wide query. refs #21218 diff --git src/hg/hgTables/vcf.c src/hg/hgTables/vcf.c index 99ea471..b158c41 100644 --- src/hg/hgTables/vcf.c +++ src/hg/hgTables/vcf.c @@ -165,49 +165,59 @@ dyJoin(dyFilter, ";", rec->filters, rec->filterCount); row[6] = dyFilter->string; vcfInfoElsToString(dyInfo, vcff, rec); row[7] = dyInfo->string; if (vcff->genotypeCount > 0) { row[8] = rec->format; dyJoin(dyGt, "\t", rec->genotypeUnparsedStrings, vcff->genotypeCount); row[9] = dyGt->string; } else row[8] = row[9] = ""; // compatible with localmem usage } static char *vcfFileName(struct sqlConnection *conn, char *table, char *chrom, boolean isTabix) -// Look up the vcf or vcfTabix file name, using CUSTOM_TRASH if necessary. +// Look up the vcf or vcfTabix file name, using CUSTOM_TRASH if necessary; return NULL if not found. { char *fileName = bigFileNameFromCtOrHub(table, conn); struct trackDb *tdb = hashFindVal(fullTableToTdbHash, table); if (isCustomTrack(table) && ! isTabix) { // fileName is stored in the customTrash table struct customTrack *ct = ctLookupName(table); struct sqlConnection *conn = hAllocConn(CUSTOM_TRASH); char query[1024]; sqlSafef(query, sizeof(query), "select fileName from %s", ct->dbTableName); fileName = sqlQuickString(conn, query); hFreeConn(&conn); } if (fileName == NULL) fileName = bbiNameFromSettingOrTableChrom(tdb, conn, table, chrom); return fileName; } +static char *vcfMustFindFileName(struct sqlConnection *conn, char *table, char *chrom, + boolean isTabix) +/* Look up the vcf or vcfTabix file name; errAbort if not found. */ +{ +char *fileName = vcfFileName(conn, table, chrom, isTabix); +if (fileName == NULL) + errAbort("vcfMustFindFileName: can't find VCF file; chrom=%s, table=%s", chrom, table); +return fileName; +} + void vcfTabOut(char *db, char *table, struct sqlConnection *conn, char *fields, FILE *f, boolean isTabix) /* Print out selected fields from VCF. If fields is NULL, then print out all fields. */ { struct hTableInfo *hti = NULL; hti = getHti(db, table, conn); struct hash *idHash = NULL; char *idField = getIdField(db, curTrack, table, hti); int idFieldNum = 0; /* if we know what field to use for the identifiers, get the hash of names */ if (idField != NULL) idHash = identifierHash(db, table); if (f == NULL) @@ -252,30 +262,32 @@ /* Loop through outputting each region */ struct region *region, *regionList = getRegions(); int maxOut = bigFileMaxOutput(); // Include the header, absolutely necessary for VCF parsing. boolean printedHeader = FALSE; // Temporary storage for row-ification: struct dyString *dyAlt = newDyString(1024); struct dyString *dyFilter = newDyString(1024); struct dyString *dyInfo = newDyString(1024); struct dyString *dyGt = newDyString(1024); struct vcfRecord *rec; for (region = regionList; region != NULL && (maxOut > 0); region = region->next) { char *fileName = vcfFileName(conn, table, region->chrom, isTabix); + if (fileName == NULL) + continue; struct vcfFile *vcff; if (isTabix) { char *indexUrl = bigDataIndexFromCtOrHub(table, conn); vcff = vcfTabixFileAndIndexMayOpen(fileName, indexUrl, region->chrom, region->start, region->end, 100, maxOut); } else vcff = vcfFileMayOpen(fileName, region->chrom, region->start, region->end, 100, maxOut, TRUE); if (vcff == NULL) noWarnAbort(); // If we are outputting all fields, but this VCF has no genotype info, omit the // genotype columns from output: if (allFields && vcff->genotypeCount == 0) @@ -403,31 +415,31 @@ freeMem(fileName); if (maxOut <= 0) { errAbort("Reached output limit of %d data values, please make region smaller,\n" "\tor set a higher output line limit with the filter settings.", bigFileMaxOutput()); } } slReverse(&bedList); return bedList; } struct slName *randomVcfIds(char *table, struct sqlConnection *conn, int count, boolean isTabix) /* Return some semi-random IDs from a VCF file. */ { /* Read 10000 items from vcf file, or if they ask for a big list, then 4x what they ask for. */ -char *fileName = vcfFileName(conn, table, hDefaultChrom(database), isTabix); +char *fileName = vcfMustFindFileName(conn, table, hDefaultChrom(database), isTabix); char *indexUrl = bigDataIndexFromCtOrHub(table, conn); struct lineFile *lf = isTabix ? lineFileTabixAndIndexMayOpen(fileName, indexUrl, TRUE) : lineFileMayOpen(fileName, TRUE); if (lf == NULL) noWarnAbort(); int orderedCount = count * 4; if (orderedCount < 100) orderedCount = 100; struct slName *idList = NULL; char *words[4]; int i; for (i = 0; i < orderedCount && lineFileChop(lf, words); i++) { // compress runs of identical ID, in case most are placeholder @@ -444,31 +456,31 @@ { slNameFreeList(&(sl->next)); break; } } freez(&fileName); return idList; } #define VCF_MAX_SCHEMA_COLS 20 void showSchemaVcf(char *table, struct trackDb *tdb, boolean isTabix) /* Show schema on vcf. */ { struct sqlConnection *conn = hAllocConn(database); -char *fileName = vcfFileName(conn, table, hDefaultChrom(database), isTabix); +char *fileName = vcfMustFindFileName(conn, table, hDefaultChrom(database), isTabix); struct asObject *as = vcfAsObj(); hPrintf("<B>Database:</B> %s", database); hPrintf(" <B>Primary Table:</B> %s<br>", table); hPrintf("<B>VCF File:</B> %s", fileName); hPrintf("<BR>\n"); hPrintf("<B>Format description:</B> %s<BR>", as->comment); hPrintf("See the <A HREF=\"%s\" target=_blank>Variant Call Format specification</A> for more details<BR>\n", "http://www.1000genomes.org/wiki/analysis/vcf4.0"); /* Put up table that describes fields. */ hTableStart(); hPrintf("<TR><TH>field</TH>"); hPrintf("<TH>description</TH> "); puts("</TR>\n");