5780fed8b94b69b4b22e2d389112e4d34d51368f
angie
  Fri Apr 6 09:31:50 2018 -0700
Fixing lack of NULL checks on result of vcfFileName.  Symptom was crash on tgpPhase3 which has no file for chrM when doing genome-wide query.  refs #21218

diff --git src/hg/hgTables/vcf.c src/hg/hgTables/vcf.c
index 99ea471..b158c41 100644
--- src/hg/hgTables/vcf.c
+++ src/hg/hgTables/vcf.c
@@ -165,49 +165,59 @@
 dyJoin(dyFilter, ";", rec->filters, rec->filterCount);
 row[6] = dyFilter->string;
 vcfInfoElsToString(dyInfo, vcff, rec);
 row[7] = dyInfo->string;
 if (vcff->genotypeCount > 0)
     {
     row[8] = rec->format;
     dyJoin(dyGt, "\t", rec->genotypeUnparsedStrings, vcff->genotypeCount);
     row[9] = dyGt->string;
     }
 else
     row[8] = row[9] = ""; // compatible with localmem usage
 }
 
 static char *vcfFileName(struct sqlConnection *conn, char *table, char *chrom, boolean isTabix)
-// Look up the vcf or vcfTabix file name, using CUSTOM_TRASH if necessary.
+// Look up the vcf or vcfTabix file name, using CUSTOM_TRASH if necessary; return NULL if not found.
 {
 char *fileName = bigFileNameFromCtOrHub(table, conn);
 struct trackDb *tdb = hashFindVal(fullTableToTdbHash, table);
 if (isCustomTrack(table) && ! isTabix)
     {
     // fileName is stored in the customTrash table
     struct customTrack *ct = ctLookupName(table);
     struct sqlConnection *conn = hAllocConn(CUSTOM_TRASH);
     char query[1024];
     sqlSafef(query, sizeof(query), "select fileName from %s", ct->dbTableName);
     fileName = sqlQuickString(conn, query);
     hFreeConn(&conn);
     }
 if (fileName == NULL)
     fileName = bbiNameFromSettingOrTableChrom(tdb, conn, table, chrom);
 return fileName;
 }
 
+static char *vcfMustFindFileName(struct sqlConnection *conn, char *table, char *chrom,
+                                 boolean isTabix)
+/* Look up the vcf or vcfTabix file name; errAbort if not found. */
+{
+char *fileName = vcfFileName(conn, table, chrom, isTabix);
+if (fileName == NULL)
+    errAbort("vcfMustFindFileName: can't find VCF file; chrom=%s, table=%s", chrom, table);
+return fileName;
+}
+
 void vcfTabOut(char *db, char *table, struct sqlConnection *conn, char *fields, FILE *f,
 	       boolean isTabix)
 /* Print out selected fields from VCF.  If fields is NULL, then print out all fields. */
 {
 struct hTableInfo *hti = NULL;
 hti = getHti(db, table, conn);
 struct hash *idHash = NULL;
 char *idField = getIdField(db, curTrack, table, hti);
 int idFieldNum = 0;
 
 /* if we know what field to use for the identifiers, get the hash of names */
 if (idField != NULL)
     idHash = identifierHash(db, table);
 
 if (f == NULL)
@@ -252,30 +262,32 @@
 
 /* Loop through outputting each region */
 struct region *region, *regionList = getRegions();
 int maxOut = bigFileMaxOutput();
 // Include the header, absolutely necessary for VCF parsing.
 boolean printedHeader = FALSE;
 // Temporary storage for row-ification:
 struct dyString *dyAlt = newDyString(1024);
 struct dyString *dyFilter = newDyString(1024);
 struct dyString *dyInfo = newDyString(1024);
 struct dyString *dyGt = newDyString(1024);
 struct vcfRecord *rec;
 for (region = regionList; region != NULL && (maxOut > 0); region = region->next)
     {
     char *fileName = vcfFileName(conn, table, region->chrom, isTabix);
+    if (fileName == NULL)
+        continue;
     struct vcfFile *vcff;
     if (isTabix)
         {
         char *indexUrl = bigDataIndexFromCtOrHub(table, conn);
 	vcff = vcfTabixFileAndIndexMayOpen(fileName, indexUrl, region->chrom, region->start, region->end,
 				   100, maxOut);
         }
     else
 	vcff = vcfFileMayOpen(fileName, region->chrom, region->start, region->end,
 			      100, maxOut, TRUE);
     if (vcff == NULL)
 	noWarnAbort();
     // If we are outputting all fields, but this VCF has no genotype info, omit the
     // genotype columns from output:
     if (allFields && vcff->genotypeCount == 0)
@@ -403,31 +415,31 @@
     freeMem(fileName);
     if (maxOut <= 0)
 	{
 	errAbort("Reached output limit of %d data values, please make region smaller,\n"
 	     "\tor set a higher output line limit with the filter settings.", bigFileMaxOutput());
 	}
     }
 slReverse(&bedList);
 return bedList;
 }
 
 struct slName *randomVcfIds(char *table, struct sqlConnection *conn, int count, boolean isTabix)
 /* Return some semi-random IDs from a VCF file. */
 {
 /* Read 10000 items from vcf file,  or if they ask for a big list, then 4x what they ask for. */
-char *fileName = vcfFileName(conn, table, hDefaultChrom(database), isTabix);
+char *fileName = vcfMustFindFileName(conn, table, hDefaultChrom(database), isTabix);
 char *indexUrl = bigDataIndexFromCtOrHub(table, conn);
 
 struct lineFile *lf = isTabix ? lineFileTabixAndIndexMayOpen(fileName, indexUrl, TRUE) :
 				lineFileMayOpen(fileName, TRUE);
 if (lf == NULL)
     noWarnAbort();
 int orderedCount = count * 4;
 if (orderedCount < 100)
     orderedCount = 100;
 struct slName *idList = NULL;
 char *words[4];
 int i;
 for (i = 0;  i < orderedCount && lineFileChop(lf, words); i++)
     {
     // compress runs of identical ID, in case most are placeholder
@@ -444,31 +456,31 @@
 	{
 	slNameFreeList(&(sl->next));
 	break;
 	}
     }
 freez(&fileName);
 return idList;
 }
 
 #define VCF_MAX_SCHEMA_COLS 20
 
 void showSchemaVcf(char *table, struct trackDb *tdb, boolean isTabix)
 /* Show schema on vcf. */
 {
 struct sqlConnection *conn = hAllocConn(database);
-char *fileName = vcfFileName(conn, table, hDefaultChrom(database), isTabix);
+char *fileName = vcfMustFindFileName(conn, table, hDefaultChrom(database), isTabix);
 
 struct asObject *as = vcfAsObj();
 hPrintf("<B>Database:</B> %s", database);
 hPrintf("&nbsp;&nbsp;&nbsp;&nbsp;<B>Primary Table:</B> %s<br>", table);
 hPrintf("<B>VCF File:</B> %s", fileName);
 hPrintf("<BR>\n");
 hPrintf("<B>Format description:</B> %s<BR>", as->comment);
 hPrintf("See the <A HREF=\"%s\" target=_blank>Variant Call Format specification</A> for  more details<BR>\n",
 	"http://www.1000genomes.org/wiki/analysis/vcf4.0");
 
 /* Put up table that describes fields. */
 hTableStart();
 hPrintf("<TR><TH>field</TH>");
 hPrintf("<TH>description</TH> ");
 puts("</TR>\n");