e020a087201d536e5ffe573fa6fb1ad1be68127c
angie
  Wed Dec 18 16:54:56 2013 -0800
In MLQ #12367, the user got a blank screen when trying to view a CTusing a Mac-mangled BAM file.  Of course we need to show an error message
instead of SEGV/blank screen; but hgCustom also shouldn't have accepted
the file as a custom track.  It turns out that the samtools lib function
samopen() may return a non-null file handler with a null header, when the
header is invalid.  So we need to check not only for null result from
samopen, but also null header inside non-null result.  Meanwhile, I noticed
that hgCustom's error reporting was actually masking more detailed error
messages from lib code, because of its use of errCatch around bamFileExists.
errCatch only lets errAbort messages propagate, not warnings; bamFileExists
has a warn() that is lost in transmission.  I added bamFileAndIndexMustExist
which uses errAbort so that errCatch in callers will get the more informative
error instead of having to make up one of its own.  trackHub checking code
also needed this stronger form of checking.  I also moved some duplicated
(or triplicated) code in bamFile.c into functions.
refs #12367

diff --git src/lib/bamFile.c src/lib/bamFile.c
index 6f34286..6dd3ae5 100644
--- src/lib/bamFile.c
+++ src/lib/bamFile.c
@@ -13,157 +13,179 @@
  * and make sure the directory exists. */
 {
 static char *samDir = NULL;
 char *dirName = "samtools";
 if (samDir == NULL)
     {
     mkdirTrashDirectory(dirName);
     size_t len = strlen(trashDir()) + 1 + strlen(dirName) + 1;
     samDir = needMem(len);
     safef(samDir, len, "%s/%s", trashDir(), dirName);
     }
 return samDir;
 }
 #endif//ndef KNETFILE_HOOKS
 
-boolean bamFileExists(char *fileOrUrl)
-/* Return TRUE if we can successfully open the bam file and its index file. */
-{
-char *bamFileName = fileOrUrl;
-samfile_t *fh = samopen(bamFileName, "rb", NULL);
-boolean usingUrl = TRUE; 
-usingUrl = (strstr(fileOrUrl, "tp://") || strstr(fileOrUrl, "https://"));
-if (fh != NULL)
+static bam_index_t *bamOpenIdx(char *bamFileName)
+/* If bamFileName has a valid accompanying .bai file, parse and return the index;
+ * otherwise return NULL. */
 {
 #ifndef KNETFILE_HOOKS
 // When file is an URL, this caches the index file in addition to validating:
 // Since samtools's url-handling code saves the .bai file to the current directory,
 // chdir to a trash directory before calling bam_index_load, then chdir back.
 char *runDir = getCurrentDir();
 char *samDir = getSamDir();
+boolean usingUrl = (strstr(fileOrUrl, "tp://") || strstr(fileOrUrl, "https://"));
 if (usingUrl)
     setCurrentDir(samDir);
 #endif//ndef KNETFILE_HOOKS
 bam_index_t *idx = bam_index_load(bamFileName);
 #ifndef KNETFILE_HOOKS
 if (usingUrl)
     setCurrentDir(runDir);
 #endif//ndef KNETFILE_HOOKS
+return idx;
+}
+
+static void bamCloseIdx(bam_index_t **pIdx)
+/* Free unless already NULL. */
+{
+if (pIdx != NULL && *pIdx != NULL)
+    {
+    free(*pIdx); // Not freeMem, freez etc -- sam just uses malloc/calloc.
+    *pIdx = NULL;
+    }
+}
+
+boolean bamFileExists(char *fileOrUrl)
+/* Return TRUE if we can successfully open the bam file and its index file.
+ * NOTE: this doesn't give enough diagnostics */
+{
+char *bamFileName = fileOrUrl;
+samfile_t *fh = samopen(bamFileName, "rb", NULL);
+// Check both fh and fh->header; non-NULL fh can have NULL header if header doesn't parse!
+if (fh != NULL && fh->header != NULL)
+    {
+    bam_index_t *idx = bamOpenIdx(bamFileName);
     samclose(fh);
     if (idx == NULL)
 	{
 	warn("bamFileExists: failed to read index corresponding to %s", bamFileName);
 	return FALSE;
 	}
-    free(idx); // Not freeMem, freez etc -- sam just uses malloc/calloc.
+    bamCloseIdx(&idx);
+    bamClose(&fh);
     return TRUE;
     }
 return FALSE;
 }
 
 samfile_t *bamOpen(char *fileOrUrl, char **retBamFileName)
-/* Return an open bam file, dealing with FUSE caching if need be. 
- * Return parameter if NON-null will return the file name after FUSing */
+/* Return an open bam file or errAbort (should be named bamMustOpen).
+ * Return parameter if NON-null will return the file name (vestigial; long ago
+ * there was a plan to use udcFuse filenames instead of URLs) */
 {
 char *bamFileName = fileOrUrl;
 if (retBamFileName != NULL)
     *retBamFileName = bamFileName;
 
 #ifdef BAM_VERSION
 // suppress too verbose messages in samtools >= 0.1.18; see redmine #6491
 // This variable didn't exist in older versions of samtools (where BAM_VERSION wasn't defined).
 bam_verbose = 1;
 #endif
 
 samfile_t *fh = samopen(bamFileName, "rb", NULL);
-if (fh == NULL)
+// Check both fh and fh->header; non-NULL fh can have NULL header if header doesn't parse!
+if (fh == NULL || fh->header == NULL)
     {
     boolean usingUrl = (strstr(fileOrUrl, "tp://") || strstr(fileOrUrl, "https://"));
     struct dyString *urlWarning = dyStringNew(0);
-    if (usingUrl)
+    if (usingUrl && fh == NULL)
 	{
 	dyStringAppend(urlWarning,
 		       ". If you are able to access the URL with your web browser, "
 		       "please try reloading this page.");
 	}
+    else if (fh != NULL && fh->header == NULL)
+	dyStringAppend(urlWarning, ": parser error while reading the file header.");
     errAbort("Failed to open %s%s", fileOrUrl, urlWarning->string);
     }
 return fh;
 }
 
 void bamClose(samfile_t **pSamFile)
 /* Close down a samefile_t */
 {
 if (pSamFile != NULL)
     {
     samclose(*pSamFile);
     *pSamFile = NULL;
     }
 }
 
+void bamFileAndIndexMustExist(char *fileOrUrl)
+/* Open both a bam file and its accompanying index or errAbort; this is what it
+ * takes for diagnostic info to propagate up through errCatches in calling code. */
+{
+samfile_t *bamF = bamOpen(fileOrUrl, NULL);
+bam_index_t *idx = bamOpenIdx(fileOrUrl);
+if (idx == NULL)
+    errAbort("failed to read index file (.bai) corresponding to %s", fileOrUrl);
+bamCloseIdx(&idx);
+bamClose(&bamF);
+}
+
 void bamFetchAlreadyOpen(samfile_t *samfile, bam_index_t *idx, char *bamFileName, 
 			 char *position, bam_fetch_f callbackFunc, void *callbackData)
 /* With the open bam file, return items the same way with the callbacks as with bamFetch() */
 /* except in this case use an already-open bam file and index (use bam_index_load and free() for */
 /* the index). It seems a little strange to pass the filename in with the open bam, but */
 /* it's just used to report errors. */
 {
 int chromId, start, end;
 int ret = bam_parse_region(samfile->header, position, &chromId, &start, &end);
 if (ret != 0 && startsWith("chr", position))
     ret = bam_parse_region(samfile->header, position+strlen("chr"), &chromId, &start, &end);
 if (ret != 0)
     // If the bam file does not cover the current chromosome, OK
     return;
 ret = bam_fetch(samfile->x.bam, idx, chromId, start, end, callbackData, callbackFunc);
 if (ret != 0)
     warn("bam_fetch(%s, %s (chromId=%d) failed (%d)", bamFileName, position, chromId, ret);
 }
 
 void bamFetch(char *fileOrUrl, char *position, bam_fetch_f callbackFunc, void *callbackData,
 		 samfile_t **pSamFile)
 /* Open the .bam file, fetch items in the seq:start-end position range,
  * and call callbackFunc on each bam item retrieved from the file plus callbackData.
  * This handles BAM files with "chr"-less sequence names, e.g. from Ensembl. 
  * The pSamFile parameter is optional.  If non-NULL it will be filled in, just for
  * the benefit of the callback function, with the open samFile.  */
 {
 char *bamFileName = NULL;
 samfile_t *fh = bamOpen(fileOrUrl, &bamFileName);
-boolean usingUrl = TRUE;
-usingUrl = (strstr(fileOrUrl, "tp://") || strstr(fileOrUrl, "https://"));
 if (pSamFile != NULL)
     *pSamFile = fh;
-#ifndef KNETFILE_HOOKS
-// Since samtools' url-handling code saves the .bai file to the current directory,
-// chdir to a trash directory before calling bam_index_load, then chdir back.
-char *runDir = getCurrentDir();
-char *samDir = getSamDir();
-if (usingUrl)
-    setCurrentDir(samDir);
-#endif//ndef KNETFILE_HOOKS
-bam_index_t *idx = bam_index_load(bamFileName);
-#ifndef KNETFILE_HOOKS
-if (usingUrl)
-    setCurrentDir(runDir);
-#endif//ndef KNETFILE_HOOKS
+bam_index_t *idx = bamOpenIdx(bamFileName);
 if (idx == NULL)
     warn("bam_index_load(%s) failed.", bamFileName);
 else
     {
     bamFetchAlreadyOpen(fh, idx, bamFileName, position, callbackFunc, callbackData);
-    free(idx); // Not freeMem, freez etc -- sam just uses malloc/calloc.
+    bamCloseIdx(&idx);
     }
 bamClose(&fh);
 }
 
 boolean bamIsRc(const bam1_t *bam)
 /* Return TRUE if alignment is on - strand. */
 {
 const bam1_core_t *core = &bam->core;
 return (core->flag & BAM_FREVERSE);
 }
 
 void bamGetSoftClipping(const bam1_t *bam, int *retLow, int *retHigh, int *retClippedQLen)
 /* If retLow is non-NULL, set it to the number of "soft-clipped" (skipped) bases at
  * the beginning of the query sequence and quality; likewise for retHigh at end.
  * For convenience, retClippedQLen is the original query length minus soft clipping
@@ -558,31 +580,31 @@
 }
 
 #else
 // If we're not compiling with samtools, make stub routines so compile won't fail:
 
 boolean bamFileExists(char *bamFileName)
 /* Return TRUE if we can successfully open the bam file and its index file. */
 {
 warn(COMPILE_WITH_SAMTOOLS, "bamFileExists");
 return FALSE;
 }
 
 samfile_t *bamOpen(char *fileOrUrl, char **retBamFileName)
 /* Return an open bam file */
 {
-warn(COMPILE_WITH_SAMTOOLS, "bamOpenUdc");
+warn(COMPILE_WITH_SAMTOOLS, "bamOpen");
 return FALSE;
 }
 
 void bamClose(samfile_t **pSamFile)
 /* Close down a samefile_t */
 {
 errAbort(COMPILE_WITH_SAMTOOLS, "bamClose");
 }
 
 void bamFetch(char *fileOrUrl, char *position, bam_fetch_f callbackFunc, void *callbackData,
 	      samfile_t **pSamFile)
 /* Open the .bam file, fetch items in the seq:start-end position range,
  * and call callbackFunc on each bam item retrieved from the file plus callbackData.
  * This handles BAM files with "chr"-less sequence names, e.g. from Ensembl.
  * The pSamFile parameter is optional.  If non-NULL it will be filled in, just for