src/hg/lib/bamFile.c 1.13

1.13 2009/11/05 17:50:24 angie
It's not sufficient to test existence of a path in udcFuse, because empty directories are left behind when bitmap and sparseData files are cleaned up, and then udcFuse will show the directory. So test whether a udcFuse path is a regular file (as a dir with bitmap and sparseData files will appear). Also, test existence of index files independently from .bam, and look for both index possibilities as udcFuse paths before attempting the costly udc open.
Index: src/hg/lib/bamFile.c
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/lib/bamFile.c,v
retrieving revision 1.12
retrieving revision 1.13
diff -b -B -U 4 -r1.12 -r1.13
--- src/hg/lib/bamFile.c	3 Nov 2009 00:33:45 -0000	1.12
+++ src/hg/lib/bamFile.c	5 Nov 2009 17:50:24 -0000	1.13
@@ -47,8 +47,17 @@
 hFreeConn(&conn);
 return fileName;
 }
 
+static boolean isRegularFile(char *filename)
+/* File not only exists, but is also a file not a directory. */
+{
+struct stat mystat;
+if (stat(filename, &mystat) != 0)
+    return FALSE;
+return S_ISREG(mystat.st_mode);
+}
+
 static char *samtoolsFileName(char *fileOrUrl)
 /* If udcFuse is configured, and we have a URL, convert it into a filename in
  * the udcFuse filesystem for use by samtools.  Thus samtools will think it's
  * working on a local file, but udcFuse will pass access requests to udc and
@@ -64,46 +73,66 @@
     {
     struct dyString *dy = dyStringNew(0);
     dyStringPrintf(dy, "%s/%s/%s", udcFuseRoot, protocol, afterProtocol);
     char *bamFileName = dyStringCannibalize(&dy);
-    if (!fileExists(bamFileName))
+    if (!isRegularFile(bamFileName))
 	{
+	verbose(2, "going to call udcFileMayOpen(%s).\n", fileOrUrl);
 	struct udcFile *udcf = udcFileMayOpen(fileOrUrl, NULL);
 	if (udcf != NULL)
 	    {
 	    udcFileClose(&udcf);
-	    if (!fileExists(bamFileName))
+	    verbose(2, "closed udcf. testing existence of %s.\n", bamFileName);
+	    if (!isRegularFile(bamFileName))
 		{
 		warn("Cannot find %s -- remount udcFuse?", bamFileName);
 		freeMem(bamFileName);
 		return cloneString(fileOrUrl);
 		}
-	    // Look for index file: xxx.bam.bai or xxx.bai.
+	    }
+	else
+	    errAbort("Failed to open BAM URL \"%s\" with udc", fileOrUrl);
+	}
+    // Look for index file: xxx.bam.bai or xxx.bai.  Look for both in udcFuse,
+    // and only open the URL with udc if neither udcFuse file exists.
 	    int urlLen = strlen(fileOrUrl), fLen = strlen(bamFileName);
-	    char *indexUrl = needMem(urlLen+5), *indexFileName = needMem(fLen+5);
-	    safef(indexUrl, urlLen+5, "%s.bai", fileOrUrl);
+    char *indexFileName = needMem(fLen+5);
 	    safef(indexFileName, fLen+5, "%s.bai", bamFileName);
-	    udcf = udcFileMayOpen(indexUrl, NULL);
-	    if (udcf == NULL)
+    if (!isRegularFile(indexFileName))
 		{
+	verbose(2, "%s does not already exist\n", indexFileName);
+	char *altIndexFileName = NULL;
 		if (endsWith(fileOrUrl, ".bam"))
 		    {
-		    strcpy(indexUrl+urlLen-1, "i");
-		    strcpy(indexFileName+fLen-1, "i");
-		    udcf = udcFileMayOpen(indexUrl, NULL);
+	    altIndexFileName = cloneString(indexFileName);
+	    strcpy(altIndexFileName+fLen-1, "i");
+	    }
+	if (!(altIndexFileName && isRegularFile(altIndexFileName)))
+	    {
+	    char *indexUrl = needMem(urlLen+5);
+	    safef(indexUrl, urlLen+5, "%s.bai", fileOrUrl);
+	    verbose(2, "going to call udcFileMayOpen(%s).\n", indexUrl);
+	    struct udcFile *udcf = udcFileMayOpen(indexUrl, NULL);
+	    if (udcf != NULL)
+		udcFileClose(&udcf);
+	    else if (altIndexFileName != NULL)
+		{
+		char *altIndexUrl = cloneString(indexUrl);
+		strcpy(altIndexUrl+urlLen-1, "i");
+		verbose(2, "going to call udcFileMayOpen(%s).\n", altIndexUrl);
+		udcf = udcFileMayOpen(altIndexUrl, NULL);
 		    if (udcf == NULL)
-			errAbort("Cannot find BAM index file (%s.bai or %s)", fileOrUrl, indexUrl);
+		    errAbort("Cannot find BAM index file (%s or %s)", indexUrl, altIndexUrl);
 		    udcFileClose(&udcf);
+		freeMem(altIndexUrl);
 		    }
 		else
 		    errAbort("Cannot find BAM index file for \"%s\"", fileOrUrl);
-		}
 	    freeMem(indexUrl);
-	    freeMem(indexFileName);
 	    }
-	else
-	    errAbort("Failed to read BAM file \"%s\"", fileOrUrl);
+	freeMem(altIndexFileName);
 	}
+    freeMem(indexFileName);
     return bamFileName;
     }
 return cloneString(fileOrUrl);
 }