src/fuse/udcFuse/udcFuse.c 1.6

1.6 2009/11/20 17:56:35 angie
Path may now include http auth info (that's the only way of communicating the info to udcFuse) -- if so, strip it out when making the udc cache path.
Index: src/fuse/udcFuse/udcFuse.c
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/fuse/udcFuse/udcFuse.c,v
retrieving revision 1.5
retrieving revision 1.6
diff -b -B -U 1000000 -r1.5 -r1.6
--- src/fuse/udcFuse/udcFuse.c	19 Nov 2009 19:07:58 -0000	1.5
+++ src/fuse/udcFuse/udcFuse.c	20 Nov 2009 17:56:35 -0000	1.6
@@ -1,400 +1,429 @@
 /* udcFuse - FUSE (Filesystem in USErspace) filesystem for lib/udc.c (Url Data Cache). */
 #ifdef USE_FUSE
 #include "common.h"
 #include "portable.h"
 #include "errCatch.h"
 #include "udc.h"
 #include <sys/types.h>
 #include <dirent.h>
 #include <pthread.h>
 
 #ifndef FUSE_USE_VERSION
 #define FUSE_USE_VERSION 26
 #endif
 #include "fuse.h"
 
 static char const rcsid[] = "$Id$";
 
 void usage()
 /* Explain usage and exit. */
 {
 errAbort(
 "udcFuse - FUSE (Filesystem in USErspace) filesystem for lib/udc.c (Url Data Cache)\n"
 "usage:\n"
 "   udcFuse [options] emptyDirMountPoint [udcCacheDir]\n"
 "options:\n"
 "   -d: run in debug mode\n"
   );
 }
 
 
 // Important bits from http://sourceforge.net/apps/mediawiki/fuse/index.php?title=FuseInvariants:
 // --------------------------------------------------------------------------
 // * All requests are absolute, i.e. all paths begin with / and
 //   include the complete path to a file or a directory. Symlinks,
 //   . and .. are already resolved.
 // * For every request you can get except for getattr(), read() and
 //   write(), usually for every path argument (both source and
 //   destination for link and rename, but only the source for
 //   symlink), you will get a getattr() request just before the
 //   callback.
 //   For example, suppose I store file names of files in a filesystem
 //   also into a database. To keep data in sync, I would like, for
 //   each filesystem operation that succeeds, to check if the file
 //   exists on the database. I just do this in the getattr() call,
 //   since all other calls will be preceded by a getattr.
 
 // * The arguments for every request are already verified as much as
 //   possible. This means that, for example
 //    * readdir() is only called with an existing directory name
 //    ...
 //    * read() and write() are only called if the file has been opened
 //      with the correct flags
 // --------------------------------------------------------------------------
 
 // Since this is run by a kernel module and can't just bail when there
 // is a problem.  Wrap errCatch (which has been made pthread-safe)
 // around any calls to kent/src code.
 #define ERR_CATCH_START() \
     { \
     struct errCatch *catch = errCatchNew(); \
     if (errCatchStart(catch)) \
 	{
 
 	// code that can errAbort goes between ERR_CATCH_START and ERR_CATCH_END,
 	// calling ERR_CATCH_FREE if it does its own return statement:
 
 #define ERR_CATCH_FREE() errCatchFree(&catch)
 #define ERR_CATCH_END(msg) \
 	} \
     errCatchEnd(catch); \
     if (catch->gotError) \
 	{ \
 	fprintf(stderr, "%s errCatch: %s", (msg), catch->message->string); \
 	ERR_CATCH_FREE(); \
 	return -1; \
 	} \
     ERR_CATCH_FREE(); \
     }
 
 static int checkForFile(const char *path, char *udcCachePath, struct stat *stbuf, int pid)
 /* When a udc cache directory has "bitmap" and "sparseData" files, it 
  * corresponds to a file URL and a udcFile object.  Modify stbuf->st_mode 
  * to reflect a file not a directory. */
 {
 if (stbuf->st_mode | S_IFDIR)
     {
     DIR *dirHandle = opendir(udcCachePath);
     if (dirHandle != NULL)
 	{
 	// should we make sure that there are not also subdirectories??
 	boolean gotBitmap = FALSE, gotSparse = FALSE;
 	struct dirent *dirInfo;
 	while ((dirInfo = readdir(dirHandle)) != NULL)
 	    {
 	    if (sameString(dirInfo->d_name, "bitmap"))
 		gotBitmap = TRUE;
 	    else if (sameString(dirInfo->d_name, "sparseData"))
 		gotSparse = TRUE;
 	    if (gotBitmap && gotSparse)
 		break;
 	    }
 	if (gotBitmap || gotSparse)
 	    {
 	    if (gotBitmap ^ gotSparse)
 		fprintf(stderr, "...[%d] getattr: got one cache file but not the other - stale?\n",
 			pid);
 	    stbuf->st_mode &= ~(S_IFDIR | S_IXUSR | S_IXGRP | S_IXOTH);
 	    stbuf->st_mode |= S_IFREG;
 	    // Now we need to set the actual size in stbuf, otherwise fuse will think
 	    // the size is 4096 or however many bytes have been cached so far, and will
 	    // prevent callers from reading past that.  
 	    char buf[4096];
 	    char *url = NULL;
 	    long long size = -1;
 	    ERR_CATCH_START();
 	    url = udcPathToUrl(path, buf, sizeof(buf), NULL);
 	    size = udcSizeFromCache(url, NULL);
 	    ERR_CATCH_END("udcPathToUrl or udcSizeFromCache");
 	    if (size < 0)
 		fprintf(stderr, "...[%d] getattr: failed to get udc cache size for %s", pid, url);
 	    else
 		stbuf->st_size = size;
 	    }
 	closedir(dirHandle);
 	}
     else
 	{
 	fprintf(stderr, "...[%d] getattr: failed to opendir(%s)!: %s\n",
 		pid, udcCachePath, strerror(errno));
 	return -errno;
 	}
     }
 return 0;
 }
 
+#define HTTP_PATH_PREFIX "/http/"
+#define QENCODED_AT_SIGN "Q40"
+
+static int fusePathToUdcPath(const char *path, char *udcPath, size_t udcPathSize)
+/* The udc cache path is almost always just udcDefaultDir() + fuse path,
+ * except when the fuse path includes qEncoded http auth info -- necessary for 
+ * reconstructing the URL, but not included in the udc cache path. 
+ * Return -1 for problem, 0 for OK. */
+{
+char *httpHost = NULL;
+if (startsWith(HTTP_PATH_PREFIX, path))
+    httpHost = (char *)path + strlen(HTTP_PATH_PREFIX);
+if (httpHost)
+    {
+    char *atSign = strstr(httpHost, QENCODED_AT_SIGN);
+    char *nextSlash = strchr(httpHost, '/');
+    if (atSign != NULL &&
+	(nextSlash == NULL || atSign < nextSlash))
+	{
+	ERR_CATCH_START();
+	safef(udcPath, udcPathSize, "%s" HTTP_PATH_PREFIX "%s",
+	      udcDefaultDir(), atSign+strlen(QENCODED_AT_SIGN));
+	ERR_CATCH_END("safef udcPath (skipping auth)");
+	return 0;
+	}
+    }
+ERR_CATCH_START();
+safef(udcPath, udcPathSize, "%s%s", udcDefaultDir(), path);
+ERR_CATCH_END("safef udcPath");
+return 0;
+}
+
 static int udcfs_getattr(const char *path, struct stat *stbuf)
 /* According to http://sourceforge.net/apps/mediawiki/fuse/index.php?title=FuseInvariants ,
  * getattr() is called to test existence before every other command except read, write and
  * getattr itself.  Give stat of corresponding udc cache file (but make it read-only). */
 {
 unsigned int pid = pthread_self();
 char udcCachePath[4096];
-ERR_CATCH_START();
-safef(udcCachePath, sizeof(udcCachePath), "%s%s", udcDefaultDir(), path);
-ERR_CATCH_END("getattr safef udcCachePath");
+if (fusePathToUdcPath(path, udcCachePath, sizeof(udcCachePath)) < 0)
+    return -1;
 int res = stat(udcCachePath, stbuf);
 if (res != 0)
     {
     fprintf(stderr, "...[%d] getattr: stat(%s) failed (%d): %s\n", pid, udcCachePath, res, strerror(errno));
     return -errno;
     }
 // Force read-only permissions:
 stbuf->st_mode &= ~(S_IWUSR | S_IWGRP | S_IWOTH);
 int ret = checkForFile(path, udcCachePath, stbuf, pid);
 fprintf(stderr, "...[%d] getattr %s finish %ld\n", pid, path, clock1000());
 return ret;
 }
 
 static int udcfs_readdir(const char *path, void *buf, fuse_fill_dir_t filler,
 			 off_t offset, struct fuse_file_info *fi)
 /* Read the corresponding udc cache directory. */
 {
 unsigned int pid = pthread_self();
-char *udcCacheRoot = udcDefaultDir();
 char udcCachePath[4096];
-ERR_CATCH_START();
-safef(udcCachePath, sizeof(udcCachePath), "%s%s", udcCacheRoot, path);
-ERR_CATCH_END("readdir safef udcCachePath");
+if (fusePathToUdcPath(path, udcCachePath, sizeof(udcCachePath)) < 0)
+    return -1;
 DIR *dirHandle = opendir(udcCachePath);
 if (dirHandle == NULL)
     {
     fprintf(stderr, "...[%d] readdir: opendir(%s) failed!: %s\n",
 	    pid, udcCachePath, strerror(errno));
     return -errno;
     }
 struct dirent *dirInfo;
 while ((dirInfo = readdir(dirHandle)) != NULL)
     if (filler(buf, dirInfo->d_name, NULL, 0))
 	break;
 int ret = closedir(dirHandle);
 fprintf(stderr, "...[%d] readdir %s finish %ld\n", pid, path, clock1000());
 return ret;
 }
 
 static int udcfs_open(const char *path, struct fuse_file_info *fi)
 /* Call udcOpen() and stash the handle in fi->fh for use by later calls. */
 {
 if ((fi->flags & (O_RDONLY | O_WRONLY | O_RDWR)) != O_RDONLY)
     return -EACCES;
 unsigned int pid = pthread_self();
 fprintf(stderr, "...[%d] open(%s) start %ld\n", pid, path, clock1000());
 struct udcFile *udcf = NULL;
 ERR_CATCH_START();
 char buf[4096];
 char *url = udcPathToUrl(path, buf, sizeof(buf), NULL);
 if (url != NULL)
     {
     if (udcCacheAge(url, NULL) < udcCacheTimeout())
 	fi->keep_cache = 1;
     udcf = udcFileMayOpen(url, NULL);
     fprintf(stderr, "...[%d] open -> udcFileMayOpen(%s) -> 0x%llx\n", pid, url, (long long)udcf);
     }
 else
     {
     fprintf(stderr, "...[%d] open: Unable to translate path %s to URL!\n", pid, path);
     ERR_CATCH_FREE();
     return -1;
     }
 ERR_CATCH_END("udcPathToUrl, udcCacheAge or udcFileMayOpen");
 if (udcf == NULL)
     {
     fprintf(stderr, "...[%d] open: Unable to open udcFile for %s!\n", pid, path);
     return -1;
     }
 fi->fh = (uint64_t)udcf;
 fprintf(stderr, "...[%d] open fh=0x%llx finish %ld\n", pid, (long long)(fi->fh), clock1000());
 return 0;
 }
 
 static int udcfs_read(const char *path, char *buf, size_t size, off_t offset,
                       struct fuse_file_info *fi)
 /* udcSeek to specified offset, udcRead size bytes into buf, return #bytes read. */
 {
 unsigned int pid = pthread_self();
 fprintf(stderr, "...[%d] read(%s, size=%lld, offset=%lld, fh=0x%llx) start %ld\n",
 	pid, path, (long long)size, (long long)offset, (long long)(fi->fh), clock1000());
 struct udcFile *udcf = (struct udcFile *)(fi->fh);
 if (udcf == NULL)
     {
     fprintf(stderr, "...[%d] read: fuse_file_info fh is NULL -- can't read.\n", pid);
     return -1;
     }
 ERR_CATCH_START();
 udcSeek(udcf, (bits64)offset);
 size = udcRead(udcf, buf, size);
 ERR_CATCH_END("udcSeek or udcRead");
 fprintf(stderr, "...[%d] read %lld bytes finish %ld\n", pid, (long long)size, clock1000());
 return size;
 }
 
 static int udcfs_release(const char *path, struct fuse_file_info *fi)
 // Close the udcFile stored as fi->fh.
 {
 unsigned int pid = pthread_self();
 fprintf(stderr, "...[%d] release %s (0x%llx) %ld\n", pid, path, (long long)(fi->fh), clock1000());
 ERR_CATCH_START();
 udcFileClose((struct udcFile **)&(fi->fh));
 ERR_CATCH_END("udcFileClose");
 return 0;
 }
 
 static struct fuse_operations udcfs_oper =
 {
     .getattr	= udcfs_getattr,
     .readdir	= udcfs_readdir,
     .open	= udcfs_open,
     .read	= udcfs_read,
     .release	= udcfs_release,
 };
 
 void checkUdcCacheDir()
 /* Make sure udcDefaultDir() is a readable directory. */
 {
 DIR *udcCacheHandle = opendir(udcDefaultDir());
 if (udcCacheHandle == NULL)
     {
     fprintf(stderr, "Error: Can't open udc local cache directory '%s': %s\n",
 	    udcDefaultDir(), strerror(errno));
     exit(1);
     }
 closedir(udcCacheHandle);
 }
 
 int main(int argc, char *argv[])
 /* udcFuse - FUSE (Filesystem in USErspace) filesystem for lib/udc.c (Url Data Cache). */
 {
 int minArgc = 2;
 int i;
 for (i = 1; i < argc; i++)
     {
     if (argv[i][0] == '-')
 	minArgc++;
     }
 if (argc < minArgc || argc > minArgc+1)
     usage();
 if (argc == minArgc+1)
     {
     udcSetDefaultDir(argv[argc-1]);
     // Fuse does not like getting an extra arg.
     argc--;
     }
 
 // Use kernel caching, and tell udc not to ping server, if cache files are 
 // less than an hour old.  (Should make this a command-line opt.)
 udcSetCacheTimeout(3600);
 
 #ifndef UDC_TEST
 
 return fuse_main(argc, argv, &udcfs_oper, NULL);
 
 #else
 // TEST MAIN -- don't call fuse, just call methods the way we imagine 
 // fuse would call them.
 
 #define TESTFILLER_BUFSIZE 256
 int testFiller(void *buf, const char *name, const struct stat *stbuf, off_t off)
 // Impersonate fuse's readdir callback (type fuse_fill_dir_t)
 {
 printf("  -> testFiller(%s)\n", name);
 return 0;
 }
 
 #define checkRet(ret) \
 { \
 if (ret < 0) \
     { \
     printf("Doh!: %s\n", strerror(-ret)); \
     exit(1); \
     } \
 }
 
 #define UDC_TEST_PATH "/ftp/ftp-trace.ncbi.nih.gov/1000genomes/ftp/pilot_data/data/NA12878/alignment/NA12878.chrom22.SLX.maq.SRP000032.2009_07.bam"
 #define UDC_TEST_PATH2 "/ftp/ftp-trace.ncbi.nih.gov/1000genomes/ftp/pilot_data/data/NA12878/alignment/NA12878.chrom21.SLX.maq.SRP000032.2009_07.bam"
 udcfs_oper.getattr = udcfs_oper.getattr; // avoid unused-var warning.
 struct fuse_file_info fi;
 memset(&fi, 0, sizeof(fi));
 struct stat stbuf;
 char buf[TESTFILLER_BUFSIZE];
 int ret;
 ret = udcfs_getattr(UDC_TEST_PATH, &stbuf);
 printf("Got %d from getattr; stbuf.st_mode=0%llo\n\n", ret, (long long)stbuf.st_mode);
 checkRet(ret);
 
 ret = udcfs_readdir("/", buf, testFiller, 0, &fi);
 printf("Got %d from readdir\n\n", ret);
 checkRet(ret);
 
 ret = udcfs_readdir("/ftp", buf, testFiller, 0, &fi);
 printf("Got %d from readdir\n\n", ret);
 checkRet(ret);
 
 ret = udcfs_open(UDC_TEST_PATH, &fi);
 printf("Got %d from open -> udc handle 0x%llx\n\n", ret, (long long)(fi.fh));
 checkRet(ret);
 
 ret = udcfs_read(UDC_TEST_PATH, buf, 4, 0, &fi);
 printf("Got %d bytes: 0x%x from read @0 on 0x%llx!\n\n", ret, *(unsigned int *)buf, (long long)(fi.fh));
 checkRet(ret);
 
 // Make sure we can have two open handles on the same file at the same time:
 struct fuse_file_info fi2;
 memset(&fi2, 0, sizeof(fi2));
 ret = udcfs_open(UDC_TEST_PATH2, &fi2);
 printf("Got %d from open -> second udc handle 0x%llx\n\n", ret, (long long)(fi2.fh));
 checkRet(ret);
 
 ret = udcfs_read(UDC_TEST_PATH2, buf, 4, 8, &fi2);
 printf("Got %d bytes: 0x%x from read @8 on second handle 0x%llx!\n\n", ret, *(unsigned int *)buf, (long long)(fi2.fh));
 checkRet(ret);
 
 ret = udcfs_read(UDC_TEST_PATH2, buf, 4, 8, &fi);
 printf("Got %d bytes: 0x%x from read @8 on first handle 0x%llx!\n\n", ret, *(unsigned int *)buf, (long long)(fi.fh));
 checkRet(ret);
 
 ret = udcfs_read(UDC_TEST_PATH, buf, 8, 9000, &fi2);
 printf("Got %d bytes: 0x%llx from read @9000 on second handle 0x%llx!\n\n", ret, *(unsigned long long *)buf, (long long)(fi2.fh));
 checkRet(ret);
 
 ret = udcfs_release(UDC_TEST_PATH2, &fi2);
 printf("Got %d from release of second handle; now fi2.fh is 0x%llx\n\n", ret, (long long)(fi2.fh));
 checkRet(ret);
 
 ret = udcfs_read(UDC_TEST_PATH, buf, 8, 9000, &fi);
 printf("Got %d bytes: 0x%llx from read @9000 on 0x%llx!\n\n", ret, *(unsigned long long *)buf, (long long)(fi.fh));
 checkRet(ret);
 
 ret = udcfs_release(UDC_TEST_PATH, &fi);
 printf("Got %d from release; now fi.fh is 0x%llx\n\n", ret, (long long)(fi.fh));
 checkRet(ret);
 
 // Now try to getattr something that has not (at the moment anyway) yet been opened in udc first:
 #define UDC_TEST_PATH3 "/ftp/ftp-trace.ncbi.nih.gov/1000genomes/ftp/pilot_data/data/NA12878/alignment/NA12878.chrom9.SLX.maq.SRP000032.2009_07.bam"
 memset(&stbuf, 0, sizeof(stbuf));
 ret = udcfs_getattr(UDC_TEST_PATH3, &stbuf);
 printf("Got %d from getattr; stbuf.st_mode=0%llo\n\n", ret, (long long)stbuf.st_mode);
 checkRet(ret);
 
 return 0;
 #endif//def UDC_TEST
 }
 
 #else // no USE_FUSE
 #include <stdio.h>
 int main(int argc, char *argv[])
 {
 printf("udcFuse requires the FUSE (filesystem in userspace) library -- make sure that is installed and add USE_FUSE=1 to your enviroment.\n");
 return 0;
 }
 
 #endif//def USE_FUSE