c571aeb65d3c0ee76bea06111aa9b0fff4d198b1 aamp Sat Jul 16 09:07:15 2011 -0700 deleted anything udcFUSE-related diff --git src/lib/bamFile.c src/lib/bamFile.c index 4456991..0f9a9e3 100644 --- src/lib/bamFile.c +++ src/lib/bamFile.c @@ -1,146 +1,46 @@ /* bamFile -- interface to binary alignment format files using Heng Li's samtools lib. */ #include "common.h" #include "portable.h" #include "bamFile.h" #ifdef USE_BAM #include "htmshell.h" #include "udc.h" -static boolean isRegularFile(char *filename) -/* File not only exists, but is also a file not a directory. */ -{ -struct stat mystat; -if (stat(filename, &mystat) != 0) - return FALSE; -return S_ISREG(mystat.st_mode); -} - -static char *samtoolsFileNameUdcFuse(char *fileOrUrl, char *udcFuseRoot) -/* If udcFuse is configured, and we have a URL, convert it into a filename in - * the udcFuse filesystem for use by samtools. Thus samtools will think it's - * working on a local file, but udcFuse will pass access requests to udc and - * we'll get the benefits of sparse-file local caching and https support. - * udcFuse needs us to open udc files before invoking udcFuse paths, so open - * both the .bam and .bai (index) URLs with udc here. - * If udcFuse is not configured, or fileOrUrl is not an URL, just pass through fileOrUrl. */ -{ -char *protocol = NULL, *afterProtocol = NULL, *colon = NULL, *auth = NULL; -udcParseUrlFull(fileOrUrl, &protocol, &afterProtocol, &colon, &auth); -if (udcFuseRoot != NULL && afterProtocol != NULL) - { - struct dyString *dy = dyStringNew(0); - if (auth == NULL) - auth = ""; - dyStringPrintf(dy, "%s/%s/%s%s", udcFuseRoot, protocol, auth, afterProtocol); - char *bamFileName = dyStringCannibalize(&dy); - if (!isRegularFile(bamFileName)) - { - verbose(2, "going to call udcFileMayOpen(%s).\n", fileOrUrl); - struct udcFile *udcf = udcFileMayOpen(fileOrUrl, NULL); - if (udcf != NULL) - { - udcFileClose(&udcf); - verbose(2, "closed udcf. testing existence of %s.\n", bamFileName); - if (!isRegularFile(bamFileName)) - { - warn("Cannot find %s -- remount udcFuse?", bamFileName); - freeMem(bamFileName); - return cloneString(fileOrUrl); - } - } - else - { - warn("Failed to open BAM URL \"%s\" with udc", fileOrUrl); - freeMem(bamFileName); - return cloneString(fileOrUrl); - } - } - // Look for index file: xxx.bam.bai or xxx.bai. Look for both in udcFuse, - // and only open the URL with udc if neither udcFuse file exists. - int urlLen = strlen(fileOrUrl), fLen = strlen(bamFileName); - char *indexFileName = needMem(fLen+5); - safef(indexFileName, fLen+5, "%s.bai", bamFileName); - if (!isRegularFile(indexFileName)) - { - verbose(2, "%s does not already exist\n", indexFileName); - char *altIndexFileName = NULL; - if (endsWith(fileOrUrl, ".bam")) - { - altIndexFileName = cloneString(indexFileName); - strcpy(altIndexFileName+fLen-1, "i"); - } - if (!(altIndexFileName && isRegularFile(altIndexFileName))) - { - char *indexUrl = needMem(urlLen+5); - safef(indexUrl, urlLen+5, "%s.bai", fileOrUrl); - verbose(2, "going to call udcFileMayOpen(%s).\n", indexUrl); - struct udcFile *udcf = udcFileMayOpen(indexUrl, NULL); - if (udcf != NULL) - udcFileClose(&udcf); - else if (altIndexFileName != NULL) - { - char *altIndexUrl = cloneString(indexUrl); - strcpy(altIndexUrl+urlLen-1, "i"); - verbose(2, "going to call udcFileMayOpen(%s).\n", altIndexUrl); - udcf = udcFileMayOpen(altIndexUrl, NULL); - if (udcf == NULL) - { - warn("Cannot find BAM index file (%s or %s)", indexUrl, altIndexUrl); - return cloneString(fileOrUrl); - } - udcFileClose(&udcf); - freeMem(altIndexUrl); - } - else - { - warn("Cannot find BAM index file for \"%s\"", fileOrUrl); - return cloneString(fileOrUrl); - } - freeMem(indexUrl); - } - freeMem(altIndexFileName); - } - freeMem(indexFileName); - return bamFileName; - } -return cloneString(fileOrUrl); -} - #ifndef KNETFILE_HOOKS static char *getSamDir() /* Return the name of a trash dir for samtools to run in (it creates files in current dir) * and make sure the directory exists. */ { static char *samDir = NULL; char *dirName = "samtools"; if (samDir == NULL) { mkdirTrashDirectory(dirName); size_t len = strlen(trashDir()) + 1 + strlen(dirName) + 1; samDir = needMem(len); safef(samDir, len, "%s/%s", trashDir(), dirName); } return samDir; } #endif//ndef KNETFILE_HOOKS -boolean bamFileExistsUdc(char *fileOrUrl, char *udcFuseRoot) +boolean bamFileExists(char *fileOrUrl) /* Return TRUE if we can successfully open the bam file and its index file. */ { -char *bamFileName = samtoolsFileNameUdcFuse(fileOrUrl, udcFuseRoot); +char *bamFileName = fileOrUrl; samfile_t *fh = samopen(bamFileName, "rb", NULL); boolean usingUrl = TRUE; usingUrl = (strstr(fileOrUrl, "tp://") || strstr(fileOrUrl, "https://")); if (fh != NULL) { #ifndef KNETFILE_HOOKS // When file is an URL, this caches the index file in addition to validating: // Since samtools's url-handling code saves the .bai file to the current directory, // chdir to a trash directory before calling bam_index_load, then chdir back. char *runDir = getCurrentDir(); char *samDir = getSamDir(); if (usingUrl) setCurrentDir(samDir); #endif//ndef KNETFILE_HOOKS bam_index_t *idx = bam_index_load(bamFileName); @@ -148,76 +48,73 @@ if (usingUrl) setCurrentDir(runDir); #endif//ndef KNETFILE_HOOKS samclose(fh); if (idx == NULL) { warn("bamFileExists: failed to read index corresponding to %s", bamFileName); return FALSE; } free(idx); // Not freeMem, freez etc -- sam just uses malloc/calloc. return TRUE; } return FALSE; } -samfile_t *bamOpenUdc(char *fileOrUrl, char **retBamFileName, char *udcFuseRoot) +samfile_t *bamOpen(char *fileOrUrl, char **retBamFileName) /* Return an open bam file, dealing with FUSE caching if need be. * Return parameter if NON-null will return the file name after FUSing */ { -char *bamFileName = samtoolsFileNameUdcFuse(fileOrUrl, udcFuseRoot); +char *bamFileName = fileOrUrl; if (retBamFileName != NULL) *retBamFileName = bamFileName; samfile_t *fh = samopen(bamFileName, "rb", NULL); if (fh == NULL) { boolean usingUrl = (strstr(fileOrUrl, "tp://") || strstr(fileOrUrl, "https://")); struct dyString *urlWarning = dyStringNew(0); if (usingUrl) { - boolean usingUdc = (udcFuseRoot != NULL && startsWith(udcFuseRoot, bamFileName)); - if (usingUdc) - dyStringAppend(urlWarning, " (using udcFuse)"); dyStringAppend(urlWarning, ". If you are able to access the URL with your web browser, " "please try reloading this page."); } errAbort("Failed to open %s%s", fileOrUrl, urlWarning->string); } return fh; } void bamClose(samfile_t **pSamFile) /* Close down a samefile_t */ { if (pSamFile != NULL) { samclose(*pSamFile); *pSamFile = NULL; } } -void bamFetchUdc(char *fileOrUrl, char *position, bam_fetch_f callbackFunc, void *callbackData, - samfile_t **pSamFile, char *udcFuseRoot) +void bamFetch(char *fileOrUrl, char *position, bam_fetch_f callbackFunc, void *callbackData, + samfile_t **pSamFile) /* Open the .bam file, fetch items in the seq:start-end position range, * and call callbackFunc on each bam item retrieved from the file plus callbackData. * This handles BAM files with "chr"-less sequence names, e.g. from Ensembl. * The pSamFile parameter is optional. If non-NULL it will be filled in, just for * the benefit of the callback function, with the open samFile. */ { char *bamFileName = NULL; -samfile_t *fh = bamOpenUdc(fileOrUrl, &bamFileName, udcFuseRoot); +samfile_t *fh = bamOpen(fileOrUrl, &bamFileName); boolean usingUrl = TRUE; usingUrl = (strstr(fileOrUrl, "tp://") || strstr(fileOrUrl, "https://")); if (pSamFile != NULL) *pSamFile = fh; int chromId, start, end; int ret = bam_parse_region(fh->header, position, &chromId, &start, &end); if (ret != 0 && startsWith("chr", position)) ret = bam_parse_region(fh->header, position+strlen("chr"), &chromId, &start, &end); if (ret != 0) // If the bam file does not cover the current chromosome, OK return; #ifndef KNETFILE_HOOKS // Since samtools' url-handling code saves the .bai file to the current directory, // chdir to a trash directory before calling bam_index_load, then chdir back. char *runDir = getCurrentDir(); @@ -565,52 +462,52 @@ else if (type == 'I') { dyStringPrintf(dy, "%u", *(uint32_t*)s); s += 4; } else if (type == 'i') { dyStringPrintf(dy, "%d", *(int32_t*)s); s += 4; } else if (type == 'f') { dyStringPrintf(dy, "%g", *(float*)s); s += 4; } else if (type == 'd') { dyStringPrintf(dy, "%lg", *(double*)s); s += 8; } else if (type == 'Z' || type == 'H') { dyStringAppend(dy, (char *)s); s += strlen((char *)s) + 1; } } } #else // If we're not compiling with samtools, make stub routines so compile won't fail: -boolean bamFileExistsUdcFuse(char *bamFileName, char *udcFuseRoot) +boolean bamFileExists(char *bamFileName) /* Return TRUE if we can successfully open the bam file and its index file. */ { -warn(COMPILE_WITH_SAMTOOLS, "bamFileExistsUdcFuse"); +warn(COMPILE_WITH_SAMTOOLS, "bamFileExists"); return FALSE; } -samfile_t *bamOpenUdcFuse(char *fileOrUrl, char **retBamFileName) -/* Return an open bam file, dealing with some FUSE caching if need be. */ +samfile_t *bamOpen(char *fileOrUrl, char **retBamFileName) +/* Return an open bam file */ { warn(COMPILE_WITH_SAMTOOLS, "bamOpenUdc"); return FALSE; } void bamClose(samfile_t **pSamFile) /* Close down a samefile_t */ { errAbort(COMPILE_WITH_SAMTOOLS, "bamClose"); } -void bamFetchUdcFuse(char *fileOrUrl, char *position, bam_fetch_f callbackFunc, void *callbackData, - samfile_t **pSamFile, char *udcFuseRoot) +void bamFetch(char *fileOrUrl, char *position, bam_fetch_f callbackFunc, void *callbackData, + samfile_t **pSamFile) /* Open the .bam file, fetch items in the seq:start-end position range, * and call callbackFunc on each bam item retrieved from the file plus callbackData. * This handles BAM files with "chr"-less sequence names, e.g. from Ensembl. * The pSamFile parameter is optional. If non-NULL it will be filled in, just for * the benefit of the callback function, with the open samFile. */ { errAbort(COMPILE_WITH_SAMTOOLS, "bamFetch"); } boolean bamIsRc(const bam1_t *bam) /* Return TRUE if alignment is on - strand. */ { errAbort(COMPILE_WITH_SAMTOOLS, "bamIsRc"); return FALSE; }