11e45667d4e291b3038ccda729a1cdf5bcaf004a braney Mon Jul 11 15:46:54 2016 -0700 incorporate htslib in kent src, remove USE_BAM, USE_SAMTABIX, USE_TABIX defines, modify a bunch of makefiles to include kentSrc variable pointing to top of the tree. diff --git src/lib/linefile.c src/lib/linefile.c index b53741e..a307f36 100644 --- src/lib/linefile.c +++ src/lib/linefile.c @@ -3,33 +3,31 @@ * * This file is copyright 2002 Jim Kent, but license is hereby * granted for all use - public, private or commercial. */ #include "common.h" #include "hash.h" #include #include #include "dystring.h" #include "errAbort.h" #include "linefile.h" #include "pipeline.h" #include "localmem.h" #include "cheapcgi.h" #include "udc.h" -#ifdef USE_HTS #include "htslib/tbx.h" -#endif char *getFileNameFromHdrSig(char *m) /* Check if header has signature of supported compression stream, and return a phoney filename for it, or NULL if no sig found. */ { char buf[20]; char *ext=NULL; if (startsWith("\x1f\x8b",m)) ext = "gz"; else if (startsWith("\x1f\x9d\x90",m)) ext = "Z"; else if (startsWith("BZ",m)) ext = "bz2"; else if (startsWith("PK\x03\x04",m)) ext = "zip"; if (ext==NULL) return NULL; safef(buf, sizeof(buf), LF_BOGUS_FILE_PREFIX "%s", ext); return cloneString(buf); @@ -199,141 +197,99 @@ struct lineFile *lineFileOnString(char *name, bool zTerm, char *s) /* Wrap a line file object around string in memory. This buffer * have zeroes written into it and be freed when the line file * is closed. */ { struct lineFile *lf; AllocVar(lf); lf->fileName = cloneString(name); lf->fd = -1; lf->bufSize = lf->bytesInBuf = strlen(s); lf->zTerm = zTerm; lf->buf = s; return lf; } -#if (defined USE_TABIX && defined KNETFILE_HOOKS && !defined USE_SAMTABIX) -// UCSC aliases for backwards compatibility with independently patched & linked samtools and tabix: -#define bgzf_tell ti_bgzf_tell -#define bgzf_read ti_bgzf_read -#endif struct lineFile *lineFileTabixMayOpen(char *fileOrUrl, bool zTerm) /* Wrap a line file around a data file that has been compressed and indexed * by the tabix command line program. The index file .tbi must be * readable in addition to fileOrUrl. If there's a problem, warn & return NULL. * This works only if kent/src has been compiled with USE_TABIX=1 and linked * with the tabix C library. */ { -#ifdef USE_TABIX if (fileOrUrl == NULL) errAbort("lineFileTabixMayOpen: fileOrUrl is NULL"); int tbiNameSize = strlen(fileOrUrl) + strlen(".tbi") + 1; char tbiName[tbiNameSize]; safef(tbiName, sizeof(tbiName), "%s.tbi", fileOrUrl); -#ifdef USE_HTS htsFile *htsFile = hts_open(fileOrUrl, "r"); if (htsFile == NULL) { warn("Unable to open \"%s\"", fileOrUrl); return NULL; } tbx_t *tabix; if ((tabix = tbx_index_load2(fileOrUrl, tbiName)) == NULL) -#else -tabix_t *tabix = ti_open(fileOrUrl, tbiName); -if (tabix == NULL) - { - warn("Unable to open \"%s\"", fileOrUrl); - return NULL; - } -if ((tabix->idx = ti_index_load(tbiName)) == NULL) -#endif { warn("Unable to load tabix index from \"%s\"", tbiName); if (tabix) ti_close(tabix); tabix = NULL; return NULL; } struct lineFile *lf = needMem(sizeof(struct lineFile)); lf->fileName = cloneString(fileOrUrl); lf->fd = -1; lf->bufSize = 64 * 1024; lf->buf = needMem(lf->bufSize); lf->zTerm = zTerm; lf->tabix = tabix; -#ifdef USE_HTS lf->htsFile = htsFile; kstring_t *kline; AllocVar(kline); kline->s = malloc(8192); lf->kline = kline; lf->tabixIter = tbx_itr_queryi(tabix, HTS_IDX_REST, 0, 0); -#else -lf->tabixIter = ti_iter_first(); -#endif return lf; -#else // no USE_TABIX -warn(COMPILE_WITH_TABIX, "lineFileTabixMayOpen"); -return NULL; -#endif // no USE_TABIX } boolean lineFileSetTabixRegion(struct lineFile *lf, char *seqName, int start, int end) /* Assuming lf was created by lineFileTabixMayOpen, tell tabix to seek to the specified region * and return TRUE (or if there are no items in region, return FALSE). */ { -#ifdef USE_TABIX if (lf->tabix == NULL) errAbort("lineFileSetTabixRegion: lf->tabix is NULL. Did you open lf with lineFileTabixMayOpen?"); if (seqName == NULL) return FALSE; -#ifdef USE_HTS int tabixSeqId = ti_get_tid(lf->tabix, seqName); if (tabixSeqId < 0 && startsWith("chr", seqName)) // We will get some files that have chr-less Ensembl chromosome names: tabixSeqId = ti_get_tid(lf->tabix, seqName+strlen("chr")); if (tabixSeqId < 0) return FALSE; ti_iter_t *iter = ti_queryi((tbx_t *)lf->tabix, tabixSeqId, start, end); -#else -int tabixSeqId = ti_get_tid(lf->tabix->idx, seqName); -if (tabixSeqId < 0 && startsWith("chr", seqName)) - // We will get some files that have chr-less Ensembl chromosome names: - tabixSeqId = ti_get_tid(lf->tabix->idx, seqName+strlen("chr")); -if (tabixSeqId < 0) - return FALSE; -ti_iter_t iter = ti_queryi(lf->tabix, tabixSeqId, start, end); -#endif if (iter == NULL) return FALSE; if (lf->tabixIter != NULL) ti_iter_destroy(lf->tabixIter); lf->tabixIter = iter; -#ifndef USE_HTS -lf->bufOffsetInFile = bgzf_tell(lf->tabix->fp); -#endif lf->bytesInBuf = 0; lf->lineIx = -1; lf->lineStart = 0; lf->lineEnd = 0; return TRUE; -#else // no USE_TABIX -warn(COMPILE_WITH_TABIX, "lineFileSetTabixRegion"); -return FALSE; -#endif // no USE_TABIX } struct lineFile *lineFileUdcMayOpen(char *fileOrUrl, bool zTerm) /* Create a line file object with an underlying UDC cache. NULL if not found. */ { if (fileOrUrl == NULL) errAbort("lineFileUdcMayOpen: fileOrUrl is NULL"); struct udcFile *udcFile = udcFileMayOpen(fileOrUrl, NULL); if (udcFile == NULL) return NULL; struct lineFile *lf; AllocVar(lf); lf->fileName = cloneString(fileOrUrl); @@ -383,34 +339,32 @@ struct lineFile *lf = lineFileMayOpen(fileName, zTerm); if (lf == NULL) errAbort("Couldn't open %s , %s", fileName, strerror(errno)); return lf; } void lineFileReuse(struct lineFile *lf) /* Reuse current line. */ { lf->reuse = TRUE; } INLINE void noTabixSupport(struct lineFile *lf, char *where) { -#ifdef USE_TABIX if (lf->tabix != NULL) lineFileAbort(lf, "%s: not implemented for lineFile opened with lineFileTabixMayOpen.", where); -#endif // USE_TABIX } void lineFileSeek(struct lineFile *lf, off_t offset, int whence) /* Seek to read next line from given position. */ { noTabixSupport(lf, "lineFileSeek"); if (lf->checkSupport) lf->checkSupport(lf, "lineFileSeek"); if (lf->pl != NULL) errnoAbort("Can't lineFileSeek on a compressed file: %s", lf->fileName); lf->reuse = FALSE; if (lf->udcFile) { udcSeek(lf->udcFile, offset); return; @@ -498,64 +452,52 @@ char *line = udcReadLine(lf->udcFile); if (line==NULL) return FALSE; int lineSize = strlen(line); lf->bytesInBuf = lineSize; lf->lineIx = -1; lf->lineStart = 0; lf->lineEnd = lineSize; *retStart = line; freeMem(lf->buf); lf->buf = line; lf->bufSize = lineSize; return TRUE; } -#ifdef USE_TABIX if (lf->tabix != NULL && lf->tabixIter != NULL) { // Just use line-oriented ti_read: int lineSize = 0; -#ifdef USE_HTS lineSize = tbx_itr_next(lf->htsFile, lf->tabix, lf->tabixIter, lf->kline); if (lineSize == -1) return FALSE; -#else - const char *line = ti_read(lf->tabix, lf->tabixIter, &lineSize); - if (line == NULL) - return FALSE; -#endif lf->bufOffsetInFile = -1; lf->bytesInBuf = lineSize; lf->lineIx = -1; lf->lineStart = 0; lf->lineEnd = lineSize; if (lineSize > lf->bufSize) // shouldn't be! but just in case: lineFileExpandBuf(lf, lineSize * 2); -#ifdef USE_HTS kstring_t *kline = lf->kline; safecpy(lf->buf, lf->bufSize, kline->s); -#else - safecpy(lf->buf, lf->bufSize, line); -#endif *retStart = lf->buf; if (retSize != NULL) *retSize = lineSize; return TRUE; } -#endif // USE_TABIX determineNlType(lf, buf+endIx, bytesInBuf); /* Find next end of line in buffer. */ switch(lf->nlType) { case nlt_unix: case nlt_dos: for (endIx = lf->lineEnd; endIx < bytesInBuf; ++endIx) { if (buf[endIx] == '\n') { gotLf = TRUE; endIx += 1; break; @@ -580,42 +522,36 @@ /* If not in buffer read in a new buffer's worth. */ while (!gotLf) { int oldEnd = lf->lineEnd; int sizeLeft = bytesInBuf - oldEnd; int bufSize = lf->bufSize; int readSize = bufSize - sizeLeft; if (oldEnd > 0 && sizeLeft > 0) { memmove(buf, buf+oldEnd, sizeLeft); } lf->bufOffsetInFile += oldEnd; if (lf->fd >= 0) readSize = lineFileLongNetRead(lf->fd, buf+sizeLeft, readSize); -#ifdef USE_TABIX else if (lf->tabix != NULL && readSize > 0) { -#ifdef USE_HTS errAbort("bgzf read not supported with htslib (yet)"); -#else - readSize = bgzf_read(lf->tabix->fp, buf+sizeLeft, readSize); -#endif if (readSize < 1) return FALSE; } -#endif // USE_TABIX else readSize = 0; if ((readSize == 0) && (endIx > oldEnd)) { endIx = sizeLeft; buf[endIx] = 0; lf->bytesInBuf = newStart = lf->lineStart = 0; lf->lineEnd = endIx; ++lf->lineIx; if (retSize != NULL) *retSize = endIx - newStart; *retStart = buf + newStart; if (*retStart[0] == '#') metaDataAdd(lf, *retStart); @@ -731,43 +667,39 @@ /* Close up a line file. */ { struct lineFile *lf; if ((lf = *pLf) != NULL) { struct pipeline *pl = lf->pl; if (pl != NULL) { pipelineClose(&lf->pl); } else if (lf->fd > 0 && lf->fd != fileno(stdin)) { close(lf->fd); freeMem(lf->buf); } -#ifdef USE_TABIX else if (lf->tabix != NULL) { if (lf->tabixIter != NULL) ti_iter_destroy(lf->tabixIter); ti_close(lf->tabix); -#ifdef USE_HTS hts_close(lf->htsFile); kstring_t *kline = lf->kline; free(kline->s); -#endif } -#endif // USE_TABIX else if (lf->udcFile != NULL) udcFileClose(&lf->udcFile); if (lf->closeCallBack) lf->closeCallBack(lf); freeMem(lf->fileName); metaDataFree(lf); freez(pLf); } } void lineFileCloseList(struct lineFile **pList) /* Close up a list of line files. */ { struct lineFile *el, *next;