b29f1fbf93c9df5aa65fed050d9f8132dc6a2c73
braney
  Tue Nov 24 14:20:10 2015 -0800
support htslib for sam/bam/cram/tabix support  #14717

diff --git src/lib/linefile.c src/lib/linefile.c
index 5416c24..08a0e38 100644
--- src/lib/linefile.c
+++ src/lib/linefile.c
@@ -3,30 +3,33 @@
  *
  * This file is copyright 2002 Jim Kent, but license is hereby
  * granted for all use - public, private or commercial. */
 
 #include "common.h"
 #include "hash.h"
 #include <fcntl.h>
 #include <signal.h>
 #include "dystring.h"
 #include "errAbort.h"
 #include "linefile.h"
 #include "pipeline.h"
 #include "localmem.h"
 #include "cheapcgi.h"
 #include "udc.h"
+#ifdef USE_HTS
+#include "htslib/tbx.h"
+#endif
 
 char *getFileNameFromHdrSig(char *m)
 /* Check if header has signature of supported compression stream,
    and return a phoney filename for it, or NULL if no sig found. */
 {
 char buf[20];
 char *ext=NULL;
 if (startsWith("\x1f\x8b",m)) ext = "gz";
 else if (startsWith("\x1f\x9d\x90",m)) ext = "Z";
 else if (startsWith("BZ",m)) ext = "bz2";
 else if (startsWith("PK\x03\x04",m)) ext = "zip";
 if (ext==NULL)
     return NULL;
 safef(buf, sizeof(buf), LF_BOGUS_FILE_PREFIX "%s", ext);
 return cloneString(buf);
@@ -215,80 +218,112 @@
 #endif
 
 struct lineFile *lineFileTabixMayOpen(char *fileOrUrl, bool zTerm)
 /* Wrap a line file around a data file that has been compressed and indexed
  * by the tabix command line program.  The index file <fileOrUrl>.tbi must be
  * readable in addition to fileOrUrl. If there's a problem, warn & return NULL.
  * This works only if kent/src has been compiled with USE_TABIX=1 and linked
  * with the tabix C library. */
 {
 #ifdef USE_TABIX
 if (fileOrUrl == NULL)
     errAbort("lineFileTabixMayOpen: fileOrUrl is NULL");
 int tbiNameSize = strlen(fileOrUrl) + strlen(".tbi") + 1;
 char tbiName[tbiNameSize];
 safef(tbiName, sizeof(tbiName), "%s.tbi", fileOrUrl);
+#ifdef USE_HTS
+htsFile *htsFile = hts_open(fileOrUrl, "r");
+if (htsFile == NULL)
+    {
+    warn("Unable to open \"%s\"", fileOrUrl);
+    return NULL;
+    }
+tbx_t *tabix;
+if ((tabix = ti_index_load(tbiName)) == NULL)
+#else
 tabix_t *tabix = ti_open(fileOrUrl, tbiName);
 if (tabix == NULL)
     {
     warn("Unable to open \"%s\"", fileOrUrl);
     return NULL;
     }
 if ((tabix->idx = ti_index_load(tbiName)) == NULL)
+#endif
     {
     warn("Unable to load tabix index from \"%s\"", tbiName);
     ti_close(tabix);
     tabix = NULL;
     return NULL;
     }
 struct lineFile *lf = needMem(sizeof(struct lineFile));
 lf->fileName = cloneString(fileOrUrl);
 lf->fd = -1;
 lf->bufSize = 64 * 1024;
 lf->buf = needMem(lf->bufSize);
 lf->zTerm = zTerm;
 lf->tabix = tabix;
+#ifdef USE_HTS
+lf->htsFile = htsFile;
+kstring_t *kline;
+AllocVar(kline);
+kline->s = malloc(8192);
+lf->kline = kline;
+lf->tabixIter = tbx_itr_queryi(tabix, HTS_IDX_REST, 0, 0);
+#else
 lf->tabixIter = ti_iter_first();
+#endif
 return lf;
 #else // no USE_TABIX
 warn(COMPILE_WITH_TABIX, "lineFileTabixMayOpen");
 return NULL;
 #endif // no USE_TABIX
 }
 
 boolean lineFileSetTabixRegion(struct lineFile *lf, char *seqName, int start, int end)
 /* Assuming lf was created by lineFileTabixMayOpen, tell tabix to seek to the specified region
  * and return TRUE (or if there are no items in region, return FALSE). */
 {
 #ifdef USE_TABIX
 if (lf->tabix == NULL)
     errAbort("lineFileSetTabixRegion: lf->tabix is NULL.  Did you open lf with lineFileTabixMayOpen?");
 if (seqName == NULL)
     return FALSE;
+#ifdef USE_HTS
+int tabixSeqId = ti_get_tid(lf->tabix, seqName);
+if (tabixSeqId < 0 && startsWith("chr", seqName))
+    // We will get some files that have chr-less Ensembl chromosome names:
+    tabixSeqId = ti_get_tid(lf->tabix, seqName+strlen("chr"));
+if (tabixSeqId < 0)
+    return FALSE;
+ti_iter_t *iter = ti_queryi((tbx_t *)lf->tabix, tabixSeqId, start, end);
+#else
 int tabixSeqId = ti_get_tid(lf->tabix->idx, seqName);
 if (tabixSeqId < 0 && startsWith("chr", seqName))
     // We will get some files that have chr-less Ensembl chromosome names:
     tabixSeqId = ti_get_tid(lf->tabix->idx, seqName+strlen("chr"));
 if (tabixSeqId < 0)
     return FALSE;
 ti_iter_t iter = ti_queryi(lf->tabix, tabixSeqId, start, end);
+#endif
 if (iter == NULL)
     return FALSE;
 if (lf->tabixIter != NULL)
     ti_iter_destroy(lf->tabixIter);
 lf->tabixIter = iter;
+#ifndef USE_HTS
 lf->bufOffsetInFile = bgzf_tell(lf->tabix->fp);
+#endif
 lf->bytesInBuf = 0;
 lf->lineIx = -1;
 lf->lineStart = 0;
 lf->lineEnd = 0;
 return TRUE;
 #else // no USE_TABIX
 warn(COMPILE_WITH_TABIX, "lineFileSetTabixRegion");
 return FALSE;
 #endif // no USE_TABIX
 }
 
 struct lineFile *lineFileUdcMayOpen(char *fileOrUrl, bool zTerm)
 /* Create a line file object with an underlying UDC cache. NULL if not found. */
 {
 if (fileOrUrl == NULL)
@@ -467,42 +502,53 @@
     lf->lineIx = -1;
     lf->lineStart = 0;
     lf->lineEnd = lineSize;
     *retStart = line;
     freeMem(lf->buf);
     lf->buf = line;
     lf->bufSize = lineSize;
     return TRUE;
     }
 
 #ifdef USE_TABIX
 if (lf->tabix != NULL && lf->tabixIter != NULL)
     {
     // Just use line-oriented ti_read:
     int lineSize = 0;
+#ifdef USE_HTS
+    lineSize = tbx_itr_next(lf->htsFile, lf->tabix, lf->tabixIter, lf->kline);
+    if (lineSize == -1)
+	return FALSE;
+#else
     const char *line = ti_read(lf->tabix, lf->tabixIter, &lineSize);
     if (line == NULL)
 	return FALSE;
+#endif
     lf->bufOffsetInFile = -1;
     lf->bytesInBuf = lineSize;
     lf->lineIx = -1;
     lf->lineStart = 0;
     lf->lineEnd = lineSize;
     if (lineSize > lf->bufSize)
 	// shouldn't be!  but just in case:
 	lineFileExpandBuf(lf, lineSize * 2);
+#ifdef USE_HTS
+    kstring_t *kline = lf->kline;
+    safecpy(lf->buf, lf->bufSize, kline->s);
+#else 
     safecpy(lf->buf, lf->bufSize, line);
+#endif
     *retStart = lf->buf;
     if (retSize != NULL)
 	*retSize = lineSize;
     return TRUE;
     }
 #endif // USE_TABIX
 
 determineNlType(lf, buf+endIx, bytesInBuf);
 
 /* Find next end of line in buffer. */
 switch(lf->nlType)
     {
     case nlt_unix:
     case nlt_dos:
 	for (endIx = lf->lineEnd; endIx < bytesInBuf; ++endIx)
@@ -536,31 +582,35 @@
     int oldEnd = lf->lineEnd;
     int sizeLeft = bytesInBuf - oldEnd;
     int bufSize = lf->bufSize;
     int readSize = bufSize - sizeLeft;
 
     if (oldEnd > 0 && sizeLeft > 0)
 	{
 	memmove(buf, buf+oldEnd, sizeLeft);
 	}
     lf->bufOffsetInFile += oldEnd;
     if (lf->fd >= 0)
 	readSize = lineFileLongNetRead(lf->fd, buf+sizeLeft, readSize);
 #ifdef USE_TABIX
     else if (lf->tabix != NULL && readSize > 0)
 	{
+#ifdef USE_HTS
+        errAbort("bgzf read not supported with htslib (yet)");
+#else
 	readSize = bgzf_read(lf->tabix->fp, buf+sizeLeft, readSize);
+#endif
 	if (readSize < 1)
 	    return FALSE;
 	}
 #endif // USE_TABIX
     else
         readSize = 0;
 
     if ((readSize == 0) && (endIx > oldEnd))
 	{
 	endIx = sizeLeft;
 	buf[endIx] = 0;
 	lf->bytesInBuf = newStart = lf->lineStart = 0;
 	lf->lineEnd = endIx;
 	++lf->lineIx;
 	if (retSize != NULL)