src/lib/linefile.c 9b043c5143cff7efe24ec0f9baf61c71104acc5a

9b043c5143cff7efe24ec0f9baf61c71104acc5a
angie
  Thu Aug 20 10:06:41 2020 -0700
Allow SARS-CoV-2 VCF to use GenBank or RefSeq ID instead of our chromified RefSeq ID.  (Russ request, no RM)

diff --git src/lib/linefile.c src/lib/linefile.c
index f953e1f..8609c63 100644
--- src/lib/linefile.c
+++ src/lib/linefile.c
@@ -1,1468 +1,1475 @@
 /* lineFile - stuff to rapidly read text files and parse them into
  * lines.
  *
  * This file is copyright 2002 Jim Kent, but license is hereby
  * granted for all use - public, private or commercial. */
 
 #include "common.h"
 #include "hash.h"
 #include <fcntl.h>
 #include <signal.h>
 #include "dystring.h"
 #include "errAbort.h"
 #include "linefile.h"
 #include "pipeline.h"
 #include "localmem.h"
 #include "cheapcgi.h"
 #include "udc.h"
 #include "htslib/tbx.h"
 
 char *getFileNameFromHdrSig(char *m)
 /* Check if header has signature of supported compression stream,
    and return a phoney filename for it, or NULL if no sig found. */
 {
 char buf[20];
 char *ext=NULL;
 if (startsWith("\x1f\x8b",m)) ext = "gz";
 else if (startsWith("\x1f\x9d\x90",m)) ext = "Z";
 else if (startsWith("BZ",m)) ext = "bz2";
 else if (startsWith("PK\x03\x04",m)) ext = "zip";
 if (ext==NULL)
     return NULL;
 safef(buf, sizeof(buf), LF_BOGUS_FILE_PREFIX "%s", ext);
 return cloneString(buf);
 }
 
 static char **getDecompressor(char *fileName)
 /* if a file is compressed, return the command to decompress the
  * approriate format, otherwise return NULL */
 {
 static char *GZ_READ[] = {"gzip", "-dc", NULL};
 static char *Z_READ[] = {"gzip", "-dc", NULL};
 static char *BZ2_READ[] = {"bzip2", "-dc", NULL};
 static char *ZIP_READ[] = {"gzip", "-dc", NULL};
 
 char **result = NULL;
 char *fileNameDecoded = cloneString(fileName);
 if (startsWith("http://" , fileName)
  || startsWith("https://", fileName)
  || startsWith("ftp://",   fileName))
     cgiDecode(fileName, fileNameDecoded, strlen(fileName));
 
 if      (endsWith(fileNameDecoded, ".gz"))
     result = GZ_READ;
 else if (endsWith(fileNameDecoded, ".Z"))
     result = Z_READ;
 else if (endsWith(fileNameDecoded, ".bz2"))
     result = BZ2_READ;
 else if (endsWith(fileNameDecoded, ".zip"))
     result = ZIP_READ;
 
 freeMem(fileNameDecoded);
 return result;
 
 }
 
 static void metaDataAdd(struct lineFile *lf, char *line)
 /* write a line of metaData to output file
  * internal function called by lineFileNext */
 {
 struct metaOutput *meta = NULL;
 
 if (lf->isMetaUnique)
     {
     /* suppress repetition of comments */
     if (hashLookup(lf->metaLines, line))
         {
         return;
         }
     hashAdd(lf->metaLines, line, NULL);
     }
 for (meta = lf->metaOutput ; meta != NULL ; meta = meta->next)
     if (line != NULL && meta->metaFile != NULL)
         fprintf(meta->metaFile,"%s\n", line);
 }
 
 static void metaDataFree(struct lineFile *lf)
 /* free saved comments */
 {
 if (lf->isMetaUnique && lf->metaLines)
     freeHash(&lf->metaLines);
 }
 
 void lineFileSetMetaDataOutput(struct lineFile *lf, FILE *f)
 /* set file to write meta data to,
  * should be called before reading from input file */
 {
 struct metaOutput *meta = NULL;
 if (lf == NULL)
     return;
 AllocVar(meta);
 meta->next = NULL;
 meta->metaFile = f;
 slAddHead(&lf->metaOutput, meta);
 }
 
 void lineFileSetUniqueMetaData(struct lineFile *lf)
 /* suppress duplicate lines in metadata */
 {
 lf->isMetaUnique = TRUE;
 lf->metaLines = hashNew(8);
 }
 
 static char * headerBytes(char *fileName, int numbytes)
 /* Return specified number of header bytes from file
  * if file exists as a string which should be freed. */
 {
 int fd,bytesread=0;
 char *result = NULL;
 if ((fd = open(fileName, O_RDONLY)) >= 0)
     {
     result=needMem(numbytes+1);
     if ((bytesread=read(fd,result,numbytes)) < numbytes)
 	freez(&result);  /* file too short? can read numbytes */
     else
 	result[numbytes]=0;
     close(fd);
     }
 return result;
 }
 
 
 struct lineFile *lineFileDecompress(char *fileName, bool zTerm)
 /* open a linefile with decompression */
 {
 struct pipeline *pl;
 struct lineFile *lf;
 char *testName = NULL;
 char *testbytes = NULL;    /* the header signatures for .gz, .bz2, .Z,
 			    * .zip are all 2-4 bytes only */
 if (fileName==NULL)
   return NULL;
 testbytes=headerBytes(fileName,4);
 if (!testbytes)
     return NULL;  /* avoid error from pipeline */
 testName=getFileNameFromHdrSig(testbytes);
 freez(&testbytes);
 if (!testName)
     return NULL;  /* avoid error from pipeline */
 pl = pipelineOpen1(getDecompressor(fileName), pipelineRead|pipelineSigpipe, fileName, NULL);
 lf = lineFileAttach(fileName, zTerm, pipelineFd(pl));
 lf->pl = pl;
 return lf;
 }
 
 struct lineFile *lineFileDecompressFd(char *name, bool zTerm, int fd)
 /* open a linefile with decompression from a file or socket descriptor */
 {
 struct pipeline *pl;
 struct lineFile *lf;
 pl = pipelineOpenFd1(getDecompressor(name), pipelineRead|pipelineSigpipe, fd, STDERR_FILENO);
 lf = lineFileAttach(name, zTerm, pipelineFd(pl));
 lf->pl = pl;
 return lf;
 }
 
 
 
 struct lineFile *lineFileDecompressMem(bool zTerm, char *mem, long size)
 /* open a linefile with decompression from a memory stream */
 {
 struct pipeline *pl;
 struct lineFile *lf;
 char *fileName = getFileNameFromHdrSig(mem);
 if (fileName==NULL)
   return NULL;
 pl = pipelineOpenMem1(getDecompressor(fileName), pipelineRead|pipelineSigpipe, mem, size, STDERR_FILENO);
 lf = lineFileAttach(fileName, zTerm, pipelineFd(pl));
 lf->pl = pl;
 return lf;
 }
 
 
 
 struct lineFile *lineFileAttach(char *fileName, bool zTerm, int fd)
 /* Wrap a line file around an open'd file. */
 {
 struct lineFile *lf;
 AllocVar(lf);
 lf->fileName = cloneString(fileName);
 lf->fd = fd;
 lf->bufSize = 64*1024;
 lf->zTerm = zTerm;
 lf->buf = needMem(lf->bufSize+1);
 return lf;
 }
 
 struct lineFile *lineFileOnString(char *name, bool zTerm, char *s)
 /* Wrap a line file object around string in memory. This buffer
  * have zeroes written into it and be freed when the line file
  * is closed. */
 {
 struct lineFile *lf;
 AllocVar(lf);
 lf->fileName = cloneString(name);
 lf->fd = -1;
 lf->bufSize = lf->bytesInBuf = strlen(s);
 lf->zTerm = zTerm;
 lf->buf = s;
 return lf;
 }
 
 
 struct lineFile *lineFileTabixMayOpen(char *fileOrUrl, bool zTerm)
 /* Wrap a line file around a data file that has been compressed and indexed
  * by the tabix command line program. <fileOrUrl>.tbi must be readable in
  * addition to fileOrUrl. If there's a problem, warn & return NULL.  This works
  * only if kent/src has been compiled with USE_TABIX=1 and linked
  * with the tabix C library. */
 {
 return lineFileTabixAndIndexMayOpen(fileOrUrl, NULL, zTerm);
 }
 
 
 struct lineFile *lineFileTabixAndIndexMayOpen(char *fileOrUrl, char *tbiFileOrUrl, bool zTerm)
 /* Wrap a line file around a data file that has been compressed and indexed
  * by the tabix command line program. tbiFileOrUrl can be NULL, it defaults to <fileOrUrl>.tbi.
  * It must be readable in addition to fileOrUrl. If there's a problem, warn & return NULL.
  * This works only if kent/src has been compiled with USE_TABIX=1 and linked
  * with the tabix C library. */
 {
 if (fileOrUrl == NULL)
     errAbort("lineFileTabixMayOpen: fileOrUrl is NULL");
 
 char tbiName[4096];
 if (tbiFileOrUrl==NULL)
     safef(tbiName, sizeof(tbiName), "%s.tbi", fileOrUrl);
 else
     safef(tbiName, sizeof(tbiName), "%s", tbiFileOrUrl);
 
 htsFile *htsFile = hts_open(fileOrUrl, "r");
 if (htsFile == NULL)
     {
     warn("Unable to open \"%s\"", fileOrUrl);
     return NULL;
     }
 tbx_t *tabix;
 if ((tabix = tbx_index_load2(fileOrUrl, tbiName)) == NULL)
     {
     warn("Unable to load tabix index from \"%s\"", tbiName);
     if (tabix)
         ti_close(tabix);
     tabix = NULL;
     return NULL;
     }
 struct lineFile *lf = needMem(sizeof(struct lineFile));
 lf->fileName = cloneString(fileOrUrl);
 lf->fd = -1;
 lf->bufSize = 64 * 1024;
 lf->buf = needMem(lf->bufSize);
 lf->zTerm = zTerm;
 lf->tabix = tabix;
 lf->htsFile = htsFile;
 kstring_t *kline;
 AllocVar(kline);
 kline->s = malloc(8192);
 lf->kline = kline;
 lf->tabixIter = tbx_itr_queryi(tabix, HTS_IDX_REST, 0, 0);
 return lf;
 }
 
 boolean lineFileSetTabixRegion(struct lineFile *lf, char *seqName, int start, int end)
 /* Assuming lf was created by lineFileTabixMayOpen, tell tabix to seek to the specified region
  * and return TRUE (or if there are no items in region, return FALSE). */
 {
 if (lf->tabix == NULL)
     errAbort("lineFileSetTabixRegion: lf->tabix is NULL.  Did you open lf with lineFileTabixMayOpen?");
 if (seqName == NULL)
     return FALSE;
 int tabixSeqId = ti_get_tid(lf->tabix, seqName);
 if (tabixSeqId < 0 && startsWith("chr", seqName))
     // We will get some files that have chr-less Ensembl chromosome names:
     tabixSeqId = ti_get_tid(lf->tabix, seqName+strlen("chr"));
+// Allow SARS-CoV-2 VCF to use GenBank or RefSeq ID instead of our chromified RefSeq ID:
+if (tabixSeqId < 0 && sameString(seqName, "NC_045512v2"))
+    {
+    tabixSeqId = ti_get_tid(lf->tabix, "MN908947.3");
+    if (tabixSeqId < 0)
+        tabixSeqId = ti_get_tid(lf->tabix, "NC_045512.2");
+    }
 if (tabixSeqId < 0)
     return FALSE;
 ti_iter_t *iter = ti_queryi((tbx_t *)lf->tabix, tabixSeqId, start, end);
 if (iter == NULL)
     return FALSE;
 if (lf->tabixIter != NULL)
     ti_iter_destroy(lf->tabixIter);
 lf->tabixIter = iter;
 lf->bytesInBuf = 0;
 lf->lineIx = -1;
 lf->lineStart = 0;
 lf->lineEnd = 0;
 return TRUE;
 }
 
 struct lineFile *lineFileUdcMayOpen(char *fileOrUrl, bool zTerm)
 /* Create a line file object with an underlying UDC cache. NULL if not found. */
 {
 if (fileOrUrl == NULL)
     errAbort("lineFileUdcMayOpen: fileOrUrl is NULL");
 
 if (udcIsLocal(fileOrUrl))
      return lineFileOpen(fileOrUrl, zTerm);
 else
     {
     struct udcFile *udcFile = udcFileMayOpen(fileOrUrl, NULL);
     if (udcFile == NULL)
 	return NULL;
     struct lineFile *lf;
     AllocVar(lf);
     lf->fileName = cloneString(fileOrUrl);
     lf->fd = -1;
     lf->bufSize = 0;
     lf->buf = NULL;
     lf->zTerm = zTerm;
     lf->udcFile = udcFile;
     return lf;
     }
 }
 
 
 void lineFileExpandBuf(struct lineFile *lf, int newSize)
 /* Expand line file buffer. */
 {
 assert(newSize > lf->bufSize);
 lf->buf = needMoreMem(lf->buf, lf->bytesInBuf, newSize);
 lf->bufSize = newSize;
 }
 
 
 struct lineFile *lineFileStdin(bool zTerm)
 /* Wrap a line file around stdin. */
 {
 return lineFileAttach("stdin", zTerm, fileno(stdin));
 }
 
 struct lineFile *lineFileMayOpen(char *fileName, bool zTerm)
 /* Try and open up a lineFile. */
 {
 if (sameString(fileName, "stdin"))
     return lineFileStdin(zTerm);
 else if (getDecompressor(fileName) != NULL)
     return lineFileDecompress(fileName, zTerm);
 else
     {
     int fd = open(fileName, O_RDONLY);
     if (fd == -1)
         return NULL;
     return lineFileAttach(fileName, zTerm, fd);
     }
 }
 
 struct lineFile *lineFileOpen(char *fileName, bool zTerm)
 /* Open up a lineFile or die trying. */
 {
 struct lineFile *lf = lineFileMayOpen(fileName, zTerm);
 if (lf == NULL)
     errAbort("Couldn't open %s , %s", fileName, strerror(errno));
 return lf;
 }
 
 void lineFileReuse(struct lineFile *lf)
 /* Reuse current line. */
 {
 lf->reuse = TRUE;
 }
 
 
 INLINE void noTabixSupport(struct lineFile *lf, char *where)
 {
 if (lf->tabix != NULL)
     lineFileAbort(lf, "%s: not implemented for lineFile opened with lineFileTabixMayOpen.", where);
 }
 
 void lineFileSeek(struct lineFile *lf, off_t offset, int whence)
 /* Seek to read next line from given position. */
 {
 noTabixSupport(lf, "lineFileSeek");
 if (lf->checkSupport)
     lf->checkSupport(lf, "lineFileSeek");
 if (lf->pl != NULL)
     errnoAbort("Can't lineFileSeek on a compressed file: %s", lf->fileName);
 lf->reuse = FALSE;
 if (lf->udcFile)
     {
     udcSeek(lf->udcFile, offset);
     return;
     }
 lf->lineStart = lf->lineEnd = lf->bytesInBuf = 0;
 if ((lf->bufOffsetInFile = lseek(lf->fd, offset, whence)) == -1)
     errnoAbort("Couldn't lineFileSeek %s", lf->fileName);
 }
 
 void lineFileRewind(struct lineFile *lf)
 /* Return lineFile to start. */
 {
 lineFileSeek(lf, 0, SEEK_SET);
 lf->lineIx = 0;
 }
 
 int lineFileLongNetRead(int fd, char *buf, int size)
 /* Keep reading until either get no new characters or
  * have read size */
 {
 int oneSize, totalRead = 0;
 
 while (size > 0)
     {
     oneSize = read(fd, buf, size);
     if (oneSize <= 0)
         break;
     totalRead += oneSize;
     buf += oneSize;
     size -= oneSize;
     }
 return totalRead;
 }
 
 void lineFileCarefulNewlines(struct lineFile *lf)
 /* Tell lf to use a less efficient method of scanning for the next newline that can handle
  * files with a mix of newline conventions. */
 {
 lf->nlType = nlt_mixed;
 }
 
 static void determineNlType(struct lineFile *lf, char *buf, int bufSize)
 /* determine type of newline used for the file, assumes buffer not empty */
 {
 char *c = buf;
 if (bufSize==0) return;
 if (lf->nlType != nlt_undet) return;  /* if already determined just exit */
 while (c < buf+bufSize)
     {
     if (*c=='\r')
 	{
     	lf->nlType = nlt_mac;
 	if (++c < buf+bufSize)
     	    if (*c == '\n')
     		lf->nlType = nlt_dos;
 	return;
 	}
     if (*(c++) == '\n')
 	{
         lf->nlType = nlt_unix;
 	return;
 	}
     }
 }
 
 static boolean findNextNewline(struct lineFile *lf, char *buf, int bytesInBuf, int *pEndIx)
 /* Return TRUE if able to find next end of line in buf, starting at buf[*pEndIx], up to bytesInBuf.
  * When done set *pEndIx to the start of the next line if applicable, otherwise bytesInBuf. */
 {
 boolean gotLf = FALSE;
 int endIx = *pEndIx;
 switch (lf->nlType)
     {
     case nlt_unix:
     case nlt_dos:
         for (endIx = *pEndIx; endIx < bytesInBuf; ++endIx)
             {
             if (buf[endIx] == '\n')
                 {
                 gotLf = TRUE;
                 endIx += 1;
                 break;
                 }
             }
         break;
     case nlt_mac:
         for (endIx = *pEndIx; endIx < bytesInBuf; ++endIx)
             {
             if (buf[endIx] == '\r')
                 {
                 gotLf = TRUE;
                 endIx += 1;
                 break;
                 }
             }
         break;
     case nlt_mixed:
     case nlt_undet:
         for (endIx = *pEndIx; endIx < bytesInBuf; ++endIx)
             {
             char c = buf[endIx];
             if (c == '\r' || c == '\n')
                 {
                 gotLf = TRUE;
                 if (lf->zTerm)
                     buf[endIx] = '\0';
                 endIx += 1;
                 if (c == '\r' && buf[endIx] == '\n')
                     {
                     if (lf->zTerm)
                         buf[endIx] = '\0';
                     endIx += 1;
                     }
                 break;
                 }
             }
         break;
     }
 *pEndIx = endIx;
 return gotLf;
 }
 
 boolean lineFileNext(struct lineFile *lf, char **retStart, int *retSize)
 /* Fetch next line from file. */
 {
 int newStart;
 
 if (lf->reuse)
     {
     lf->reuse = FALSE;
     if (retSize != NULL)
 	*retSize = lf->lineEnd - lf->lineStart;
     *retStart = lf->buf + lf->lineStart;
     if (lf->metaOutput && *retStart[0] == '#')
         metaDataAdd(lf, *retStart);
     return TRUE;
     }
 
 if (lf->nextCallBack)
     return lf->nextCallBack(lf, retStart, retSize);
 
 if (lf->udcFile)
     {
     lf->bufOffsetInFile = udcTell(lf->udcFile);
     char *line = udcReadLine(lf->udcFile);
     if (line==NULL)
         return FALSE;
     int lineSize = strlen(line);
     lf->bytesInBuf = lineSize;
     ++lf->lineIx;
     lf->lineStart = 0;
     lf->lineEnd = lineSize;
     *retStart = line;
     freeMem(lf->buf);
     lf->buf = line;
     lf->bufSize = lineSize;
     if (retSize != NULL)
 	*retSize = lineSize;
     return TRUE;
     }
 
 if (lf->tabix != NULL && lf->tabixIter != NULL)
     {
     // Just use line-oriented ti_read:
     int lineSize = 0;
     lineSize = tbx_itr_next(lf->htsFile, lf->tabix, lf->tabixIter, lf->kline);
     if (lineSize == -1)
 	return FALSE;
     lf->bufOffsetInFile = -1;
     lf->bytesInBuf = lineSize;
     lf->lineIx = -1;
     lf->lineStart = 0;
     lf->lineEnd = lineSize;
     if (lineSize > lf->bufSize)
 	// shouldn't be!  but just in case:
 	lineFileExpandBuf(lf, lineSize * 2);
     kstring_t *kline = lf->kline;
     safecpy(lf->buf, lf->bufSize, kline->s);
     *retStart = lf->buf;
     if (retSize != NULL)
 	*retSize = lineSize;
     return TRUE;
     }
 
 char *buf = lf->buf;
 int endIx = lf->lineEnd;
 int bytesInBuf = lf->bytesInBuf;
 determineNlType(lf, buf+endIx, bytesInBuf-endIx);
 boolean gotLf = findNextNewline(lf, buf, bytesInBuf, &endIx);
 
 /* If not in buffer read in a new buffer's worth. */
 while (!gotLf)
     {
     int oldEnd = lf->lineEnd;
     int sizeLeft = bytesInBuf - oldEnd;
     int bufSize = lf->bufSize;
     int readSize = bufSize - sizeLeft;
 
     if (oldEnd > 0 && sizeLeft > 0)
 	{
 	memmove(buf, buf+oldEnd, sizeLeft);
 	}
     lf->bufOffsetInFile += oldEnd;
     if (lf->fd >= 0)
 	readSize = lineFileLongNetRead(lf->fd, buf+sizeLeft, readSize);
     else if (lf->tabix != NULL && readSize > 0)
 	{
         errAbort("bgzf read not supported with htslib (yet)");
 	if (readSize < 1)
 	    return FALSE;
 	}
     else
         readSize = 0;
 
     if ((readSize == 0) && (endIx > oldEnd))
 	{
 	endIx = sizeLeft;
 	buf[endIx] = 0;
 	lf->bytesInBuf = newStart = lf->lineStart = 0;
 	lf->lineEnd = endIx;
 	++lf->lineIx;
 	if (retSize != NULL)
 	    *retSize = endIx - newStart;
 	*retStart = buf + newStart;
         if (*retStart[0] == '#')
             metaDataAdd(lf, *retStart);
 	return TRUE;
 	}
     else if (readSize <= 0)
 	{
 	lf->bytesInBuf = lf->lineStart = lf->lineEnd = 0;
 	return FALSE;
 	}
     else
         endIx = sizeLeft;
 
     bytesInBuf = lf->bytesInBuf = readSize + sizeLeft;
     lf->lineEnd = 0;
 
     determineNlType(lf, buf+endIx, bytesInBuf-endIx);
     gotLf = findNextNewline(lf, buf, bytesInBuf, &endIx);
 
     if (!gotLf && bytesInBuf == lf->bufSize)
         {
 	if (bufSize >= 512*1024*1024)
 	    {
 	    errAbort("Line too long (more than %d chars) line %d of %s",
 		lf->bufSize, lf->lineIx+1, lf->fileName);
 	    }
 	else
 	    {
 	    lineFileExpandBuf(lf, bufSize*2);
 	    buf = lf->buf;
 	    }
 	}
     }
 
 if (lf->zTerm)
     {
     buf[endIx-1] = 0;
     if ((lf->nlType == nlt_dos) && (buf[endIx-2]=='\r'))
 	{
 	buf[endIx-2] = 0;
 	}
     }
 
 lf->lineStart = newStart = lf->lineEnd;
 lf->lineEnd = endIx;
 ++lf->lineIx;
 if (retSize != NULL)
     *retSize = endIx - newStart;
 *retStart = buf + newStart;
 if (*retStart[0] == '#')
     metaDataAdd(lf, *retStart);
 return TRUE;
 }
 
 void lineFileVaAbort(struct lineFile *lf, char *format, va_list args)
 /* Print file name, line number, and error message, and abort. */
 {
 struct dyString *dy = dyStringNew(0);
 dyStringPrintf(dy,  "Error line %d of %s: ", lf->lineIx, lf->fileName);
 dyStringVaPrintf(dy, format, args);
 errAbort("%s", dy->string);
 dyStringFree(&dy);
 }
 
 void lineFileAbort(struct lineFile *lf, char *format, ...)
 /* Print file name, line number, and error message, and abort. */
 {
 va_list args;
 va_start(args, format);
 lineFileVaAbort(lf, format, args);
 va_end(args);
 }
 
 void lineFileUnexpectedEnd(struct lineFile *lf)
 /* Complain about unexpected end of file. */
 {
 errAbort("Unexpected end of file in %s", lf->fileName);
 }
 
 void lineFileNeedNext(struct lineFile *lf, char **retStart, int *retSize)
 /* Fetch next line from file.  Squawk and die if it's not there. */
 {
 if (!lineFileNext(lf, retStart, retSize))
     lineFileUnexpectedEnd(lf);
 }
 
 void lineFileClose(struct lineFile **pLf)
 /* Close up a line file. */
 {
 struct lineFile *lf;
 if ((lf = *pLf) != NULL)
     {
     struct pipeline *pl = lf->pl;
     if (pl != NULL)
         {
         pipelineClose(&lf->pl);
         }
     else if (lf->fd > 0 && lf->fd != fileno(stdin))
 	{
 	close(lf->fd);
 	freeMem(lf->buf);
 	}
     else if (lf->tabix != NULL)
 	{
 	if (lf->tabixIter != NULL)
 	    ti_iter_destroy(lf->tabixIter);
 	ti_close(lf->tabix);
         hts_close(lf->htsFile);
         kstring_t *kline = lf->kline;
         free(kline->s);
 	}
     else if (lf->udcFile != NULL)
         udcFileClose(&lf->udcFile);
 
     if (lf->closeCallBack)
         lf->closeCallBack(lf);
     freeMem(lf->fileName);
     metaDataFree(lf);
     freez(pLf);
     }
 }
 
 void lineFileCloseList(struct lineFile **pList)
 /* Close up a list of line files. */
 {
 struct lineFile *el, *next;
 
 for (el = *pList; el != NULL; el = next)
     {
     next = el->next;
     lineFileClose(&el);
     }
 *pList = NULL;
 }
 
 void lineFileExpectWords(struct lineFile *lf, int expecting, int got)
 /* Check line has right number of words. */
 {
 if (expecting != got)
     errAbort("Expecting %d words line %d of %s got %d",
 	    expecting, lf->lineIx, lf->fileName, got);
 }
 
 void lineFileExpectAtLeast(struct lineFile *lf, int expecting, int got)
 /* Check line has right number of words. */
 {
 if (got < expecting)
     errAbort("Expecting at least %d words line %d of %s got %d",
 	    expecting, lf->lineIx, lf->fileName, got);
 }
 
 void lineFileShort(struct lineFile *lf)
 /* Complain that line is too short. */
 {
 errAbort("Short line %d of %s", lf->lineIx, lf->fileName);
 }
 
 void lineFileReuseFull(struct lineFile *lf)
 // Reuse last full line read.  Unlike lineFileReuse,
 // lineFileReuseFull only works with previous lineFileNextFull call
 {
 assert(lf->fullLine != NULL);
 lf->fullLineReuse = TRUE;
 }
 
 
 boolean lineFileNextFull(struct lineFile *lf, char **retFull, int *retFullSize,
                         char **retRaw, int *retRawSize)
 // Fetch next line from file joining up any that are continued by ending '\'
 // If requested, and was joined, the unjoined raw lines are also returned
 // NOTE: comment lines can't be continued!  ("# comment \ \n more comment" is 2 lines.)
 {
 // May have requested reusing the last full line.
 if (lf->fullLineReuse)
     {
     lf->fullLineReuse = FALSE;
     assert(lf->fullLine != NULL);
     *retFull = dyStringContents(lf->fullLine);
     if (retFullSize)
         *retFullSize = dyStringLen(lf->fullLine);
     if (retRaw != NULL)
         {
         assert(lf->rawLines != NULL);
         *retRaw = dyStringContents(lf->rawLines);
         if (retRawSize)
             *retRawSize = dyStringLen(lf->rawLines);
         }
     return TRUE;
     }
 
 // Empty pointers
 *retFull = NULL;
 if (retRaw != NULL)
     *retRaw = NULL;
 
 // Prepare lf buffers
 if (lf->fullLine == NULL)
     {
     lf->fullLine = dyStringNew(1024);
     lf->rawLines = dyStringNew(1024); // Better to always create it than test every time
     }
 else
     {
     dyStringClear(lf->fullLine);
     dyStringClear(lf->rawLines);
     }
 
 char *line;
 while (lineFileNext(lf, &line, NULL))
     {
     char *start = skipLeadingSpaces(line);
 
     // Will the next line continue this one?
     char *end = start;
     if (*start == '#')  // Comment lines can't be continued!
         end = start + strlen(start);
     else
         {
         while (*end != '\0')  // walking forward for efficiency (avoid strlens())
             {
             for (;*end != '\0' && *end != '\\'; end++) ; // Tight loop to find '\'
             if (*end == '\0')
                 break;
 
             // This could be a continuation
             char *slash = end;
             if (*(++end) == '\\')  // escaped
                 continue;
             end = skipLeadingSpaces(end);
 
             if (*end == '\0') // Just whitespace after '\', so true continuation mark
                 {
                 if (retRaw != NULL) // Only if actually requested.
                     {
                     dyStringAppendN(lf->rawLines,line,(end - line));
                     dyStringAppendC(lf->rawLines,'\n'); // New lines delimit raw lines.
                     }
                 end = slash; // Don't need to zero, because of appending by length
                 break;
                 }
             }
         }
 
     // Stitch together full lines
     if (dyStringLen(lf->fullLine) == 0)
         dyStringAppendN(lf->fullLine,line,(end - line)); // includes first line's whitespace
     else if (start < end)             // don't include continued line's leading spaces
         dyStringAppendN(lf->fullLine,start,(end - start));
 
     if (*end == '\\')
         continue;
 
     // Got a full line now!
     *retFull = dyStringContents(lf->fullLine);
     if (retFullSize)
         *retFullSize = dyStringLen(lf->fullLine);
 
     if (retRaw != NULL && dyStringLen(lf->rawLines) > 0) // Only if actually requested & continued
         {
         // This is the final line which doesn't have a continuation char
         dyStringAppendN(lf->rawLines,line,(end - line));
         *retRaw = dyStringContents(lf->rawLines);
         if (retRawSize)
             *retRawSize = dyStringLen(lf->rawLines);
         }
     return TRUE;
     }
 return FALSE;
 }
 
 boolean lineFileNextReal(struct lineFile *lf, char **retStart)
 /* Fetch next line from file that is not blank and
  *  * does not start with a '#'. */
 {
 char *s, c;
 while (lineFileNext(lf, retStart, NULL))
     {
     s = skipLeadingSpaces(*retStart);
     c = s[0];
     if (c != 0 && c != '#')
         return TRUE;
     }
 return FALSE;
 }
 
 boolean lineFileNextFullReal(struct lineFile *lf, char **retStart)
 // Fetch next line from file that is not blank and does not start with a '#'.
 // Continuation lines (ending in '\') are joined into a single line.
 {
 while (lineFileNextFull(lf, retStart, NULL, NULL, NULL))
     {
     char *clippedText = skipLeadingSpaces(*retStart);
     if (clippedText[0] != '\0' && clippedText[0] != '#')
         return TRUE;
     }
 return FALSE;
 }
 
 
 int lineFileChopNext(struct lineFile *lf, char *words[], int maxWords)
 /* Return next non-blank line that doesn't start with '#' chopped into words. */
 {
 int lineSize, wordCount;
 char *line;
 
 while (lineFileNext(lf, &line, &lineSize))
     {
     if (line[0] == '#')
         continue;
     wordCount = chopByWhite(line, words, maxWords);
     if (wordCount != 0)
         return wordCount;
     }
 return 0;
 }
 
 int lineFileChopCharNext(struct lineFile *lf, char sep, char *words[], int maxWords)
 /* Return next non-blank line that doesn't start with '#' chopped into
    words delimited by sep. */
 {
 int lineSize, wordCount;
 char *line;
 
 while (lineFileNext(lf, &line, &lineSize))
     {
     if (line[0] == '#')
         continue;
     wordCount = chopByChar(line, sep, words, maxWords);
     if (wordCount != 0)
         return wordCount;
     }
 return 0;
 }
 
 int lineFileChopNextTab(struct lineFile *lf, char *words[], int maxWords)
 /* Return next non-blank line that doesn't start with '#' chopped into words
  * on tabs */
 {
 int lineSize, wordCount;
 char *line;
 
 while (lineFileNext(lf, &line, &lineSize))
     {
     if (line[0] == '#')
         continue;
     wordCount = chopByChar(line, '\t', words, maxWords);
     if (wordCount != 0)
         return wordCount;
     }
 return 0;
 }
 
 boolean lineFileNextCharRow(struct lineFile *lf, char sep, char *words[], int wordCount)
 /* Return next non-blank line that doesn't start with '#' chopped into words
  * delimited by sep. Returns FALSE at EOF.  Aborts on error. */
 {
 int wordsRead;
 wordsRead = lineFileChopCharNext(lf, sep, words, wordCount);
 if (wordsRead == 0)
     return FALSE;
 if (wordsRead < wordCount)
     lineFileExpectWords(lf, wordCount, wordsRead);
 return TRUE;
 }
 
 boolean lineFileNextRow(struct lineFile *lf, char *words[], int wordCount)
 /* Return next non-blank line that doesn't start with '#' chopped into words.
  * Returns FALSE at EOF.  Aborts on error. */
 {
 int wordsRead;
 wordsRead = lineFileChopNext(lf, words, wordCount);
 if (wordsRead == 0)
     return FALSE;
 if (wordsRead < wordCount)
     lineFileExpectWords(lf, wordCount, wordsRead);
 return TRUE;
 }
 
 boolean lineFileNextRowTab(struct lineFile *lf, char *words[], int wordCount)
 /* Return next non-blank line that doesn't start with '#' chopped into words
  * at tabs. Returns FALSE at EOF.  Aborts on error. */
 {
 int wordsRead;
 wordsRead = lineFileChopNextTab(lf, words, wordCount);
 if (wordsRead == 0)
     return FALSE;
 if (wordsRead < wordCount)
     lineFileExpectWords(lf, wordCount, wordsRead);
 return TRUE;
 }
 
 int lineFileNeedFullNum(struct lineFile *lf, char *words[], int wordIx)
 /* Make sure that words[wordIx] is an ascii integer, and return
  * binary representation of it. Require all chars in word to be digits.*/
 {
 char *c;
 for (c = words[wordIx]; *c; c++)
     {
     if (*c == '-' || isdigit(*c))
         /* NOTE: embedded '-' will be caught by lineFileNeedNum */
         continue;
     errAbort("Expecting integer field %d line %d of %s, got %s",
             wordIx+1, lf->lineIx, lf->fileName, words[wordIx]);
     }
 return lineFileNeedNum(lf, words, wordIx);
 }
 
 int lineFileNeedNum(struct lineFile *lf, char *words[], int wordIx)
 /* Make sure that words[wordIx] is an ascii integer, and return
  * binary representation of it. Conversion stops at first non-digit char. */
 {
 char *ascii = words[wordIx];
 char c = ascii[0];
 if (c != '-' && !isdigit(c))
     errAbort("Expecting number field %d line %d of %s, got %s",
     	wordIx+1, lf->lineIx, lf->fileName, ascii);
 return atoi(ascii);
 }
 
 int lineFileCheckAllIntsNoAbort(char *s, void *val, 
     boolean isSigned, int byteCount, char *typeString, boolean noNeg, 
     char *errMsg, int errMsgSize)
 /* Convert string to (signed) integer of the size specified.  
  * Unlike atol assumes all of string is number, no trailing trash allowed.
  * Returns 0 if conversion possible, and value is returned in 'val'
  * Otherwise 1 for empty string or trailing chars, and 2 for numeric overflow,
  * and 3 for (-) sign in unsigned number.
  * Error messages if any are written into the provided buffer.
  * Pass NULL val if you only want validation.
  * Use noNeg if negative values are not allowed despite the type being signed,
  * returns 4. */
 {
 unsigned long long res = 0, oldRes = 0;
 boolean isMinus = FALSE;
 
 if ((byteCount != 1) 
  && (byteCount != 2)
  && (byteCount != 4)
  && (byteCount != 8))
     errAbort("Unexpected error: Invalid byte count for integer size in lineFileCheckAllIntsNoAbort, expected 1 2 4 or 8, got %d.", byteCount);
 
 unsigned long long limit = 0xFFFFFFFFFFFFFFFFULL >> (8*(8-byteCount));
 
 if (isSigned) 
     limit >>= 1;
 
 char *p, *p0 = s;
 
 if (*p0 == '-')
     {
     if (isSigned)
 	{
 	if (noNeg)
 	    {
 	    safef(errMsg, errMsgSize, "Negative value not allowed");
 	    return 4; 
 	    }
 	p0++;
 	++limit;
 	isMinus = TRUE;
 	}
     else
 	{
 	safef(errMsg, errMsgSize, "Unsigned %s may not begin with minus sign (-)", typeString);
 	return 3; 
 	}
     }
 p = p0;
 while ((*p >= '0') && (*p <= '9'))
     {
     res *= 10;
     if (res < oldRes)
 	{
 	safef(errMsg, errMsgSize, "%s%s overflowed", isSigned ? "signed ":"", typeString);
 	return 2; 
 	}
     oldRes = res;
     res += *p - '0';
     if (res < oldRes)
 	{
 	safef(errMsg, errMsgSize, "%s%s overflowed", isSigned ? "signed ":"", typeString);
 	return 2; 
 	}
     if (res > limit)
 	{
 	safef(errMsg, errMsgSize, "%s%s overflowed, limit=%s%llu", isSigned ? "signed ":"", typeString, isMinus ? "-" : "", limit);
 	return 2; 
 	}
     oldRes = res;
     p++;
     }
 /* test for invalid character, empty, or just a minus */
 if (*p != '\0')
     {
     safef(errMsg, errMsgSize, "Trailing characters parsing %s%s", isSigned ? "signed ":"", typeString);
     return 1;
     }
 if (p == p0)
     {
     safef(errMsg, errMsgSize, "Empty string parsing %s%s", isSigned ? "signed ":"", typeString);
     return 1;
     }
 
 if (!val)
     return 0;  // only validation required
 
 switch (byteCount)
     {
     case 1:
 	if (isSigned)
 	    {
 	    if (isMinus)
 		*(char *)val = -res;
 	    else
 		*(char *)val = res;
 	    }
 	else
 	    *(unsigned char *)val = res;
 	break;
     case 2:
 	if (isSigned)
 	    {
 	    if (isMinus)
 		*(short *)val = -res;
 	    else
 		*(short *)val = res;
 	    }
 	else
 	    *(unsigned short *)val = res;
 	break;
     case 4:
 	if (isSigned)
 	    {
 	    if (isMinus)
 		*(int *)val = -res;
 	    else
 		*(int *)val = res;
 	    }
 	else
 	    *(unsigned *)val = res;
 	break;
     case 8:
 	if (isSigned)
 	    {
 	    if (isMinus)
 		*(long long *)val = -res;
 	    else
 		*(long long *) val =res;
 	    }
 	else
 	    *(unsigned long long *)val = res;
 	break;
     }
 
 
 return 0;
 }
 
 void lineFileAllInts(struct lineFile *lf, char *words[], int wordIx, void *val,
   boolean isSigned,  int byteCount, char *typeString, boolean noNeg)
 /* Returns long long integer from converting the input string. Aborts on error. */
 {
 char *s = words[wordIx];
 char errMsg[256];
 int res = lineFileCheckAllIntsNoAbort(s, val, isSigned, byteCount, typeString, noNeg, errMsg, sizeof errMsg);
 if (res > 0)
     {
     errAbort("%s in field %d line %d of %s, got %s",
 	errMsg, wordIx+1, lf->lineIx, lf->fileName, s);
     }
 }
 
 int lineFileAllIntsArray(struct lineFile *lf, char *words[], int wordIx, void *array, int arraySize,
   boolean isSigned,  int byteCount, char *typeString, boolean noNeg)
 /* Convert comma separated list of numbers to an array.  Pass in
  * array and max size of array. Aborts on error. Returns number of elements in parsed array. */
 {
 char *s = words[wordIx];
 char errMsg[256];
 unsigned count = 0;
 char *cArray = array;
 for (;;)
     {
     char *e;
     if (s == NULL || s[0] == 0 || count == arraySize)
         break;
     e = strchr(s, ',');
     if (e)
         *e = 0;
     int res = lineFileCheckAllIntsNoAbort(s, cArray, isSigned, byteCount, typeString, noNeg, errMsg, sizeof errMsg);
     if (res > 0)
 	{
 	errAbort("%s in column %d of array field %d line %d of %s, got %s",
 	    errMsg, count, wordIx+1, lf->lineIx, lf->fileName, s);
 	}
     if (cArray) // NULL means validation only.
 	cArray += byteCount;  
     count++;
     if (e)  // restore input string
         *e++ = ',';
     s = e;
     }
 return count;
 }
 
 
 double lineFileNeedDouble(struct lineFile *lf, char *words[], int wordIx)
 /* Make sure that words[wordIx] is an ascii double value, and return
  * binary representation of it. */
 {
 char *valEnd;
 char *val = words[wordIx];
 double doubleValue;
 
 doubleValue = strtod(val, &valEnd);
 if ((*val == '\0') || (*valEnd != '\0'))
     errAbort("Expecting double field %d line %d of %s, got %s",
     	wordIx+1, lf->lineIx, lf->fileName, val);
 return doubleValue;
 }
 
 void lineFileSkip(struct lineFile *lf, int lineCount)
 /* Skip a number of lines. */
 {
 int i, lineSize;
 char *line;
 
 for (i=0; i<lineCount; ++i)
     {
     if (!lineFileNext(lf, &line, &lineSize))
         errAbort("Premature end of file in %s", lf->fileName);
     }
 }
 
 char *lineFileSkipToLineStartingWith(struct lineFile *lf, char *start, int maxCount)
 /* Skip to next line that starts with given string.  Return NULL
  * if no such line found, otherwise return the line. */
 {
 char *line;
 while (lineFileNext(lf, &line, NULL) && --maxCount >= 0)
     {
     if (startsWith(start, line))
         return line;
     }
 return NULL;
 }
 
 char *lineFileReadAll(struct lineFile *lf)
 /* Read remainder of lineFile and return it as a string. */
 {
 struct dyString *dy = dyStringNew(1024*4);
 lf->zTerm = 0;
 int size;
 char *line;
 while (lineFileNext(lf, &line, &size))
     dyStringAppendN(dy, line, size);
 return dyStringCannibalize(&dy);
 }
 
 boolean lineFileParseHttpHeader(struct lineFile *lf, char **hdr,
 				boolean *chunked, int *contentLength)
 /* Extract HTTP response header from lf into hdr, tell if it's
  * "Transfer-Encoding: chunked" or if it has a contentLength. */
 {
   struct dyString *header = newDyString(1024);
   char *line;
   int lineSize;
 
   if (chunked != NULL)
     *chunked = FALSE;
   if (contentLength != NULL)
     *contentLength = -1;
   dyStringClear(header);
   if (lineFileNext(lf, &line, &lineSize))
     {
       if (startsWith("HTTP/", line))
 	{
 	char *version, *code;
 	dyStringAppendN(header, line, lineSize-1);
 	dyStringAppendC(header, '\n');
 	version = nextWord(&line);
 	code = nextWord(&line);
 	if (code == NULL)
 	    {
 	    warn("%s: Expecting HTTP/<version> <code> header line, got this: %s\n", lf->fileName, header->string);
 	    *hdr = cloneString(header->string);
 	    dyStringFree(&header);
 	    return FALSE;
 	    }
 	if (!sameString(code, "200"))
 	    {
 	    warn("%s: Errored HTTP response header: %s %s %s\n", lf->fileName, version, code, line);
 	    *hdr = cloneString(header->string);
 	    dyStringFree(&header);
 	    return FALSE;
 	    }
 	while (lineFileNext(lf, &line, &lineSize))
 	    {
 	    /* blank line means end of HTTP header */
 	    if ((line[0] == '\r' && line[1] == 0) || line[0] == 0)
 	        break;
 	    if (strstr(line, "Transfer-Encoding: chunked") && chunked != NULL)
 	        *chunked = TRUE;
 	    dyStringAppendN(header, line, lineSize-1);
 	    dyStringAppendC(header, '\n');
 	    if (strstr(line, "Content-Length:"))
 	      {
 		code = nextWord(&line);
 		code = nextWord(&line);
 		if (contentLength != NULL)
 		    *contentLength = atoi(code);
 	      }
 	    }
 	}
       else
 	{
 	  /* put the line back, don't put it in header/hdr */
 	  lineFileReuse(lf);
 	  warn("%s: Expecting HTTP/<version> <code> header line, got this: %s\n", lf->fileName, header->string);
 	  *hdr = cloneString(header->string);
 	  dyStringFree(&header);
 	  return FALSE;
 	}
     }
   else
     {
       *hdr = cloneString(header->string);
       dyStringFree(&header);
       return FALSE;
     }
 
   *hdr = cloneString(header->string);
   dyStringFree(&header);
   return TRUE;
 } /* lineFileParseHttpHeader */
 
 struct dyString *lineFileSlurpHttpBody(struct lineFile *lf,
 				       boolean chunked, int contentLength)
 /* Return a dyString that contains the http response body in lf.  Handle
  * chunk-encoding and content-length. */
 {
   struct dyString *body = newDyString(64*1024);
   char *line;
   int lineSize;
 
   dyStringClear(body);
   if (chunked)
     {
       /* Handle "Transfer-Encoding: chunked" body */
       /* Procedure from RFC2068 section 19.4.6 */
       char *csword;
       unsigned chunkSize = 0;
       unsigned size;
       do
 	{
 	  /* Read line that has chunk size (in hex) as first word. */
 	  if (lineFileNext(lf, &line, NULL))
 	    csword = nextWord(&line);
 	  else break;
 	  if (sscanf(csword, "%x", &chunkSize) < 1)
 	    {
 	      warn("%s: chunked transfer-encoding chunk size parse error.\n",
 		   lf->fileName);
 	      break;
 	    }
 	  /* If chunk size is 0, read in a blank line & then we're done. */
 	  if (chunkSize == 0)
 	    {
 	      lineFileNext(lf, &line, NULL);
 	      if (line == NULL || (line[0] != '\r' && line[0] != 0))
 		warn("%s: chunked transfer-encoding: expected blank line, got %s\n",
 		     lf->fileName, line);
 
 	      break;
 	    }
 	  /* Read (and save) lines until we have read in chunk. */
 	  for (size = 0;  size < chunkSize;  size += lineSize)
 	    {
 	      if (! lineFileNext(lf, &line, &lineSize))
 		break;
 	      dyStringAppendN(body, line, lineSize-1);
 	      dyStringAppendC(body, '\n');
 	    }
 	  /* Read blank line - or extra CRLF inserted in the middle of the
 	   * current line, in which case we need to trim it. */
 	  if (size > chunkSize)
 	    {
 	      body->stringSize -= (size - chunkSize);
 	      body->string[body->stringSize] = 0;
 	    }
 	  else if (size == chunkSize)
 	    {
 	      lineFileNext(lf, &line, NULL);
 	      if (line == NULL || (line[0] != '\r' && line[0] != 0))
 		warn("%s: chunked transfer-encoding: expected blank line, got %s\n",
 		     lf->fileName, line);
 	    }
 	} while (chunkSize > 0);
       /* Try to read in next line.  If it's an HTTP header, put it back. */
       /* If there is a next line but it's not an HTTP header, it's a footer. */
       if (lineFileNext(lf, &line, NULL))
 	{
 	  if (startsWith("HTTP/", line))
 	    lineFileReuse(lf);
 	  else
 	    {
 	      /* Got a footer -- keep reading until blank line */
 	      warn("%s: chunked transfer-encoding: got footer %s, discarding it.\n",
 		   lf->fileName, line);
 	      while (lineFileNext(lf, &line, NULL))
 		{
 		  if ((line[0] == '\r' && line[1] == 0) || line[0] == 0)
 		    break;
 		  warn("discarding footer line: %s\n", line);
 		}
 	    }
 	}
     }
   else if (contentLength >= 0)
     {
       /* Read in known length */
       int size;
       for (size = 0;  size < contentLength;  size += lineSize)
 	{
 	  if (! lineFileNext(lf, &line, &lineSize))
 	    break;
 	  dyStringAppendN(body, line, lineSize-1);
 	  dyStringAppendC(body, '\n');
 	}
     }
   else
     {
       /* Read in to end of file (assume it's not a persistent connection) */
       while (lineFileNext(lf, &line, &lineSize))
 	{
 	  dyStringAppendN(body, line, lineSize-1);
 	  dyStringAppendC(body, '\n');
 	}
     }
 
   return(body);
 } /* lineFileSlurpHttpBody */
 
 void lineFileRemoveInitialCustomTrackLines(struct lineFile *lf)
 /* remove initial browser and track lines */
 {
 char *line;
 while (lineFileNextReal(lf, &line))
     {
     if (!(startsWith("browser", line) || startsWith("track", line) ))
         {
         verbose(2, "found line not browser or track: %s\n", line);
         lineFileReuse(lf);
         break;
         }
     verbose(2, "skipping %s\n", line);
     }
 }