16439684a0ecc75ede242ded740c51cf1f60c8a4 angie Tue Feb 22 11:36:43 2011 -0800 Feature #2820 (tabix: add as optional linked library in kent/src):Added lineFile wrapper on tabix: lineFileOnTabix to open; then lineFileNext and its derivatives work as usual. lineFileSetTabixRegion seeks to the given position range, and subsequent lineFileNext's return lines in that position range. lineFileSeek is not supported -- tabix doesn't have linear offsets but rather a block number and offset which are packed into 64 bits. Line numbers are not known after calls to lineFileSetTabixRegion. diff --git src/inc/linefile.h src/inc/linefile.h index e31622f..b84127a 100644 --- src/inc/linefile.h +++ src/inc/linefile.h @@ -1,24 +1,28 @@ /* lineFile - stuff to rapidly read text files and parse them into * lines. * * This file is copyright 2002 Jim Kent, but license is hereby * granted for all use - public, private or commercial. */ #ifndef LINEFILE_H #define LINEFILE_H +#ifdef USE_TABIX +#include "tabix.h" +#endif + enum nlType { nlt_undet, /* undetermined */ nlt_unix, /* lf */ nlt_dos, /* crlf */ nlt_mac /* cr */ }; struct metaOutput /* struct to store list of file handles to output meta data to * meta data is text after # */ { struct metaOutput *next; /* next file handle */ FILE *metaFile; /* file to write metadata to */ }; @@ -32,30 +36,34 @@ int bufSize; /* Size of buffer. */ off_t bufOffsetInFile; /* Offset in file of first buffer byte. */ int bytesInBuf; /* Bytes read into buffer. */ int reserved; /* Reserved (zero for now). */ int lineIx; /* Current line. */ int lineStart; /* Offset of line in buffer. */ int lineEnd; /* End of line in buffer. */ bool zTerm; /* Replace '\n' with zero? */ enum nlType nlType; /* type of line endings: dos, unix, mac or undet */ bool reuse; /* Set if reusing input. */ char *buf; /* Buffer. */ struct pipeline *pl; /* pipeline if reading compressed */ struct metaOutput *metaOutput; /* list of FILE handles to write metaData to */ bool isMetaUnique; /* if set, do not repeat comments in output */ struct hash *metaLines; /* save lines to suppress repetition */ +#ifdef USE_TABIX + tabix_t *tabix; /* A tabix-compressed file and its binary index file (.tbi) */ + ti_iter_t tabixIter; /* An iterator to get decompressed indexed lines of text */ +#endif }; char *getFileNameFromHdrSig(char *m); /* Check if header has signature of supported compression stream, and return a phoney filename for it, or NULL if no sig found. */ struct lineFile *lineFileDecompressFd(char *name, bool zTerm, int fd); /* open a linefile with decompression from a file or socket descriptor */ struct lineFile *lineFileDecompressMem(bool zTerm, char *mem, long size); /* open a linefile with decompression from a memory stream */ struct lineFile *lineFileMayOpen(char *fileName, bool zTerm); /* Try and open up a lineFile. If fileName ends in .gz, .Z, or .bz2, * it will be read from a decompress pipeline. */ @@ -195,18 +203,36 @@ boolean chunked, int contentLength); /* Return a dyString that contains the http response body in lf. Handle * chunk-encoding and content-length. */ void lineFileSetMetaDataOutput(struct lineFile *lf, FILE *f); /* set file to write meta data to, * should be called before reading from input file */ void lineFileSetUniqueMetaData(struct lineFile *lf); /* suppress duplicate lines in metadata */ void lineFileRemoveInitialCustomTrackLines(struct lineFile *lf); /* remove initial browser and track lines */ +/*----- Optionally-compiled wrapper on tabix (compression + indexing): -----*/ + +#define COMPILE_WITH_TABIX "%s: Sorry, this functionality is available only when\n" \ + "you have installed the tabix library from\n" \ + "http://samtools.sourceforge.net/ and rebuilt kent/src with USE_TABIX=1\n" \ + "(see http://genomewiki.ucsc.edu/index.php/Build_Environment_Variables)." + +struct lineFile *lineFileOnTabix(char *fileName, bool zTerm); +/* Wrap a line file around a data file that has been compressed and indexed + * by the tabix command line program. The index file .tbi must be + * readable in addition to fileName. If there's a problem, warn & return NULL. + * This works only if kent/src has been compiled with USE_TABIX=1 and linked + * with the tabix C library. */ + +boolean lineFileSetTabixRegion(struct lineFile *lf, char *seqName, int start, int end); +/* Assuming lf was created by lineFileOnTabix, tell tabix to seek to the specified region + * and return TRUE (or if unable, return FALSE). */ + #endif /* LINEFILE_H */