19eaeb4d65c6962e516fb23474643ba754c32ea1 angie Fri Feb 11 10:52:15 2011 -0800 Feature #2821 (VCF parser): works on a 1000 Genomes pilot release VCF+tabix file with genotypes.[Note: this is a squash of 6 commits, developed off in my vcf branch.] diff --git src/lib/linefile.c src/lib/linefile.c index 14e1a5c..d1b888c 100644 --- src/lib/linefile.c +++ src/lib/linefile.c @@ -184,72 +184,72 @@ struct lineFile *lineFileOnString(char *name, bool zTerm, char *s) /* Wrap a line file object around string in memory. This buffer * have zeroes written into it and be freed when the line file * is closed. */ { struct lineFile *lf; AllocVar(lf); lf->fileName = cloneString(name); lf->fd = -1; lf->bufSize = lf->bytesInBuf = strlen(s); lf->zTerm = zTerm; lf->buf = s; return lf; } -struct lineFile *lineFileOnTabix(char *fileName, bool zTerm) +struct lineFile *lineFileOnTabix(char *fileOrUrl, bool zTerm) /* Wrap a line file around a data file that has been compressed and indexed - * by the tabix command line program. The index file <fileName>.tbi must be - * readable in addition to fileName. If there's a problem, warn & return NULL. + * by the tabix command line program. The index file <fileOrUrl>.tbi must be + * readable in addition to fileOrUrl. If there's a problem, warn & return NULL. * This works only if kent/src has been compiled with USE_TABIX=1 and linked * with the tabix C library. */ { #ifdef USE_TABIX -int tbiNameSize = strlen(fileName) + strlen(".tbi") + 1; +int tbiNameSize = strlen(fileOrUrl) + strlen(".tbi") + 1; char *tbiName = needMem(tbiNameSize); -safef(tbiName, tbiNameSize, "%s.tbi", fileName); -tabix_t *tabix = ti_open(fileName, tbiName); +safef(tbiName, tbiNameSize, "%s.tbi", fileOrUrl); +tabix_t *tabix = ti_open(fileOrUrl, tbiName); if (tabix == NULL) { - warn("Unable to open \"%s\"", fileName); + warn("Unable to open \"%s\"", fileOrUrl); freez(&tbiName); return NULL; } if ((tabix->idx = ti_index_load(tbiName)) == NULL) { warn("Unable to load tabix index from \"%s\"", tbiName); freez(&tbiName); return NULL; } struct lineFile *lf = needMem(sizeof(struct lineFile)); -lf->fileName = cloneString(fileName); +lf->fileName = cloneString(fileOrUrl); lf->fd = -1; lf->bufSize = 64 * 1024; lf->buf = needMem(lf->bufSize); lf->zTerm = zTerm; lf->tabix = tabix; freez(&tbiName); return lf; #else // no USE_TABIX warn(COMPILE_WITH_TABIX, "lineFileOnTabix"); return NULL; #endif // no USE_TABIX } boolean lineFileSetTabixRegion(struct lineFile *lf, char *seqName, int start, int end) /* Assuming lf was created by lineFileOnTabix, tell tabix to seek to the specified region - * and return TRUE (or if unable, return FALSE). */ + * and return TRUE (or if there are no items in region, return FALSE). */ { #ifdef USE_TABIX if (lf->tabix == NULL) errAbort("lineFileSetTabixRegion: lf->tabix is NULL. Did you open lf with lineFileOnTabix?"); int tabixSeqId = ti_get_tid(lf->tabix->idx, seqName); if (tabixSeqId < 0 && startsWith("chr", seqName)) // We will get some files that have chr-less Ensembl chromosome names: tabixSeqId = ti_get_tid(lf->tabix->idx, seqName+strlen("chr")); if (tabixSeqId < 0) return FALSE; ti_iter_t iter = ti_queryi(lf->tabix, tabixSeqId, start, end); if (iter == NULL) return FALSE; if (lf->tabixIter != NULL) ti_iter_destroy(lf->tabixIter);