0544059f3df004f2d54e8978809ddfcbdee23645 braney Mon Feb 25 14:36:53 2013 -0800 modify twoBit library to auto-recognize whether it's dealing with a URL or local file (per code review #10237) diff --git src/lib/twoBit.c src/lib/twoBit.c index 49e65f5..494d111 100644 --- src/lib/twoBit.c +++ src/lib/twoBit.c @@ -415,64 +415,60 @@ tbf->isSwapped = isSwapped; tbf->fileName = cloneString(fileName); tbf->version = (*tbf->ourReadBits32)(tbf->f, isSwapped); if (tbf->version != 0) { errAbort("Can only handle version 0 of this file. This is version %d", (int)tbf->version); } tbf->seqCount = (*tbf->ourReadBits32)(tbf->f, isSwapped); tbf->reserved = (*tbf->ourReadBits32)(tbf->f, isSwapped); return tbf; } -struct twoBitFile *twoBitOpenExt(char *fileName, boolean useUdc) +struct twoBitFile *twoBitOpen(char *fileName) /* Open file, read in header and index. * Squawk and die if there is a problem. */ { +boolean useUdc = FALSE; +if (hasProtocol(fileName)) + useUdc = TRUE; struct twoBitFile *tbf = twoBitOpenReadHeader(fileName, useUdc); struct twoBitIndex *index; boolean isSwapped = tbf->isSwapped; int i; struct hash *hash; void *f = tbf->f; /* Read in index. */ hash = tbf->hash = hashNew(digitsBaseTwo(tbf->seqCount)); for (i=0; i<tbf->seqCount; ++i) { char name[256]; if (!(*tbf->ourFastReadString)(f, name)) errAbort("%s is truncated", fileName); lmAllocVar(hash->lm, index); index->offset = (*tbf->ourReadBits32)(f, isSwapped); hashAddSaveName(hash, name, index, &index->name); slAddHead(&tbf->indexList, index); } slReverse(&tbf->indexList); return tbf; } -struct twoBitFile *twoBitOpen(char *fileName) -/* Open stdio file, read in header and index. - * Squawk and die if there is a problem. */ -{ -return twoBitOpenExt(fileName, FALSE); -} - struct twoBitFile *twoBitOpenExternalBptIndex(char *twoBitName, char *bptName) /* Open file, read in header, but not regular index. Instead use * bpt index. Beware if you use this the indexList field will be NULL * as will the hash. */ { struct twoBitFile *tbf = twoBitOpenReadHeader(twoBitName, FALSE); tbf->bpt = bptFileOpen(bptName); if (tbf->seqCount != tbf->bpt->itemCount) errAbort("%s and %s don't have same number of sequences!", twoBitName, bptName); return tbf; } static int findGreatestLowerBound(int blockCount, bits32 *pos, int val) @@ -576,54 +572,48 @@ /* Read in masked blocks. */ readBlockCoords(tbf, isSwapped, &(twoBit->maskBlockCount), &(twoBit->maskStarts), &(twoBit->maskSizes)); /* Reserved word. */ twoBit->reserved = (*tbf->ourReadBits32)(f, isSwapped); /* Read in data. */ packByteCount = packedSize(twoBit->size); twoBit->data = needLargeMem(packByteCount); (*tbf->ourMustRead)(f, twoBit->data, packByteCount); return twoBit; } -struct twoBit *twoBitFromFileExt(char *fileName, boolean useUdc) +struct twoBit *twoBitFromFile(char *fileName) /* Get twoBit list of all sequences in twoBit file. */ { -struct twoBitFile *tbf = twoBitOpenExt(fileName, useUdc); +struct twoBitFile *tbf = twoBitOpen(fileName); struct twoBitIndex *index; struct twoBit *twoBitList = NULL; for (index = tbf->indexList; index != NULL; index = index->next) { struct twoBit *twoBit = twoBitOneFromFile(tbf, index->name); slAddHead(&twoBitList, twoBit); } twoBitClose(&tbf); slReverse(&twoBitList); return twoBitList; } -struct twoBit *twoBitFromFile(char *fileName) -/* Get twoBit list of all sequences in twoBit file. */ -{ -return twoBitFromFileExt(fileName, FALSE); -} - void twoBitFree(struct twoBit **pTwoBit) /* Free up a two bit structure. */ { struct twoBit *twoBit = *pTwoBit; if (twoBit != NULL) { freeMem(twoBit->nStarts); freeMem(twoBit->nSizes); freeMem(twoBit->maskStarts); freeMem(twoBit->maskSizes); freeMem(twoBit->data); freez(pTwoBit); } } @@ -840,108 +830,94 @@ } long long twoBitTotalSize(struct twoBitFile *tbf) /* Return total size of all sequences in two bit file. */ { struct twoBitIndex *index; long long totalSize = 0; for (index = tbf->indexList; index != NULL; index = index->next) { (*tbf->ourSeek)(tbf->f, index->offset); totalSize += (*tbf->ourReadBits32)(tbf->f, tbf->isSwapped); } return totalSize; } -struct dnaSeq *twoBitLoadAllExt(char *spec, boolean useUdc) +struct dnaSeq *twoBitLoadAll(char *spec) /* Return list of all sequences matching spec, which is in * the form: * * file/path/input.2bit[:seqSpec1][,seqSpec2,...] * * where seqSpec is either * seqName * or * seqName:start-end */ { struct twoBitSpec *tbs = twoBitSpecNew(spec); -struct twoBitFile *tbf = twoBitOpenExt(tbs->fileName, useUdc); +struct twoBitFile *tbf = twoBitOpen(tbs->fileName); struct dnaSeq *list = NULL; if (tbs->seqs != NULL) { struct twoBitSeqSpec *tbss; for (tbss = tbs->seqs; tbss != NULL; tbss = tbss->next) slSafeAddHead(&list, twoBitReadSeqFrag(tbf, tbss->name, tbss->start, tbss->end)); } else { struct twoBitIndex *index; for (index = tbf->indexList; index != NULL; index = index->next) slSafeAddHead(&list, twoBitReadSeqFrag(tbf, index->name, 0, 0)); } slReverse(&list); twoBitClose(&tbf); twoBitSpecFree(&tbs); return list; } -struct dnaSeq *twoBitLoadAll(char *spec) -{ -return twoBitLoadAllExt(spec, FALSE); -} - -struct slName *twoBitSeqNamesExt(char *fileName, boolean useUdc) +struct slName *twoBitSeqNames(char *fileName) /* Get list of all sequences in twoBit file. */ { -struct twoBitFile *tbf = twoBitOpenExt(fileName, useUdc); +struct twoBitFile *tbf = twoBitOpen(fileName); struct twoBitIndex *index; struct slName *name, *list = NULL; for (index = tbf->indexList; index != NULL; index = index->next) { name = slNameNew(index->name); slAddHead(&list, name); } twoBitClose(&tbf); slReverse(&list); return list; } -struct slName *twoBitSeqNames(char *fileName) -/* Get list of all sequences in twoBit file. */ -{ -return twoBitSeqNamesExt(fileName, FALSE); -} - -boolean twoBitIsFileExt(char *fileName, boolean useUdc) +boolean twoBitIsFile(char *fileName) /* Return TRUE if file is in .2bit format. */ { +boolean useUdc = FALSE; +if (hasProtocol(fileName)) + useUdc = TRUE; struct twoBitFile *tbf = getTbfAndOpen(fileName, useUdc); boolean isSwapped; boolean isTwoBit = twoBitSigRead(tbf, &isSwapped); (*tbf->ourClose)(&tbf->f); return isTwoBit; } -boolean twoBitIsFile(char *fileName) -/* Return TRUE if file is in .2bit format. */ -{ -return twoBitIsFileExt(fileName, FALSE); -} - boolean twoBitParseRange(char *rangeSpec, char **retFile, char **retSeq, int *retStart, int *retEnd) /* Parse out something in format * file/path/name:seqName:start-end * or * file/path/name:seqName * or * file/path/name:seqName1,seqName2,seqName3,... * This will destroy the input 'rangeSpec' in the process. Returns FALSE if * it doesn't fit this format, setting retFile to rangeSpec, and retSet to * null. If it is the shorter form then start and end will both be returned * as zero, which is ok by twoBitReadSeqFrag. Any of the return arguments * maybe NULL. */ {