965f6d70ac26f0fc37afe89305955c826edbe031 braney Mon Apr 10 15:03:30 2017 -0700 allow -long option to faToTwoBit which builds a twoBit file with 64-bit offsets in the index instead of 32-bit offsets. This allows the total amount of stored sequence to be greater than 2Gb diff --git src/inc/twoBit.h src/inc/twoBit.h index 85394db..8658c5a 100644 --- src/inc/twoBit.h +++ src/inc/twoBit.h @@ -14,60 +14,61 @@ bits32 size; /* Size of this sequence. */ bits32 nBlockCount; /* Count of blocks of Ns. */ bits32 *nStarts; /* Starts of blocks of Ns. */ bits32 *nSizes; /* Sizes of blocks of Ns. */ bits32 maskBlockCount; /* Count of masked blocks. */ bits32 *maskStarts; /* Starts of masked regions. */ bits32 *maskSizes; /* Sizes of masked regions. */ bits32 reserved; /* Reserved for future expansion. */ }; struct twoBitIndex /* An entry in twoBit index. */ { struct twoBitIndex *next; /* Next in list. */ char *name; /* Name - allocated in hash */ - bits32 offset; /* Offset in file. */ + bits64 offset; /* Offset in file. */ }; struct twoBitFile /* Holds header and index info from .2bit file. */ { struct twoBitFile *next; char *fileName; /* Name of this file, for error reporting. */ void *f; /* Open file. */ boolean isSwapped; /* Is byte-swapping needed. */ bits32 version; /* Version of .2bit file */ bits32 seqCount; /* Number of sequences. */ bits32 reserved; /* Reserved, always zero for now. */ struct twoBitIndex *indexList; /* List of sequence. */ struct hash *hash; /* Hash of sequences. */ struct bptFile *bpt; /* Alternative index. */ struct twoBit *seqCache; /* Cache information about last sequence accessed, including * nBlock and mask block. This doesn't include the data. * This speeds fragment reads. */ bits64 dataOffsetCache; /* file offset of data for seqCache seqeunce */ /* the routines we use to access the twoBit. * These may be UDC routines, or stdio */ void (*ourSeek)(void *file, bits64 offset); void (*ourSeekCur)(void *file, bits64 offset); bits64 (*ourTell)(void *file); bits32 (*ourReadBits32)(void *f, boolean isSwapped); + bits64 (*ourReadBits64)(void *f, boolean isSwapped); void (*ourClose)(void *pFile); boolean (*ourFastReadString)(void *f, char buf[256]); void (*ourMustRead)(void *file, void *buf, size_t size); }; struct twoBitSpec /* parsed .2bit file and sequence specs */ { char *fileName; /* path to file */ struct twoBitSeqSpec *seqs; /* list of sequences and subsequences */ }; struct twoBitSeqSpec /* specification for a seq or subsequence in a .2bit file */ { @@ -142,30 +143,34 @@ /* Free up a two bit structure. */ void twoBitFreeList(struct twoBit **pList); /* Free a list of dynamically allocated twoBit's */ void twoBitWriteOne(struct twoBit *twoBit, FILE *f); /* Write out one twoBit sequence to binary file. * Note this does not include the name, which is * stored only in index. */ void twoBitWriteHeader(struct twoBit *twoBitList, FILE *f); /* Write out header portion of twoBit file, including initial * index */ +void twoBitWriteHeaderExt(struct twoBit *twoBitList, FILE *f, boolean useLong); +/* Write out header portion of twoBit file, including initial + * index. If useLong is True, use 64 bit quantities for the index offsets to support >4Gb assemblies */ + boolean twoBitIsFile(char *fileName); /* Return TRUE if file is in .2bit format. */ boolean twoBitParseRange(char *rangeSpec, char **retFile, char **retSeq, int *retStart, int *retEnd); /* Parse out something in format * file/path/name:seqName:start-end * or * file/path/name:seqName * This will destroy the input 'rangeSpec' in the process. * Returns FALSE if it doesn't fit this format. * If it is the shorter form then start and end will both * be returned as zero, which is ok by twoBitReadSeqFrag. */ boolean twoBitIsRange(char *rangeSpec);