965f6d70ac26f0fc37afe89305955c826edbe031
braney
  Mon Apr 10 15:03:30 2017 -0700
allow -long option to faToTwoBit which builds a twoBit file with 64-bit
offsets in the index instead of 32-bit offsets.  This allows the total
amount of stored sequence to be greater than 2Gb

diff --git src/lib/twoBit.c src/lib/twoBit.c
index 06cc295..5db09d2 100644
--- src/lib/twoBit.c
+++ src/lib/twoBit.c
@@ -40,30 +40,35 @@
 static void udcMustReadWrap(void *file, void *buf, size_t size)
 {
 udcMustRead((struct udcFile *)file, buf, size);
 }
 
 static void udcFileCloseWrap(void *pFile)
 {
 udcFileClose((struct udcFile **)pFile);
 }
 
 static bits32 udcReadBits32Wrap(void *f, boolean isSwapped)
 {
 return udcReadBits32((struct udcFile *)f, isSwapped);
 }
 
+static bits64 udcReadBits64Wrap(void *f, boolean isSwapped)
+{
+return udcReadBits64((struct udcFile *)f, isSwapped);
+}
+
 static boolean udcFastReadStringWrap(void *f, char buf[256])
 {
 return udcFastReadString((struct udcFile *)f, buf);
 }
 
 /* now the stdio wrappers */
 static void seekCurWrap(void *file, bits64 offset)
 {
 fseek((FILE *)file, offset, SEEK_CUR);
 }
 
 static void seekWrap(void *file, bits64 offset)
 {
 fseek((FILE *)file, offset, SEEK_SET);
 }
@@ -76,56 +81,63 @@
 static void mustReadWrap(void *file, void *buf, size_t size)
 {
 mustRead((FILE *)file, buf, size);
 }
 
 static void fileCloseWrap(void *pFile)
 {
 carefulClose((FILE **)pFile);
 }
 
 static bits32 readBits32Wrap(void *f, boolean isSwapped)
 {
 return readBits32((FILE *)f, isSwapped);
 }
 
+static bits64 readBits64Wrap(void *f, boolean isSwapped)
+{
+return readBits64((FILE *)f, isSwapped);
+}
+
 static boolean fastReadStringWrap(void *f, char buf[256])
 {
 return fastReadString((FILE *)f, buf);
 }
 
 static void setFileFuncs( struct twoBitFile *tbf, boolean useUdc)
 /* choose the proper function pointers depending on whether
  * this open twoBit is using stdio or UDC
  */
 {
 if (useUdc)
     {
     tbf->ourSeekCur = udcSeekCurWrap;
     tbf->ourSeek = udcSeekWrap;
     tbf->ourTell = udcTellWrap;
     tbf->ourReadBits32 = udcReadBits32Wrap;
+    tbf->ourReadBits64 = udcReadBits64Wrap;
     tbf->ourFastReadString = udcFastReadStringWrap;
     tbf->ourClose = udcFileCloseWrap;
     tbf->ourMustRead = udcMustReadWrap;
     }
 else
     {
     tbf->ourSeekCur = seekCurWrap;
     tbf->ourSeek = seekWrap;
     tbf->ourTell = tellWrap;
     tbf->ourReadBits32 = readBits32Wrap;
+    tbf->ourReadBits64 = readBits64Wrap;
     tbf->ourFastReadString = fastReadStringWrap;
     tbf->ourClose = fileCloseWrap;
     tbf->ourMustRead = mustReadWrap;
     }
 }
 
 static int countBlocksOfN(char *s, int size)
 /* Count number of blocks of N's (or n's) in s. */
 {
 int i;
 boolean isN, lastIsN = FALSE;
 char c;
 int blockCount = 0;
 
 for (i=0; i<size; ++i)
@@ -311,76 +323,98 @@
     fwrite(twoBit->nSizes, sizeof(twoBit->nSizes[0]), 
     	twoBit->nBlockCount, f);
     }
 writeOne(f, twoBit->maskBlockCount);
 if (twoBit->maskBlockCount > 0)
     {
     fwrite(twoBit->maskStarts, sizeof(twoBit->maskStarts[0]), 
     	twoBit->maskBlockCount, f);
     fwrite(twoBit->maskSizes, sizeof(twoBit->maskSizes[0]), 
     	twoBit->maskBlockCount, f);
     }
 writeOne(f, twoBit->reserved);
 mustWrite(f, twoBit->data, packedSize(twoBit->size));
 }
 
-void twoBitWriteHeader(struct twoBit *twoBitList, FILE *f)
+void twoBitWriteHeaderExt(struct twoBit *twoBitList, FILE *f, boolean useLong)
 /* Write out header portion of twoBit file, including initial
- * index */
+ * index. If useLong is True, use 64 bit quantities for the index offsets to support >4Gb assemblies */
 {
 bits32 sig = twoBitSig;
 bits32 version = 0;
+if (useLong)
+    version = 1;
+
 bits32 seqCount = slCount(twoBitList);
 bits32 reserved = 0;
 bits32 offset = 0;
+bits64 longOffset = 0;
 struct twoBit *twoBit;
 long long counter = 0; /* check for 32 bit overflow */
 
 /* Write out fixed parts of header. */
 writeOne(f, sig);
 writeOne(f, version);
 writeOne(f, seqCount);
 writeOne(f, reserved);
 
 /* Figure out location of first byte past index.
  * Each index entry contains 4 bytes of offset information
  * and the name of the sequence, which is variable length. */
-offset = sizeof(sig) + sizeof(version) + sizeof(seqCount) + sizeof(reserved);
+longOffset = offset = sizeof(sig) + sizeof(version) + sizeof(seqCount) + sizeof(reserved);
 for (twoBit = twoBitList; twoBit != NULL; twoBit = twoBit->next)
     {
     int nameLen = strlen(twoBit->name);
     if (nameLen > 255)
         errAbort("name %s too long", twoBit->name);
+    if (useLong)
+        longOffset += nameLen + 1 + sizeof(bits64);
+    else
         offset += nameLen + 1 + sizeof(bits32);
     }
 
 /* Write out index. */
 for (twoBit = twoBitList; twoBit != NULL; twoBit = twoBit->next)
     {
     int size = twoBitSizeInFile(twoBit);
     writeString(f, twoBit->name);
+    if (useLong)
+        {
+        writeOne(f, longOffset);
+        longOffset += size;
+        }
+    else
+        {
         writeOne(f, offset);
         offset += size;
+        }
     counter += (long long)size;
-    if (counter > UINT_MAX )
+    if (!useLong && (counter > UINT_MAX ))
         errAbort("Error in faToTwoBit, index overflow at %s. The 2bit format "
                 "does not support indexes larger than %dGb, \n"
-                "please split up into smaller files.\n", 
+                "please split up into smaller files, or use -long option.\n", 
                 twoBit->name, UINT_MAX/1000000000);
     }
 }
 
+void twoBitWriteHeader(struct twoBit *twoBitList, FILE *f)
+/* Write out header portion of twoBit file, including initial
+ * index */
+{
+twoBitWriteHeaderExt(twoBitList, f, FALSE);
+}
+
 void twoBitClose(struct twoBitFile **pTbf)
 /* Free up resources associated with twoBitFile. */
 {
 struct twoBitFile *tbf = *pTbf;
 if (tbf != NULL)
     {
     twoBitFree(&tbf->seqCache);
     freez(&tbf->fileName);
     (*tbf->ourClose)(&tbf->f);
     hashFree(&tbf->hash);
     /* The indexList is allocated out of the hash's memory pool. */
     bptFileClose(&tbf->bpt);
     freez(pTbf);
     }
 }
@@ -422,63 +456,66 @@
 {
 struct twoBitFile *tbf;
 boolean isSwapped = FALSE;
 
 tbf = getTbfAndOpen(fileName, useUdc);
 
 /* Allocate header verify signature, and read in
  * the constant-length bits. */
 
 if (!twoBitSigRead(tbf, &isSwapped))
     errAbort("%s doesn't have a valid twoBitSig", fileName);
 
 tbf->isSwapped = isSwapped;
 tbf->fileName = cloneString(fileName);
 tbf->version = (*tbf->ourReadBits32)(tbf->f, isSwapped);
-if (tbf->version != 0)
+if ((tbf->version != 0) && (tbf->version != 1))
     {
-    errAbort("Can only handle version 0 of this file. This is version %d",
+    errAbort("Can only handle version 0 or version 1 of this file. This is version %d",
     	(int)tbf->version);
     }
 tbf->seqCount = (*tbf->ourReadBits32)(tbf->f, isSwapped);
 tbf->reserved = (*tbf->ourReadBits32)(tbf->f, isSwapped);
 return tbf;
 }
 
 
 struct twoBitFile *twoBitOpen(char *fileName)
 /* Open file, read in header and index.  
  * Squawk and die if there is a problem. */
 {
 boolean useUdc = FALSE;
 if (hasProtocol(fileName))
     useUdc = TRUE;
 struct twoBitFile *tbf = twoBitOpenReadHeader(fileName, useUdc);
 struct twoBitIndex *index;
 boolean isSwapped = tbf->isSwapped;
 int i;
 struct hash *hash;
 void *f = tbf->f;
 
 /* Read in index. */
 hash = tbf->hash = hashNew(digitsBaseTwo(tbf->seqCount));
 for (i=0; i<tbf->seqCount; ++i)
     {
     char name[256];
     if (!(*tbf->ourFastReadString)(f, name))
         errAbort("%s is truncated", fileName);
     lmAllocVar(hash->lm, index);
+    if (tbf->version == 1)
+        index->offset = (*tbf->ourReadBits64)(f, isSwapped);
+    else
         index->offset = (*tbf->ourReadBits32)(f, isSwapped);
     hashAddSaveName(hash, name, index, &index->name);
     slAddHead(&tbf->indexList, index);
     }
 slReverse(&tbf->indexList);
 return tbf;
 }
 
 struct twoBitFile *twoBitOpenExternalBptIndex(char *twoBitName, char *bptName)
 /* Open file, read in header, but not regular index.  Instead use
  * bpt index.   Beware if you use this the indexList field will be NULL
  * as will the hash. */
 {
 struct twoBitFile *tbf = twoBitOpenReadHeader(twoBitName, FALSE);
 tbf->bpt = bptFileOpen(bptName);