965f6d70ac26f0fc37afe89305955c826edbe031
braney
  Mon Apr 10 15:03:30 2017 -0700
allow -long option to faToTwoBit which builds a twoBit file with 64-bit
offsets in the index instead of 32-bit offsets.  This allows the total
amount of stored sequence to be greater than 2Gb

diff --git src/utils/faToTwoBit/faToTwoBit.c src/utils/faToTwoBit/faToTwoBit.c
index 7c8dcf8..82deb96 100644
--- src/utils/faToTwoBit/faToTwoBit.c
+++ src/utils/faToTwoBit/faToTwoBit.c
@@ -8,45 +8,49 @@
 #include "options.h"
 #include "dnaseq.h"
 #include "dnautil.h"
 #include "fa.h"
 #include "twoBit.h"
 
 
 void usage()
 /* Explain usage and exit. */
 {
 errAbort(
   "faToTwoBit - Convert DNA from fasta to 2bit format\n"
   "usage:\n"
   "   faToTwoBit in.fa [in2.fa in3.fa ...] out.2bit\n"
   "options:\n"
+  "   -long          use 64-bit offsets for index.   Allow for twoBit to contain more than 4Gb of sequence. \n"
+  "                  NOT COMPATIBLE WITH OLDER CODE.\n"
   "   -noMask        Ignore lower-case masking in fa file.\n"
   "   -stripVersion  Strip off version number after '.' for GenBank accessions.\n"
   "   -ignoreDups    Convert first sequence only if there are duplicate sequence\n"
   "                  names.  Use 'twoBitDup' to find duplicate sequences."
   );
 }
 
 boolean noMask = FALSE;
 boolean stripVersion = FALSE;
 boolean ignoreDups = FALSE;
+boolean useLong = FALSE;
 
 static struct optionSpec options[] = {
    {"noMask", OPTION_BOOLEAN},
    {"stripVersion", OPTION_BOOLEAN},
    {"ignoreDups", OPTION_BOOLEAN},
+   {"long", OPTION_BOOLEAN},
    {NULL, 0},
 };
 
 static void unknownToN(char *s, int size)
 /* Convert non ACGT characters to N. */
 {
 char c;
 int i;
 for (i=0; i<size; ++i)
     {
     c = s[i];
     if (ntChars[(int)c] == 0)
         {
 	if (isupper(c))
 	    s[i] = 'N';
@@ -96,36 +100,37 @@
             else
                 continue;
             }
 	hashAdd(uniqHash, seq.name, NULL);
 	if (noMask)
 	    faToDna(seq.dna, seq.size);
 	else
 	    unknownToN(seq.dna, seq.size);
 	twoBit = twoBitFromDnaSeq(&seq, !noMask);
 	slAddHead(&twoBitList, twoBit);
 	}
     lineFileClose(&lf);
     }
 slReverse(&twoBitList);
 f = mustOpen(outFile, "wb");
-twoBitWriteHeader(twoBitList, f);
+twoBitWriteHeaderExt(twoBitList, f, useLong);
 for (twoBit = twoBitList; twoBit != NULL; twoBit = twoBit->next)
     {
     twoBitWriteOne(twoBit, f);
     }
 carefulClose(&f);
 }
 
 int main(int argc, char *argv[])
 /* Process command line. */
 {
 optionInit(&argc, argv, options);
 if (argc < 3)
     usage();
 noMask = optionExists("noMask");
 stripVersion = optionExists("stripVersion");
 ignoreDups = optionExists("ignoreDups");
+useLong = optionExists("long");
 dnaUtilOpen();
 faToTwoBit(argv+1, argc-2, argv[argc-1]);
 return 0;
 }