93119a2588afc1810b9c6b819ce35cbb0a45bedc
angie
  Mon Jul 13 14:34:25 2020 -0700
bedToBigBed: add a -maxAlloc option to raise needLargeMem's ceiling for enormous bigDbSnp files.  refs #24500

diff --git src/utils/bedToBigBed/bedToBigBed.c src/utils/bedToBigBed/bedToBigBed.c
index a278fa9..e0674f5 100644
--- src/utils/bedToBigBed/bedToBigBed.c
+++ src/utils/bedToBigBed/bedToBigBed.c
@@ -1,27 +1,28 @@
 /* bedToBigBed - Convert bed to bigBed.. */
 
 /* Copyright (C) 2014 The Regents of the University of California 
  * See README in this or parent directory for licensing information. */
 #include "common.h"
 #include "linefile.h"
 #include "hash.h"
 #include "options.h"
 #include "dystring.h"
 #include "obscure.h"
 #include "asParse.h"
 #include "basicBed.h"
+#include "memalloc.h"
 #include "sig.h"
 #include "rangeTree.h"
 #include "zlibFace.h"
 #include "sqlNum.h"
 #include "bPlusTree.h"
 #include "bigBed.h"
 #include "twoBit.h"
 
 char *version = "2.8";   // when changing, change in bedToBigBed, bedGraphToBigWig, and wigToBigWig
 /* Version history from 2.6 on at least -
  *   2.8 - Various changes where developer didn't increment version id
  *   2.7 - Added check for duplicate field names in asParse.c
  *   2.6 - Made it not crash on empty input.  
  *   */
 
@@ -69,45 +70,47 @@
   "                      optional P specifies the number of extra fields. Not required, but preferred.\n"
   "                      Examples: -type=bed6 or -type=bed6+ or -type=bed6+3 \n"
   "                      (see http://genome.ucsc.edu/FAQ/FAQformat.html#format1)\n"
   "   -as=fields.as - If you have non-standard \"bedPlus\" fields, it's great to put a definition\n"
   "                   of each field in a row in AutoSql format here.\n"
   "   -blockSize=N - Number of items to bundle in r-tree.  Default %d\n"
   "   -itemsPerSlot=N - Number of data points bundled at lowest level. Default %d\n"
   "   -unc - If set, do not use compression.\n"
   "   -tab - If set, expect fields to be tab separated, normally\n"
   "           expects white space separator.\n"
   "   -extraIndex=fieldList - If set, make an index on each field in a comma separated list\n"
   "           extraIndex=name and extraIndex=name,id are commonly used.\n"
   "   -sizesIs2Bit  -- If set, the chrom.sizes file is assumed to be a 2bit file.\n"
   "   -udcDir=/path/to/udcCacheDir  -- sets the UDC cache dir for caching of remote files.\n"
   "   -allow1bpOverlap  -- allow exons to overlap by at most one base pair\n"
+  "   -maxAlloc=N -- Set the maximum memory allocation size to N bytes\n"
   , version, bbiCurrentVersion, blockSize, itemsPerSlot
   );
 }
 
 static struct optionSpec options[] = {
    {"blockSize", OPTION_INT},
    {"itemsPerSlot", OPTION_INT},
    {"type", OPTION_STRING},
    {"as", OPTION_STRING},
    {"unc", OPTION_BOOLEAN},
    {"tab", OPTION_BOOLEAN},
    {"sizesIs2Bit", OPTION_BOOLEAN},
    {"extraIndex", OPTION_STRING},
    {"udcDir", OPTION_STRING},
    {"allow1bpOverlap", OPTION_BOOLEAN},
+   {"maxAlloc", OPTION_LONG_LONG},
    {NULL, 0},
 };
 
 int bbNamedFileChunkCmpByName(const void *va, const void *vb)
 /* Compare two named offset object to facilitate qsorting by name. */
 {
 const struct bbNamedFileChunk *a = va, *b = vb;
 return strcmp(a->name, b->name);
 }
 
 static int maxBedNameSize;
 
 void bbNamedFileChunkKey(const void *va, char *keyBuf)
 /* Copy name to keyBuf for bPlusTree maker */
 {
@@ -815,33 +818,36 @@
 }
 
 int main(int argc, char *argv[])
 /* Process command line. */
 {
 optionInit(&argc, argv, options);
 blockSize = optionInt("blockSize", blockSize);
 itemsPerSlot = optionInt("itemsPerSlot", itemsPerSlot);
 asFile = optionVal("as", asFile);
 doCompress = !optionExists("unc");
 sizesIs2Bit = optionExists("sizesIs2Bit");
 extraIndex = optionVal("extraIndex", NULL);
 tabSep = optionExists("tab");
 allow1bpOverlap = optionExists("allow1bpOverlap");
 udcDir = optionVal("udcDir", udcDefaultDir());
+size_t maxAlloc = optionLongLong("maxAlloc", 0);
 if (argc != 4)
     usage();
 udcSetDefaultDir(udcDir);
+if (maxAlloc > 0)
+    setMaxAlloc(maxAlloc);
 
 if (optionExists("type"))
     {
     // parse type
     char *btype = cloneString(optionVal("type", ""));
     char *plus = strchr(btype, '+');
     if (plus)
 	{
 	*plus++ = 0;
 	if (isdigit(*plus))
 	    bedP = sqlUnsigned(plus);
 	}
     if (!startsWith("bed", btype))
 	errAbort("type must begin with \"bed\"");
     btype +=3;