766edf73c99da98b3935f42c234d14acf08c2c0e
markd
  Sat Jun 27 19:37:45 2020 -0700
move index building to a separate gfServer subcommand

diff --git src/gfServer/gfServer.c src/gfServer/gfServer.c
index 1723c85..3403c72 100644
--- src/gfServer/gfServer.c
+++ src/gfServer/gfServer.c
@@ -33,31 +33,30 @@
     {"maxDnaHits", OPTION_INT},
     {"maxGap", OPTION_INT},
     {"maxNtSize", OPTION_INT},
     {"maxTransHits", OPTION_INT},
     {"minMatch", OPTION_INT},
     {"repMatch", OPTION_INT},
     {"seqLog", OPTION_BOOLEAN},
     {"ipLog", OPTION_BOOLEAN},
     {"debugLog", OPTION_BOOLEAN},
     {"stepSize", OPTION_INT},
     {"tileSize", OPTION_INT},
     {"trans", OPTION_BOOLEAN},
     {"syslog", OPTION_BOOLEAN},
     {"perSeqMax", OPTION_STRING},
     {"noSimpRepMask", OPTION_BOOLEAN},
-    {"writeIndex", OPTION_BOOLEAN},
     {"indexFile", OPTION_STRING},
     {NULL, 0}
 };
 
 
 int maxNtSize = 40000;
 int maxAaSize = 8000;
 
 int minMatch = gfMinMatch;	/* Can be overridden from command line. */
 int tileSize = gfTileSize;	/* Can be overridden from command line. */
 int stepSize = 0;		/* Can be overridden from command line. */
 boolean doTrans = FALSE;	/* Do translation? */
 boolean allowOneMismatch = FALSE; 
 boolean noSimpRepMask = FALSE;
 int repMatch = 1024;    /* Can be overridden from command line. */
@@ -85,30 +84,36 @@
   "      gfServer query host port probe.fa\n"
   "   To query a server with protein sequence:\n"
   "      gfServer protQuery host port probe.fa\n"
   "   To query a server with translated DNA sequence:\n"
   "      gfServer transQuery host port probe.fa\n"
   "   To query server with PCR primers:\n"
   "      gfServer pcr host port fPrimer rPrimer maxDistance\n"
   "   To process one probe fa file against a .2bit format genome (not starting server):\n"
   "      gfServer direct probe.fa file(s).2bit\n"
   "   To test PCR without starting server:\n"
   "      gfServer pcrDirect fPrimer rPrimer file(s).2bit\n"
   "   To figure out usage level:\n"
   "      gfServer status host port\n"
   "   To get input file list:\n"
   "      gfServer files host port\n"
+  "   To generate a precomputed index:\n"
+  "      gfServer index gfidx file(s)\n"
+  "     where the files are .2bit or .nib format files.  Separate indexes must be created\n"
+  "     for untranslated and translated queries.  These can be used with a persistent server\n"
+  "     as with 'start -indexFile or a dynamic server. They must follow the naming convention for\n"
+  "     for dynamic servers.\n"
   "options:\n"
   "   -tileSize=N     Size of n-mers to index.  Default is 11 for nucleotides, 4 for\n"
   "                   proteins (or translated nucleotides).\n"
   "   -stepSize=N     Spacing between tiles. Default is tileSize.\n"
   "   -minMatch=N     Number of n-mer matches that trigger detailed alignment.\n"
   "                   Default is 2 for nucleotides, 3 for proteins.\n"
   "   -maxGap=N       Number of insertions or deletions allowed between n-mers.\n"
   "                   Default is 2 for nucleotides, 0 for proteins.\n"
   "   -trans          Translate database to protein in 6 frames.  Note: it is best\n"
   "                   to run this on RepeatMasked data in this case.\n"
   "   -log=logFile    Keep a log file that records server requests.\n"
   "   -seqLog         Include sequences in log file (not logged with -syslog).\n"
   "   -ipLog          Include user's IP in log file (not logged with -syslog).\n"
   "   -debugLog       Include debugging info in log file.\n"
   "   -syslog         Log to syslog.\n"
@@ -118,31 +123,30 @@
   "                   tile. Default is %d.\n"
   "   -noSimpRepMask  Suppresses simple repeat masking.\n"
   "   -maxDnaHits=N   Maximum number of hits for a DNA query that are sent from the server.\n"
   "                   Default is %d.\n"
   "   -maxTransHits=N Maximum number of hits for a translated query that are sent from the server.\n"
   "                   Default is %d.\n"
   "   -maxNtSize=N    Maximum size of untranslated DNA query sequence.\n"
   "                   Default is %d.\n"
   "   -maxAaSize=N    Maximum size of protein or translated DNA queries.\n"
   "                   Default is %d.\n"
   "   -perSeqMax=file File contains one seq filename (possibly with ':seq' suffix) per line.\n"
   "                   -maxDnaHits will be applied to each filename[:seq] separately: each may\n"
   "                   have at most maxDnaHits/2 hits.\n"
   "                   Useful for assemblies with many alternate/patch sequences.\n"
   "   -canStop        If set, a quit message will actually take down the server.\n"
-  "   -writeIndex     Write the in-memory index to indexFile after building and exit.\n"
   "   -indexFile      File for index.  If -writeIndex is specified, the file is created,\n"
   "                   otherwise it is loaded from this file.  Saving index can speed up\n"
   "                   gfServer startup by two orders of magnitude.  The parameters must\n"
   "                   exactly match the parameters when the file is written or bad things\n"
   "                   will happen.\n"
   ,	gfVersion, repMatch, maxDnaHits, maxTransHits, maxNtSize, maxAaSize
   );
 
 }
 /*
   Note about file(s) specified in the start command:
       The path(s) specified here are sent back exactly as-is
       to clients such as gfClient, hgBlat, webBlat.
       It is intended that relative paths are used.
       Absolute paths starting with '/' tend not to work
@@ -579,44 +583,38 @@
 time_t curtime;
 struct tm *loctime;
 char timestr[256];
 
 netBlockBrokenPipes();
 
 curtime = time (NULL);           /* Get the current time. */
 loctime = localtime (&curtime);  /* Convert it to local time representation. */
 strftime (timestr, sizeof(timestr), "%Y-%m-%d %H:%M", loctime); /* formate datetime as string */
 								
 logInfo("gfServer version %s on host %s, port %s  (%s)", gfVersion, 
 	hostName, portName, timestr);
 struct hash *perSeqMaxHash = maybePerSeqMax(fileCount, seqFiles);
 
 time_t startIndexTime = clock1000();
-if (writeIndex || (!writeIndex && (indexFile == NULL)))
+if (indexFile == NULL)
     {
     char *desc = doTrans ? "translated" : "untranslated";
     uglyf("starting %s server...\n", desc);
     logInfo("setting up %s index", desc);
     gfIdx = genoFindIndexBuild(fileCount, seqFiles, minMatch, maxGap, tileSize, repMatch, doTrans, NULL,
                                allowOneMismatch, doMask, stepSize, noSimpRepMask);
     logInfo("indexing building complete in  %4.3f seconds", 0.001 * (clock1000() - startIndexTime));
-    if (writeIndex)
-        {
-        genoFindIndexWrite(gfIdx, indexFile);
-        logInfo("index file built, exiting: %s", indexFile);
-        exit(0);
-        }
     }
 else
     {
     gfIdx = genoFindIndexLoad(indexFile, doTrans);
     logInfo("indexing loading complete in  %4.3f seconds", 0.001 * (clock1000() - startIndexTime));
     }
 
 /* Set up socket.  Get ready to listen to it. */
 socketHandle = netAcceptingSocket(port, 100);
 if (socketHandle < 0)
     errAbort("Fatal Error: Unable to open listening socket on port %d.", port);
 
 logInfo("Server ready for queries!");
 printf("Server ready for queries!\n");
 int connectFailCount = 0;
@@ -991,30 +989,39 @@
 sprintf(buf, "%sfiles", gfSignature());
 mustWriteFd(sd, buf, strlen(buf));
 
 /* Get count of files, and then each file name. */
 if (netGetString(sd, buf) != NULL)
     {
     fileCount = atoi(buf);
     for (i=0; i<fileCount; ++i)
 	{
 	printf("%s\n", netRecieveString(sd, buf));
 	}
     }
 close(sd);
 }
 
+static void buildIndex(char *gfxFile, int fileCount, char *seqFiles[])
+/* build pre-computed index for seqFiles and write to gfxFile */
+{
+struct genoFindIndex *gfIdx = genoFindIndexBuild(fileCount, seqFiles, minMatch, maxGap, tileSize,
+                                                 repMatch, doTrans, NULL, allowOneMismatch, doMask, stepSize, noSimpRepMask);
+genoFindIndexWrite(gfIdx, gfxFile);
+}
+
+
 int main(int argc, char *argv[])
 /* Process command line. */
 {
 char *command;
 
 gfCatchPipes();
 dnaUtilOpen();
 optionInit(&argc, argv, optionSpecs);
 command = argv[1];
 if (optionExists("trans"))
     {
     doTrans = TRUE;
     tileSize = 4;
     minMatch = 3;
     maxGap = 0;
@@ -1100,21 +1107,27 @@
 else if (sameWord(command, "status"))
     {
     if (argc != 4)
 	usage();
     if (statusServer(argv[2], argv[3]))
 	{
 	exit(-1);
 	}
     }
 else if (sameWord(command, "files"))
     {
     if (argc != 4)
 	usage();
     getFileList(argv[2], argv[3]);
     }
+else if (sameWord(command, "index"))
+    {
+    if (argc < 4)
+        usage();
+    buildIndex(argv[2], argc-3, argv+3);
+    }
 else
     {
     usage();
     }
 return 0;
 }