6b482d67559bf0e2f7c27eed5182a18f85f0db2b
angie
  Fri Sep 13 13:55:38 2024 -0700
Add config setting enableOptimization (for small trees).  Since there are now too many configurable usher-sampled options to handle by truncating a list, instead we now add options if configured.

diff --git src/hg/hgPhyloPlace/runUsher.c src/hg/hgPhyloPlace/runUsher.c
index 9a6976a..f4ad0e1 100644
--- src/hg/hgPhyloPlace/runUsher.c
+++ src/hg/hgPhyloPlace/runUsher.c
@@ -1222,72 +1222,87 @@
     reportTiming(pStartTime, "get socket");
     if (serverSocket > 0)
         {
         success = sendQuery(serverSocket, cmd, org, treeChoices, errFile, TRUE, anchorFile);
         close(serverSocket);
         if (success)
             reportTiming(pStartTime, "send query and get response (successful)");
         else
             reportTiming(pStartTime, "send query and get response (failed)");
         }
     carefulClose(&errFile);
     }
 return success;
 }
 
+static int indexOfNull(char *stringArray[])
+/* Return the index of the first NULL element of stringArray.
+ * Do not call this unless stringArray has at least one NULL! */
+{
+int ix = 0;
+while (stringArray[ix] != NULL)
+    ix++;
+return ix;
+}
+
 #define MAX_SUBTREES 1000
 
 struct usherResults *runUsher(char *org, char *usherPath, char *usherAssignmentsPath, char *vcfFile,
                               int subtreeSize, struct slName **pUserSampleIds,
                               struct treeChoices *treeChoices, char *anchorFile, int *pStartTime)
 /* Open a pipe from Yatish Turakhia's usher program, save resulting big trees and
  * subtrees to trash files, return list of slRef to struct tempName for the trash files
  * and parse other results out of stderr output.  The usher-sampled version of usher might
  * modify userSampleIds, adding a prefix if a sample with the same name is already in the tree. */
 {
 struct usherResults *results = usherResultsNew();
 char subtreeSizeStr[16];
 safef(subtreeSizeStr, sizeof subtreeSizeStr, "%d", subtreeSize);
 struct tempName tnOutDir;
 trashDirFile(&tnOutDir, "ct", "usher_outdir", ".dir");
 char *cmd[] = { usherPath, "-v", vcfFile, "-i", usherAssignmentsPath, "-d", tnOutDir.forCgi,
                 "-k", subtreeSizeStr, "-K", SINGLE_SUBTREE_SIZE, "-u",
                 "-T", USHER_NUM_THREADS,       // Don't pass args from -T onward to server
-                "--optimization_radius", "0",  // Don't pass these to original usher, only -sampled
-                "--no-ignore-prefix", USHER_DEDUP_PREFIX,
-                "--anchor-samples", anchorFile,
+                // Room for extra arguments if using usher-sampled
+                NULL, NULL, NULL, NULL, NULL, NULL,
                 NULL };
 struct tempName tnStderr;
 trashDirFile(&tnStderr, "ct", "usher_stderr", ".txt");
 struct tempName tnServerStderr;
 trashDirFile(&tnServerStderr, "ct", "usher_server_stderr", ".txt");
 char *stderrFile = tnServerStderr.forCgi;
 if (! runUsherServer(org, cmd, tnServerStderr.forCgi, treeChoices, anchorFile, pStartTime))
     {
-    if (!endsWith(usherPath, "-sampled"))
+    if (endsWith(usherPath, "-sampled"))
         {
-        // Truncate cmd for original usher: remove usher-sampled-specific option
-        int ix = stringArrayIx("--optimization_radius", cmd, ArraySize(cmd)-1);
-        if (ix > 0)
-            cmd[ix] = NULL;
+        // Add --no-ignore-prefix
+        int ix = indexOfNull(cmd);
+        cmd[ix++] = "--no-ignore-prefix";
+        cmd[ix++] = USHER_DEDUP_PREFIX;
+        // Add --anchor-samples if configured
+        if (isNotEmpty(anchorFile))
+            {
+            cmd[ix++] = "--anchor-samples";
+            cmd[ix++] = anchorFile;
             }
-    else if (isEmpty(anchorFile))
+        // Add --optimization-radius 0 unless optimization is explicitly enabled
+        char *enableOptimization = phyloPlaceOrgSetting(org, "enableOptimization");
+        if (SETTING_NOT_ON(enableOptimization))
             {
-        // Don't pass --anchor-samples option unless it's configured
-        int ix = stringArrayIx("--anchor-samples", cmd, ArraySize(cmd)-1);
-        if (ix > 0)
-            cmd[ix] = NULL;
+            cmd[ix++] = "--optimization_radius";
+            cmd[ix++] = "0";
+            }
         }
     runUsherCommand(cmd, tnStderr.forCgi, anchorFile, pStartTime);
     stderrFile = tnStderr.forCgi;
     }
 
 struct tempName *singleSubtreeTn = NULL, *subtreeTns[MAX_SUBTREES];
 struct variantPathNode *singleSubtreeMuts = NULL, *subtreeMuts[MAX_SUBTREES];
 parsePlacements(tnOutDir.forCgi, stderrFile, results->samplePlacements, pUserSampleIds);
 int subtreeCount = processOutDirFiles(results, tnOutDir.forCgi, &singleSubtreeTn,
                                       &singleSubtreeMuts, subtreeTns, subtreeMuts, MAX_SUBTREES);
 if (singleSubtreeTn)
     {
     results->subtreeInfoList = parseSubtrees(subtreeCount, singleSubtreeTn, singleSubtreeMuts,
                                              subtreeTns, subtreeMuts, *pUserSampleIds);
     results->singleSubtreeInfo = results->subtreeInfoList;