6b482d67559bf0e2f7c27eed5182a18f85f0db2b angie Fri Sep 13 13:55:38 2024 -0700 Add config setting enableOptimization (for small trees). Since there are now too many configurable usher-sampled options to handle by truncating a list, instead we now add options if configured. diff --git src/hg/hgPhyloPlace/runUsher.c src/hg/hgPhyloPlace/runUsher.c index 9a6976a..f4ad0e1 100644 --- src/hg/hgPhyloPlace/runUsher.c +++ src/hg/hgPhyloPlace/runUsher.c @@ -1222,72 +1222,87 @@ reportTiming(pStartTime, "get socket"); if (serverSocket > 0) { success = sendQuery(serverSocket, cmd, org, treeChoices, errFile, TRUE, anchorFile); close(serverSocket); if (success) reportTiming(pStartTime, "send query and get response (successful)"); else reportTiming(pStartTime, "send query and get response (failed)"); } carefulClose(&errFile); } return success; } +static int indexOfNull(char *stringArray[]) +/* Return the index of the first NULL element of stringArray. + * Do not call this unless stringArray has at least one NULL! */ +{ +int ix = 0; +while (stringArray[ix] != NULL) + ix++; +return ix; +} + #define MAX_SUBTREES 1000 struct usherResults *runUsher(char *org, char *usherPath, char *usherAssignmentsPath, char *vcfFile, int subtreeSize, struct slName **pUserSampleIds, struct treeChoices *treeChoices, char *anchorFile, int *pStartTime) /* Open a pipe from Yatish Turakhia's usher program, save resulting big trees and * subtrees to trash files, return list of slRef to struct tempName for the trash files * and parse other results out of stderr output. The usher-sampled version of usher might * modify userSampleIds, adding a prefix if a sample with the same name is already in the tree. */ { struct usherResults *results = usherResultsNew(); char subtreeSizeStr[16]; safef(subtreeSizeStr, sizeof subtreeSizeStr, "%d", subtreeSize); struct tempName tnOutDir; trashDirFile(&tnOutDir, "ct", "usher_outdir", ".dir"); char *cmd[] = { usherPath, "-v", vcfFile, "-i", usherAssignmentsPath, "-d", tnOutDir.forCgi, "-k", subtreeSizeStr, "-K", SINGLE_SUBTREE_SIZE, "-u", "-T", USHER_NUM_THREADS, // Don't pass args from -T onward to server - "--optimization_radius", "0", // Don't pass these to original usher, only -sampled - "--no-ignore-prefix", USHER_DEDUP_PREFIX, - "--anchor-samples", anchorFile, + // Room for extra arguments if using usher-sampled + NULL, NULL, NULL, NULL, NULL, NULL, NULL }; struct tempName tnStderr; trashDirFile(&tnStderr, "ct", "usher_stderr", ".txt"); struct tempName tnServerStderr; trashDirFile(&tnServerStderr, "ct", "usher_server_stderr", ".txt"); char *stderrFile = tnServerStderr.forCgi; if (! runUsherServer(org, cmd, tnServerStderr.forCgi, treeChoices, anchorFile, pStartTime)) { - if (!endsWith(usherPath, "-sampled")) + if (endsWith(usherPath, "-sampled")) { - // Truncate cmd for original usher: remove usher-sampled-specific option - int ix = stringArrayIx("--optimization_radius", cmd, ArraySize(cmd)-1); - if (ix > 0) - cmd[ix] = NULL; + // Add --no-ignore-prefix + int ix = indexOfNull(cmd); + cmd[ix++] = "--no-ignore-prefix"; + cmd[ix++] = USHER_DEDUP_PREFIX; + // Add --anchor-samples if configured + if (isNotEmpty(anchorFile)) + { + cmd[ix++] = "--anchor-samples"; + cmd[ix++] = anchorFile; } - else if (isEmpty(anchorFile)) + // Add --optimization-radius 0 unless optimization is explicitly enabled + char *enableOptimization = phyloPlaceOrgSetting(org, "enableOptimization"); + if (SETTING_NOT_ON(enableOptimization)) { - // Don't pass --anchor-samples option unless it's configured - int ix = stringArrayIx("--anchor-samples", cmd, ArraySize(cmd)-1); - if (ix > 0) - cmd[ix] = NULL; + cmd[ix++] = "--optimization_radius"; + cmd[ix++] = "0"; + } } runUsherCommand(cmd, tnStderr.forCgi, anchorFile, pStartTime); stderrFile = tnStderr.forCgi; } struct tempName *singleSubtreeTn = NULL, *subtreeTns[MAX_SUBTREES]; struct variantPathNode *singleSubtreeMuts = NULL, *subtreeMuts[MAX_SUBTREES]; parsePlacements(tnOutDir.forCgi, stderrFile, results->samplePlacements, pUserSampleIds); int subtreeCount = processOutDirFiles(results, tnOutDir.forCgi, &singleSubtreeTn, &singleSubtreeMuts, subtreeTns, subtreeMuts, MAX_SUBTREES); if (singleSubtreeTn) { results->subtreeInfoList = parseSubtrees(subtreeCount, singleSubtreeTn, singleSubtreeMuts, subtreeTns, subtreeMuts, *pUserSampleIds); results->singleSubtreeInfo = results->subtreeInfoList;