src/hg/hgPhyloPlace/runUsher.c f22f0b5f1d467e570dad2407813748889408d9bd

f22f0b5f1d467e570dad2407813748889408d9bd
angie
  Tue Sep 14 09:38:11 2021 -0700
Don't rely on outputs printed only when usher is compiled with -DDEBUG=1; those are going away in server mode.  We already weren't showing the 'Best nodes' output, so just remove that code.  We already have sample mutations in seqInfo->sncList, so just reformat that into placementInfo->sampleMuts.

diff --git src/hg/hgPhyloPlace/runUsher.c src/hg/hgPhyloPlace/runUsher.c
index 9689bd8..605894e 100644
--- src/hg/hgPhyloPlace/runUsher.c
+++ src/hg/hgPhyloPlace/runUsher.c
@@ -5,33 +5,30 @@
 #include "common.h"
 #include "dnautil.h"
 #include "hash.h"
 #include "linefile.h"
 #include "obscure.h"
 #include "parsimonyProto.h"
 #include "phyloPlace.h"
 #include "regexHelper.h"
 #include "pipeline.h"
 #include "trashDir.h"
 
 // Keywords in stderr output of usher:
 #define sampleIdPrefix "Sample name:"
 #define pScorePrefix "Parsimony score:"
 #define numPlacementsPrefix "Number of parsimony-optimal placements:"
-#define bestNodePrefix "Best node ("
-#define mutationsPrefix "Mutations: "
-#define sampleMutsPrefix "Sample mutations:"
 #define imputedMutsPrefix "Imputed mutations:"
 
 static void parseSampleIdAndParsimonyScore(char **words, char **retSampleId,
                                            struct hash *samplePlacements)
 /* If words[] seems to contain columns of the line that gives sample ID and parsimony score,
  * then parse out those values. */
 {
 // Example line:
 // words[0] = Current tree size (#nodes): 70775
 // words[1] = Sample name: MyLabSequence2
 // words[2] = Parsimony score: 1
 // words[3] = Number of parsimony-optimal placements: 1
 char *p = stringIn(sampleIdPrefix, words[1]);
 if (p)
     {
@@ -62,129 +59,30 @@
 
 static struct singleNucChange *parseSnc(char *sncStr)
 /* If sncStr looks like a <old><pos><new>-style single nucleotide change then parse out those
  * values & return singleNucChange (with parBase and newBase; no refBase), otherwise return NULL.  */
 {
 struct singleNucChange *snc = NULL;
 regmatch_t substrs[4];
 if (regexMatchSubstr(sncStr, "^([ACGT])([0-9]+)([ACGT])$", substrs, ArraySize(substrs)))
     {
     int chromStart = regexSubstringInt(sncStr, substrs[2]) - 1;
     snc = sncNew(chromStart, '\0', sncStr[0], sncStr[substrs[3].rm_so]);
     }
 return snc;
 }
 
-static struct variantPathNode *parsePipeyPath(char *pipeyPath)
-/* Parse something like "|C8782T|T28144C| > |C29095T|  > |G2494A|T11083G|C18501T|"
- * into a variant path with unknown node names. */
-{
-if (isEmpty(pipeyPath))
-    return NULL;
-struct variantPathNode *variantPath = NULL;
-char *words[strlen(pipeyPath) / 5];
-int wordCount = chopString(pipeyPath, " > ", words, ArraySize(words));
-int i;
-for (i = 0;  i < wordCount;  i++)
-    {
-    struct variantPathNode *vpn;
-    AllocVar(vpn);
-    vpn->nodeName = cloneString("?");
-    char *mutStr = words[i];
-    // Trim first and last pipes
-    if (mutStr[0] == '|')
-        mutStr++;
-    if (mutStr[strlen(mutStr)-1] == '|')
-        mutStr[strlen(mutStr)-1] = '\0';
-    // Split by pipe
-    char *muts[strlen(mutStr) / 4];
-    int sncCount = chopString(mutStr, "|", muts, ArraySize(muts));
-    int j;
-    for (j = 0;  j < sncCount;  j++)
-        {
-        struct singleNucChange *snc = parseSnc(muts[j]);
-        if (!snc)
-            errAbort("Expected single-nucleotide change but got '%s' when parsing pipe-separated "
-                     "best node path", muts[j]);
-        slAddHead(&vpn->sncList, snc);
-        }
-    slReverse(&vpn->sncList);
-    slAddHead(&variantPath, vpn);
-    }
-slReverse(&variantPath);
-return variantPath;
-}
-
-static boolean parseBestNode(char **words, struct placementInfo *info)
-/* If the line starts with "Best node", parse out the name and path of the node
- * (or one of the nodes) with the lowest parsimony distance from the sample being placed;
- * add to info->bestNodes and return TRUE. */
-{
-// Example line (* means this node was chosen for placement):
-// words[0] = Best node (child)*: 1239
-// words[1] = Mutations: |C8782T|T28144C| > |C29095T| > |T8782C| > |T29095C| > |C28144T|
-// or
-// words[0] = Best node (sibling): SomeLeafName
-// words[1] = Mutations: |C8782T|T28144C| > |C29095T| > |T8782C| > |T29095C| > |C28144T| > |G11083T| > |G2494A|T11083G|C18501T|
-boolean matches = FALSE;
-if (stringIn(bestNodePrefix, words[0]))
-    {
-    matches = TRUE;
-    struct bestNodeInfo *bn;
-    AllocVar(bn);
-    char *p = words[0] + strlen(bestNodePrefix);
-    if (startsWith("sibling", p))
-        bn->isSibling = TRUE;
-    boolean isChosen = (stringIn(")*:", words[0]) != NULL);
-    p = stringIn(": ", words[0]);
-    if (p)
-        bn->name = cloneString(p + strlen(": "));
-    else
-        errAbort("parseBestNode: expected first column to have ': ' followed by name, but got '%s'",
-                 words[0]);
-    if (startsWith(mutationsPrefix, words[1]))
-        bn->variantPath = parsePipeyPath(words[1] + strlen(mutationsPrefix));
-    else
-        errAbort("parseBestNode: expected second column to have '" mutationsPrefix"' followed by "
-                 "path, but got '%s'", words[1]);
-    if (isChosen)
-        slAddHead(&info->bestNodes, bn);
-    else
-        slAddTail(&info->bestNodes, bn);
-    }
-return matches;
-}
-
-
-static boolean parseSampleMutations(char **words, struct placementInfo *info)
-/* If words[] looks like it defines the sample mutations relative to the reference genome,
- * then parse out the list and add to info->sampleMuts and return TRUE. */
-{
-// Example line:
-// words[0] = Sample mutations:
-// words[1] = |C241T| |C3037T| |C14408T| |A23403G|
-boolean matches = FALSE;
-if (stringIn(sampleMutsPrefix, words[0]))
-    {
-    matches = TRUE;
-    char *mutStr = words[1];
-    stripChar(mutStr, '|');
-    info->sampleMuts = slNameListFromString(mutStr, ' ');
-    }
-return matches;
-}
-
 static boolean parseImputedMutations(char **words, struct placementInfo *info)
 /* If words[] looks like it defines imputed mutations of the most recently named sample,
  * then parse out the list and add to info->imputedBases and return TRUE. */
 {
 // Example line:
 // words[0] = Imputed mutations: 
 // words[1] = 6709:A;23403:G
 boolean matches = FALSE;
 if (stringIn(imputedMutsPrefix, words[0]))
     {
     matches = TRUE;
     char *muts[strlen(words[1]) / 4];
     int mutCount = chopString(words[1], ";", muts, ArraySize(muts));
     struct baseVal *bvList = NULL;
     int i;
@@ -234,32 +132,30 @@
     safencpy(lineCpy, sizeof lineCpy, line, size);
     char *words[16];
     int wordCount = chopTabs(lineCpy, words);
     if (wordCount == 4)
         parseSampleIdAndParsimonyScore(words, &sampleId, samplePlacements);
     else if (wordCount == 2)
         {
         if (! sampleId)
             errAbort("Problem parsing stderr output of usher: "
                      "Got line starting with '%s' that was not preceded by a line that "
                      "defines sample ID.:\n%s", words[0], line);
         struct placementInfo *info = hashFindVal(samplePlacements, sampleId);
         if (!info)
             errAbort("Problem parsing stderr output of usher: "
                      "Can't find placement info for sample '%s'", sampleId);
-        if (! parseBestNode(words, info) &&
-            ! parseSampleMutations(words, info))
         parseImputedMutations(words, info);
         }
     }
 }
 
 static void parseVariantPaths(char *filename, struct hash *samplePlacements)
 /* Parse out space-sep list of {node ID, ':', node-associated ,-sep variant list} into
  * variantPathNode list and associate with sample ID. */
 {
 // Example line (note the back-mutation at 28144T... may want to highlight those):
 // words[0] = MySeq
 // words[1] = 1:C8782T,T28144C 2309:C29095T 2340:T8782C 2342:T29095C 2588:C28144T MySeq:C29867T 
 struct lineFile *lf = lineFileOpen(filename, TRUE);
 char *line;
 while (lineFileNext(lf, &line, NULL))