2aa99ff0ca7a50af99ba59a3e8a54d7a819e7808
angie
  Thu Aug 26 12:30:42 2021 -0700
Yikes, I was forgetting to expand condensed nodes in the big tree when making custom track VCF for subtrees, resulting in some sequences not being found & having no alt alleles in VCF.

diff --git src/hg/hgPhyloPlace/writeCustomTracks.c src/hg/hgPhyloPlace/writeCustomTracks.c
index 70b7ff0..fd27450 100644
--- src/hg/hgPhyloPlace/writeCustomTracks.c
+++ src/hg/hgPhyloPlace/writeCustomTracks.c
@@ -303,74 +303,83 @@
 for (s = sampleNames;  s != NULL;  s = s->next)
     fprintf(f, "\t%s", s->name);
 fputc('\n', f);
 }
 
 static struct slRef *slRefListCopyReverse(struct slRef *inList)
 /* Return a clone of inList but in reverse order. */
 {
 struct slRef *outList = NULL;
 struct slRef *ref;
 for (ref = inList;  ref != NULL;  ref = ref->next)
     slAddHead(&outList, slRefNew(ref->val));
 return outList;
 }
 
-static void treeToBaseAlleles(struct phyloTree *node, char *refBases, char **sampleBases,
-                              struct hash *sampleToIx, struct slRef *parentMuts)
+static void treeToBaseAlleles(struct phyloTree *node, struct hash *condensedNodes, char *refBases,
+                              char **sampleBases, struct hash *sampleToIx, struct slRef *parentMuts)
 /* Traverse tree, building up an array of reference bases indexed by position, and an array
  * (indexed by pos) of sample allele arrays indexed by sampleToIx. */
 {
 struct slRef *nodeMuts = parentMuts;
 if (node->priv != NULL)
     {
     // Start out with a copy of parent node mutations in reverse order so we can add at head
     // and then reverse all.
     struct singleNucChange *sncs = node->priv;
     nodeMuts = slRefListCopyReverse(parentMuts);
     slAddHead(&nodeMuts, slRefNew(sncs));
     slReverse(&nodeMuts);
     }
 if (node->numEdges > 0)
     {
     int i;
     for (i = 0;  i < node->numEdges;  i++)
-        treeToBaseAlleles(node->edges[i], refBases, sampleBases, sampleToIx, nodeMuts);
+        treeToBaseAlleles(node->edges[i], condensedNodes, refBases, sampleBases, sampleToIx,
+                          nodeMuts);
     }
 else
     {
     // Leaf node: if in sampleToIx, then for each mutation (beware back-mutations),
     // set refBase if it has not already been set and set sample allele.
-    int ix = hashIntValDefault(sampleToIx, node->ident->name, -1);
+    struct slName *nameList = hashFindVal(condensedNodes, node->ident->name);
+    if (nameList == NULL)
+        nameList = slNameNew(node->ident->name);
+    struct slName *name;
+    for (name = nameList;  name != NULL;  name = name->next)
+        {
+        int ix = hashIntValDefault(sampleToIx, name->name, -1);
         if (ix >= 0)
             {
             struct slRef *mlRef;
             for (mlRef = nodeMuts;  mlRef != NULL;  mlRef = mlRef->next)
                 {
                 struct singleNucChange *snc, *sncs = mlRef->val;
                 for (snc = sncs;  snc != NULL;  snc = snc->next)
                     {
                     // Don't let a back-mutation overwrite the true ref value:
                     if (!refBases[snc->chromStart])
                         refBases[snc->chromStart] = snc->refBase;
                     if (sampleBases[snc->chromStart] == NULL)
                         AllocArray(sampleBases[snc->chromStart], sampleToIx->elCount);
                     sampleBases[snc->chromStart][ix] = snc->newBase;
                     }
                 }
             }
         }
+    slFreeList(&nameList);
+    }
 if (node->priv != NULL)
     slFreeList(&nodeMuts);
 }
 
 static void vcfToBaseAlleles(struct vcfFile *vcff, char *refBases, char **sampleBases,
                              struct hash *sampleToIx)
 /* Build up an array of reference bases indexed by position, and an array (indexed by pos)
  * of sample allele arrays indexed by sampleToIx, from variants & genotypes in vcff. */
 {
 int gtIxToSampleIx[vcff->genotypeCount];
 int gtIx;
 for (gtIx = 0;  gtIx < vcff->genotypeCount;  gtIx++)
     {
     int ix = hashIntValDefault(sampleToIx, vcff->genotypeIds[gtIx], -1);
     gtIxToSampleIx[gtIx] = ix;
@@ -498,76 +507,77 @@
         fprintf(f, ",%c", toupper(altAlleles[altIx]));
         fputs("\t.\t.\t", f);
         fprintf(f, "AC=%d", altCounts[0]);
         for (altIx = 1;  altIx < altCount;  altIx++)
             fprintf(f, ",%d", altCounts[altIx]);
         fprintf(f, ";AN=%d\tGT", sampleCount - noCallCount);
         int gtIx;
         for (gtIx = 0;  gtIx < sampleCount;  gtIx++)
             fprintf(f, "\t%c", genotypes[gtIx]);
         fputc('\n', f);
         }
     }
 }
 
 static void writeSubtreeVcf(FILE *f, struct hash *sampleToIx,
-                            struct vcfFile *userVcf, struct phyloTree *bigTree)
+                            struct vcfFile *userVcf, struct mutationAnnotatedTree *bigTree)
 /* Write rows of VCF with genotypes for samples in sampleToIx (with order determined by sampleToIx)
  * with some samples found in userVcf and the rest found in bigTree which has been annotated
  * with single-nucleotide variants. */
 {
 char refBases[chromSize];
 memset(refBases, 0, sizeof refBases);
 char *sampleBases[chromSize];
 memset(sampleBases, 0, sizeof sampleBases);
-treeToBaseAlleles(bigTree, refBases, sampleBases, sampleToIx, NULL);
+treeToBaseAlleles(bigTree->tree, bigTree->condensedNodes, refBases, sampleBases, sampleToIx, NULL);
 vcfToBaseAlleles(userVcf, refBases, sampleBases, sampleToIx);
 int sampleCount = sampleToIx->elCount;
 baseAllelesToVcf(refBases, sampleBases, chromSize, sampleCount, f);
 int i;
 for (i = 0;  i < chromSize;  i++)
     freeMem(sampleBases[i]);
 }
 
 static void addSubtreeCustomTracks(FILE *ctF, char *userVcfFile, struct subtreeInfo *subtreeInfoList,
-                                   struct hash *samplePlacements, struct phyloTree *bigTree,
+                                   struct hash *samplePlacements,
+                                   struct mutationAnnotatedTree *bigTree,
                                    char *source, int fontHeight, int *pStartTime)
 /* For each subtree trashfile, write VCF+treeFile custom track text to ctF. */
 {
 struct vcfFile *userVcf = parseUserVcf(userVcfFile, pStartTime);
 if (! userVcf)
     {
     warn("Problem parsing VCF file with user variants '%s'; can't make subtree subtracks.",
          userVcfFile);
     return;
     }
 if (!bigTree)
     return;
 struct subtreeInfo *ti;
 for (ti = subtreeInfoList;  ti != NULL;  ti = ti->next)
     {
     writeSubtreeTrackLine(ctF, ti, source, fontHeight);
     writeVcfHeader(ctF, ti->subtreeNameList);
     writeSubtreeVcf(ctF, ti->subtreeIdToIx, userVcf, bigTree);
     fputc('\n', ctF);
     }
 vcfFileFree(&userVcf);
 reportTiming(pStartTime, "write subtree custom tracks");
 }
 
 struct tempName *writeCustomTracks(struct tempName *vcfTn, struct usherResults *ur,
-                                   struct slName *sampleIds, struct phyloTree *bigTree,
+                                   struct slName *sampleIds, struct mutationAnnotatedTree *bigTree,
                                    char *source, int fontHeight, struct phyloTree **retSampleTree,
                                    int *pStartTime)
 /* Write one custom track per subtree, and one custom track with just the user's uploaded samples. */
 {
 struct tempName *ctVcfTn = userVcfWithImputedBases(vcfTn, ur->samplePlacements, sampleIds);
 struct tempName *ctTn;
 AllocVar(ctTn);
 trashDirFile(ctTn, "ct", "ct_pp", ".ct");
 FILE *ctF = mustOpen(ctTn->forCgi, "w");
 int subtreeCount = slCount(ur->subtreeInfoList);
 if (subtreeCount <= MAX_SUBTREE_CTS)
     addSubtreeCustomTracks(ctF, ctVcfTn->forCgi, ur->subtreeInfoList, ur->samplePlacements, bigTree,
                            source, fontHeight, pStartTime);
 else
     printf("<p>Subtree custom tracks are added when there are at most %d subtrees, "