eeac40956b3dd6611f58aeb847ff5c404d3ce883
angie
  Fri Dec 1 09:05:41 2023 -0800
Support trees whose reference/root is not from a db or hub, but rather a custom .2bit file.
This means that the selected pathogen may or may not also be a db/hub, so the selection interacts with the db cart variable but does not always match it.
Also, in phyloPlace.c, update RSV metadata column headers (RGCC lineages replace Ramaekers 2020 clades).

diff --git src/hg/hgPhyloPlace/writeCustomTracks.c src/hg/hgPhyloPlace/writeCustomTracks.c
index 292df91..89b6571 100644
--- src/hg/hgPhyloPlace/writeCustomTracks.c
+++ src/hg/hgPhyloPlace/writeCustomTracks.c
@@ -1,32 +1,29 @@
 /* Write custom tracks: one per subtree and one with user's uploaded samples. */
 
 /* Copyright (C) 2020 The Regents of the University of California */
 
 #include "common.h"
 #include "hash.h"
 #include "hdb.h"
 #include "iupac.h"
 #include "parsimonyProto.h"
 #include "phyloPlace.h"
 #include "phyloTree.h"
 #include "trashDir.h"
 #include "vcf.h"
 
-// Parameter constants:
-int minSamplesForOwnTree = 3;  // If user uploads at least this many samples, show tree for them.
-
 static char ***imputedBasesByPosition(struct hash *samplePlacements, struct slName *sampleIds,
                                       int chromSize)
 /* Unpack imputedBases into an array of (a few) arrays indexed by 0-based position and sample index,
  * for speedy retrieval while rewriting user VCF. */
 {
 char ***impsByPos = needMem(chromSize * sizeof(char **));
 int sampleCount = slCount(sampleIds);
 struct slName *sample;
 int ix;
 for (ix = 0, sample = sampleIds;  sample != NULL; ix++, sample = sample->next)
     {
     struct placementInfo *info = hashFindVal(samplePlacements, sample->name);
     if (!info)
         errAbort("imputedBasesByPosition: can't find placementInfo for sample '%s'", sample->name);
     struct baseVal *bv;
@@ -169,79 +166,67 @@
             fputs(words[0], f);
             for (i = 1;  i < colCount;  i++)
                 {
                 fputc('\t', f);
                 fputs(words[i], f);
                 }
             }
             fputc('\n', f);
         }
     carefulClose(&f);
     reportTiming(pStartTime, "Make user VCF with imputed bases");
     }
 return ctVcfTn;
 }
 
-static struct phyloTree *uploadedSamplesTree(char *sampleTreeFile, char *bigTreeFile,
-                                             struct slName *sampleIds)
-/* Read in tree from bigTreeFile, prune all nodes that have no leaf descendants in sampleIds,
- * save pruned tree to sampleTreeFile. */
-{
-struct phyloTree *tree = phyloOpenTree(bigTreeFile);
-tree = phyloPruneToIds(tree, sampleIds);
-FILE *f = mustOpen(sampleTreeFile, "w");
-phyloPrintTree(tree, f);
-carefulClose(&f);
-return tree;
-}
-
 static int heightForSampleCount(int fontHeight, int count)
 /* Return a height sufficient for this many labels stacked vertically. */
 {
 return (fontHeight + 1) * count;
 }
 
-static struct phyloTree *addSampleOnlyCustomTrack(FILE *ctF, struct tempName *vcfTn,
-                                                  char *bigTreePlusFile, struct slName *sampleIds,
+static void addSampleOnlyCustomTrack(FILE *ctF, struct tempName *vcfTn,
+                                     struct slName *sampleIds, struct phyloTree *sampleTree,
                                      char *geneTrack, int fontHeight, int *pStartTime)
 /* Make custom track with uploaded VCF.  If there are enough samples to make a non-trivial tree,
  * then make the tree by pruning down the big tree plus samples to only the uploaded samples. */
 {
-struct phyloTree *sampleTree = NULL;
 fprintf(ctF, "track name=uploadedSamples description='Uploaded sample genotypes' "
         "type=vcf visibility=pack "
         "hapClusterEnabled=on hapClusterHeight=%d ",
         heightForSampleCount(fontHeight, slCount(sampleIds)));
-if (slCount(sampleIds) >= minSamplesForOwnTree)
+if (sampleTree != NULL)
     {
+    // Write tree of uploaded samples to a file so we can draw it in left label area.
     struct tempName sampleTreeTn;
     trashDirFile(&sampleTreeTn, "ct", "uploadedSamples", ".nwk");
-    sampleTree = uploadedSamplesTree(sampleTreeTn.forCgi, bigTreePlusFile, sampleIds);
+    FILE *f = mustOpen(sampleTreeTn.forCgi, "w");
+    phyloPrintTree(sampleTree, f);
+    carefulClose(&f);
     fprintf(ctF, "hapClusterMethod='treeFile %s' ", sampleTreeTn.forCgi);
     }
 else
     fprintf(ctF, "hapClusterMethod=fileOrder ");
 if (isNotEmpty(geneTrack))
     fprintf(ctF, "hapClusterColorBy=function geneTrack=%s", geneTrack);
 fputc('\n', ctF);
 struct lineFile *vcfLf = lineFileOpen(vcfTn->forCgi, TRUE);
 char *line;
 while (lineFileNext(vcfLf, &line, NULL))
     fprintf(ctF, "%s\n",line);
 lineFileClose(&vcfLf);
 reportTiming(pStartTime, "add custom track for uploaded samples");
-return sampleTree;
 }
 
 static struct vcfFile *parseUserVcf(char *userVcfFile, int chromSize, int *pStartTime)
 /* Read in user VCF file and parse genotypes. */
 {
 struct vcfFile *userVcf = vcfFileMayOpen(userVcfFile, NULL, 0, chromSize, 0, chromSize, TRUE);
 if (! userVcf)
     return NULL;
 struct vcfRecord *rec;
 for (rec = userVcf->records;  rec != NULL;  rec = rec->next)
     vcfParseGenotypesGtOnly(rec);
 reportTiming(pStartTime, "read userVcf and parse genotypes");
 return userVcf;
 }
 
@@ -525,47 +510,42 @@
 struct subtreeInfo *ti;
 int subtreeNum;
 for (subtreeNum = 1, ti = subtreeInfoList;  ti != NULL;  ti = ti->next, subtreeNum++)
     {
     writeSubtreeTrackLine(ctF, ti, subtreeNum, source, geneTrack, fontHeight);
     writeVcfHeader(ctF, ti->subtreeNameList);
     writeSubtreeVcf(ctF, ti->subtreeIdToIx, chrom, chromSize, userVcf, ti->subtree);
     fputc('\n', ctF);
     }
 vcfFileFree(&userVcf);
 reportTiming(pStartTime, "write subtree custom tracks");
 }
 
 struct tempName *writeCustomTracks(char *db, struct tempName *vcfTn, struct usherResults *ur,
                                    struct slName *sampleIds, char *source, int fontHeight,
-                                   struct phyloTree **retSampleTree, int *pStartTime)
+                                   struct phyloTree *sampleTree, int *pStartTime)
 /* Write one custom track per subtree, and one custom track with just the user's uploaded samples. */
 {
 char *chrom = hDefaultChrom(db);
 int chromSize = hChromSize(db, chrom);
 char *geneTrack = phyloPlaceDbSetting(db, "geneTrack");
 // Default to SARS-CoV-2 or hMPXV values if setting is missing from config.ra.
 if (isEmpty(geneTrack))
     geneTrack = sameString(db, "wuhCor1") ? "ncbiGeneBGP" : "ncbiGene";
 struct tempName *ctVcfTn = userVcfWithImputedBases(vcfTn, ur->samplePlacements, sampleIds,
                                                    chromSize, pStartTime);
 struct tempName *ctTn;
 AllocVar(ctTn);
 trashDirFile(ctTn, "ct", "ct_pp", ".ct");
 FILE *ctF = mustOpen(ctTn->forCgi, "w");
 int subtreeCount = slCount(ur->subtreeInfoList);
 if (subtreeCount <= MAX_SUBTREE_CTS)
     addSubtreeCustomTracks(ctF, ctVcfTn->forCgi, ur->subtreeInfoList, ur->samplePlacements,
                            chrom, chromSize, source, geneTrack, fontHeight, pStartTime);
 else
     printf("<p>Subtree custom tracks are added when there are at most %d subtrees, "
            "but %d subtrees were found.</p>\n",
            MAX_SUBTREE_CTS, subtreeCount);
-struct phyloTree *sampleTree = addSampleOnlyCustomTrack(ctF, ctVcfTn,
-                                                        ur->singleSubtreeInfo->subtreeTn->forCgi,
-                                                        sampleIds, geneTrack, fontHeight,
-                                                        pStartTime);
-if (retSampleTree)
-    *retSampleTree = sampleTree;
+addSampleOnlyCustomTrack(ctF, ctVcfTn, sampleIds, sampleTree, geneTrack, fontHeight, pStartTime);
 carefulClose(&ctF);
 return ctTn;
 }