7e58340888377874edaad1dbc5174e20295f890c
angie
  Mon Feb 22 14:17:33 2021 -0800
Support upload of more sequences, add TSV file summarizing sample variants and placements.
Requested by Joe de Risi (UCSF).  Increase timeout to 10 minutes; make TSV with each sample's
ID, nuc muts, AA muts, imputed bases and path from root to sample.  Also use Yatish's new
-K subtree algorithm in usher: one subtree encompassing all uploaded samples, plus the
specified number of samples randomly selected from the rest of the tree.
Don't show every single sample name in the title because there can be 1000 samples in the
same subtree now.  :)

diff --git src/hg/hgPhyloPlace/writeCustomTracks.c src/hg/hgPhyloPlace/writeCustomTracks.c
index 6e98f88..0f16bb1 100644
--- src/hg/hgPhyloPlace/writeCustomTracks.c
+++ src/hg/hgPhyloPlace/writeCustomTracks.c
@@ -1,569 +1,582 @@
 /* Write custom tracks: one per subtree and one with user's uploaded samples. */
 
 /* Copyright (C) 2020 The Regents of the University of California */
 
 #include "common.h"
 #include "hash.h"
 #include "iupac.h"
 #include "parsimonyProto.h"
 #include "phyloPlace.h"
 #include "phyloTree.h"
 #include "trashDir.h"
 #include "vcf.h"
 
 // wuhCor1-specific:
 char *geneTrack = "ncbiGeneBGP";
 int maxVcfRows = 30000;
 
 // Globals
 extern char *chrom;
 extern int chromSize;
 
 // Parameter constants:
 int minSamplesForOwnTree = 3;  // If user uploads at least this many samples, show tree for them.
 
 static char ***imputedBasesByPosition(struct hash *samplePlacements, struct slName *sampleIds)
 /* Unpack imputedBases into an array of (a few) arrays indexed by 0-based position and sample index,
  * for speedy retrieval while rewriting user VCF. */
 {
 char ***impsByPos = needMem(chromSize * sizeof(char **));
 int sampleCount = slCount(sampleIds);
 struct slName *sample;
 int ix;
 for (ix = 0, sample = sampleIds;  sample != NULL; ix++, sample = sample->next)
     {
     struct placementInfo *info = hashFindVal(samplePlacements, sample->name);
     if (!info)
         errAbort("imputedBasesByPosition: can't find placementInfo for sample '%s'", sample->name);
     struct baseVal *bv;
     for (bv = info->imputedBases;  bv != NULL;  bv = bv->next)
         {
         if (!impsByPos[bv->chromStart])
             impsByPos[bv->chromStart] = needMem(sampleCount * sizeof(char *));
         impsByPos[bv->chromStart][ix] = bv->val;
         }
     }
 return impsByPos;
 }
 
 static boolean anyImputedBases(struct hash *samplePlacements, struct slName *sampleIds)
 /* Return TRUE if any sample has at least one imputed base. */
 {
 struct slName *sample;
 for (sample = sampleIds;  sample != NULL;  sample = sample->next)
     {
     struct placementInfo *info = hashFindVal(samplePlacements, sample->name);
     if (info && info->imputedBases)
         return TRUE;
     }
 return FALSE;
 }
 
 struct tempName *userVcfWithImputedBases(struct tempName *userVcfTn, struct hash *samplePlacements,
                                          struct slName *sampleIds)
 /* If usher imputed the values of ambiguous bases in user's VCF, then swap in
  * the imputed values and write a new VCF to use as the custom track.  Otherwise just use
  * the user's VCF for the custom track. */
 {
 struct tempName *ctVcfTn = userVcfTn;
 if (anyImputedBases(samplePlacements, sampleIds))
     {
     AllocVar(ctVcfTn);
     trashDirFile(ctVcfTn, "ct", "imputed", ".vcf");
     FILE *f = mustOpen(ctVcfTn->forCgi, "w");
     struct lineFile *lf = lineFileOpen(userVcfTn->forCgi, TRUE);
     int sampleCount = slCount(sampleIds);
     int colCount = VCF_NUM_COLS_BEFORE_GENOTYPES + sampleCount;
     char ***impsByPos = imputedBasesByPosition(samplePlacements, sampleIds);
     char *line;
     int size;
     while (lineFileNext(lf, &line, &size))
         {
         if (line[0] == '#')
             fputs(line, f);
         else
             {
             char *words[colCount];
             int wordCount = chopTabs(line, words);
             lineFileExpectWords(lf, colCount, wordCount);
             int chromStart = atoi(words[1]) - 1;
             if (impsByPos[chromStart])
                 {
                 verbose(3, "%d ref=%s alt=%s\n", chromStart+1, words[3], words[4]);
                 int gtIx;
                 for (gtIx = 0;  gtIx < sampleCount;  gtIx++)
                     if (impsByPos[chromStart][gtIx])
                         {
                         char *val = impsByPos[chromStart][gtIx];
                         // We can't just swap the value in; we need the numerical index.
                         // Is the imputed value ref, alt, or member of list of alts?
                         if (sameString(val, words[3]))
                             {
                             words[VCF_NUM_COLS_BEFORE_GENOTYPES+gtIx] = "0";
                             verbose(3, "gtIx %d: val matches ref (%s)\n", gtIx, val);
                             }
                         else if (sameString(val, words[4]))
                             {
                             words[VCF_NUM_COLS_BEFORE_GENOTYPES+gtIx] = "1";
                             verbose(3, "gtIx %d: val matches alt (%s)\n", gtIx, val);
                             }
                         else
                             {
                             // There may be multiple alts.  The alt itself might be ambiguous
                             // and require replacement.
                             boolean foundIt = FALSE;
                             struct slName *alt, *alts = slNameListFromComma(words[4]);
                             int colIx = VCF_NUM_COLS_BEFORE_GENOTYPES + gtIx;
                             int altIx;
                             for (altIx = 0, alt = alts;  alt != NULL;  altIx++, alt = alt->next)
                                 {
                                 if (sameString(val, alt->name))
                                     {
                                     // val is in the list of alts; substitute in its index.
                                     foundIt = TRUE;
                                     int len = strlen(words[colIx]);
                                     safef(words[colIx], len+1, "%d", altIx+1);
                                     verbose(3, "gtIx %d: val matches alts[%d] (%s)\n",
                                             gtIx, altIx, val);
                                     break;
                                     }
                                 }
                             if (!foundIt)
                                 {
                                 for (altIx = 0, alt = alts;  alt != NULL;  altIx++, alt = alt->next)
                                     {
                                     if (alt->name[1] == '\0' && val[1] == '\0' &&
                                         isIupacAmbiguous(alt->name[0]))
                                         {
                                         char lowerV = tolower(val[0]);
                                         if (strchr(iupacAmbiguousToString(alt->name[0]), lowerV))
                                             {
                                             verbose(3,
                                                   "gtIx %d: val (%s) matches ambig alts[%d] (%s)\n",
                                                     gtIx, val, altIx, alt->name);
                                             // The alt is an ambiguous character that includes val.
                                             // Replace this alt with val.
                                             foundIt = TRUE;
                                             alt->name[0] = val[0];
                                             // Update the alts column
                                             words[4] = slNameListToString(alts, '.');
                                             int len = strlen(words[colIx]);
                                             safef(words[colIx], len+1, "%d", altIx+1);
                                             verbose(3, "gtIx %d: swapped in %s\n",
                                                     gtIx, words[colIx]);
                                             break;
                                             }
                                         }
                                     }
                                 if (!foundIt)
                                     {
                                     // Add the imputed value as a new alt.
                                     int altCount = slCount(alts);
                                     verbose(3, "gtIx %d: adding val as new alts[%d] = %s\n",
                                             gtIx, altCount, val);
                                     int len = strlen(words[colIx]);
                                     safef(words[colIx], len+1, "%d", altCount+1);
                                     slAddTail(alts, slNameNew(val));
                                     words[4] = slNameListToString(alts, ',');
                                     verbose(3, "gtIx %d: swapped in %s\n", gtIx, words[colIx]);
                                     }
                                 }
                             }
                         }
                 }
             int i;
             fputs(words[0], f);
             for (i = 1;  i < colCount;  i++)
                 {
                 fputc('\t', f);
                 fputs(words[i], f);
                 }
             }
             fputc('\n', f);
         }
     carefulClose(&f);
     }
 return ctVcfTn;
 }
 
-static void uploadedSamplesTree(char *sampleTreeFile, char *bigTreeFile, struct slName *sampleIds)
+static struct phyloTree *uploadedSamplesTree(char *sampleTreeFile, char *bigTreeFile,
+                                             struct slName *sampleIds)
 /* Read in tree from bigTreeFile, prune all nodes that have no leaf descendants in sampleIds,
  * save pruned tree to sampleTreeFile. */
 {
 struct phyloTree *tree = phyloOpenTree(bigTreeFile);
 tree = phyloPruneToIds(tree, sampleIds);
 FILE *f = mustOpen(sampleTreeFile, "w");
 phyloPrintTree(tree, f);
 carefulClose(&f);
+return tree;
 }
 
 static int heightForSampleCount(int fontHeight, int count)
 /* Return a height sufficient for this many labels stacked vertically. */
 {
 return (fontHeight + 1) * count;
 }
 
-static void addSampleOnlyCustomTrack(FILE *ctF, struct tempName *vcfTn, char *bigTreePlusFile,
-                                     struct slName *sampleIds,
+static struct phyloTree *addSampleOnlyCustomTrack(FILE *ctF, struct tempName *vcfTn,
+                                                  char *bigTreePlusFile, struct slName *sampleIds,
                                                   int fontHeight, int *pStartTime)
 /* Make custom track with uploaded VCF.  If there are enough samples to make a non-trivial tree,
  * then make the tree by pruning down the big tree plus samples to only the uploaded samples. */
 {
+struct phyloTree *sampleTree = NULL;
 fprintf(ctF, "track name=uploadedSamples description='Uploaded sample genotypes' "
         "type=vcf visibility=pack "
         "hapClusterEnabled=on hapClusterHeight=%d ",
         heightForSampleCount(fontHeight, slCount(sampleIds)));
 if (slCount(sampleIds) >= minSamplesForOwnTree)
     {
     struct tempName sampleTreeTn;
     trashDirFile(&sampleTreeTn, "ct", "uploadedSamples", ".nwk");
-    uploadedSamplesTree(sampleTreeTn.forCgi, bigTreePlusFile, sampleIds);
+    sampleTree = uploadedSamplesTree(sampleTreeTn.forCgi, bigTreePlusFile, sampleIds);
     fprintf(ctF, "hapClusterMethod='treeFile %s' ", sampleTreeTn.forCgi);
     }
 else
     fprintf(ctF, "hapClusterMethod=fileOrder ");
 if (isNotEmpty(geneTrack))
     fprintf(ctF, "hapClusterColorBy=function geneTrack=%s", geneTrack);
 fputc('\n', ctF);
 struct lineFile *vcfLf = lineFileOpen(vcfTn->forCgi, TRUE);
 char *line;
 while (lineFileNext(vcfLf, &line, NULL))
     fprintf(ctF, "%s\n",line);
 lineFileClose(&vcfLf);
 reportTiming(pStartTime, "add custom track for uploaded samples");
+return sampleTree;
 }
 
 static struct vcfFile *parseUserVcf(char *userVcfFile, int *pStartTime)
 /* Read in user VCF file and parse genotypes. */
 {
 struct vcfFile *userVcf = vcfFileMayOpen(userVcfFile, chrom, 0, chromSize, 0, maxVcfRows, TRUE);
 if (! userVcf)
     return NULL;
 reportTiming(pStartTime, "read userVcf");
 struct vcfRecord *rec;
 for (rec = userVcf->records;  rec != NULL;  rec = rec->next)
     vcfParseGenotypesGtOnly(rec);
 reportTiming(pStartTime, "parse userVcf genotypes");
 return userVcf;
 }
 
 static char *trackNameFromSampleIds(struct slName *sampleIds)
 /* Make a name for our subtree custom track by concatenating sample names. */
 {
 struct dyString *dy = dyStringCreate("subtree_%s", sampleIds->name);
 struct slName *id;
 for (id = sampleIds->next;  id != NULL;  id = id->next)
     {
     dyStringPrintf(dy, "_%s", id->name);
     if (dy->stringSize > 200)
         break;
     }
 return dyStringCannibalize(&dy);
 }
 
 static void writeSubtreeTrackLine(FILE *ctF, struct subtreeInfo *ti, char *source, int fontHeight)
 /* Write a custom track line to ctF for one subtree. */
 {
 fprintf(ctF, "track name=%s", trackNameFromSampleIds(ti->subtreeUserSampleIds));
 // Keep the description from being too long in order to avoid buffer overflow in hgTrackUi
 int descLen = fprintf(ctF, " description='Uploaded sample%s %s",
                       (slCount(ti->subtreeUserSampleIds) > 1 ? "s" : ""),
                       ti->subtreeUserSampleIds->name);
 struct slName *id;
 for (id = ti->subtreeUserSampleIds->next;  id != NULL;  id = id->next)
     {
     if (descLen > 200)
         {
         fprintf(ctF, " and %d other samples", slCount(id));
         break;
         }
     descLen += fprintf(ctF, ", %s", id->name);
     }
 int height = heightForSampleCount(fontHeight, slCount(ti->subtreeNameList));
 fprintf(ctF, " and nearest neighboring %s sequences' type=vcf visibility=pack "
         "hapClusterEnabled=on hapClusterHeight=%d hapClusterMethod='treeFile %s' "
         "highlightIds=%s",
         source, height, ti->subtreeTn->forHtml,
         slNameListToString(ti->subtreeUserSampleIds, ','));
 if (isNotEmpty(geneTrack))
     fprintf(ctF, " hapClusterColorBy=function geneTrack=%s", geneTrack);
 fputc('\n', ctF);
 }
 
 static void writeVcfHeader(FILE *f, struct slName *sampleNames)
 /* Write a minimal VCF header with sample names for genotype columns. */
 {
 fprintf(f, "##fileformat=VCFv4.2\n");
 fprintf(f, "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT");
 struct slName *s;
 for (s = sampleNames;  s != NULL;  s = s->next)
     fprintf(f, "\t%s", s->name);
 fputc('\n', f);
 }
 
 static struct slRef *slRefListCopyReverse(struct slRef *inList)
 /* Return a clone of inList but in reverse order. */
 {
 struct slRef *outList = NULL;
 struct slRef *ref;
 for (ref = inList;  ref != NULL;  ref = ref->next)
     slAddHead(&outList, slRefNew(ref->val));
 return outList;
 }
 
 static void treeToBaseAlleles(struct phyloTree *node, char *refBases, char **sampleBases,
                               struct hash *sampleToIx, struct slRef *parentMuts)
 /* Traverse tree, building up an array of reference bases indexed by position, and an array
  * (indexed by pos) of sample allele arrays indexed by sampleToIx. */
 {
 struct slRef *nodeMuts = parentMuts;
 if (node->priv != NULL)
     {
     // Start out with a copy of parent node mutations in reverse order so we can add at head
     // and then reverse all.
     struct singleNucChange *sncs = node->priv;
     nodeMuts = slRefListCopyReverse(parentMuts);
     slAddHead(&nodeMuts, slRefNew(sncs));
     slReverse(&nodeMuts);
     }
 if (node->numEdges > 0)
     {
     int i;
     for (i = 0;  i < node->numEdges;  i++)
         treeToBaseAlleles(node->edges[i], refBases, sampleBases, sampleToIx, nodeMuts);
     }
 else
     {
     // Leaf node: if in sampleToIx, then for each mutation (beware back-mutations),
     // set refBase if it has not already been set and set sample allele.
     int ix = hashIntValDefault(sampleToIx, node->ident->name, -1);
     if (ix >= 0)
         {
         struct slRef *mlRef;
         for (mlRef = nodeMuts;  mlRef != NULL;  mlRef = mlRef->next)
             {
             struct singleNucChange *snc, *sncs = mlRef->val;
             for (snc = sncs;  snc != NULL;  snc = snc->next)
                 {
                 // Don't let a back-mutation overwrite the true ref value:
                 if (!refBases[snc->chromStart])
                     refBases[snc->chromStart] = snc->refBase;
                 if (sampleBases[snc->chromStart] == NULL)
                     AllocArray(sampleBases[snc->chromStart], sampleToIx->elCount);
                 sampleBases[snc->chromStart][ix] = snc->newBase;
                 }
             }
         }
     }
 if (node->priv != NULL)
     slFreeList(&nodeMuts);
 }
 
 static void vcfToBaseAlleles(struct vcfFile *vcff, char *refBases, char **sampleBases,
                              struct hash *sampleToIx)
 /* Build up an array of reference bases indexed by position, and an array (indexed by pos)
  * of sample allele arrays indexed by sampleToIx, from variants & genotypes in vcff. */
 {
 int gtIxToSampleIx[vcff->genotypeCount];
 int gtIx;
 for (gtIx = 0;  gtIx < vcff->genotypeCount;  gtIx++)
     {
     int ix = hashIntValDefault(sampleToIx, vcff->genotypeIds[gtIx], -1);
     gtIxToSampleIx[gtIx] = ix;
     }
 struct vcfRecord *rec;
 for (rec = vcff->records;  rec != NULL;  rec = rec->next)
     {
     for (gtIx = 0;  gtIx < vcff->genotypeCount;  gtIx++)
         {
         int ix = gtIxToSampleIx[gtIx];
         if (ix >= 0)
             {
             // If refBases was already set by bigTree, go with that.
             if (!refBases[rec->chromStart])
                 refBases[rec->chromStart] = rec->alleles[0][0];
             if (sampleBases[rec->chromStart] == NULL)
                 AllocArray(sampleBases[rec->chromStart], sampleToIx->elCount);
             int alIx = rec->genotypes[gtIx].hapIxA;
             if (alIx < 0)
                 sampleBases[rec->chromStart][ix] = '.';
             else if (alIx > 0)
                 sampleBases[rec->chromStart][ix] = rec->alleles[alIx][0];
             // If alIx == 0 (ref), leave the sample base at '\0', the default assumption.
             }
         }
     }
 }
 
 static int tallyAlts(char *sampleAlleles, char ref, int sampleCount, char *altAlleles,
                      int *altCounts, int maxAltCount, int *retNoCallCount)
 /* Scan through sampleAlleles looking for alternate alleles.  Return the number of alternate
  * alleles and fill in altAlleles and altCounts (number of observations of each alt allele).
  * Also simplify sampleAlleles by replacing ref alleles (from back-mutations) with '\0'. */
 {
 int altCount = 0;
 int noCallCount = 0;
 memset(altAlleles, 0, sizeof(*altAlleles) * maxAltCount);
 memset(altCounts, 0, sizeof(*altCounts) * maxAltCount);
 int ix;
 for (ix = 0;  ix < sampleCount;  ix++)
     {
     if (sampleAlleles[ix] == ref)
         sampleAlleles[ix] = '\0';
     char sampleAl = sampleAlleles[ix];
     if (sampleAl == '.')
         noCallCount++;
     else if (sampleAl != '\0')
         {
         int altIx;
         for (altIx = 0;  altIx < altCount;  altIx++)
             if (altAlleles[altIx] == sampleAl)
                 break;
         if (altIx == altCount)
             {
             if (altCount == maxAltCount)
                 errAbort("Too many alternate alleles for maxAltCount=%d; increase it.",
                          maxAltCount);
             // Add new alt allele.
             altCount++;
             altAlleles[altIx] = sampleAl;
             }
         altCounts[altIx]++;
         }
     }
 *retNoCallCount = noCallCount;
 return altCount;
 }
 
 static void altsToGenotypes(char *sampleAlleles, int sampleCount, char *altAlleles, int altCount,
                             char *genotypes)
 /* Fill in genotypes with '0' for ref allele, '1' for first alternate allele, etc. */
 {
 int ix;
 for (ix = 0;  ix < sampleCount;  ix++)
     {
     char sampleAl = sampleAlleles[ix];
     if (sampleAl == '\0')
         genotypes[ix] = '0';
     else if (sampleAl == '.')
         genotypes[ix] = '.';
     else
         {
         int altIx;
         for (altIx = 0;  altIx < altCount;  altIx++)
             if (altAlleles[altIx] == sampleAl)
                 break;
         if (altIx == altCount)
             errAbort("altsToGenotypes: sample %d allele '%c' not found in array of %d alts.",
                      ix, sampleAl, altCount);
         genotypes[ix] = '1' + altIx;
         }
     }
 }
 
 static void baseAllelesToVcf(char *refBases, char **sampleBases, int baseCount, int sampleCount,
                              FILE *f)
 /* Write VCF rows with per-sample genotypes from refBases and sampleBases. */
 {
 int chromStart;
 for (chromStart = 0;  chromStart < baseCount;  chromStart++)
     {
     char ref = refBases[chromStart];
     if (ref != '\0')
         {
         // This position has a variant.  Tally up alternate alleles and counts and determine
         // allele indices to use when printing out genotypes.
         int maxAlts = 16;
         char altAlleles[maxAlts];
         int altCounts[maxAlts];
         int noCallCount;
         int altCount = tallyAlts(sampleBases[chromStart], ref, sampleCount,
                                  altAlleles, altCounts, maxAlts, &noCallCount);
         if (altCount == 0)
             continue;
         char genotypes[sampleCount];
         altsToGenotypes(sampleBases[chromStart], sampleCount, altAlleles, altCount, genotypes);
         int pos = chromStart+1;
         fprintf(f, "%s\t%d\t%c%d%c",
                 chrom, pos, ref, pos, altAlleles[0]);
         int altIx;
         for (altIx = 1;  altIx < altCount;  altIx++)
             fprintf(f, ",%c%d%c", ref, pos, altAlleles[altIx]);
         fprintf(f, "\t%c\t%c", ref, altAlleles[0]);
         for (altIx = 1;  altIx < altCount;  altIx++)
         fprintf(f, ",%c", toupper(altAlleles[altIx]));
         fputs("\t.\t.\t", f);
         fprintf(f, "AC=%d", altCounts[0]);
         for (altIx = 1;  altIx < altCount;  altIx++)
             fprintf(f, ",%d", altCounts[altIx]);
         fprintf(f, ";AN=%d\tGT", sampleCount - noCallCount);
         int gtIx;
         for (gtIx = 0;  gtIx < sampleCount;  gtIx++)
             fprintf(f, "\t%c", genotypes[gtIx]);
         fputc('\n', f);
         }
     }
 }
 
 static void writeSubtreeVcf(FILE *f, struct hash *sampleToIx,
                             struct vcfFile *userVcf, struct phyloTree *bigTree)
 /* Write rows of VCF with genotypes for samples in sampleToIx (with order determined by sampleToIx)
  * with some samples found in userVcf and the rest found in bigTree which has been annotated
  * with single-nucleotide variants. */
 {
 char refBases[chromSize];
 memset(refBases, 0, sizeof refBases);
 char *sampleBases[chromSize];
 memset(sampleBases, 0, sizeof sampleBases);
 treeToBaseAlleles(bigTree, refBases, sampleBases, sampleToIx, NULL);
 vcfToBaseAlleles(userVcf, refBases, sampleBases, sampleToIx);
 int sampleCount = sampleToIx->elCount;
 baseAllelesToVcf(refBases, sampleBases, chromSize, sampleCount, f);
 int i;
 for (i = 0;  i < chromSize;  i++)
     freeMem(sampleBases[i]);
 }
 
 static void addSubtreeCustomTracks(FILE *ctF, char *userVcfFile, struct subtreeInfo *subtreeInfoList,
                                    struct hash *samplePlacements, struct phyloTree *bigTree,
                                    char *source, int fontHeight, int *pStartTime)
 /* For each subtree trashfile, write VCF+treeFile custom track text to ctF. */
 {
 struct vcfFile *userVcf = parseUserVcf(userVcfFile, pStartTime);
 if (! userVcf)
     {
     warn("Problem parsing VCF file with user variants '%s'; can't make subtree subtracks.",
          userVcfFile);
     return;
     }
 if (!bigTree)
     return;
 struct subtreeInfo *ti;
 for (ti = subtreeInfoList;  ti != NULL;  ti = ti->next)
     {
     writeSubtreeTrackLine(ctF, ti, source, fontHeight);
     writeVcfHeader(ctF, ti->subtreeNameList);
     writeSubtreeVcf(ctF, ti->subtreeIdToIx, userVcf, bigTree);
     fputc('\n', ctF);
     }
 vcfFileFree(&userVcf);
 reportTiming(pStartTime, "write subtree custom tracks");
 }
 
 struct tempName *writeCustomTracks(struct tempName *vcfTn, struct usherResults *ur,
                                    struct slName *sampleIds, struct phyloTree *bigTree,
-                                   char *source, int fontHeight, int *pStartTime)
+                                   char *source, int fontHeight, struct phyloTree **retSampleTree,
+                                   int *pStartTime)
 /* Write one custom track per subtree, and one custom track with just the user's uploaded samples. */
 {
 struct tempName *ctVcfTn = userVcfWithImputedBases(vcfTn, ur->samplePlacements, sampleIds);
 struct tempName *ctTn;
 AllocVar(ctTn);
 trashDirFile(ctTn, "ct", "ct_pp", ".ct");
 FILE *ctF = mustOpen(ctTn->forCgi, "w");
+int subtreeCount = slCount(ur->subtreeInfoList);
+if (subtreeCount <= MAX_SUBTREE_CTS)
     addSubtreeCustomTracks(ctF, ctVcfTn->forCgi, ur->subtreeInfoList, ur->samplePlacements, bigTree,
                            source, fontHeight, pStartTime);
-addSampleOnlyCustomTrack(ctF, ctVcfTn, ur->bigTreePlusTn->forCgi, sampleIds, fontHeight,
-                         pStartTime);
+else
+    printf("<p>Subtree custom tracks are added when there are at most %d subtrees, "
+           "but %d subtrees were found.</p>\n",
+           MAX_SUBTREE_CTS, subtreeCount);
+struct phyloTree *sampleTree = addSampleOnlyCustomTrack(ctF, ctVcfTn, ur->bigTreePlusTn->forCgi,
+                                                        sampleIds, fontHeight, pStartTime);
+if (retSampleTree)
+    *retSampleTree = sampleTree;
 carefulClose(&ctF);
 return ctTn;
 }