eeac40956b3dd6611f58aeb847ff5c404d3ce883 angie Fri Dec 1 09:05:41 2023 -0800 Support trees whose reference/root is not from a db or hub, but rather a custom .2bit file. This means that the selected pathogen may or may not also be a db/hub, so the selection interacts with the db cart variable but does not always match it. Also, in phyloPlace.c, update RSV metadata column headers (RGCC lineages replace Ramaekers 2020 clades). diff --git src/hg/hgPhyloPlace/writeCustomTracks.c src/hg/hgPhyloPlace/writeCustomTracks.c index 292df91..89b6571 100644 --- src/hg/hgPhyloPlace/writeCustomTracks.c +++ src/hg/hgPhyloPlace/writeCustomTracks.c @@ -1,32 +1,29 @@ /* Write custom tracks: one per subtree and one with user's uploaded samples. */ /* Copyright (C) 2020 The Regents of the University of California */ #include "common.h" #include "hash.h" #include "hdb.h" #include "iupac.h" #include "parsimonyProto.h" #include "phyloPlace.h" #include "phyloTree.h" #include "trashDir.h" #include "vcf.h" -// Parameter constants: -int minSamplesForOwnTree = 3; // If user uploads at least this many samples, show tree for them. - static char ***imputedBasesByPosition(struct hash *samplePlacements, struct slName *sampleIds, int chromSize) /* Unpack imputedBases into an array of (a few) arrays indexed by 0-based position and sample index, * for speedy retrieval while rewriting user VCF. */ { char ***impsByPos = needMem(chromSize * sizeof(char **)); int sampleCount = slCount(sampleIds); struct slName *sample; int ix; for (ix = 0, sample = sampleIds; sample != NULL; ix++, sample = sample->next) { struct placementInfo *info = hashFindVal(samplePlacements, sample->name); if (!info) errAbort("imputedBasesByPosition: can't find placementInfo for sample '%s'", sample->name); struct baseVal *bv; @@ -169,79 +166,67 @@ fputs(words[0], f); for (i = 1; i < colCount; i++) { fputc('\t', f); fputs(words[i], f); } } fputc('\n', f); } carefulClose(&f); reportTiming(pStartTime, "Make user VCF with imputed bases"); } return ctVcfTn; } -static struct phyloTree *uploadedSamplesTree(char *sampleTreeFile, char *bigTreeFile, - struct slName *sampleIds) -/* Read in tree from bigTreeFile, prune all nodes that have no leaf descendants in sampleIds, - * save pruned tree to sampleTreeFile. */ -{ -struct phyloTree *tree = phyloOpenTree(bigTreeFile); -tree = phyloPruneToIds(tree, sampleIds); -FILE *f = mustOpen(sampleTreeFile, "w"); -phyloPrintTree(tree, f); -carefulClose(&f); -return tree; -} - static int heightForSampleCount(int fontHeight, int count) /* Return a height sufficient for this many labels stacked vertically. */ { return (fontHeight + 1) * count; } -static struct phyloTree *addSampleOnlyCustomTrack(FILE *ctF, struct tempName *vcfTn, - char *bigTreePlusFile, struct slName *sampleIds, +static void addSampleOnlyCustomTrack(FILE *ctF, struct tempName *vcfTn, + struct slName *sampleIds, struct phyloTree *sampleTree, char *geneTrack, int fontHeight, int *pStartTime) /* Make custom track with uploaded VCF. If there are enough samples to make a non-trivial tree, * then make the tree by pruning down the big tree plus samples to only the uploaded samples. */ { -struct phyloTree *sampleTree = NULL; fprintf(ctF, "track name=uploadedSamples description='Uploaded sample genotypes' " "type=vcf visibility=pack " "hapClusterEnabled=on hapClusterHeight=%d ", heightForSampleCount(fontHeight, slCount(sampleIds))); -if (slCount(sampleIds) >= minSamplesForOwnTree) +if (sampleTree != NULL) { + // Write tree of uploaded samples to a file so we can draw it in left label area. struct tempName sampleTreeTn; trashDirFile(&sampleTreeTn, "ct", "uploadedSamples", ".nwk"); - sampleTree = uploadedSamplesTree(sampleTreeTn.forCgi, bigTreePlusFile, sampleIds); + FILE *f = mustOpen(sampleTreeTn.forCgi, "w"); + phyloPrintTree(sampleTree, f); + carefulClose(&f); fprintf(ctF, "hapClusterMethod='treeFile %s' ", sampleTreeTn.forCgi); } else fprintf(ctF, "hapClusterMethod=fileOrder "); if (isNotEmpty(geneTrack)) fprintf(ctF, "hapClusterColorBy=function geneTrack=%s", geneTrack); fputc('\n', ctF); struct lineFile *vcfLf = lineFileOpen(vcfTn->forCgi, TRUE); char *line; while (lineFileNext(vcfLf, &line, NULL)) fprintf(ctF, "%s\n",line); lineFileClose(&vcfLf); reportTiming(pStartTime, "add custom track for uploaded samples"); -return sampleTree; } static struct vcfFile *parseUserVcf(char *userVcfFile, int chromSize, int *pStartTime) /* Read in user VCF file and parse genotypes. */ { struct vcfFile *userVcf = vcfFileMayOpen(userVcfFile, NULL, 0, chromSize, 0, chromSize, TRUE); if (! userVcf) return NULL; struct vcfRecord *rec; for (rec = userVcf->records; rec != NULL; rec = rec->next) vcfParseGenotypesGtOnly(rec); reportTiming(pStartTime, "read userVcf and parse genotypes"); return userVcf; } @@ -525,47 +510,42 @@ struct subtreeInfo *ti; int subtreeNum; for (subtreeNum = 1, ti = subtreeInfoList; ti != NULL; ti = ti->next, subtreeNum++) { writeSubtreeTrackLine(ctF, ti, subtreeNum, source, geneTrack, fontHeight); writeVcfHeader(ctF, ti->subtreeNameList); writeSubtreeVcf(ctF, ti->subtreeIdToIx, chrom, chromSize, userVcf, ti->subtree); fputc('\n', ctF); } vcfFileFree(&userVcf); reportTiming(pStartTime, "write subtree custom tracks"); } struct tempName *writeCustomTracks(char *db, struct tempName *vcfTn, struct usherResults *ur, struct slName *sampleIds, char *source, int fontHeight, - struct phyloTree **retSampleTree, int *pStartTime) + struct phyloTree *sampleTree, int *pStartTime) /* Write one custom track per subtree, and one custom track with just the user's uploaded samples. */ { char *chrom = hDefaultChrom(db); int chromSize = hChromSize(db, chrom); char *geneTrack = phyloPlaceDbSetting(db, "geneTrack"); // Default to SARS-CoV-2 or hMPXV values if setting is missing from config.ra. if (isEmpty(geneTrack)) geneTrack = sameString(db, "wuhCor1") ? "ncbiGeneBGP" : "ncbiGene"; struct tempName *ctVcfTn = userVcfWithImputedBases(vcfTn, ur->samplePlacements, sampleIds, chromSize, pStartTime); struct tempName *ctTn; AllocVar(ctTn); trashDirFile(ctTn, "ct", "ct_pp", ".ct"); FILE *ctF = mustOpen(ctTn->forCgi, "w"); int subtreeCount = slCount(ur->subtreeInfoList); if (subtreeCount <= MAX_SUBTREE_CTS) addSubtreeCustomTracks(ctF, ctVcfTn->forCgi, ur->subtreeInfoList, ur->samplePlacements, chrom, chromSize, source, geneTrack, fontHeight, pStartTime); else printf("<p>Subtree custom tracks are added when there are at most %d subtrees, " "but %d subtrees were found.</p>\n", MAX_SUBTREE_CTS, subtreeCount); -struct phyloTree *sampleTree = addSampleOnlyCustomTrack(ctF, ctVcfTn, - ur->singleSubtreeInfo->subtreeTn->forCgi, - sampleIds, geneTrack, fontHeight, - pStartTime); -if (retSampleTree) - *retSampleTree = sampleTree; +addSampleOnlyCustomTrack(ctF, ctVcfTn, sampleIds, sampleTree, geneTrack, fontHeight, pStartTime); carefulClose(&ctF); return ctTn; }