7e58340888377874edaad1dbc5174e20295f890c angie Mon Feb 22 14:17:33 2021 -0800 Support upload of more sequences, add TSV file summarizing sample variants and placements. Requested by Joe de Risi (UCSF). Increase timeout to 10 minutes; make TSV with each sample's ID, nuc muts, AA muts, imputed bases and path from root to sample. Also use Yatish's new -K subtree algorithm in usher: one subtree encompassing all uploaded samples, plus the specified number of samples randomly selected from the rest of the tree. Don't show every single sample name in the title because there can be 1000 samples in the same subtree now. :) diff --git src/hg/hgPhyloPlace/runUsher.c src/hg/hgPhyloPlace/runUsher.c index 6a5c593..5cd7aa7 100644 --- src/hg/hgPhyloPlace/runUsher.c +++ src/hg/hgPhyloPlace/runUsher.c @@ -698,30 +698,66 @@ subtreeCount = subtreeIx + 1; if (sameString(parts[2], "mutations.txt")) { subtreeMuts[subtreeIx] = parseSubtreeMutations(path); } else if (sameString(parts[2], "expanded.txt")) { // Don't need this, just remove it } else warn("Unexpected filename '%s' from usher, ignoring", file->name); } else warn("Unexpected filename '%s' from usher, ignoring", file->name); } + else if (startsWith("single-subtree", file->name)) + { + // We have a single subtree, not subtree-N-* files + int subtreeIx = 0; + subtreeCount = 1; + char fnameCpy[strlen(file->name)+1]; + safecpy(fnameCpy, sizeof fnameCpy, file->name); + char *parts[4]; + int partCount = chopString(fnameCpy, "-", parts, ArraySize(parts)); + if (partCount == 2) + { + // Expect single-subtree.nh + if (!endsWith(parts[1], ".nh")) + warn("Unexpected filename '%s' from usher, ignoring", file->name); + else + { + AllocVar(subtreeTns[subtreeIx]); + trashDirFile(subtreeTns[subtreeIx], "ct", "subtree", ".nwk"); + mustRename(path, subtreeTns[subtreeIx]->forCgi); + } + } + else if (partCount == 3) + { + // Expect single-subtree-mutations.txt or single-subtree-expanded.txt + if (sameString(parts[2], "mutations.txt")) + { + subtreeMuts[subtreeIx] = parseSubtreeMutations(path); + } + else if (sameString(parts[2], "expanded.txt")) + { + // Don't need this, just remove it + } + } + else + warn("Unexpected filename '%s' from usher, ignoring", file->name); + } else if (sameString(file->name, "final-tree.nh")) { // Don't need this, just remove it. } else warn("Unexpected filename '%s' from usher, ignoring", file->name); unlink(path); } rmdir(outDir); // Make sure we got a complete range of subtrees [0..subtreeCount-1] int i; for (i = 0; i < subtreeCount; i++) { if (subtreeTns[i] == NULL) errAbort("Missing file subtree-%d.nh in usher results", i+1); @@ -735,31 +771,31 @@ struct usherResults *runUsher(char *usherPath, char *usherAssignmentsPath, char *vcfFile, int subtreeSize, struct slName *userSampleIds, struct hash *condensedNodes, int *pStartTime) /* Open a pipe from Yatish Turakhia's usher program, save resulting big trees and * subtrees to trash files, return list of slRef to struct tempName for the trash files * and parse other results out of stderr output. */ { struct usherResults *results = usherResultsNew(); char subtreeSizeStr[16]; safef(subtreeSizeStr, sizeof subtreeSizeStr, "%d", subtreeSize); char *numThreadsStr = "16"; struct tempName tnOutDir; trashDirFile(&tnOutDir, "ct", "usher_outdir", ".dir"); char *cmd[] = { usherPath, "-v", vcfFile, "-i", usherAssignmentsPath, "-d", tnOutDir.forCgi, - "-k", subtreeSizeStr, "-T", numThreadsStr, "-u", "-l", NULL }; + "-K", subtreeSizeStr, "-T", numThreadsStr, "-u", "-l", NULL }; char **cmds[] = { cmd, NULL }; struct tempName tnStderr; trashDirFile(&tnStderr, "ct", "usher_stderr", ".txt"); struct pipeline *pl = pipelineOpen(cmds, pipelineRead, NULL, tnStderr.forCgi); pipelineClose(&pl); reportTiming(pStartTime, "run usher"); parseStderr(tnStderr.forCgi, results->samplePlacements); struct tempName *subtreeTns[MAX_SUBTREES]; struct variantPathNode *subtreeMuts[MAX_SUBTREES]; int subtreeCount = processOutDirFiles(results, tnOutDir.forCgi, subtreeTns, subtreeMuts, MAX_SUBTREES); results->subtreeInfoList = parseSubtrees(subtreeCount, subtreeTns, subtreeMuts, userSampleIds, condensedNodes); return results;