2a9d39b83afd2a71f4a0d57042b4a6fcee81a556 angie Thu Jan 26 14:54:41 2023 -0800 Added two RSV clade systems (Goya et al. and Ramaekers et al.) and different sources (nextclade vs. tree vs. direct assignments) as coloring options in Nextstrain JSON output for RSV. diff --git src/hg/hgPhyloPlace/phyloPlace.c src/hg/hgPhyloPlace/phyloPlace.c index 19602d5..09d35ed 100644 --- src/hg/hgPhyloPlace/phyloPlace.c +++ src/hg/hgPhyloPlace/phyloPlace.c @@ -771,30 +771,35 @@ if (lineageIx < 0) lineageIx = stringArrayIx("pango_lineage", headerWords, headerWordCount); int countryIx = stringArrayIx("country", headerWords, headerWordCount); int divisionIx = stringArrayIx("division", headerWords, headerWordCount); int locationIx = stringArrayIx("location", headerWords, headerWordCount); int countryExpIx = stringArrayIx("country_exposure", headerWords, headerWordCount); int divExpIx = stringArrayIx("division_exposure", headerWords, headerWordCount); int origLabIx = stringArrayIx("originating_lab", headerWords, headerWordCount); int subLabIx = stringArrayIx("submitting_lab", headerWords, headerWordCount); int regionIx = stringArrayIx("region", headerWords, headerWordCount); int nCladeUsherIx = stringArrayIx("Nextstrain_clade_usher", headerWords, headerWordCount); int lineageUsherIx = stringArrayIx("pango_lineage_usher", headerWords, headerWordCount); int authorsIx = stringArrayIx("authors", headerWords, headerWordCount); int pubsIx = stringArrayIx("publications", headerWords, headerWordCount); int nLineageIx = stringArrayIx("Nextstrain_lineage", headerWords, headerWordCount); + int gnCladeIx = stringArrayIx("goya_nextclade", headerWords, headerWordCount); + int rnCladeIx = stringArrayIx("ramaekers_nextclade", headerWords, headerWordCount); + int guCladeIx = stringArrayIx("goya_usher", headerWords, headerWordCount); + int ruCladeIx = stringArrayIx("ramaekers_usher", headerWords, headerWordCount); + int rtCladeIx = stringArrayIx("ramaekers_tableS1", headerWords, headerWordCount); while (lineFileNext(lf, &line, NULL)) { char *words[headerWordCount]; int wordCount = chopTabs(line, words); lineFileExpectWords(lf, headerWordCount, wordCount); struct sampleMetadata *met; AllocVar(met); if (strainIx >= 0) met->strain = cloneString(words[strainIx]); if (epiIdIx >= 0) met->epiId = cloneString(words[epiIdIx]); if (genbankIx >= 0 && !sameString("?", words[genbankIx])) met->gbAcc = cloneString(words[genbankIx]); if (dateIx >= 0) met->date = cloneString(words[dateIx]); @@ -820,30 +825,43 @@ met->origLab = cloneString(words[origLabIx]); if (subLabIx >= 0) met->subLab = cloneString(words[subLabIx]); if (regionIx >= 0) met->region = cloneString(words[regionIx]); if (nCladeUsherIx >= 0) met->nCladeUsher = cloneString(words[nCladeUsherIx]); if (lineageUsherIx >= 0) met->lineageUsher = cloneString(words[lineageUsherIx]); if (authorsIx >= 0) met->authors = cloneString(words[authorsIx]); if (pubsIx >= 0) met->pubs = cloneString(words[pubsIx]); if (nLineageIx >= 0) met->nLineage = cloneString(words[nLineageIx]); + // For RSV, use lineage for Ramaekers clades and nClade for Goya clades. + // This is getting ugly and we really should specify metadata columns in config.ra files. + if (gnCladeIx >= 0) + met->nClade = cloneString(words[gnCladeIx]); + if (rnCladeIx >= 0) + met->lineage = cloneString(words[rnCladeIx]); + if (guCladeIx >= 0) + met->nCladeUsher = cloneString(words[guCladeIx]); + if (ruCladeIx >= 0) + met->lineageUsher = cloneString(words[ruCladeIx]); + // Uglier still, use gClade to store Ramaekers Table S1 designations because it's left over. + if (rtCladeIx >= 0) + met->gClade = cloneString(words[rtCladeIx]); // If epiId and/or genbank ID is included, we'll probably be using that to look up items. if (epiIdIx >= 0 && !isEmpty(words[epiIdIx])) hashAdd(sampleMetadata, words[epiIdIx], met); if (genbankIx >= 0 && !isEmpty(words[genbankIx]) && !sameString("?", words[genbankIx])) { if (strchr(words[genbankIx], '.')) { // Index by versionless accession char copy[strlen(words[genbankIx])+1]; safecpy(copy, sizeof copy, words[genbankIx]); char *dot = strchr(copy, '.'); *dot = '\0'; hashAdd(sampleMetadata, copy, met); } else @@ -1399,46 +1417,67 @@ TOOLTIP("Number of bases in aligned portion of uploaded sequence that are not present in " "reference %s") "\n