1eb02994652b9a06a33024bc629520aeeeb0a2f1 chmalee Fri Aug 6 11:18:49 2021 -0700 When a user has searched for a psuedo hgvs term, if the term does not map to any transcripts, warn the user but head to the general location around where they meant to go, refs #15554, #27942 diff --git src/hg/lib/hgFind.c src/hg/lib/hgFind.c index 2b9d417..424f138 100644 --- src/hg/lib/hgFind.c +++ src/hg/lib/hgFind.c @@ -2740,71 +2740,71 @@ } return foundIt; } // a little data structure for combining multiple transcripts that resolve // to the same hgvs change. This struct can be used to fill out a struct hgPos struct hgvsHelper { struct hgvsHelper *next; char *chrom; // chromosome name of position int chromStart; // start of position int chromEnd; // end of position struct slName *validTranscripts; // valid transcripts/protein accessions for this position char *label; // corresponding hgvs term char *table; // type of match, LRG, NCBI, etc + boolean mapError; // does this hgvs mapping result in a map error? }; static boolean matchesHgvs(struct cart *cart, char *db, char *term, struct hgPositions *hgp) /* Return TRUE if the search term looks like a variant encoded using the HGVS nomenclature * See http://varnomen.hgvs.org/ * If search term is a pseudo hgvs term like GeneName AminoAcidPosition (RUNX2 Arg155) and * matches more than one transcript, fill out the hgp with the potential matches so the user * can choose where to go, otherwise return a singlePos */ { boolean foundIt = FALSE; struct hgvsVariant *hgvsList = hgvsParseTerm(term); if (hgvsList == NULL) hgvsList = hgvsParsePseudoHgvs(db, term); if (hgvsList) { struct hgvsVariant *hgvs = NULL; - int hgvsListLen = slCount(hgvs); + int hgvsListLen = slCount(hgvsList); struct hgPosTable *table; AllocVar(table); table->description = "HGVS"; int padding = 5; + int mapErrCnt = 0; struct dyString *dyWarn = dyStringNew(0); struct hgvsHelper *helper = NULL; struct hash *uniqHgvsPos = hashNew(0); struct dyString *chromPosIndex = dyStringNew(0); + struct dyString *allWarnings = dyStringNew(0); for (hgvs = hgvsList; hgvs != NULL; hgvs = hgvs->next) { dyStringClear(dyWarn); dyStringClear(chromPosIndex); char *pslTable = NULL; struct bed *mapping = hgvsValidateAndMap(hgvs, db, term, dyWarn, &pslTable); if (dyStringLen(dyWarn) > 0) { + mapErrCnt++; if (hgvsListLen == 1) { warn("%s", dyStringContents(dyWarn)); } - else - { - continue; - } } if (mapping) { char *trackTable; if (isEmpty(pslTable)) trackTable = "chromInfo"; else if (startsWith("lrg", pslTable)) trackTable = "lrgTranscriptAli"; else if (startsWith("wgEncodeGencode", pslTable)) trackTable = pslTable; else if (startsWith("ncbiRefSeqPsl", pslTable)) { if (startsWith("NM_", hgvs->seqAcc) || startsWith("NR_", hgvs->seqAcc) || startsWith("NP_", hgvs->seqAcc) || startsWith("YP_", hgvs->seqAcc)) trackTable = "ncbiRefSeqCurated"; @@ -2821,54 +2821,101 @@ if ((helper = hashFindVal(uniqHgvsPos, chromPosIndex->string)) != NULL) { slNameAddHead(&helper->validTranscripts, hgvs->seqAcc); } else { AllocVar(helper); helper->chrom = mapping->chrom; helper->chromStart = mapping->chromStart; helper->chromEnd = mapping->chromEnd; helper->validTranscripts = slNameNew(hgvs->seqAcc); helper->label = cloneString(term); helper->table = trackTable; hashAdd(uniqHgvsPos, chromPosIndex->string, helper); } + if (dyStringLen(dyWarn) > 0) + { + helper->mapError = TRUE; + dyStringPrintf(allWarnings, "%s%s", dyStringLen(allWarnings) > 0 ? "\n" : "", dyStringContents(dyWarn)); + } } } + if (mapErrCnt < hgvsListLen) + // at least one of the hgvs terms mapped sucessfully, so we can go to that spot + // or let the user pick a location + { struct hashEl *hel, *helList= hashElListHash(uniqHgvsPos); for (hel = helList; hel != NULL; hel = hel->next) { + helper = (struct hgvsHelper *)hel->val; + if (!helper->mapError) + { if (hgp->tableList == NULL) hgp->tableList = table; - helper = (struct hgvsHelper *)hel->val; + foundIt = TRUE; table->name = helper->table; struct hgPos *pos; AllocVar(pos); pos->chrom = helper->chrom; pos->chromStart = helper->chromStart - padding; pos->chromEnd = helper->chromEnd + padding; pos->name = slNameListToString(helper->validTranscripts, '/'); pos->description = cloneString(helper->label); pos->browserName = ""; slAddHead(&table->posList, pos); // highlight the mapped bases to distinguish from padding hgp->tableList->posList->highlight = addHighlight(db, helper->chrom, helper->chromStart, helper->chromEnd); + } + } + } + else + // all of the positions mapped incorrectly, so the term was bad. However, we may + // be able to still go to a general area around the term, so build that, warn the + // user about their bad search term, and warn that this is not an exactly correct position + { + struct hashEl *hel, *helList= hashElListHash(uniqHgvsPos); + if (hgp->tableList == NULL) + hgp->tableList = table; foundIt = TRUE; + table->name = helper->table; + struct hgPos *pos; + AllocVar(pos); + char *chrom; + int spanStart = INT_MAX, spanEnd = 0; + for (hel = helList; hel != NULL; hel = hel->next) + { + helper = (struct hgvsHelper *)hel->val; + chrom = helper->chrom; + spanStart = helper->chromStart < spanStart ? helper->chromStart : spanStart; + spanEnd = helper->chromEnd > spanEnd ? helper->chromEnd : spanEnd; + } + pos->chrom = cloneString(chrom); + pos->chromStart = spanStart-padding; + pos->chromEnd = spanEnd + padding; + pos->name = "Approximate area"; + pos->description = term; + pos->browserName = term; + slAddHead(&table->posList, pos); + // highlight the 'mapped' bases to distinguish from padding + hgp->tableList->posList->highlight = addHighlight(db, helper->chrom, spanStart, spanEnd); + warn("%s", dyStringContents(allWarnings)); + warn("Sorry, couldn't locate %s, moving to general location", term); } dyStringFree(&dyWarn); + dyStringFree(&allWarnings); } return foundIt; } struct hgPositions *hgPositionsFind(char *db, char *term, char *extraCgi, char *hgAppNameIn, struct cart *cart, boolean multiTerm) /* Return container of tracks and positions (if any) that match term. */ { struct hgPositions *hgp = NULL, *hgpItem = NULL; regmatch_t substrs[4]; boolean canonicalSpec = FALSE; boolean gbrowserSpec = FALSE; boolean lengthSpec = FALSE; boolean singleBaseSpec = FALSE; boolean relativeFlag = FALSE;