9ee0dbb8ed4ffb54cdd9fb8bb473548ae9cf276b angie Wed Mar 20 15:16:13 2024 -0700 M. tuberculosis gets lots of BLAT alignments; tweak warning message, make sure we keep only the first. diff --git src/hg/hgPhyloPlace/vcfFromFasta.c src/hg/hgPhyloPlace/vcfFromFasta.c index 067a5a2..1cabca6 100644 --- src/hg/hgPhyloPlace/vcfFromFasta.c +++ src/hg/hgPhyloPlace/vcfFromFasta.c @@ -202,40 +202,39 @@ struct psl *filteredPsls = NULL; *retFailedPsls = NULL; struct hash *userSeqsByName = hashNew(0); struct seqInfo *si; for (si = userSeqs; si != NULL; si = si->next) hashAdd(userSeqsByName, si->seq->name, si); struct hash *alignedSeqs = hashNew(0); struct psl *psl, *nextPsl; for (psl = psls; psl != NULL; psl = nextPsl) { nextPsl = psl->next; boolean passes = TRUE; struct psl *otherPsl = hashFindVal(alignedSeqs, psl->qName); if (otherPsl) { - //#*** Is this ever going to happen? Maybe if there's a large dup??? - //#*** Is there any condition under which we would want to try to merge? (Would expect blat - //#*** to have done that) - struct dyString *dy = dyStringCreate("Warning: multiple alignments found for sequence %s " - "(%d-%d and %d-%d). Skipping alignment of %d-%d", + struct dyString *dy = dyStringCreate("Warning: multiple alignments to reference found for " + "sequence %s (%d-%d and %d-%d). " + "Skipping alignment of %d-%d", psl->qName, otherPsl->qStart, otherPsl->qEnd, psl->qStart, psl->qEnd, psl->qStart, psl->qEnd); slPairAdd(retFailedPsls, dyStringCannibalize(&dy), psl); passes = FALSE; } + else hashAdd(alignedSeqs, psl->qName, psl); if (passes) { si = hashFindVal(userSeqsByName, psl->qName); if (si) si->psl = psl; else warn("Aligned sequence name '%s' does not match any input sequence name", psl->qName); slAddHead(&filteredPsls, psl); } } hashFree(&alignedSeqs); hashFree(&userSeqsByName); slReverse(&filteredPsls); slReverse(retFailedPsls);