51aef9631658a9ec173d7c5aa8989738ae4a71ba braney Thu Oct 7 17:01:53 2021 -0700 add a fix to a bug where liftOver elements with zero size that did *not* fit into a chain were flagges as an error rather than just marked as missing diff --git src/hg/lib/liftOver.c src/hg/lib/liftOver.c index 7915bf6..1c7ba61 100644 --- src/hg/lib/liftOver.c +++ src/hg/lib/liftOver.c @@ -154,49 +154,59 @@ * and results in a BED (or a list of BED's, if regionName is set (-multiple). * Otherwise return a string describing the problem. * note: minSizeT is currently NOT implemented. feel free to add it. * (its not e-s, its the boundaries of what maps of chrom:s-e to Q) */ { struct binElement *list = findRange(chainHash, chrom, s, e), *el; struct chain *chainsHit = NULL, *chainsPartial = NULL, *chainsMissed = NULL, *chain; struct bed *bedList = NULL, *unmappedBedList = NULL; struct bed *bed = NULL; char strand = qStrand; /* initialize for single region case */ int start = s, end = e; + +// for chain intersection we need a non-zero range because we need to +// distinquish a zero width query from a gap in the chain (i.e. minMatchSize needs to be +// non-zero). +if (start == end) end++; + double minMatchSize = minMatch * (end - start); int intersectSize; int tStart; bool multiple = (regionName != NULL); verbose(2, "%s:%d-%d", chrom, s, e); if (multiple) verbose(2, "\t%s", regionName); verbose(2, "\n"); for (el = list; el != NULL; el = el->next) { chain = el->val; if (multiple) { if (chain->qEnd - chain->qStart < minChainSizeQ || chain->tEnd - chain->tStart < minChainSizeT) continue; /* limit required match to chain range on target */ end = min(e, chain->tEnd); start = max(s, chain->tStart); + + // see above + if (start == end) end++; + minMatchSize = minMatch * (end - start); } intersectSize = aliIntersectSize(chain, start, end); if (intersectSize >= minMatchSize) slAddHead(&chainsHit, chain); else if (intersectSize > 0) { slAddHead(&chainsPartial, chain); } else { /* shouldn't happen ? */ slAddHead(&chainsMissed, chain); } } @@ -213,54 +223,61 @@ } else if (chainsHit->next != NULL && !multiple) { return "Duplicated in new"; } /* sort chains by position in target to order subregions by orthology */ slSort(&chainsHit, chainCmpTarget); tStart = s; struct chain *next = chainsHit->next; for (chain = chainsHit; chain != NULL; chain = next) { struct chain *subChain = NULL; struct chain *toFree = NULL; int start=s, end=e; + boolean expanded = FALSE; + + // see above. Add fudge factor and remember that we did so. + if (start == end) end++; + next = chain->next; verbose(3,"hit chain %s:%d %s:%d-%d %c (%d)\n", chain->tName, chain->tStart, chain->qName, chain->qStart, chain->qEnd, chain->qStrand, chain->id); if (multiple) { /* no real need to verify ratio again (it would require * adjusting coords again). */ minRatio = 0; if (db) { /* use full chain, not the possibly truncated chain * from the net */ struct chain *next = chain->next; chain = chainLoadIdRange(db, chainTableName, chrom, s, e, chain->id); chain->next = next; verbose(3,"chain from db %s:%d %s:%d-%d %c (%d)\n", chain->tName, chain->tStart, chain->qName, chain->qStart, chain->qEnd, chain->qStrand, chain->id); } } if (!mapThroughChain(chain, minRatio, &start, &end, &subChain, &toFree)) errAbort("Chain mapping error: %s:%d-%d\n", chain->qName, start, end); + if (expanded) // correct if we made a zero width item into a 1bp item + end = start; if (chain->qStrand == '-') strand = otherStrand(qStrand); else strand = qStrand; int mappedThickStart = thickStart, mappedThickEnd = thickEnd; if (useThick) { struct chain *subChain2 = NULL; struct chain *toFree2 = NULL; if (!mapThroughChain(chain, minRatio, &mappedThickStart, &mappedThickEnd, &subChain2, &toFree2)) mappedThickStart = mappedThickEnd = start; chainFree(&toFree2); } verbose(3, "mapped %s:%d-%d\n", chain->qName, start, end);