e60f133b865553d0a024b5b0dbf16aaf31e47645 markd Wed Aug 5 13:01:44 2015 -0700 fixed bug converting GFF3 alignments to PSL when alignments started or ended with indels (no redmine) diff --git src/lib/psl.c src/lib/psl.c index 0811ca4..e7ac689 100644 --- src/lib/psl.c +++ src/lib/psl.c @@ -1835,48 +1835,47 @@ } else { char *end = str + 1; for(;*end ; end++) { if (! (isdigit(*end) || (*end == ' ') || (*end == '+'))) break; } *ptr = end; } *op = *str++; *size = atoi(str); - return TRUE; } struct psl* pslFromGff3Cigar(char *qName, int qSize, int qStart, int qEnd, char *tName, int tSize, int tStart, int tEnd, char* strand, char *cigar) /* create a PSL from a GFF3-style cigar formatted alignment */ { // this is currently tested by the gff3ToPsl int blocksAlloced = 4; struct psl *psl = pslNew(qName, qSize, qStart, qEnd, tName, tSize, tStart, tEnd, strand, blocksAlloced, 0); char op; int size; -int qNext = qStart, qBlkEnd = qEnd; int totalSize = 0; +int qNext = qStart, qBlkEnd = qEnd; if (strand[0] == '-') reverseIntRange(&qNext, &qBlkEnd, qSize); int tNext = tStart, tBlkEnd = tEnd; if (strand[1] == '-') reverseIntRange(&tNext, &tBlkEnd, tSize); if (cigar == NULL) { // no cigar means one block size = qEnd - qStart; totalSize += size; psl->blockSizes[psl->blockCount] = size; psl->qStarts[psl->blockCount] = qNext; psl->tStarts[psl->blockCount] = tNext; psl->blockCount++; @@ -1908,30 +1907,42 @@ qNext += size; break; case 'I': // inserted in target tNext += size; break; case 'D': // deleted from target qNext += size; break; default: errAbort("unrecognized CIGAR op %c in %s", op, cigar); } } } +/* CIGARs starting/ending with indels require adjusting of query/target ranges, + * as PSL starts/ends with matches */ +psl->qStart = psl->qStarts[0]; +psl->qEnd = pslQEnd(psl, psl->blockCount-1); +if (strand[0] == '-') + reverseIntRange(&psl->qStart, &psl->qEnd, qSize); +psl->tStart = psl->tStarts[0]; +psl->tEnd = pslTEnd(psl, psl->blockCount-1); +if (strand[1] == '-') + reverseIntRange(&psl->tStart, &psl->tEnd, tSize); + +/* sanity check */ if (qNext != qBlkEnd) errAbort("CIGAR query length does not match specified query range %s:%d-%d", qName, qStart, qEnd); if (tNext != tBlkEnd) errAbort("CIGAR target length does not match specified target range %s:%d-%d", tName, tStart, tEnd); psl->match = totalSize; return psl; } int pslRangeTreeOverlap(struct psl *psl, struct rbTree *rangeTree) /* Return amount that psl overlaps (on target side) with rangeTree. */ { int i; int overlap = 0; boolean isRc = (psl->strand[1] == '-'); for (i=0; i<psl->blockCount; ++i)