4bd6f75468248235ec114db43709354d600fd65b braney Tue Jan 21 09:21:35 2014 -0800 fix gff3ToPsl to work on the negative strand diff --git src/lib/psl.c src/lib/psl.c index ca75f20..8e4611a 100644 --- src/lib/psl.c +++ src/lib/psl.c @@ -1798,104 +1798,139 @@ * updated to with the new amount of space. */ { int blockSpace = *blockSpacePtr; int newSpace = 2 * blockSpace; ExpandArray(psl->blockSizes, blockSpace, newSpace); ExpandArray(psl->qStarts, blockSpace, newSpace); ExpandArray(psl->tStarts, blockSpace, newSpace); if (psl->qSequence != NULL) { ExpandArray(psl->qSequence, blockSpace, newSpace); ExpandArray(psl->tSequence, blockSpace, newSpace); } *blockSpacePtr = newSpace; } -static boolean getNextCigarOp(char **ptr, char *op, int *size) -/* parts the next cigar op */ +static boolean getNextCigarOp(char *startPtr, boolean reverse, char **ptr, char *op, int *size) +/* gets one cigar op out of the CIGAR string. Reverse the order if asked */ { char *str = *ptr; -if ((str == NULL) || (*str == 0)) +if (str == NULL) + return FALSE; + +if ((!reverse && (*str == 0)) || (reverse && (str == startPtr))) return FALSE; // between each cigar op there could be nothing, or a space, or a plus +if (reverse) + { + char *end = str - 1; + for(;*end ; end--) + { + if (isalpha(*end)) + break; + } + str = end; + *ptr = end; + } +else + { char *end = str + 1; for(;*end ; end++) { if (! (isdigit(*end) || (*end == ' ') || (*end == '+'))) break; } *ptr = end; + } *op = *str++; *size = atoi(str); return TRUE; } struct psl* pslFromGff3Cigar(char *qName, int qSize, int qStart, int qEnd, char *tName, int tSize, int tStart, int tEnd, char* strand, char *cigar) /* create a PSL from a GFF3-style cigar formatted alignment */ { // this is currently tested by the gff3ToPsl int blocksAlloced = 4; struct psl *psl = pslNew(qName, qSize, qStart, qEnd, tName, tSize, tStart, tEnd, strand, blocksAlloced, 0); -char cigarSpec[strlen(cigar+1)]; // copy since parsing is destructive -strcpy(cigarSpec, cigar); -char *cigarNext = cigarSpec; char op; int size; int qNext = qStart, qBlkEnd = qEnd; int totalSize = 0; if (strand[0] == '-') reverseIntRange(&qNext, &qBlkEnd, qSize); int tNext = tStart, tBlkEnd = tEnd; if (strand[1] == '-') reverseIntRange(&tNext, &tBlkEnd, tSize); -while(getNextCigarOp(&cigarNext, &op, &size)) +if (cigar == NULL) + { + // no cigar means one block + size = qEnd - qStart; + totalSize += size; + psl->blockSizes[psl->blockCount] = size; + psl->qStarts[psl->blockCount] = qNext; + psl->tStarts[psl->blockCount] = tNext; + psl->blockCount++; + tNext += size; + qNext += size; + } +else + { + char cigarSpec[strlen(cigar+1)]; // copy since parsing is destructive + strcpy(cigarSpec, cigar); + char *cigarNext = cigarSpec; + if (strand[0] == '-') + for(; *cigarNext; cigarNext++) + ; + while(getNextCigarOp(cigarSpec, (strand[0] == '-'), &cigarNext, &op, &size)) { switch (op) { case 'M': // match or mismatch (gapless aligned block) if (psl->blockCount == blocksAlloced) pslGrow(psl, &blocksAlloced); totalSize += size; psl->blockSizes[psl->blockCount] = size; psl->qStarts[psl->blockCount] = qNext; psl->tStarts[psl->blockCount] = tNext; psl->blockCount++; tNext += size; qNext += size; break; case 'I': // inserted in target tNext += size; break; case 'D': // deleted from target qNext += size; break; default: errAbort("unrecognized CIGAR op %c in %s", op, cigar); } } + } assert(qNext == qBlkEnd); assert(tNext == tBlkEnd); psl->match = totalSize; return psl; } int pslRangeTreeOverlap(struct psl *psl, struct rbTree *rangeTree) /* Return amount that psl overlaps (on target side) with rangeTree. */ { int i; int overlap = 0; boolean isRc = (psl->strand[1] == '-'); for (i=0; i<psl->blockCount; ++i) { int start = psl->tStarts[i];