54335a78fe09d6ff4aa2559809a7fec47426f315 braney Sat Jan 18 08:40:23 2014 -0800 fix up gff3ToPsl to grok NCBI's mappings from the haplotypes to thereference chroms diff --git src/lib/psl.c src/lib/psl.c index 13a063d..ca75f20 100644 --- src/lib/psl.c +++ src/lib/psl.c @@ -1806,94 +1806,99 @@ { ExpandArray(psl->qSequence, blockSpace, newSpace); ExpandArray(psl->tSequence, blockSpace, newSpace); } *blockSpacePtr = newSpace; } static boolean getNextCigarOp(char **ptr, char *op, int *size) /* parts the next cigar op */ { char *str = *ptr; if ((str == NULL) || (*str == 0)) return FALSE; -char *end = strchr(str, '+'); -if (end) +// between each cigar op there could be nothing, or a space, or a plus +char *end = str + 1; +for(;*end ; end++) { - *end = 0; - *ptr = end + 1; + if (! (isdigit(*end) || (*end == ' ') || (*end == '+'))) + break; } -else - *ptr = NULL; + +*ptr = end; *op = *str++; *size = atoi(str); return TRUE; } struct psl* pslFromGff3Cigar(char *qName, int qSize, int qStart, int qEnd, char *tName, int tSize, int tStart, int tEnd, char* strand, char *cigar) /* create a PSL from a GFF3-style cigar formatted alignment */ { // this is currently tested by the gff3ToPsl int blocksAlloced = 4; struct psl *psl = pslNew(qName, qSize, qStart, qEnd, tName, tSize, tStart, tEnd, strand, blocksAlloced, 0); char cigarSpec[strlen(cigar+1)]; // copy since parsing is destructive strcpy(cigarSpec, cigar); char *cigarNext = cigarSpec; char op; int size; int qNext = qStart, qBlkEnd = qEnd; +int totalSize = 0; + if (strand[0] == '-') reverseIntRange(&qNext, &qBlkEnd, qSize); int tNext = tStart, tBlkEnd = tEnd; if (strand[1] == '-') reverseIntRange(&tNext, &tBlkEnd, tSize); while(getNextCigarOp(&cigarNext, &op, &size)) { switch (op) { case 'M': // match or mismatch (gapless aligned block) if (psl->blockCount == blocksAlloced) pslGrow(psl, &blocksAlloced); + totalSize += size; psl->blockSizes[psl->blockCount] = size; psl->qStarts[psl->blockCount] = qNext; psl->tStarts[psl->blockCount] = tNext; psl->blockCount++; tNext += size; qNext += size; break; case 'I': // inserted in target tNext += size; break; case 'D': // deleted from target qNext += size; break; default: errAbort("unrecognized CIGAR op %c in %s", op, cigar); } } assert(qNext == qBlkEnd); assert(tNext == tBlkEnd); +psl->match = totalSize; return psl; } int pslRangeTreeOverlap(struct psl *psl, struct rbTree *rangeTree) /* Return amount that psl overlaps (on target side) with rangeTree. */ { int i; int overlap = 0; boolean isRc = (psl->strand[1] == '-'); for (i=0; i<psl->blockCount; ++i) { int start = psl->tStarts[i]; int end = start + psl->blockSizes[i]; if (isRc) reverseIntRange(&start, &end, psl->tSize);