474b9ac55dcb8e3c4cc56b00d52bd210b7989038 markd Thu Mar 7 22:59:45 2024 -0800 create PSLs for supplementary alignments that correct represent the query alignment. RM: 33209 diff --git src/lib/bamFile.c src/lib/bamFile.c index 164b1cc..2a7e339 100644 --- src/lib/bamFile.c +++ src/lib/bamFile.c @@ -704,33 +704,36 @@ else if (type == 'c') { dyStringPrintf(dy, "%d", *s); ++s; } else if (type == 'S') { dyStringPrintf(dy, "%u", *(uint16_t*)s); s += 2; } else if (type == 's') { dyStringPrintf(dy, "%d", *(int16_t*)s); s += 2; } else if (type == 'I') { dyStringPrintf(dy, "%u", *(uint32_t*)s); s += 4; } else if (type == 'i') { dyStringPrintf(dy, "%d", *(int32_t*)s); s += 4; } else if (type == 'f') { dyStringPrintf(dy, "%g", *(float*)s); s += 4; } else if (type == 'd') { dyStringPrintf(dy, "%lg", *(double*)s); s += 8; } else if (type == 'Z' || type == 'H') { dyStringAppend(dy, (char *)s); s += strlen((char *)s) + 1; } } } -struct psl *bamToPslUnscored(const bam1_t *bam, const bam_hdr_t *hdr) +struct psl *bamToPslUnscored2(const bam1_t *bam, const bam_hdr_t *hdr, int inclHardClipped) /* Translate BAM's numeric CIGAR encoding into PSL sufficient for cds.c (just coords, - * no scoring info) */ + * no scoring info). If inclHardClipped is True, the size of the hard-clipped regions + * are include in the PSL. This is required for supplementary alignments to have the + * correct query sizes and offsets. + */ { const bam1_core_t *core = &bam->core; if (core->tid == -1) return NULL; struct psl *psl; AllocVar(psl); boolean isRc = (core->flag & BAM_FREVERSE); psl->strand[0] = isRc ? '-' : '+'; psl->qName = cloneString(bam1_qname(bam)); psl->tName = cloneString(hdr->target_name[core->tid]); unsigned blockCount = 0; unsigned *blockSizes, *qStarts, *tStarts; AllocArray(blockSizes, core->n_cigar); @@ -760,45 +763,57 @@ else psl->match += n; break; case 'I': // inserted in query qPos += n; qLength += n; break; case 'D': // deleted from query case 'N': // long deletion from query (intron as opposed to small del) tPos += n; break; case 'S': // skipped query bases at beginning or end ("soft clipping") qPos += n; qLength += n; break; - case 'H': // skipped query bases not stored in record's query sequence ("hard clipping") + case 'H': // note query bases not stored in record's query sequence ("hard clipping") + if (inclHardClipped) + { + qPos += n; + qLength += n; + } case 'P': // P="silent deletion from padded reference sequence" -- ignore these. break; default: errAbort("bamToPsl: unrecognized CIGAR op %c -- update me", op); } } if (blockCount == 0) { pslFree(&psl); return NULL; } psl->tSize = hdr->target_len[core->tid]; psl->tStart = tStarts[0]; psl->tEnd = tStarts[blockCount-1] + blockSizes[blockCount-1]; psl->qSize = qLength; psl->qStart = qStarts[0]; psl->qEnd = qStarts[blockCount-1] + blockSizes[blockCount-1]; if (isRc) reverseIntRange(&psl->qStart, &psl->qEnd, psl->qSize); psl->blockCount = blockCount; psl->blockSizes = blockSizes; psl->qStarts = qStarts; psl->tStarts = tStarts; pslComputeInsertCounts(psl); return psl; } +struct psl *bamToPslUnscored(const bam1_t *bam, const bam_hdr_t *hdr) +/* Translate BAM's numeric CIGAR encoding into PSL sufficient for cds.c (just coords, + * no scoring info) */ +{ +return bamToPslUnscored2(bam, hdr, FALSE); +} +