474b9ac55dcb8e3c4cc56b00d52bd210b7989038
markd
  Thu Mar 7 22:59:45 2024 -0800
create PSLs for supplementary alignments that correct represent the query alignment.  RM: 33209

diff --git src/lib/bamFile.c src/lib/bamFile.c
index 164b1cc..2a7e339 100644
--- src/lib/bamFile.c
+++ src/lib/bamFile.c
@@ -704,33 +704,36 @@
     else if (type == 'c') { dyStringPrintf(dy, "%d", *s); ++s; }
     else if (type == 'S') { dyStringPrintf(dy, "%u", *(uint16_t*)s); s += 2; }
     else if (type == 's') { dyStringPrintf(dy, "%d", *(int16_t*)s); s += 2; }
     else if (type == 'I') { dyStringPrintf(dy, "%u", *(uint32_t*)s); s += 4; }
     else if (type == 'i') { dyStringPrintf(dy, "%d", *(int32_t*)s); s += 4; }
     else if (type == 'f') { dyStringPrintf(dy, "%g", *(float*)s); s += 4; }
     else if (type == 'd') { dyStringPrintf(dy, "%lg", *(double*)s); s += 8; }
     else if (type == 'Z' || type == 'H')
 	{
 	dyStringAppend(dy, (char *)s);
 	s += strlen((char *)s) + 1;
 	}
     }
 }
 
-struct psl *bamToPslUnscored(const bam1_t *bam, const bam_hdr_t *hdr)
+struct psl *bamToPslUnscored2(const bam1_t *bam, const bam_hdr_t *hdr, int inclHardClipped)
 /* Translate BAM's numeric CIGAR encoding into PSL sufficient for cds.c (just coords,
- * no scoring info) */
+ * no scoring info).  If inclHardClipped is True, the size of the hard-clipped regions
+ * are include in the PSL.  This is required for supplementary alignments to have the
+ * correct query sizes and offsets.
+ */
 {
 const bam1_core_t *core = &bam->core;
 
 if (core->tid == -1)
     return NULL;
 
 struct psl *psl;
 AllocVar(psl);
 boolean isRc = (core->flag & BAM_FREVERSE);
 psl->strand[0] = isRc ? '-' : '+';
 psl->qName = cloneString(bam1_qname(bam));
 psl->tName = cloneString(hdr->target_name[core->tid]);
 unsigned blockCount = 0;
 unsigned *blockSizes, *qStarts, *tStarts;
 AllocArray(blockSizes, core->n_cigar);
@@ -760,45 +763,57 @@
             else
                psl->match += n;
 	    break;
 	case 'I': // inserted in query
 	    qPos += n;
 	    qLength += n;
 	    break;
 	case 'D': // deleted from query
 	case 'N': // long deletion from query (intron as opposed to small del)
 	    tPos += n;
 	    break;
 	case 'S': // skipped query bases at beginning or end ("soft clipping")
 	    qPos += n;
 	    qLength += n;
 	    break;
-	case 'H': // skipped query bases not stored in record's query sequence ("hard clipping")
+	case 'H': // note query bases not stored in record's query sequence ("hard clipping")
+            if (inclHardClipped)
+                {
+                qPos += n;
+                qLength += n;
+                }
 	case 'P': // P="silent deletion from padded reference sequence" -- ignore these.
 	    break;
 	default:
 	    errAbort("bamToPsl: unrecognized CIGAR op %c -- update me", op);
 	}
     }
 
 if (blockCount == 0)
     {
     pslFree(&psl);
     return NULL;
     }
 
 psl->tSize = hdr->target_len[core->tid];
 psl->tStart = tStarts[0];
 psl->tEnd = tStarts[blockCount-1] + blockSizes[blockCount-1];
 psl->qSize = qLength;
 psl->qStart = qStarts[0];
 psl->qEnd = qStarts[blockCount-1] + blockSizes[blockCount-1];
 if (isRc)
     reverseIntRange(&psl->qStart, &psl->qEnd, psl->qSize);
 psl->blockCount = blockCount;
 psl->blockSizes = blockSizes;
 psl->qStarts = qStarts;
 psl->tStarts = tStarts;
 pslComputeInsertCounts(psl);
 return psl;
 }
 
+struct psl *bamToPslUnscored(const bam1_t *bam, const bam_hdr_t *hdr)
+/* Translate BAM's numeric CIGAR encoding into PSL sufficient for cds.c (just coords,
+ * no scoring info) */
+{
+return bamToPslUnscored2(bam, hdr, FALSE);
+}
+