src/hg/hgTracks/bamTrack.c 1.28

1.28 2010/03/03 19:30:04 angie
Bam track improvements: 1. Enable indel-coloring. 2. Display soft-clipped sequence instead of pretending it doesn't exist.
Index: src/hg/hgTracks/bamTrack.c
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/hgTracks/bamTrack.c,v
retrieving revision 1.27
retrieving revision 1.28
diff -b -B -U 4 -r1.27 -r1.28
--- src/hg/hgTracks/bamTrack.c	26 Feb 2010 06:54:57 -0000	1.27
+++ src/hg/hgTracks/bamTrack.c	3 Mar 2010 19:30:04 -0000	1.28
@@ -29,8 +29,76 @@
     char *grayMode;
     char *userTag;
     };
 
+struct psl *pslFromBam(const bam1_t *bam)
+/* Translate BAM's numeric CIGAR encoding into PSL sufficient for cds.c (just coords,
+ * no scoring info) */
+{
+const bam1_core_t *core = &bam->core;
+struct psl *psl;
+AllocVar(psl);
+boolean isRc = (core->flag & BAM_FREVERSE);
+psl->strand[0] = isRc ? '-' : '+';
+psl->qName = cloneString(bam1_qname(bam));
+psl->tName = cloneString(chromName);
+unsigned blockCount = 0;
+unsigned *blockSizes, *qStarts, *tStarts;
+AllocArray(blockSizes, core->n_cigar);
+AllocArray(qStarts, core->n_cigar);
+AllocArray(tStarts, core->n_cigar);
+int tPos = core->pos, qPos = 0, qLength = 0;
+unsigned int *cigar = bam1_cigar(bam);
+int i;
+for (i = 0;  i < core->n_cigar;  i++)
+    {
+    char op;
+    int n = bamUnpackCigarElement(cigar[i], &op);
+    switch (op)
+	{
+	case 'M': // match or mismatch (gapless aligned block)
+	    blockSizes[blockCount] = n;
+	    qStarts[blockCount] = qPos;
+	    tStarts[blockCount] = tPos;
+	    blockCount++;
+	    tPos += n;
+	    qPos += n;
+	    qLength += n;
+	    break;
+	case 'I': // inserted in query
+	    qPos += n;
+	    qLength += n;
+	    break;
+	case 'D': // deleted from query
+	case 'N': // long deletion from query (intron as opposed to small del)
+	    tPos += n;
+	    break;
+	case 'S': // skipped query bases at beginning or end ("soft clipping")
+	    qPos += n;
+	    qLength += n;
+	    break;
+	case 'H': // skipped query bases not stored in record's query sequence ("hard clipping")
+	case 'P': // P="silent deletion from padded reference sequence" -- ignore these.
+	    break;
+	default:
+	    errAbort("pslFromBam: unrecognized CIGAR op %c -- update me", op);
+	}
+    }
+psl->tSize = hChromSize(database, chromName);
+psl->tStart = tStarts[0];
+psl->tEnd = tStarts[blockCount-1] + blockSizes[blockCount-1];
+psl->qSize = qLength;
+psl->qStart = qStarts[0];
+psl->qEnd = qStarts[blockCount-1] + blockSizes[blockCount-1];
+if (isRc)
+    reverseIntRange(&psl->qStart, &psl->qEnd, psl->qSize);
+psl->blockCount = blockCount;
+psl->blockSizes = blockSizes;
+psl->qStarts = qStarts;
+psl->tStarts = tStarts;
+return psl;
+}
+
 struct simpleFeature *sfFromNumericCigar(const bam1_t *bam, int *retLength)
 /* Translate BAM's numeric CIGAR encoding into a list of simpleFeatures, 
  * and tally up length on reference sequence while we're at it. */
 {
@@ -71,8 +139,9 @@
 	}
     }
 if (retLength != NULL)
     *retLength = tLength;
+slReverse(&sfList);
 return sfList;
 }
 
 INLINE int shadeTransform(int min, int max, int score)
@@ -91,9 +160,9 @@
 					   int orientation, int qLen)
 /* Chop up blocksIn into one sf per query base, with sf->grayIx set according to
  * base quality score. */
 {
-struct simpleFeature *sf, *blocksOut = NULL;
+struct simpleFeature *sf, *blocksOut = NULL, *tail = NULL;
 for (sf = blocksIn;  sf != NULL;  sf = sf->next)
     {
     struct simpleFeature *newSf;
     int i;
@@ -106,12 +175,17 @@
 	newSf->qEnd = i + 1;
 	int offset = (orientation < 0) ? (qLen - i - 1) : i;
 	// hardcode min & max for now; if user demand, make into tdb/cart vars.
 	newSf->grayIx = shadeTransform(0, 40, quals[offset]);
-	slAddHead(&blocksOut, newSf);
+	if (blocksOut == NULL)
+	    blocksOut = tail = newSf;
+	else
+	    {
+	    tail->next = newSf;
+	    tail = tail->next;
+	    }
 	}
     }
-slReverse(&blocksOut);
 return blocksOut;
 }
 
 // similar but not identical to customFactory.c's parseRgb:
@@ -141,9 +215,10 @@
 int length;
 lf->components = sfFromNumericCigar(bam, &length);
 lf->start = lf->tallStart = core->pos;
 lf->end = lf->tallEnd = core->pos + length;
-lf->extra = bamGetQuerySequence(bam, TRUE);
+lf->extra = bamGetQuerySequence(bam, FALSE); // cds.c reverses if psl != NULL
+lf->original = pslFromBam(bam);
 int clippedQLen;
 bamGetSoftClipping(bam, NULL, NULL, &clippedQLen);
 if (sameString(btd->colorMode, BAM_COLOR_MODE_GRAY) &&
     sameString(btd->grayMode, BAM_GRAY_MODE_ALI_QUAL))
@@ -434,9 +509,9 @@
 		      (vis != tvDense || exonArrowsEvenWhenDense));
 struct dnaSeq *mrnaSeq = NULL;
 enum baseColorDrawOpt drawOpt = baseColorDrawOff;
 boolean indelShowDoubleInsert, indelShowQueryInsert, indelShowPolyA;
-struct psl *psl = NULL;
+struct psl *psl = (struct psl *)(lf->original);
 char *colorMode = cartOrTdbString(cart, tg->tdb, BAM_COLOR_MODE, BAM_COLOR_MODE_DEFAULT);
 char *grayMode = cartOrTdbString(cart, tg->tdb, BAM_GRAY_MODE, BAM_GRAY_MODE_DEFAULT);
 bool baseQualMode = (sameString(colorMode, BAM_COLOR_MODE_GRAY) &&
 		     sameString(grayMode, BAM_GRAY_MODE_BASE_QUAL));
@@ -494,9 +569,9 @@
 	clippedBarbs(hvg, x1+1, midY, x2-x1-2, tl.barbHeight, tl.barbSpacing, lf->orientation,
 		     barbColor, TRUE);
 	}
     }
-if (indelShowDoubleInsert)
+if (indelShowDoubleInsert && psl)
     {
     int intronGap = 0;
     if (vis != tvDense)
 	intronGap = atoi(trackDbSettingClosestToHomeOrDefault(tg->tdb, "intronGap", "0"));
@@ -506,9 +581,9 @@
     {
     /* If highlighting differences between aligned sequence and genome when
      * zoomed way out, this must be done in a separate pass after exons are
      * drawn so that exons sharing the pixel don't overdraw differences. */
-    if (indelShowQueryInsert || indelShowPolyA)
+    if ((indelShowQueryInsert || indelShowPolyA) && psl)
 	baseColorOverdrawQInsert(tg, lf, hvg, xOff, y, scale, heightPer, mrnaSeq, psl, winStart,
 				 drawOpt, indelShowQueryInsert, indelShowPolyA);
     baseColorOverdrawDiff(tg, lf, hvg, xOff, y, scale, heightPer, mrnaSeq, psl, winStart, drawOpt);
     baseColorDrawCleanup(lf, &mrnaSeq, &psl);