src/hg/hgTracks/bamTrack.c 1.3

1.3 2009/07/27 19:31:48 angie
Added option (trackDb setting pairEndsByName) to join separate BAM records for paired ends into linkedFeaturesSeries paired-end items.
Index: src/hg/hgTracks/bamTrack.c
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/hgTracks/bamTrack.c,v
retrieving revision 1.2
retrieving revision 1.3
diff -b -B -U 4 -r1.2 -r1.3
--- src/hg/hgTracks/bamTrack.c	24 Jun 2009 23:57:05 -0000	1.2
+++ src/hg/hgTracks/bamTrack.c	27 Jul 2009 19:31:48 -0000	1.3
@@ -21,10 +21,9 @@
 
 struct bamTrackData
     {
     struct track *tg;
-// TODO: save state here when we add pairing into linkedFeaturesSeries
-//    struct hash *looseEnds;
+    struct hash *pairHash;
     };
 
 struct simpleFeature *sfFromNumericCigar(const bam1_t *bam, int *retLength)
 /* Translate BAM's numeric CIGAR encoding into a list of simpleFeatures  */
@@ -37,9 +36,10 @@
 for (i = 0;  i < core->n_cigar;  i++)
     {
     // decoding lifted from bam.c bam_format1(), long may it remain stable:
     int n = cigar[i]>>BAM_CIGAR_SHIFT;
-    char op = "MIDNSHP"[cigar[i]&BAM_CIGAR_MASK];
+    int opcode = cigar[i] & BAM_CIGAR_MASK;
+    char op = "MIDNSHP"[opcode];
     switch (op)
 	{
 	case 'M': // match or mismatch (gapless aligned block)
 	    AllocVar(sf);
@@ -62,23 +62,19 @@
 	case 'H': // skipped query bases not stored in record's query sequence ("hard clipping")
 	case 'P': // P="silent deletion from padded reference sequence" -- ignore these.
 	    break;
 	default:
-	    errAbort("Unrecognized CIGAR op character '%c' (%03o)", op, op);
-	    ;
+	    errAbort("Unrecognized CIGAR op index %d -- has bam.c bam_format1() changed?", opcode);
 	}
     }
 if (retLength != NULL)
     *retLength = tLength;
 return sfList;
 }
 
-int bamToLf(const bam1_t *bam, void *data)
-/* bam_fetch() calls this on each bam alignment retrieved.  Translate each bam 
- * into a linkedFeatures item, and add it to tg->items. */
+struct linkedFeatures *bamToLf(const bam1_t *bam, void *data)
+/* Translate a BAM record into a linkedFeatures item. */
 {
-struct bamTrackData *btd = (struct bamTrackData *)data;
-struct track *tg = btd->tg;
 const bam1_core_t *core = &bam->core;
 uint8_t *s = bam1_seq(bam);
 struct linkedFeatures *lf;
 AllocVar(lf);
@@ -95,13 +91,115 @@
     qSeq[i] = bam_nt16_rev_table[bam1_seqi(s, i)];
 if (lf->orientation == -1)
     reverseComplement(qSeq, core->l_qseq);
 lf->extra = qSeq;
+return lf;
+}
+
+int addBam(const bam1_t *bam, void *data)
+/* bam_fetch() calls this on each bam alignment retrieved.  Translate each bam 
+ * into a linkedFeatures item, and add it to tg->items. */
+{
+const bam1_core_t *core = &bam->core;
+if (core->flag & BAM_FUNMAP)
+    return 0;
+struct linkedFeatures *lf = bamToLf(bam, data);
+struct bamTrackData *btd = (struct bamTrackData *)data;
+struct track *tg = btd->tg;
 slAddHead(&(tg->items), lf);
 return 0;
 }
 
-void bamLoadItems(struct track *tg)
+static struct linkedFeatures *lfStub(int startEnd, int orientation)
+/* Make a linkedFeatures for a zero-length item (so that an arrow will be drawn
+ * toward the given coord. */
+{
+struct linkedFeatures *lf;
+AllocVar(lf);
+safef(lf->name, sizeof(lf->name), "stub");
+lf->orientation = orientation;
+struct simpleFeature *sf;
+AllocVar(sf);
+sf->start = sf->end = lf->start = lf->end = lf->tallStart = lf->tallEnd = startEnd;
+lf->components = sf;
+lf->extra = cloneString("");
+return lf;
+}
+
+static struct linkedFeaturesSeries *lfsFromLf(struct linkedFeatures *lf)
+/* Make a linkedFeaturesSeries from one or two linkedFeatures elements. */
+{
+struct linkedFeaturesSeries *lfs;
+AllocVar(lfs);
+lfs->name = cloneString(lf->name);
+lfs->grayIx = lf->grayIx;
+if (!lf->next || lf->next->orientation == -lf->orientation)
+    lfs->orientation = lf->orientation;
+if (lf->next != NULL)
+    slSort(&lf, linkedFeaturesCmpStart);
+lfs->start = lf->start;
+lfs->end = lf->next ? lf->next->end : lf->end;
+lfs->features = lf;
+return lfs;
+}
+
+int addBamPaired(const bam1_t *bam, void *data)
+/* bam_fetch() calls this on each bam alignment retrieved.  Translate each bam 
+ * into a linkedFeaturesSeries item, and either store it until we find its mate
+ * or add it to tg->items. */
+{
+const bam1_core_t *core = &bam->core;
+if (core->flag & BAM_FUNMAP)
+    return 0;
+struct linkedFeatures *lf = bamToLf(bam, data);
+struct bamTrackData *btd = (struct bamTrackData *)data;
+struct track *tg = btd->tg;
+if (! (core->flag & BAM_FPAIRED))
+    {
+    slAddHead(&(tg->items), lfsFromLf(lf));
+    }
+else
+    {
+    struct linkedFeatures *lfMate = (struct linkedFeatures *)hashFindVal(btd->pairHash, lf->name);
+    if (lfMate == NULL)
+	{
+	if (core->flag & BAM_FPROPER_PAIR && core->mpos < 0)
+	    {
+	    // If we know that this is properly paired, but don't know where its mate
+	    // is, make a bogus item off the edge of the window so that if we don't
+	    // encounter its mate later, we can at least draw an arrow off the
+	    // edge of the window.
+	    int offscreen = (lf->orientation > 0) ? winEnd + 10 : winStart - 10;
+	    if (offscreen < 0) offscreen = 0;
+	    struct linkedFeatures *stub = lfStub(offscreen, -lf->orientation);
+	    lf->next = stub;
+	    }
+	else if (! (core->flag & BAM_FPROPER_PAIR))
+	    {
+	    // TODO: find a way to make this a lighter shade.
+	    // doesn't work: lf->grayIx += 2;
+	    }
+	hashAdd(btd->pairHash, lf->name, lf);
+	}
+    else
+	{
+	lfMate->next = lf;
+	slAddHead(&(tg->items), lfsFromLf(lfMate));
+	hashRemove(btd->pairHash, lf->name);
+	}
+    }
+return 0;
+}
+
+#define MAX_ITEMS_FOR_MAPBOX 1500
+void dontMapItem(struct track *tg, struct hvGfx *hvg, void *item,
+		    char *itemName, char *mapItemName, int start, int end,
+		    int x, int y, int width, int height)
+/* When there are many many items, drawing hgc links can really slow us down. */
+{
+}
+
+void bamLoadItemsCore(struct track *tg, boolean isPaired)
 /* Load BAM data into tg->items item list, unless zoomed out so far
  * that the data would just end up in dense mode and be super-slow. */
 {
 if (winEnd-winStart > BAM_MAX_ZOOM)
@@ -116,27 +214,57 @@
 bam_index_t *idx = bam_index_load(bamFileName);
 samfile_t *fh = samopen(bamFileName, "rb", NULL);
 int chromId, s, e;
 
-// TODO: watch out for presence/absence of initial "chr" -- need trackDb setting
+// TODO: watch out for presence/absence of initial "chr" in bam -- need trackDb setting
 char *posToParse = position;
 if (startsWith("chr", position))
     posToParse += 3;
 
 bam_parse_region(fh->header, posToParse, &chromId, &s, &e);
-struct bamTrackData btd = {tg};
-int ret = bam_fetch(fh->x.bam, idx, chromId, winStart, winEnd, &btd, bamToLf);
+struct hash *pairHash = isPaired ? hashNew(18) : NULL;
+struct bamTrackData btd = {tg, pairHash};
+int ret = bam_fetch(fh->x.bam, idx, chromId, winStart, winEnd, &btd,
+		    isPaired ? addBamPaired : addBam);
 if (ret != 0)
     errAbort("bam_fetch(%s, %s=%d:%d-%d failed (%d)", bamFileName, chromName,
 	     chromId, winStart+1, winEnd, ret);
 samclose(fh);
+if (isPaired)
+    {
+    struct hashEl *hel;
+    struct hashCookie cookie = hashFirst(pairHash);
+    int count = 0;
+    while ((hel = hashNext(&cookie)) != NULL)
+	{
+	struct linkedFeatures *lf = hel->val;
+	slAddHead(&(tg->items), lfsFromLf(lf));
+	count++;
+	}
+    }
 if (tg->visibility != tvDense)
     {
     slReverse(&(tg->items));
-    slSort(&(tg->items), linkedFeaturesCmpStart);
+    slSort(&(tg->items), isPaired ? linkedFeaturesSeriesCmp : linkedFeaturesCmpStart);
+    if (slCount(tg->items) > MAX_ITEMS_FOR_MAPBOX)
+	tg->mapItem = dontMapItem;
     }
 }
 
+void bamLoadItems(struct track *tg)
+/* Load single-ended-only BAM data into tg->items item list, unless zoomed out so far
+ * that the data would just end up in dense mode and be super-slow. */
+{
+bamLoadItemsCore(tg, FALSE);
+}
+
+void bamPairedLoadItems(struct track *tg)
+/* Load possibly paired BAM data into tg->items item list, unless zoomed out so far
+ * that the data would just end up in dense mode and be super-slow. */
+{
+bamLoadItemsCore(tg, TRUE);
+}
+
 void bamDrawItems(struct track *tg, int seqStart, int seqEnd, struct hvGfx *hvg,
 		  int xOff, int yOff, int width, MgFont *font, Color color,
 		  enum trackVisibility vis)
 /* Draw BAM alignments unless zoomed out too far. */
@@ -145,14 +273,40 @@
     return;
 linkedFeaturesDraw(tg, seqStart, seqEnd, hvg, xOff, yOff, width, font, color, vis);
 }
 
+void bamPairedDrawItems(struct track *tg, int seqStart, int seqEnd, struct hvGfx *hvg,
+			int xOff, int yOff, int width, MgFont *font, Color color,
+			enum trackVisibility vis)
+/* Draw paired-end BAM alignments unless zoomed out too far. */
+{
+if (winEnd-winStart > BAM_MAX_ZOOM)
+    return;
+linkedFeaturesSeriesDraw(tg, seqStart, seqEnd, hvg, xOff, yOff, width, font, color, vis);
+}
+
 void bamMethods(struct track *track)
 /* Methods for BAM alignment files. */
 {
 track->canPack = TRUE;
-linkedFeaturesMethods(track);
-track->loadItems = bamLoadItems;
-track->drawItems = bamDrawItems;
+char varName[1024];
+safef(varName, sizeof(varName), "%s_pairEndsByName", track->mapName);
+boolean isPaired = cartUsualBoolean(cart, varName,
+				    (trackDbSetting(track->tdb, "pairEndsByName") != NULL));
+if (isPaired)
+    {
+    linkedFeaturesSeriesMethods(track);
+    track->loadItems = bamPairedLoadItems;
+    track->drawItems = bamPairedDrawItems;
+    }
+else
+    {
+    linkedFeaturesMethods(track);
+    track->loadItems = bamLoadItems;
+    track->drawItems = bamDrawItems;
+    }
+track->labelNextItemButtonable = track->nextItemButtonable = FALSE;
+track->labelNextPrevItem = NULL;
+track->nextPrevItem = NULL;
 }
 
 #endif /* USE_BAM */