src/hg/hgTracks/bamTrack.c 1.3
1.3 2009/07/27 19:31:48 angie
Added option (trackDb setting pairEndsByName) to join separate BAM records for paired ends into linkedFeaturesSeries paired-end items.
Index: src/hg/hgTracks/bamTrack.c
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/hgTracks/bamTrack.c,v
retrieving revision 1.2
retrieving revision 1.3
diff -b -B -U 4 -r1.2 -r1.3
--- src/hg/hgTracks/bamTrack.c 24 Jun 2009 23:57:05 -0000 1.2
+++ src/hg/hgTracks/bamTrack.c 27 Jul 2009 19:31:48 -0000 1.3
@@ -21,10 +21,9 @@
struct bamTrackData
{
struct track *tg;
-// TODO: save state here when we add pairing into linkedFeaturesSeries
-// struct hash *looseEnds;
+ struct hash *pairHash;
};
struct simpleFeature *sfFromNumericCigar(const bam1_t *bam, int *retLength)
/* Translate BAM's numeric CIGAR encoding into a list of simpleFeatures */
@@ -37,9 +36,10 @@
for (i = 0; i < core->n_cigar; i++)
{
// decoding lifted from bam.c bam_format1(), long may it remain stable:
int n = cigar[i]>>BAM_CIGAR_SHIFT;
- char op = "MIDNSHP"[cigar[i]&BAM_CIGAR_MASK];
+ int opcode = cigar[i] & BAM_CIGAR_MASK;
+ char op = "MIDNSHP"[opcode];
switch (op)
{
case 'M': // match or mismatch (gapless aligned block)
AllocVar(sf);
@@ -62,23 +62,19 @@
case 'H': // skipped query bases not stored in record's query sequence ("hard clipping")
case 'P': // P="silent deletion from padded reference sequence" -- ignore these.
break;
default:
- errAbort("Unrecognized CIGAR op character '%c' (%03o)", op, op);
- ;
+ errAbort("Unrecognized CIGAR op index %d -- has bam.c bam_format1() changed?", opcode);
}
}
if (retLength != NULL)
*retLength = tLength;
return sfList;
}
-int bamToLf(const bam1_t *bam, void *data)
-/* bam_fetch() calls this on each bam alignment retrieved. Translate each bam
- * into a linkedFeatures item, and add it to tg->items. */
+struct linkedFeatures *bamToLf(const bam1_t *bam, void *data)
+/* Translate a BAM record into a linkedFeatures item. */
{
-struct bamTrackData *btd = (struct bamTrackData *)data;
-struct track *tg = btd->tg;
const bam1_core_t *core = &bam->core;
uint8_t *s = bam1_seq(bam);
struct linkedFeatures *lf;
AllocVar(lf);
@@ -95,13 +91,115 @@
qSeq[i] = bam_nt16_rev_table[bam1_seqi(s, i)];
if (lf->orientation == -1)
reverseComplement(qSeq, core->l_qseq);
lf->extra = qSeq;
+return lf;
+}
+
+int addBam(const bam1_t *bam, void *data)
+/* bam_fetch() calls this on each bam alignment retrieved. Translate each bam
+ * into a linkedFeatures item, and add it to tg->items. */
+{
+const bam1_core_t *core = &bam->core;
+if (core->flag & BAM_FUNMAP)
+ return 0;
+struct linkedFeatures *lf = bamToLf(bam, data);
+struct bamTrackData *btd = (struct bamTrackData *)data;
+struct track *tg = btd->tg;
slAddHead(&(tg->items), lf);
return 0;
}
-void bamLoadItems(struct track *tg)
+static struct linkedFeatures *lfStub(int startEnd, int orientation)
+/* Make a linkedFeatures for a zero-length item (so that an arrow will be drawn
+ * toward the given coord. */
+{
+struct linkedFeatures *lf;
+AllocVar(lf);
+safef(lf->name, sizeof(lf->name), "stub");
+lf->orientation = orientation;
+struct simpleFeature *sf;
+AllocVar(sf);
+sf->start = sf->end = lf->start = lf->end = lf->tallStart = lf->tallEnd = startEnd;
+lf->components = sf;
+lf->extra = cloneString("");
+return lf;
+}
+
+static struct linkedFeaturesSeries *lfsFromLf(struct linkedFeatures *lf)
+/* Make a linkedFeaturesSeries from one or two linkedFeatures elements. */
+{
+struct linkedFeaturesSeries *lfs;
+AllocVar(lfs);
+lfs->name = cloneString(lf->name);
+lfs->grayIx = lf->grayIx;
+if (!lf->next || lf->next->orientation == -lf->orientation)
+ lfs->orientation = lf->orientation;
+if (lf->next != NULL)
+ slSort(&lf, linkedFeaturesCmpStart);
+lfs->start = lf->start;
+lfs->end = lf->next ? lf->next->end : lf->end;
+lfs->features = lf;
+return lfs;
+}
+
+int addBamPaired(const bam1_t *bam, void *data)
+/* bam_fetch() calls this on each bam alignment retrieved. Translate each bam
+ * into a linkedFeaturesSeries item, and either store it until we find its mate
+ * or add it to tg->items. */
+{
+const bam1_core_t *core = &bam->core;
+if (core->flag & BAM_FUNMAP)
+ return 0;
+struct linkedFeatures *lf = bamToLf(bam, data);
+struct bamTrackData *btd = (struct bamTrackData *)data;
+struct track *tg = btd->tg;
+if (! (core->flag & BAM_FPAIRED))
+ {
+ slAddHead(&(tg->items), lfsFromLf(lf));
+ }
+else
+ {
+ struct linkedFeatures *lfMate = (struct linkedFeatures *)hashFindVal(btd->pairHash, lf->name);
+ if (lfMate == NULL)
+ {
+ if (core->flag & BAM_FPROPER_PAIR && core->mpos < 0)
+ {
+ // If we know that this is properly paired, but don't know where its mate
+ // is, make a bogus item off the edge of the window so that if we don't
+ // encounter its mate later, we can at least draw an arrow off the
+ // edge of the window.
+ int offscreen = (lf->orientation > 0) ? winEnd + 10 : winStart - 10;
+ if (offscreen < 0) offscreen = 0;
+ struct linkedFeatures *stub = lfStub(offscreen, -lf->orientation);
+ lf->next = stub;
+ }
+ else if (! (core->flag & BAM_FPROPER_PAIR))
+ {
+ // TODO: find a way to make this a lighter shade.
+ // doesn't work: lf->grayIx += 2;
+ }
+ hashAdd(btd->pairHash, lf->name, lf);
+ }
+ else
+ {
+ lfMate->next = lf;
+ slAddHead(&(tg->items), lfsFromLf(lfMate));
+ hashRemove(btd->pairHash, lf->name);
+ }
+ }
+return 0;
+}
+
+#define MAX_ITEMS_FOR_MAPBOX 1500
+void dontMapItem(struct track *tg, struct hvGfx *hvg, void *item,
+ char *itemName, char *mapItemName, int start, int end,
+ int x, int y, int width, int height)
+/* When there are many many items, drawing hgc links can really slow us down. */
+{
+}
+
+void bamLoadItemsCore(struct track *tg, boolean isPaired)
/* Load BAM data into tg->items item list, unless zoomed out so far
* that the data would just end up in dense mode and be super-slow. */
{
if (winEnd-winStart > BAM_MAX_ZOOM)
@@ -116,27 +214,57 @@
bam_index_t *idx = bam_index_load(bamFileName);
samfile_t *fh = samopen(bamFileName, "rb", NULL);
int chromId, s, e;
-// TODO: watch out for presence/absence of initial "chr" -- need trackDb setting
+// TODO: watch out for presence/absence of initial "chr" in bam -- need trackDb setting
char *posToParse = position;
if (startsWith("chr", position))
posToParse += 3;
bam_parse_region(fh->header, posToParse, &chromId, &s, &e);
-struct bamTrackData btd = {tg};
-int ret = bam_fetch(fh->x.bam, idx, chromId, winStart, winEnd, &btd, bamToLf);
+struct hash *pairHash = isPaired ? hashNew(18) : NULL;
+struct bamTrackData btd = {tg, pairHash};
+int ret = bam_fetch(fh->x.bam, idx, chromId, winStart, winEnd, &btd,
+ isPaired ? addBamPaired : addBam);
if (ret != 0)
errAbort("bam_fetch(%s, %s=%d:%d-%d failed (%d)", bamFileName, chromName,
chromId, winStart+1, winEnd, ret);
samclose(fh);
+if (isPaired)
+ {
+ struct hashEl *hel;
+ struct hashCookie cookie = hashFirst(pairHash);
+ int count = 0;
+ while ((hel = hashNext(&cookie)) != NULL)
+ {
+ struct linkedFeatures *lf = hel->val;
+ slAddHead(&(tg->items), lfsFromLf(lf));
+ count++;
+ }
+ }
if (tg->visibility != tvDense)
{
slReverse(&(tg->items));
- slSort(&(tg->items), linkedFeaturesCmpStart);
+ slSort(&(tg->items), isPaired ? linkedFeaturesSeriesCmp : linkedFeaturesCmpStart);
+ if (slCount(tg->items) > MAX_ITEMS_FOR_MAPBOX)
+ tg->mapItem = dontMapItem;
}
}
+void bamLoadItems(struct track *tg)
+/* Load single-ended-only BAM data into tg->items item list, unless zoomed out so far
+ * that the data would just end up in dense mode and be super-slow. */
+{
+bamLoadItemsCore(tg, FALSE);
+}
+
+void bamPairedLoadItems(struct track *tg)
+/* Load possibly paired BAM data into tg->items item list, unless zoomed out so far
+ * that the data would just end up in dense mode and be super-slow. */
+{
+bamLoadItemsCore(tg, TRUE);
+}
+
void bamDrawItems(struct track *tg, int seqStart, int seqEnd, struct hvGfx *hvg,
int xOff, int yOff, int width, MgFont *font, Color color,
enum trackVisibility vis)
/* Draw BAM alignments unless zoomed out too far. */
@@ -145,14 +273,40 @@
return;
linkedFeaturesDraw(tg, seqStart, seqEnd, hvg, xOff, yOff, width, font, color, vis);
}
+void bamPairedDrawItems(struct track *tg, int seqStart, int seqEnd, struct hvGfx *hvg,
+ int xOff, int yOff, int width, MgFont *font, Color color,
+ enum trackVisibility vis)
+/* Draw paired-end BAM alignments unless zoomed out too far. */
+{
+if (winEnd-winStart > BAM_MAX_ZOOM)
+ return;
+linkedFeaturesSeriesDraw(tg, seqStart, seqEnd, hvg, xOff, yOff, width, font, color, vis);
+}
+
void bamMethods(struct track *track)
/* Methods for BAM alignment files. */
{
track->canPack = TRUE;
-linkedFeaturesMethods(track);
-track->loadItems = bamLoadItems;
-track->drawItems = bamDrawItems;
+char varName[1024];
+safef(varName, sizeof(varName), "%s_pairEndsByName", track->mapName);
+boolean isPaired = cartUsualBoolean(cart, varName,
+ (trackDbSetting(track->tdb, "pairEndsByName") != NULL));
+if (isPaired)
+ {
+ linkedFeaturesSeriesMethods(track);
+ track->loadItems = bamPairedLoadItems;
+ track->drawItems = bamPairedDrawItems;
+ }
+else
+ {
+ linkedFeaturesMethods(track);
+ track->loadItems = bamLoadItems;
+ track->drawItems = bamDrawItems;
+ }
+track->labelNextItemButtonable = track->nextItemButtonable = FALSE;
+track->labelNextPrevItem = NULL;
+track->nextPrevItem = NULL;
}
#endif /* USE_BAM */