src/hg/hgTracks/bamTrack.c 1.1

1.1 2009/06/24 20:33:03 angie
Adding rudimentary support for BAM (compressed alignments) files as a native track type.
Index: src/hg/hgTracks/bamTrack.c
===================================================================
RCS file: src/hg/hgTracks/bamTrack.c
diff -N src/hg/hgTracks/bamTrack.c
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ src/hg/hgTracks/bamTrack.c	24 Jun 2009 20:33:03 -0000	1.1
@@ -0,0 +1,156 @@
+/* bamTrack -- handlers for alignments in BAM format (produced by MAQ,
+ * BWA and some other short-read alignment tools). */
+
+#ifdef USE_BAM
+
+#include "common.h"
+#include "hCommon.h"
+#include "hash.h"
+#include "linefile.h"
+#include "jksql.h"
+#include "hdb.h"
+#include "hgTracks.h"
+// bam.h is incomplete without _IOLIB set to 1, 2 or 3.  2 is used by Makefile.generic:
+#define _IOLIB 2
+#include "bam.h"
+#include "sam.h"
+
+static char const rcsid[] = "$Id$";
+
+#define BAM_MAX_ZOOM 200000
+
+struct bamTrackData
+    {
+    struct track *tg;
+// TODO: save state here when we add pairing into linkedFeaturesSeries
+//    struct hash *looseEnds;
+    };
+
+struct simpleFeature *sfFromNumericCigar(const bam1_t *bam, int *retLength)
+/* Translate BAM's numeric CIGAR encoding into a list of simpleFeatures  */
+{
+const bam1_core_t *core = &bam->core;
+struct simpleFeature *sf, *sfList = NULL;
+int tLength=0, tPos = core->pos, qPos = 0;
+unsigned int *cigar = bam1_cigar(bam);
+int i;
+for (i = 0;  i < core->n_cigar;  i++)
+    {
+    // decoding lifted from bam.c bam_format1(), long may it remain stable:
+    int n = cigar[i]>>BAM_CIGAR_SHIFT;
+    char op = "MIDNSHP"[cigar[i]&BAM_CIGAR_MASK];
+    switch (op)
+	{
+	case 'M': // match or mismatch (gapless aligned block)
+	    AllocVar(sf);
+	    sf->start = tPos;
+	    sf->qStart = qPos;
+	    tPos = sf->end = tPos + n;
+	    qPos = sf->qEnd = qPos + n;
+	    slAddHead(&sfList, sf);
+	    tLength += n;
+	    break;
+	case 'I': // inserted in query
+	case 'S': // skipped query bases at beginning or end ("soft clipping")
+	    qPos += n;
+	    break;
+	case 'D': // deleted from query
+	case 'N': // long deletion from query (intron as opposed to small del)
+	    tPos += n;
+	    tLength += n;
+	    break;
+	case 'H': // skipped query bases not stored in record's query sequence ("hard clipping")
+	case 'P': // P="silent deletion from padded reference sequence" -- ignore these.
+	    break;
+	default:
+	    errAbort("Unrecognized CIGAR op character '%c' (%03o)", op, op);
+	    ;
+	}
+    }
+if (retLength != NULL)
+    *retLength = tLength;
+return sfList;
+}
+
+int bamToLf(const bam1_t *bam, void *data)
+/* bam_fetch() calls this on each bam alignment retrieved.  Translate each bam 
+ * into a linkedFeatures item, and add it to tg->items. */
+{
+struct bamTrackData *btd = (struct bamTrackData *)data;
+struct track *tg = btd->tg;
+const bam1_core_t *core = &bam->core;
+uint8_t *s = bam1_seq(bam);
+struct linkedFeatures *lf;
+AllocVar(lf);
+lf->score = core->qual;
+safef(lf->name, sizeof(lf->name), bam1_qname(bam));
+lf->orientation = (core->flag & BAM_FREVERSE) ? -1 : 1;
+int length;
+lf->components = sfFromNumericCigar(bam, &length);
+lf->start = lf->tallStart = core->pos;
+lf->end = lf->tallEnd = core->pos + length;
+char *qSeq = needMem(core->l_qseq + 1);
+int i;
+for (i = 0; i < core->l_qseq; i++)
+    qSeq[i] = bam_nt16_rev_table[bam1_seqi(s, i)];
+lf->extra = qSeq;
+slAddHead(&(tg->items), lf);
+return 0;
+}
+
+void bamLoadItems(struct track *tg)
+/* Load BAM data into tg->items item list, unless zoomed out so far
+ * that the data would just end up in dense mode and be super-slow. */
+{
+if (winEnd-winStart > BAM_MAX_ZOOM)
+    return;
+struct sqlConnection *conn = hAllocConn(database);
+char *bamFileName = bbiNameFromTable(conn, tg->mapName);
+hFreeConn(&conn);
+
+// TODO: if bamFile is URL, convert URL to path a la UDC, but under udcFuse mountpoint.
+// new hg.conf setting for udcFuse mountpoint/support.
+
+bam_index_t *idx = bam_index_load(bamFileName);
+samfile_t *fh = samopen(bamFileName, "rb", NULL);
+int chromId, s, e;
+
+// TODO: watch out for presence/absence of initial "chr" -- need trackDb setting
+char *posToParse = position;
+if (startsWith("chr", position))
+    posToParse += 3;
+
+bam_parse_region(fh->header, posToParse, &chromId, &s, &e);
+struct bamTrackData btd = {tg};
+int ret = bam_fetch(fh->x.bam, idx, chromId, winStart, winEnd, &btd, bamToLf);
+if (ret != 0)
+    errAbort("bam_fetch(%s, %s=%d:%d-%d failed (%d)", bamFileName, chromName,
+	     chromId, winStart+1, winEnd, ret);
+samclose(fh);
+if (tg->visibility != tvDense)
+    {
+    slReverse(&(tg->items));
+    slSort(&(tg->items), linkedFeaturesCmpStart);
+    }
+}
+
+void bamDrawItems(struct track *tg, int seqStart, int seqEnd, struct hvGfx *hvg,
+		  int xOff, int yOff, int width, MgFont *font, Color color,
+		  enum trackVisibility vis)
+/* Draw BAM alignments unless zoomed out too far. */
+{
+if (winEnd-winStart > BAM_MAX_ZOOM)
+    return;
+linkedFeaturesDraw(tg, seqStart, seqEnd, hvg, xOff, yOff, width, font, color, vis);
+}
+
+void bamMethods(struct track *track)
+/* Methods for BAM alignment files. */
+{
+track->canPack = TRUE;
+linkedFeaturesMethods(track);
+track->loadItems = bamLoadItems;
+track->drawItems = bamDrawItems;
+}
+
+#endif /* USE_BAM */