src/hg/hgTracks/bamTrack.c 1.2
1.2 2009/06/24 23:57:05 angie
Oops, reverse-complement BAM query sequence when on - strand.
Index: src/hg/hgTracks/bamTrack.c
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/hgTracks/bamTrack.c,v
retrieving revision 1.1
retrieving revision 1.2
diff -b -B -U 1000000 -r1.1 -r1.2
--- src/hg/hgTracks/bamTrack.c 24 Jun 2009 20:33:03 -0000 1.1
+++ src/hg/hgTracks/bamTrack.c 24 Jun 2009 23:57:05 -0000 1.2
@@ -1,156 +1,158 @@
/* bamTrack -- handlers for alignments in BAM format (produced by MAQ,
* BWA and some other short-read alignment tools). */
#ifdef USE_BAM
#include "common.h"
#include "hCommon.h"
#include "hash.h"
#include "linefile.h"
#include "jksql.h"
#include "hdb.h"
#include "hgTracks.h"
// bam.h is incomplete without _IOLIB set to 1, 2 or 3. 2 is used by Makefile.generic:
#define _IOLIB 2
#include "bam.h"
#include "sam.h"
static char const rcsid[] = "$Id$";
#define BAM_MAX_ZOOM 200000
struct bamTrackData
{
struct track *tg;
// TODO: save state here when we add pairing into linkedFeaturesSeries
// struct hash *looseEnds;
};
struct simpleFeature *sfFromNumericCigar(const bam1_t *bam, int *retLength)
/* Translate BAM's numeric CIGAR encoding into a list of simpleFeatures */
{
const bam1_core_t *core = &bam->core;
struct simpleFeature *sf, *sfList = NULL;
int tLength=0, tPos = core->pos, qPos = 0;
unsigned int *cigar = bam1_cigar(bam);
int i;
for (i = 0; i < core->n_cigar; i++)
{
// decoding lifted from bam.c bam_format1(), long may it remain stable:
int n = cigar[i]>>BAM_CIGAR_SHIFT;
char op = "MIDNSHP"[cigar[i]&BAM_CIGAR_MASK];
switch (op)
{
case 'M': // match or mismatch (gapless aligned block)
AllocVar(sf);
sf->start = tPos;
sf->qStart = qPos;
tPos = sf->end = tPos + n;
qPos = sf->qEnd = qPos + n;
slAddHead(&sfList, sf);
tLength += n;
break;
case 'I': // inserted in query
case 'S': // skipped query bases at beginning or end ("soft clipping")
qPos += n;
break;
case 'D': // deleted from query
case 'N': // long deletion from query (intron as opposed to small del)
tPos += n;
tLength += n;
break;
case 'H': // skipped query bases not stored in record's query sequence ("hard clipping")
case 'P': // P="silent deletion from padded reference sequence" -- ignore these.
break;
default:
errAbort("Unrecognized CIGAR op character '%c' (%03o)", op, op);
;
}
}
if (retLength != NULL)
*retLength = tLength;
return sfList;
}
int bamToLf(const bam1_t *bam, void *data)
/* bam_fetch() calls this on each bam alignment retrieved. Translate each bam
* into a linkedFeatures item, and add it to tg->items. */
{
struct bamTrackData *btd = (struct bamTrackData *)data;
struct track *tg = btd->tg;
const bam1_core_t *core = &bam->core;
uint8_t *s = bam1_seq(bam);
struct linkedFeatures *lf;
AllocVar(lf);
lf->score = core->qual;
safef(lf->name, sizeof(lf->name), bam1_qname(bam));
lf->orientation = (core->flag & BAM_FREVERSE) ? -1 : 1;
int length;
lf->components = sfFromNumericCigar(bam, &length);
lf->start = lf->tallStart = core->pos;
lf->end = lf->tallEnd = core->pos + length;
char *qSeq = needMem(core->l_qseq + 1);
int i;
for (i = 0; i < core->l_qseq; i++)
qSeq[i] = bam_nt16_rev_table[bam1_seqi(s, i)];
+if (lf->orientation == -1)
+ reverseComplement(qSeq, core->l_qseq);
lf->extra = qSeq;
slAddHead(&(tg->items), lf);
return 0;
}
void bamLoadItems(struct track *tg)
/* Load BAM data into tg->items item list, unless zoomed out so far
* that the data would just end up in dense mode and be super-slow. */
{
if (winEnd-winStart > BAM_MAX_ZOOM)
return;
struct sqlConnection *conn = hAllocConn(database);
char *bamFileName = bbiNameFromTable(conn, tg->mapName);
hFreeConn(&conn);
// TODO: if bamFile is URL, convert URL to path a la UDC, but under udcFuse mountpoint.
// new hg.conf setting for udcFuse mountpoint/support.
bam_index_t *idx = bam_index_load(bamFileName);
samfile_t *fh = samopen(bamFileName, "rb", NULL);
int chromId, s, e;
// TODO: watch out for presence/absence of initial "chr" -- need trackDb setting
char *posToParse = position;
if (startsWith("chr", position))
posToParse += 3;
bam_parse_region(fh->header, posToParse, &chromId, &s, &e);
struct bamTrackData btd = {tg};
int ret = bam_fetch(fh->x.bam, idx, chromId, winStart, winEnd, &btd, bamToLf);
if (ret != 0)
errAbort("bam_fetch(%s, %s=%d:%d-%d failed (%d)", bamFileName, chromName,
chromId, winStart+1, winEnd, ret);
samclose(fh);
if (tg->visibility != tvDense)
{
slReverse(&(tg->items));
slSort(&(tg->items), linkedFeaturesCmpStart);
}
}
void bamDrawItems(struct track *tg, int seqStart, int seqEnd, struct hvGfx *hvg,
int xOff, int yOff, int width, MgFont *font, Color color,
enum trackVisibility vis)
/* Draw BAM alignments unless zoomed out too far. */
{
if (winEnd-winStart > BAM_MAX_ZOOM)
return;
linkedFeaturesDraw(tg, seqStart, seqEnd, hvg, xOff, yOff, width, font, color, vis);
}
void bamMethods(struct track *track)
/* Methods for BAM alignment files. */
{
track->canPack = TRUE;
linkedFeaturesMethods(track);
track->loadItems = bamLoadItems;
track->drawItems = bamDrawItems;
}
#endif /* USE_BAM */