src/hg/tcga/pBamBam/pBamStats.c 1.1
1.1 2010/01/05 04:03:36 jsanborn
initial commit
Index: src/hg/tcga/pBamBam/pBamStats.c
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/tcga/pBamBam/pBamStats.c,v
retrieving revision 1.2
retrieving revision 1.1
diff -b -B -U 1000000 -r1.2 -r1.1
--- src/hg/tcga/pBamBam/pBamStats.c 5 Jan 2010 04:30:53 -0000 1.2
+++ src/hg/tcga/pBamBam/pBamStats.c 5 Jan 2010 04:03:36 -0000 1.1
@@ -1,201 +1,198 @@
/* bamStats - Gathering useful information from BAM files.
* code adapted from samtools...
*/
#ifdef USE_BAM
#include "common.h"
#include "hCommon.h"
#include "linefile.h"
#include "hash.h"
#include "hdb.h"
#include "options.h"
#include "jksql.h"
#define _IOLIB 2
#include "bam.h"
#include "sam.h"
#include "bam_helper.h"
static char const rcsid[] = "$Id$";
void usage()
/* Explain usage and exit. */
{
errAbort(
"bamStats - Useful info from BAM file (chroms represented, etc.)\n"
"usage:\n"
" bamStats chrom <in.bam>\n"
"options:\n"
"\n"
);
}
static struct optionSpec options[] = {
{NULL, 0},
};
#define HIGH_Q 30
#define MAX_ISIZE 10000
typedef struct {
double avg_isize, n_good_paired;
- long long prev_pos, n_pos, n_align;
+ long long prev_pos, n_pos;
long long n_reads, n_mapped, n_pair_all, n_pair_map, n_pair_good;
long long n_sgltn, n_read1, n_read2;
long long n_qcfail, n_dup;
long long n_interchr, n_interchrhigh;
long long n_intrachr, n_intrachrhigh;
} bam_flagstat_t;
void flagstat_loop(bam_flagstat_t *s, bam1_core_t *c)
{
++(s)->n_reads;
if ((c)->pos > (s)->prev_pos)
- {
- ++(s)->n_pos; // number of unique locations
- (s)->n_align += (c)->l_qseq; // number of aligned positions in unique seq
- }
+ ++(s)->n_pos;
+
(s)->prev_pos = (c)->pos;
if ((c)->flag & BAM_FPAIRED)
{
++(s)->n_pair_all;
if ((c)->flag & BAM_FPROPER_PAIR)
++(s)->n_pair_good;
if ((c)->flag & BAM_FREAD1)
++(s)->n_read1;
if ((c)->flag & BAM_FREAD2)
++(s)->n_read2;
if (((c)->flag & BAM_FMUNMAP) && !((c)->flag & BAM_FUNMAP))
++(s)->n_sgltn;
if (!((c)->flag & BAM_FUNMAP) && !((c)->flag & BAM_FMUNMAP))
{
++(s)->n_pair_map;
if ((c)->mtid != (c)->tid)
{
++(s)->n_interchr;
if ((c)->qual >= HIGH_Q)
++(s)->n_interchrhigh;
}
else if (abs((c)->mpos - (c)->pos) >= MAX_ISIZE)
{
++(s)->n_intrachr;
if ((c)->qual >= HIGH_Q)
++(s)->n_intrachrhigh;
}
else
{
double isize = (double) abs((c)->mpos - (c)->pos);
(s)->avg_isize = ((s)->avg_isize * (s)->n_good_paired + isize);
(s)->n_good_paired += 1.0;
(s)->avg_isize /= (s)->n_good_paired;
}
}
}
if (!((c)->flag & BAM_FUNMAP))
++(s)->n_mapped;
if ((c)->flag & BAM_FQCFAIL)
++(s)->n_qcfail;
if ((c)->flag & BAM_FDUP)
++(s)->n_dup;
}
bam_flagstat_t *bam_flagstat_core(bamFile fp)
{
bam_flagstat_t *s;
bam1_t *b;
bam1_core_t *c;
int ret;
s = (bam_flagstat_t*)calloc(1, sizeof(bam_flagstat_t));
b = bam_init1();
c = &b->core;
int prev_tid = -1;
while ((ret = bam_read1(fp, b)) >= 0)
{
if (prev_tid == -1)
prev_tid = (c)->tid;
else if ((c)->tid != prev_tid)
break;
flagstat_loop(s, c);
}
bam_destroy1(b);
return s;
}
void bamStats(char *position, char *inbam)
/* bamFilter - Filtering a BAM file for specified region(s) and criteria. */
{
bamFile fp;
bam_header_t *header;
bam_flagstat_t *s;
fp = strcmp(inbam, "-")? bam_open(inbam, "r") : bam_dopen(fileno(stdin), "r");
if (!fp)
errAbort("Problem opening bamfile %s\n", inbam);
header = bam_header_read(fp);
int ref, beg, end;
bam_index_t *idx = bam_index_load(inbam); // load BAM index for L file
bam_parse_region(header, position, &ref, &beg, &end);
if (ref < 0)
{
fprintf(stderr, "Invalid region for BAM file\n");
return;
}
bam_file_seek(fp, idx, ref, beg, end);
s = bam_flagstat_core(fp);
printf(">%s\n", position);
printf("n_reads\t%lld\n", s->n_reads);
printf("n_pos\t%lld\n", s->n_pos);
-printf("n_align\t%lld\n", s->n_align);
printf("fail_QA\t%lld\n", s->n_qcfail);
printf("dupes\t%lld\n", s->n_dup);
printf("mapped\t%lld\n", s->n_mapped);
printf("paired\t%lld\n", s->n_pair_all);
printf("read1\t%lld\n", s->n_read1);
printf("read2\t%lld\n", s->n_read2);
printf("prop_paired\t%lld\n", s->n_pair_good);
printf("both_mapped\t%lld\n", s->n_pair_map);
printf("concordant\t%lld\n", (long long) s->n_good_paired);
printf("avg_isize\t%f\n", s->avg_isize);
printf("singletons\t%lld\n", s->n_sgltn);
printf("inter_pair\t%lld\n", s->n_interchr);
printf("inter_pair_gt_30Q\t%lld\n", s->n_interchrhigh);
printf("intra_pair\t%lld\n", s->n_intrachr);
printf("intra_pair_gt_30Q\t%lld\n", s->n_intrachrhigh);
free(s);
bam_close(fp);
}
int main(int argc, char *argv[])
/* Process command line. */
{
optionInit(&argc, argv, options);
if (argc < 3)
usage();
char *position = argv[1];
char *inbam = argv[2];
bamStats(position, inbam);
return 0;
}
#else // USE_BAM not defined
int main(int argc, char *argv[])
/* Process command line. */
{
printf("BAM not installed");
return 0;
}
#endif