src/hg/lib/bamFile.c 1.17
1.17 2009/12/10 15:02:12 angie
Got rid of bogus bamIgnoreStrand() -- make useStrand explicit. Added auto-detect of missing 'chr' in sequence names, so stripPrefix setting is no longer necessary. Better message for samopen failures.
Index: src/hg/lib/bamFile.c
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/lib/bamFile.c,v
retrieving revision 1.16
retrieving revision 1.17
diff -b -B -U 4 -r1.16 -r1.17
--- src/hg/lib/bamFile.c 30 Nov 2009 23:46:52 -0000 1.16
+++ src/hg/lib/bamFile.c 10 Dec 2009 15:02:12 -0000 1.17
@@ -10,17 +10,8 @@
#include "bamFile.h"
static char const rcsid[] = "$Id$";
-static boolean ignoreStrand = FALSE;
-
-void bamIgnoreStrand()
-/* Change the behavior of this lib to disregard item strand.
- * If called, this should be called before any other bam functions. */
-{
-ignoreStrand = TRUE;
-}
-
char *bamFileNameFromTable(char *db, char *table, char *bamSeqName)
/* Return file name from table. If table has a seqName column, then grab the
* row associated with bamSeqName (which is not nec. in chromInfo, e.g.
* bam file might have '1' not 'chr1'). */
@@ -36,8 +27,18 @@
table, bamSeqName);
else
safef(query, sizeof(query), "select fileName from %s", table);
char *fileName = sqlQuickString(conn, query);
+if (fileName == NULL && checkSeqName)
+ {
+ if (startsWith("chr", bamSeqName))
+ safef(query, sizeof(query), "select fileName from %s where seqName = '%s'",
+ table, bamSeqName+strlen("chr"));
+ else
+ safef(query, sizeof(query), "select fileName from %s where seqName = 'chr%s'",
+ table, bamSeqName);
+ fileName = sqlQuickString(conn, query);
+ }
if (fileName == NULL)
{
if (checkSeqName)
errAbort("Missing fileName for seqName '%s' in %s table", bamSeqName, table);
@@ -184,18 +185,34 @@
void bamFetch(char *fileOrUrl, char *position, bam_fetch_f callbackFunc, void *callbackData)
/* Open the .bam file, fetch items in the seq:start-end position range,
* and call callbackFunc on each bam item retrieved from the file plus callbackData.
- * Note: if sequences in .bam file don't begin with "chr" but cart position does, pass in
- * cart position + strlen("chr") to match the .bam file sequence names. */
+ * This handles BAM files with "chr"-less sequence names, e.g. from Ensembl. */
{
char *bamFileName = samtoolsFileName(fileOrUrl);
samfile_t *fh = samopen(bamFileName, "rb", NULL);
if (fh == NULL)
- errAbort("samopen(%s, \"rb\") returned NULL", bamFileName);
+ {
+ boolean usingUrl = (strstr(fileOrUrl, "tp://") || strstr(fileOrUrl, "https://"));
+ struct dyString *urlWarning = dyStringNew(0);
+ if (usingUrl)
+ {
+ char *udcFuseRoot = cfgOption("udcFuse.mountPoint");
+ boolean usingUdc = (udcFuseRoot != NULL && startsWith(udcFuseRoot, bamFileName));
+ if (usingUdc)
+ dyStringAppend(urlWarning, " (using udcFuse)");
+ dyStringAppend(urlWarning,
+ ". If you are able to access the URL with your web browser, "
+ "please try reloading this page.");
+ }
+ warn("failed to open %s%s", fileOrUrl, urlWarning->string);
+ return;
+ }
int chromId, start, end;
int ret = bam_parse_region(fh->header, position, &chromId, &start, &end);
+if (ret != 0 && startsWith("chr", position))
+ ret = bam_parse_region(fh->header, position+strlen("chr"), &chromId, &start, &end);
if (ret != 0)
// If the bam file does not cover the current chromosome, OK
return;
@@ -215,38 +232,38 @@
samclose(fh);
}
boolean bamIsRc(const bam1_t *bam)
-/* Return TRUE if alignment is on - strand. If bamIgnoreStrand has been called,
- * then this always returns FALSE. */
+/* Return TRUE if alignment is on - strand. */
{
const bam1_core_t *core = &bam->core;
-return (core->flag & BAM_FREVERSE) && !ignoreStrand;
+return (core->flag & BAM_FREVERSE);
}
-char *bamGetQuerySequence(const bam1_t *bam)
+char *bamGetQuerySequence(const bam1_t *bam, boolean useStrand)
/* Return the nucleotide sequence encoded in bam. The BAM format
* reverse-complements query sequence when the alignment is on the - strand,
- * so here we rev-comp it back to restore the original query sequence. */
+ * so if useStrand is given we rev-comp it back to restore the original query
+ * sequence. */
{
const bam1_core_t *core = &bam->core;
char *qSeq = needMem(core->l_qseq + 1);
uint8_t *s = bam1_seq(bam);
int i;
for (i = 0; i < core->l_qseq; i++)
qSeq[i] = bam_nt16_rev_table[bam1_seqi(s, i)];
-if (bamIsRc(bam))
+if (useStrand && bamIsRc(bam))
reverseComplement(qSeq, core->l_qseq);
return qSeq;
}
-UBYTE *bamGetQueryQuals(const bam1_t *bam)
+UBYTE *bamGetQueryQuals(const bam1_t *bam, boolean useStrand)
/* Return the base quality scores encoded in bam as an array of ubytes. */
{
const bam1_core_t *core = &bam->core;
int qLen = core->l_qseq;
UBYTE *arr = needMem(qLen);
-boolean isRc = bamIsRc(bam);
+boolean isRc = useStrand && bamIsRc(bam);
UBYTE *qualStr = bam1_qual(bam);
int i;
for (i = 0; i < qLen; i++)
{
@@ -382,15 +399,16 @@
}
return tLength;
}
-struct ffAli *bamToFfAli(const bam1_t *bam, struct dnaSeq *target, int targetOffset)
+struct ffAli *bamToFfAli(const bam1_t *bam, struct dnaSeq *target, int targetOffset,
+ boolean useStrand)
/* Convert from bam to ffAli format. (Adapted from psl.c's pslToFfAli.) */
{
struct ffAli *ffList = NULL, *ff;
const bam1_core_t *core = &bam->core;
-boolean isRc = bamIsRc(bam);
-DNA *needle = (DNA *)bamGetQuerySequence(bam);
+boolean isRc = useStrand && bamIsRc(bam);
+DNA *needle = (DNA *)bamGetQuerySequence(bam, useStrand);
if (isRc)
reverseComplement(target->dna, target->size);
DNA *haystack = target->dna;
unsigned int *cigarPacked = bam1_cigar(bam);