src/hg/lib/bamFile.c 1.7
1.7 2009/09/14 23:44:25 angie
Added bamFileNameFromTable to bamFile.h and changed bamFetch to take a filename, not db+table, so it can be used for CT files someday. bamFileNameFromTable supports an optional seqName column in the db table, for per-chrom BAM files (as we get from 1000Genomes).
Index: src/hg/lib/bamFile.c
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/lib/bamFile.c,v
retrieving revision 1.6
retrieving revision 1.7
diff -b -B -U 4 -r1.6 -r1.7
--- src/hg/lib/bamFile.c 24 Aug 2009 23:47:56 -0000 1.6
+++ src/hg/lib/bamFile.c 14 Sep 2009 23:44:25 -0000 1.7
@@ -16,32 +16,42 @@
{
ignoreStrand = TRUE;
}
-static char *bbiNameFromTable(struct sqlConnection *conn, char *table)
-/* Return file name from little table. */
-/* This should be libified somewhere sensible -- copied from hgTracks/bedTrack.c. */
+char *bamFileNameFromTable(char *db, char *table, char *bamSeqName)
+/* Return file name from table. If table has a seqName column, then grab the
+ * row associated with bamSeqName (which is not nec. in chromInfo, e.g.
+ * bam file might have '1' not 'chr1'). */
{
-char query[256];
-safef(query, sizeof(query), "select fileName from %s", table);
+struct sqlConnection *conn = hAllocConn(db);
+boolean checkSeqName = (sqlFieldIndex(conn, table, "seqName") >= 0);
+if (checkSeqName && bamSeqName == NULL)
+ errAbort("bamFileNameFromTable: table %s has seqName column, but NULL seqName passed in",
+ table);
+char query[512];
+if (checkSeqName)
+ safef(query, sizeof(query), "select fileName from %s where seqName = '%s'",
+ table, bamSeqName);
+else
+ safef(query, sizeof(query), "select fileName from %s", table);
char *fileName = sqlQuickString(conn, query);
if (fileName == NULL)
+ {
+ if (checkSeqName)
+ errAbort("Missing fileName for seqName '%s' in %s table", bamSeqName, table);
+ else
errAbort("Missing fileName in %s table", table);
+ }
+hFreeConn(&conn);
return fileName;
}
-void bamFetch(char *db, char *table, char *position, bam_fetch_f callbackFunc, void *callbackData)
-/* Open the .bam file given in db.table, fetch items in the seq:start-end position range,
+void bamFetch(char *bamFileName, char *position, bam_fetch_f callbackFunc, void *callbackData)
+/* Open the .bam file, fetch items in the seq:start-end position range,
* and call callbackFunc on each bam item retrieved from the file plus callbackData.
* Note: if sequences in .bam file don't begin with "chr" but cart position does, pass in
* cart position + strlen("chr") to match the .bam file sequence names. */
{
-// TODO: if bamFile is URL, convert URL to path a la UDC, but under udcFuse mountpoint.
-// new hg.conf setting for udcFuse mountpoint/support.
-struct sqlConnection *conn = hAllocConn(db);
-char *bamFileName = bbiNameFromTable(conn, table);
-hFreeConn(&conn);
-
samfile_t *fh = samopen(bamFileName, "rb", NULL);
if (fh == NULL)
errAbort("samopen(%s, \"rb\") returned NULL", bamFileName);
@@ -51,8 +61,10 @@
errAbort("bam_parse_region(%s) failed (%d)", position, ret);
//?? Could this happen if there is no data on some _random? can avoid with tdb chromosomes...
bam_index_t *idx = bam_index_load(bamFileName);
+if (idx == NULL)
+ errAbort("bam_index_load(%s) failed.", bamFileName);
ret = bam_fetch(fh->x.bam, idx, chromId, start, end, callbackData, callbackFunc);
if (ret != 0)
errAbort("bam_fetch(%s, %s (chromId=%d) failed (%d)", bamFileName, position, chromId, ret);
samclose(fh);