src/hg/lib/bamFile.c 1.7

1.7 2009/09/14 23:44:25 angie
Added bamFileNameFromTable to bamFile.h and changed bamFetch to take a filename, not db+table, so it can be used for CT files someday. bamFileNameFromTable supports an optional seqName column in the db table, for per-chrom BAM files (as we get from 1000Genomes).
Index: src/hg/lib/bamFile.c
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/lib/bamFile.c,v
retrieving revision 1.6
retrieving revision 1.7
diff -b -B -U 4 -r1.6 -r1.7
--- src/hg/lib/bamFile.c	24 Aug 2009 23:47:56 -0000	1.6
+++ src/hg/lib/bamFile.c	14 Sep 2009 23:44:25 -0000	1.7
@@ -16,32 +16,42 @@
 {
 ignoreStrand = TRUE;
 }
 
-static char *bbiNameFromTable(struct sqlConnection *conn, char *table)
-/* Return file name from little table. */
-/* This should be libified somewhere sensible -- copied from hgTracks/bedTrack.c. */
+char *bamFileNameFromTable(char *db, char *table, char *bamSeqName)
+/* Return file name from table.  If table has a seqName column, then grab the 
+ * row associated with bamSeqName (which is not nec. in chromInfo, e.g. 
+ * bam file might have '1' not 'chr1'). */
 {
-char query[256];
-safef(query, sizeof(query), "select fileName from %s", table);
+struct sqlConnection *conn = hAllocConn(db);
+boolean checkSeqName = (sqlFieldIndex(conn, table, "seqName") >= 0);
+if (checkSeqName && bamSeqName == NULL)
+    errAbort("bamFileNameFromTable: table %s has seqName column, but NULL seqName passed in",
+	     table);
+char query[512];
+if (checkSeqName)
+    safef(query, sizeof(query), "select fileName from %s where seqName = '%s'",
+	  table, bamSeqName);
+else
+    safef(query, sizeof(query), "select fileName from %s", table);
 char *fileName = sqlQuickString(conn, query);
 if (fileName == NULL)
+    {
+    if (checkSeqName)
+	errAbort("Missing fileName for seqName '%s' in %s table", bamSeqName, table);
+    else
     errAbort("Missing fileName in %s table", table);
+    }
+hFreeConn(&conn);
 return fileName;
 }
 
-void bamFetch(char *db, char *table, char *position, bam_fetch_f callbackFunc, void *callbackData)
-/* Open the .bam file given in db.table, fetch items in the seq:start-end position range,
+void bamFetch(char *bamFileName, char *position, bam_fetch_f callbackFunc, void *callbackData)
+/* Open the .bam file, fetch items in the seq:start-end position range,
  * and call callbackFunc on each bam item retrieved from the file plus callbackData. 
  * Note: if sequences in .bam file don't begin with "chr" but cart position does, pass in 
  * cart position + strlen("chr") to match the .bam file sequence names. */
 {
-// TODO: if bamFile is URL, convert URL to path a la UDC, but under udcFuse mountpoint.
-// new hg.conf setting for udcFuse mountpoint/support.
-struct sqlConnection *conn = hAllocConn(db);
-char *bamFileName = bbiNameFromTable(conn, table);
-hFreeConn(&conn);
-
 samfile_t *fh = samopen(bamFileName, "rb", NULL);
 if (fh == NULL)
     errAbort("samopen(%s, \"rb\") returned NULL", bamFileName);
 
@@ -51,8 +61,10 @@
     errAbort("bam_parse_region(%s) failed (%d)", position, ret);
 //?? Could this happen if there is no data on some _random?  can avoid with tdb chromosomes...
 
 bam_index_t *idx = bam_index_load(bamFileName);
+if (idx == NULL)
+    errAbort("bam_index_load(%s) failed.", bamFileName);
 ret = bam_fetch(fh->x.bam, idx, chromId, start, end, callbackData, callbackFunc);
 if (ret != 0)
     errAbort("bam_fetch(%s, %s (chromId=%d) failed (%d)", bamFileName, position, chromId, ret);
 samclose(fh);