2e75814ac8468bdfb14d01cabe7455b63154a64b
kent
  Thu Feb 10 16:59:09 2011 -0800
Adding example lines to BAM schema page in table browser.  Pushing forward on getting identifiers to work there, but still a ways to go.  (It no longer bombs at the paste identifiers page, but it does shortly thereafter.)
diff --git src/hg/lib/bamFile.c src/hg/lib/bamFile.c
index c45276b..a3aceff 100644
--- src/hg/lib/bamFile.c
+++ src/hg/lib/bamFile.c
@@ -184,88 +184,110 @@
 #ifndef KNETFILE_HOOKS
     setCurrentDir(runDir);
 #endif//ndef KNETFILE_HOOKS
     samclose(fh);
     if (idx == NULL)
 	{
 	warn("bamFileExists: failed to read index corresponding to %s", bamFileName);
 	return FALSE;
 	}
     free(idx); // Not freeMem, freez etc -- sam just uses malloc/calloc.
     return TRUE;
     }
 return FALSE;
 }
 
-void bamFetch(char *fileOrUrl, char *position, bam_fetch_f callbackFunc, void *callbackData,
-	samfile_t **pSamFile)
-/* Open the .bam file, fetch items in the seq:start-end position range,
- * and call callbackFunc on each bam item retrieved from the file plus callbackData.
- * This handles BAM files with "chr"-less sequence names, e.g. from Ensembl. 
- * The pSamFile parameter is optional.  If non-NULL it will be filled in, just for
- * the benefit of the callback function, with the open samFile.  */
+samfile_t *bamOpen(char *fileOrUrl, char **retBamFileName)
+/* Return an open bam file, dealing with FUSE caching if need be. 
+ * Return parameter if NON-null will return the file name after FUSing */
 {
 char *bamFileName = samtoolsFileName(fileOrUrl);
+if (retBamFileName != NULL)
+    *retBamFileName = bamFileName;
 samfile_t *fh = samopen(bamFileName, "rb", NULL);
 if (fh == NULL)
     {
     boolean usingUrl = (strstr(fileOrUrl, "tp://") || strstr(fileOrUrl, "https://"));
     struct dyString *urlWarning = dyStringNew(0);
     if (usingUrl)
 	{
 	char *udcFuseRoot = cfgOption("udcFuse.mountPoint");
 	boolean usingUdc = (udcFuseRoot != NULL && startsWith(udcFuseRoot, bamFileName));
 	if (usingUdc)
 	    dyStringAppend(urlWarning, " (using udcFuse)");
 	dyStringAppend(urlWarning,
 		       ". If you are able to access the URL with your web browser, "
 		       "please try reloading this page.");
 	}
     warn("failed to open %s%s", fileOrUrl, urlWarning->string);
-    return;
+    return NULL;
+    }
+return fh;
+}
+
+void bamClose(samfile_t **pSamFile)
+/* Close down a samefile_t */
+{
+if (pSamFile != NULL)
+    {
+    samclose(*pSamFile);
+    *pSamFile = NULL;
+    }
     }
+
+
+void bamFetch(char *fileOrUrl, char *position, bam_fetch_f callbackFunc, void *callbackData,
+	samfile_t **pSamFile)
+/* Open the .bam file, fetch items in the seq:start-end position range,
+ * and call callbackFunc on each bam item retrieved from the file plus callbackData.
+ * This handles BAM files with "chr"-less sequence names, e.g. from Ensembl. 
+ * The pSamFile parameter is optional.  If non-NULL it will be filled in, just for
+ * the benefit of the callback function, with the open samFile.  */
+{
+char *bamFileName = NULL;
+samfile_t *fh = bamOpen(fileOrUrl, &bamFileName);
 if (pSamFile != NULL)
     *pSamFile = fh;
 int chromId, start, end;
 int ret = bam_parse_region(fh->header, position, &chromId, &start, &end);
 if (ret != 0 && startsWith("chr", position))
     ret = bam_parse_region(fh->header, position+strlen("chr"), &chromId, &start, &end);
 if (ret != 0)
     // If the bam file does not cover the current chromosome, OK
     return;
 
 #ifndef KNETFILE_HOOKS
 // Since samtools' url-handling code saves the .bai file to the current directory,
 // chdir to a trash directory before calling bam_index_load, then chdir back.
 char *runDir = getCurrentDir();
 char *samDir = getSamDir();
 setCurrentDir(samDir);
 #endif//ndef KNETFILE_HOOKS
 bam_index_t *idx = bam_index_load(bamFileName);
 #ifndef KNETFILE_HOOKS
 setCurrentDir(runDir);
 #endif//ndef KNETFILE_HOOKS
 if (idx == NULL)
     warn("bam_index_load(%s) failed.", bamFileName);
 else
     {
     ret = bam_fetch(fh->x.bam, idx, chromId, start, end, callbackData, callbackFunc);
     if (ret != 0)
 	warn("bam_fetch(%s, %s (chromId=%d) failed (%d)", bamFileName, position, chromId, ret);
     free(idx); // Not freeMem, freez etc -- sam just uses malloc/calloc.
     }
-samclose(fh);
+bamClose(&fh);
 }
 
 boolean bamIsRc(const bam1_t *bam)
 /* Return TRUE if alignment is on - strand. */
 {
 const bam1_core_t *core = &bam->core;
 return (core->flag & BAM_FREVERSE);
 }
 
 void bamGetSoftClipping(const bam1_t *bam, int *retLow, int *retHigh, int *retClippedQLen)
 /* If retLow is non-NULL, set it to the number of "soft-clipped" (skipped) bases at
  * the beginning of the query sequence and quality; likewise for retHigh at end.
  * For convenience, retClippedQLen is the original query length minus soft clipping
  * (and the length of the query sequence that will be returned). */
 {
@@ -664,42 +686,44 @@
 }
 
 static boolean isAllSameChar(char *s, int size, char c)
 /* Return TRUE if first size chars of s are 0 */
 {
 int i;
 for (i=0; i<size; ++i)
     if (s[i] != c)
 	return FALSE;
 return TRUE;
 }
 
 
 int bamAddOneSamAlignment(const bam1_t *bam, void *data)
 /* bam_fetch() calls this on each bam alignment retrieved.  Translate each bam
- * into a linkedFeaturesSeries item, and either store it until we find its mate
- * or add it to tg->items. */
+ * into a samAlignment. */
 {
 struct bamToSamHelper *helper = (struct bamToSamHelper *)data;
 struct lm *lm = helper->lm;
 struct samAlignment *sam;
 lmAllocVar(lm, sam);
 const bam1_core_t *core = &bam->core;
 struct dyString *dy = helper->dy;
 sam->qName = lmCloneString(lm, bam1_qname(bam));
 sam->flag = core->flag;
+if (helper->chrom != NULL)
 sam->rName = helper->chrom;
+else
+    sam->rName = lmCloneString(lm, helper->samFile->header->target_name[core->tid]);
 sam->pos = core->pos + 1;
 sam->mapQ = core->qual;
 dyStringClear(dy);
 bamUnpackCigar(bam, dy);
 sam->cigar = lmCloneStringZ(lm, dy->string, dy->stringSize);
 if (core->mtid >= 0)
     {
     if (core->tid == core->mtid)
 	sam->rNext = "=";
     else
 	sam->rNext = lmCloneString(lm, helper->samFile->header->target_name[core->mtid]);
     }
 else
     sam->rNext = "*";
 sam->pNext = core->mpos + 1;
@@ -727,69 +751,119 @@
  * bam record.  Results is allocated out of lm, since it tends to be large... */
 {
 struct bamToSamHelper helper;
 helper.lm = lm;
 helper.chrom = chrom;
 helper.dy = dyStringNew(0);
 helper.samList = NULL;
 char posForBam[256];
 safef(posForBam, sizeof(posForBam), "%s:%d-%d", chrom, start, end);
 bamFetch(fileOrUrl, posForBam, bamAddOneSamAlignment, &helper, &helper.samFile);
 dyStringFree(&helper.dy);
 slReverse(&helper.samList);
 return helper.samList;
 }
 
+struct samAlignment *bamReadNextSamAlignments(samfile_t *fh, int count, struct lm *lm)
+/* Read next count alignments in SAM format, allocated in lm.  May return less than
+ * count at end of file. */
+{
+/* Set up helper. */
+struct bamToSamHelper helper;
+helper.lm = lm;
+helper.chrom = NULL;
+helper.dy = dyStringNew(0);
+helper.samFile = fh;
+helper.samList = NULL;
+
+/* Loop through calling our own fetch function */
+int i;
+bam1_t *b = bam_init1();
+for (i=0; i<count; ++i)
+    {
+    if (samread(fh, b) < 0)
+       break;
+    bamAddOneSamAlignment(b, &helper);
+    }
+bam_destroy1(b);
+
+/* Clean up and go home. */
+dyStringFree(&helper.dy);
+slReverse(&helper.samList);
+return helper.samList;
+}
+
 #else
 // If we're not compiling with samtools, make stub routines so compile won't fail:
 
 char *bamFileNameFromTable(struct sqlConnection *conn, char *table, char *bamSeqName)
 /* Return file name from table.  If table has a seqName column, then grab the
  * row associated with bamSeqName (which is not nec. in chromInfo, e.g.
  * bam file might have '1' not 'chr1'). */
 {
 errAbort(COMPILE_WITH_SAMTOOLS, "bamFileNameFromTable");
 return NULL;
 }
 
 boolean bamFileExists(char *bamFileName)
 /* Return TRUE if we can successfully open the bam file and its index file. */
 {
 warn(COMPILE_WITH_SAMTOOLS, "bamFileExists");
 return FALSE;
 }
 
+samfile_t *bamOpen(char *bamFileName)
+/* Return an open bam file, dealing with some FUSE caching if need be. */
+{
+errAbort(COMPILE_WITH_SAMTOOLS, "bamOpen");
+return FALSE;
+}
+
+void bamClose(samfile_t **pSamFile)
+/* Close down a samefile_t */
+{
+errAbort(COMPILE_WITH_SAMTOOLS, "bamClose");
+}
+
 void bamFetch(char *fileOrUrl, char *position, bam_fetch_f callbackFunc, void *callbackData,
 	samfile_t **pSamFile)
 /* Open the .bam file, fetch items in the seq:start-end position range,
  * and call callbackFunc on each bam item retrieved from the file plus callbackData.
  * This handles BAM files with "chr"-less sequence names, e.g. from Ensembl.
  * The pSamFile parameter is optional.  If non-NULL it will be filled in, just for
  * the benefit of the callback function, with the open samFile.  */
 {
 errAbort(COMPILE_WITH_SAMTOOLS, "bamFetch");
 }
 
 struct samAlignment *bamFetchSamAlignment(char *fileOrUrl, char *chrom, int start, int end,
 	struct lm *lm)
 /* Fetch region as a list of samAlignments - which is more or less an unpacked
  * bam record.  Results is allocated out of lm, since it tends to be large... */
 {
 errAbort(COMPILE_WITH_SAMTOOLS, "bamFetchSamAlignment");
 return NULL;
 }
 
+struct samAlignment *bamReadNextSamAlignments(struct samfile_t *fh, int count, struct lm *lm)
+/* Read next count alignments in SAM format, allocated in lm.  May return less than
+ * count at end of file. */
+{
+errAbort(COMPILE_WITH_SAMTOOLS, "bamReadNextSamAlignments");
+return NULL;
+}
+
 boolean bamIsRc(const bam1_t *bam)
 /* Return TRUE if alignment is on - strand. */
 {
 errAbort(COMPILE_WITH_SAMTOOLS, "bamIsRc");
 return FALSE;
 }
 
 void bamGetSoftClipping(const bam1_t *bam, int *retLow, int *retHigh, int *retClippedQLen)
 /* If retLow is non-NULL, set it to the number of "soft-clipped" (skipped) bases at
  * the beginning of the query sequence and quality; likewise for retHigh at end.
  * For convenience, retClippedQLen is the original query length minus soft clipping
  * (and the length of the query sequence that will be returned). */
 {
 errAbort(COMPILE_WITH_SAMTOOLS, "bamGetSoftClipping");
 }