d2c7e8be3ceae6027a5274faafdf6239d778cc63
aamp
Sat Jul 16 08:31:16 2011 -0700
Changes related to change source filenames around: bamUdc.ch -> bamFile.ch and bamFile.ch -> hgBam.ch
diff --git src/lib/bamFile.c src/lib/bamFile.c
new file mode 100644
index 0000000..4456991
--- /dev/null
+++ src/lib/bamFile.c
@@ -0,0 +1,693 @@
+/* bamFile -- interface to binary alignment format files using Heng Li's samtools lib. */
+
+#include "common.h"
+#include "portable.h"
+#include "bamFile.h"
+#ifdef USE_BAM
+#include "htmshell.h"
+#include "udc.h"
+
+static boolean isRegularFile(char *filename)
+/* File not only exists, but is also a file not a directory. */
+{
+struct stat mystat;
+if (stat(filename, &mystat) != 0)
+ return FALSE;
+return S_ISREG(mystat.st_mode);
+}
+
+static char *samtoolsFileNameUdcFuse(char *fileOrUrl, char *udcFuseRoot)
+/* If udcFuse is configured, and we have a URL, convert it into a filename in
+ * the udcFuse filesystem for use by samtools. Thus samtools will think it's
+ * working on a local file, but udcFuse will pass access requests to udc and
+ * we'll get the benefits of sparse-file local caching and https support.
+ * udcFuse needs us to open udc files before invoking udcFuse paths, so open
+ * both the .bam and .bai (index) URLs with udc here.
+ * If udcFuse is not configured, or fileOrUrl is not an URL, just pass through fileOrUrl. */
+{
+char *protocol = NULL, *afterProtocol = NULL, *colon = NULL, *auth = NULL;
+udcParseUrlFull(fileOrUrl, &protocol, &afterProtocol, &colon, &auth);
+if (udcFuseRoot != NULL && afterProtocol != NULL)
+ {
+ struct dyString *dy = dyStringNew(0);
+ if (auth == NULL)
+ auth = "";
+ dyStringPrintf(dy, "%s/%s/%s%s", udcFuseRoot, protocol, auth, afterProtocol);
+ char *bamFileName = dyStringCannibalize(&dy);
+ if (!isRegularFile(bamFileName))
+ {
+ verbose(2, "going to call udcFileMayOpen(%s).\n", fileOrUrl);
+ struct udcFile *udcf = udcFileMayOpen(fileOrUrl, NULL);
+ if (udcf != NULL)
+ {
+ udcFileClose(&udcf);
+ verbose(2, "closed udcf. testing existence of %s.\n", bamFileName);
+ if (!isRegularFile(bamFileName))
+ {
+ warn("Cannot find %s -- remount udcFuse?", bamFileName);
+ freeMem(bamFileName);
+ return cloneString(fileOrUrl);
+ }
+ }
+ else
+ {
+ warn("Failed to open BAM URL \"%s\" with udc", fileOrUrl);
+ freeMem(bamFileName);
+ return cloneString(fileOrUrl);
+ }
+ }
+ // Look for index file: xxx.bam.bai or xxx.bai. Look for both in udcFuse,
+ // and only open the URL with udc if neither udcFuse file exists.
+ int urlLen = strlen(fileOrUrl), fLen = strlen(bamFileName);
+ char *indexFileName = needMem(fLen+5);
+ safef(indexFileName, fLen+5, "%s.bai", bamFileName);
+ if (!isRegularFile(indexFileName))
+ {
+ verbose(2, "%s does not already exist\n", indexFileName);
+ char *altIndexFileName = NULL;
+ if (endsWith(fileOrUrl, ".bam"))
+ {
+ altIndexFileName = cloneString(indexFileName);
+ strcpy(altIndexFileName+fLen-1, "i");
+ }
+ if (!(altIndexFileName && isRegularFile(altIndexFileName)))
+ {
+ char *indexUrl = needMem(urlLen+5);
+ safef(indexUrl, urlLen+5, "%s.bai", fileOrUrl);
+ verbose(2, "going to call udcFileMayOpen(%s).\n", indexUrl);
+ struct udcFile *udcf = udcFileMayOpen(indexUrl, NULL);
+ if (udcf != NULL)
+ udcFileClose(&udcf);
+ else if (altIndexFileName != NULL)
+ {
+ char *altIndexUrl = cloneString(indexUrl);
+ strcpy(altIndexUrl+urlLen-1, "i");
+ verbose(2, "going to call udcFileMayOpen(%s).\n", altIndexUrl);
+ udcf = udcFileMayOpen(altIndexUrl, NULL);
+ if (udcf == NULL)
+ {
+ warn("Cannot find BAM index file (%s or %s)", indexUrl, altIndexUrl);
+ return cloneString(fileOrUrl);
+ }
+ udcFileClose(&udcf);
+ freeMem(altIndexUrl);
+ }
+ else
+ {
+ warn("Cannot find BAM index file for \"%s\"", fileOrUrl);
+ return cloneString(fileOrUrl);
+ }
+ freeMem(indexUrl);
+ }
+ freeMem(altIndexFileName);
+ }
+ freeMem(indexFileName);
+ return bamFileName;
+ }
+return cloneString(fileOrUrl);
+}
+
+#ifndef KNETFILE_HOOKS
+static char *getSamDir()
+/* Return the name of a trash dir for samtools to run in (it creates files in current dir)
+ * and make sure the directory exists. */
+{
+static char *samDir = NULL;
+char *dirName = "samtools";
+if (samDir == NULL)
+ {
+ mkdirTrashDirectory(dirName);
+ size_t len = strlen(trashDir()) + 1 + strlen(dirName) + 1;
+ samDir = needMem(len);
+ safef(samDir, len, "%s/%s", trashDir(), dirName);
+ }
+return samDir;
+}
+#endif//ndef KNETFILE_HOOKS
+
+boolean bamFileExistsUdc(char *fileOrUrl, char *udcFuseRoot)
+/* Return TRUE if we can successfully open the bam file and its index file. */
+{
+char *bamFileName = samtoolsFileNameUdcFuse(fileOrUrl, udcFuseRoot);
+samfile_t *fh = samopen(bamFileName, "rb", NULL);
+boolean usingUrl = TRUE;
+usingUrl = (strstr(fileOrUrl, "tp://") || strstr(fileOrUrl, "https://"));
+if (fh != NULL)
+ {
+#ifndef KNETFILE_HOOKS
+ // When file is an URL, this caches the index file in addition to validating:
+ // Since samtools's url-handling code saves the .bai file to the current directory,
+ // chdir to a trash directory before calling bam_index_load, then chdir back.
+ char *runDir = getCurrentDir();
+ char *samDir = getSamDir();
+ if (usingUrl)
+ setCurrentDir(samDir);
+#endif//ndef KNETFILE_HOOKS
+ bam_index_t *idx = bam_index_load(bamFileName);
+#ifndef KNETFILE_HOOKS
+ if (usingUrl)
+ setCurrentDir(runDir);
+#endif//ndef KNETFILE_HOOKS
+ samclose(fh);
+ if (idx == NULL)
+ {
+ warn("bamFileExists: failed to read index corresponding to %s", bamFileName);
+ return FALSE;
+ }
+ free(idx); // Not freeMem, freez etc -- sam just uses malloc/calloc.
+ return TRUE;
+ }
+return FALSE;
+}
+
+samfile_t *bamOpenUdc(char *fileOrUrl, char **retBamFileName, char *udcFuseRoot)
+/* Return an open bam file, dealing with FUSE caching if need be.
+ * Return parameter if NON-null will return the file name after FUSing */
+{
+char *bamFileName = samtoolsFileNameUdcFuse(fileOrUrl, udcFuseRoot);
+if (retBamFileName != NULL)
+ *retBamFileName = bamFileName;
+samfile_t *fh = samopen(bamFileName, "rb", NULL);
+if (fh == NULL)
+ {
+ boolean usingUrl = (strstr(fileOrUrl, "tp://") || strstr(fileOrUrl, "https://"));
+ struct dyString *urlWarning = dyStringNew(0);
+ if (usingUrl)
+ {
+ boolean usingUdc = (udcFuseRoot != NULL && startsWith(udcFuseRoot, bamFileName));
+ if (usingUdc)
+ dyStringAppend(urlWarning, " (using udcFuse)");
+ dyStringAppend(urlWarning,
+ ". If you are able to access the URL with your web browser, "
+ "please try reloading this page.");
+ }
+ errAbort("Failed to open %s%s", fileOrUrl, urlWarning->string);
+ }
+return fh;
+}
+
+void bamClose(samfile_t **pSamFile)
+/* Close down a samefile_t */
+{
+if (pSamFile != NULL)
+ {
+ samclose(*pSamFile);
+ *pSamFile = NULL;
+ }
+}
+
+void bamFetchUdc(char *fileOrUrl, char *position, bam_fetch_f callbackFunc, void *callbackData,
+ samfile_t **pSamFile, char *udcFuseRoot)
+/* Open the .bam file, fetch items in the seq:start-end position range,
+ * and call callbackFunc on each bam item retrieved from the file plus callbackData.
+ * This handles BAM files with "chr"-less sequence names, e.g. from Ensembl.
+ * The pSamFile parameter is optional. If non-NULL it will be filled in, just for
+ * the benefit of the callback function, with the open samFile. */
+{
+char *bamFileName = NULL;
+samfile_t *fh = bamOpenUdc(fileOrUrl, &bamFileName, udcFuseRoot);
+boolean usingUrl = TRUE;
+usingUrl = (strstr(fileOrUrl, "tp://") || strstr(fileOrUrl, "https://"));
+if (pSamFile != NULL)
+ *pSamFile = fh;
+int chromId, start, end;
+int ret = bam_parse_region(fh->header, position, &chromId, &start, &end);
+if (ret != 0 && startsWith("chr", position))
+ ret = bam_parse_region(fh->header, position+strlen("chr"), &chromId, &start, &end);
+if (ret != 0)
+ // If the bam file does not cover the current chromosome, OK
+ return;
+#ifndef KNETFILE_HOOKS
+// Since samtools' url-handling code saves the .bai file to the current directory,
+// chdir to a trash directory before calling bam_index_load, then chdir back.
+char *runDir = getCurrentDir();
+char *samDir = getSamDir();
+if (usingUrl)
+ setCurrentDir(samDir);
+#endif//ndef KNETFILE_HOOKS
+bam_index_t *idx = bam_index_load(bamFileName);
+#ifndef KNETFILE_HOOKS
+if (usingUrl)
+ setCurrentDir(runDir);
+#endif//ndef KNETFILE_HOOKS
+if (idx == NULL)
+ warn("bam_index_load(%s) failed.", bamFileName);
+else
+ {
+ ret = bam_fetch(fh->x.bam, idx, chromId, start, end, callbackData, callbackFunc);
+ if (ret != 0)
+ warn("bam_fetch(%s, %s (chromId=%d) failed (%d)", bamFileName, position, chromId, ret);
+ free(idx); // Not freeMem, freez etc -- sam just uses malloc/calloc.
+ }
+bamClose(&fh);
+}
+
+boolean bamIsRc(const bam1_t *bam)
+/* Return TRUE if alignment is on - strand. */
+{
+const bam1_core_t *core = &bam->core;
+return (core->flag & BAM_FREVERSE);
+}
+
+void bamGetSoftClipping(const bam1_t *bam, int *retLow, int *retHigh, int *retClippedQLen)
+/* If retLow is non-NULL, set it to the number of "soft-clipped" (skipped) bases at
+ * the beginning of the query sequence and quality; likewise for retHigh at end.
+ * For convenience, retClippedQLen is the original query length minus soft clipping
+ * (and the length of the query sequence that will be returned). */
+{
+unsigned int *cigarPacked = bam1_cigar(bam);
+const bam1_core_t *core = &bam->core;
+char op;
+int n = bamUnpackCigarElement(cigarPacked[0], &op);
+int low = (op == 'S') ? n : 0;
+n = bamUnpackCigarElement(cigarPacked[core->n_cigar-1], &op);
+int high = (op == 'S') ? n : 0;
+if (retLow != NULL)
+ *retLow = low;
+if (retHigh != NULL)
+ *retHigh = high;
+if (retClippedQLen != NULL)
+ *retClippedQLen = (core->l_qseq - low - high);
+}
+
+
+void bamUnpackQuerySequence(const bam1_t *bam, boolean useStrand, char *qSeq)
+/* Fill in qSeq with the nucleotide sequence encoded in bam. The BAM format
+ * reverse-complements query sequence when the alignment is on the - strand,
+ * so if useStrand is given we rev-comp it back to restore the original query
+ * sequence. */
+{
+const bam1_core_t *core = &bam->core;
+int qLen = core->l_qseq;
+uint8_t *packedQSeq = bam1_seq(bam);
+int i;
+for (i = 0; i < qLen; i++)
+ qSeq[i] = bam_nt16_rev_table[bam1_seqi(packedQSeq, i)];
+qSeq[i] = '\0';
+if (useStrand && bamIsRc(bam))
+ reverseComplement(qSeq, qLen);
+}
+
+char *bamGetQuerySequence(const bam1_t *bam, boolean useStrand)
+/* Allocate and return the nucleotide sequence encoded in bam. The BAM format
+ * reverse-complements query sequence when the alignment is on the - strand,
+ * so if useStrand is given we rev-comp it back to restore the original query
+ * sequence. */
+{
+const bam1_core_t *core = &bam->core;
+int qLen = core->l_qseq;
+char *qSeq = needMem(qLen+1);
+bamUnpackQuerySequence(bam, useStrand, qSeq);
+return qSeq;
+}
+
+UBYTE *bamGetQueryQuals(const bam1_t *bam, boolean useStrand)
+/* Return the base quality scores encoded in bam as an array of ubytes. */
+{
+const bam1_core_t *core = &bam->core;
+int qLen = core->l_qseq;
+UBYTE *arr = needMem(qLen);
+boolean isRc = useStrand && bamIsRc(bam);
+UBYTE *qualStr = bam1_qual(bam);
+int i;
+for (i = 0; i < core->l_qseq; i++)
+ {
+ int offset = isRc ? (qLen - 1 - i) : i;
+ arr[i] = (qualStr[0] == 255) ? 255 : qualStr[offset];
+ }
+return arr;
+}
+
+void bamUnpackCigar(const bam1_t *bam, struct dyString *dyCigar)
+/* Unpack CIGAR string into dynamic string */
+{
+unsigned int *cigarPacked = bam1_cigar(bam);
+const bam1_core_t *core = &bam->core;
+int i;
+for (i = 0; i < core->n_cigar; i++)
+ {
+ char op;
+ int n = bamUnpackCigarElement(cigarPacked[i], &op);
+ dyStringPrintf(dyCigar, "%d", n);
+ dyStringAppendC(dyCigar, op);
+ }
+}
+
+char *bamGetCigar(const bam1_t *bam)
+/* Return a BAM-enhanced CIGAR string, decoded from the packed encoding in bam. */
+{
+const bam1_core_t *core = &bam->core;
+struct dyString *dyCigar = dyStringNew(min(8, core->n_cigar*4));
+bamUnpackCigar(bam, dyCigar);
+return dyStringCannibalize(&dyCigar);
+}
+
+void bamShowCigarEnglish(const bam1_t *bam)
+/* Print out cigar in English e.g. "20 (mis)Match, 1 Deletion, 3 (mis)Match" */
+{
+unsigned int *cigarPacked = bam1_cigar(bam);
+const bam1_core_t *core = &bam->core;
+int i;
+for (i = 0; i < core->n_cigar; i++)
+ {
+ char op;
+ int n = bamUnpackCigarElement(cigarPacked[i], &op);
+ if (i > 0)
+ printf(", ");
+ switch (op)
+ {
+ case 'M': // match or mismatch (gapless aligned block)
+ printf("%d (mis)Match", n);
+ break;
+ case 'I': // inserted in query
+ printf("%d Insertion", n);
+ break;
+ case 'S': // skipped query bases at beginning or end ("soft clipping")
+ printf("%d Skipped", n);
+ break;
+ case 'D': // deleted from query
+ printf("%d Deletion", n);
+ break;
+ case 'N': // long deletion from query (intron as opposed to small del)
+ printf("%d deletioN", n);
+ break;
+ case 'H': // skipped query bases not stored in record's query sequence ("hard clipping")
+ printf("%d Hard clipped query", n);
+ break;
+ case 'P': // P="silent deletion from padded reference sequence"
+ printf("%d Padded / silent deletion", n);
+ break;
+ default:
+ errAbort("bamShowCigarEnglish: unrecognized CIGAR op %c -- update me", op);
+ }
+ }
+}
+
+static void descFlag(unsigned flag, unsigned bitMask, char *desc, boolean makeRed,
+ boolean *retFirst)
+/* Describe a flag bit (or multi-bit mask) if it is set in flag. */
+{
+if ((flag & bitMask) == bitMask) // *all* bits in bitMask are set in flag
+ {
+ if (!*retFirst)
+ printf(" | ");
+ printf("(0x%02x) %s",
+ (makeRed ? " style='color: red'" : ""), bitMask, desc);
+ *retFirst = FALSE;
+ }
+}
+
+void bamShowFlagsEnglish(const bam1_t *bam)
+/* Print out flags in English, e.g. "Mate is on '-' strand; Properly paired". */
+{
+const bam1_core_t *core = &bam->core;
+unsigned flag = core->flag;
+boolean first = TRUE;
+descFlag(flag, BAM_FDUP, "Optical or PCR duplicate", TRUE, &first);
+descFlag(flag, BAM_FQCFAIL, "QC failure", TRUE, &first);
+descFlag(flag, BAM_FSECONDARY, "Not primary alignment", TRUE, &first);
+descFlag(flag, BAM_FREAD2, "Read 2 of pair", FALSE, &first);
+descFlag(flag, BAM_FREAD1, "Read 1 of pair", FALSE, &first);
+descFlag(flag, BAM_FMREVERSE, "Mate is on '-' strand", FALSE, &first);
+descFlag(flag, BAM_FREVERSE, "Read is on '-' strand", FALSE, &first);
+descFlag(flag, BAM_FMUNMAP, "Mate is unmapped", TRUE, &first);
+if (flag & BAM_FUNMAP)
+ errAbort("Read is unmapped (what is it doing here?!?)");
+descFlag(flag, (BAM_FPROPER_PAIR | BAM_FPAIRED), "Properly paired", FALSE, &first);
+if ((flag & BAM_FPAIRED) && !(flag & BAM_FPROPER_PAIR))
+ descFlag(flag, BAM_FPAIRED, "Not properly paired", TRUE, &first);
+}
+
+int bamGetTargetLength(const bam1_t *bam)
+/* Tally up the alignment's length on the reference sequence from
+ * bam's packed-int CIGAR representation. */
+{
+unsigned int *cigarPacked = bam1_cigar(bam);
+const bam1_core_t *core = &bam->core;
+int tLength=0;
+int i;
+for (i = 0; i < core->n_cigar; i++)
+ {
+ char op;
+ int n = bamUnpackCigarElement(cigarPacked[i], &op);
+ switch (op)
+ {
+ case 'M': // match or mismatch (gapless aligned block)
+ tLength += n;
+ break;
+ case 'I': // inserted in query
+ break;
+ case 'D': // deleted from query
+ case 'N': // long deletion from query (intron as opposed to small del)
+ tLength += n;
+ break;
+ case 'S': // skipped query bases at beginning or end ("soft clipping")
+ case 'H': // skipped query bases not stored in record's query sequence ("hard clipping")
+ case 'P': // P="silent deletion from padded reference sequence" -- ignore these.
+ break;
+ default:
+ errAbort("bamGetTargetLength: unrecognized CIGAR op %c -- update me", op);
+ }
+ }
+return tLength;
+}
+
+bam1_t *bamClone(const bam1_t *bam)
+/* Return a newly allocated copy of bam. */
+{
+// Using typecasts to get around compiler complaints about bam being const:
+bam1_t *newBam = cloneMem((void *)bam, sizeof(*bam));
+newBam->data = cloneMem((void *)bam->data, bam->data_len*sizeof(bam->data[0]));
+return newBam;
+}
+
+void bamShowTags(const bam1_t *bam)
+/* Print out tags in HTML: bold key, no type indicator for brevity. */
+{
+// adapted from part of bam.c bam_format1:
+uint8_t *s = bam1_aux(bam);
+while (s < bam->data + bam->data_len)
+ {
+ uint8_t type, key[2];
+ key[0] = s[0]; key[1] = s[1];
+ s += 2; type = *s; ++s;
+ printf(" %c%c:", key[0], key[1]);
+ if (type == 'A') { printf("%c", *s); ++s; }
+ else if (type == 'C') { printf("%u", *s); ++s; }
+ else if (type == 'c') { printf("%d", *s); ++s; }
+ else if (type == 'S') { printf("%u", *(uint16_t*)s); s += 2; }
+ else if (type == 's') { printf("%d", *(int16_t*)s); s += 2; }
+ else if (type == 'I') { printf("%u", *(uint32_t*)s); s += 4; }
+ else if (type == 'i') { printf("%d", *(int32_t*)s); s += 4; }
+ else if (type == 'f') { printf("%g", *(float*)s); s += 4; }
+ else if (type == 'd') { printf("%lg", *(double*)s); s += 8; }
+ else if (type == 'Z' || type == 'H')
+ {
+ htmTextOut(stdout, (char *)s);
+ s += strlen((char *)s) + 1;
+ }
+ }
+putc('\n', stdout);
+}
+
+char *bamGetTagString(const bam1_t *bam, char *tag, char *buf, size_t bufSize)
+/* If bam's tags include the given 2-character tag, place the value into
+ * buf (zero-terminated, trunc'd if nec) and return a pointer to buf,
+ * or NULL if tag is not present. */
+{
+if (tag == NULL)
+ errAbort("NULL tag passed to bamGetTagString");
+if (! (isalpha(tag[0]) && isalnum(tag[1]) && tag[2] == '\0'))
+ errAbort("bamGetTagString: invalid tag '%s'", htmlEncode(tag));
+char *val = NULL;
+// adapted from part of bam.c bam_format1:
+uint8_t *s = bam1_aux(bam);
+while (s < bam->data + bam->data_len)
+ {
+ uint8_t type, key[2];
+ key[0] = s[0]; key[1] = s[1];
+ s += 2; type = *s; ++s;
+ if (key[0] == tag[0] && key[1] == tag[1])
+ {
+ if (type == 'A') { snprintf(buf, bufSize, "%c", *s);}
+ else if (type == 'C') { snprintf(buf, bufSize, "%u", *s); }
+ else if (type == 'c') { snprintf(buf, bufSize, "%d", *s); }
+ else if (type == 'S') { snprintf(buf, bufSize, "%u", *(uint16_t*)s); }
+ else if (type == 's') { snprintf(buf, bufSize, "%d", *(int16_t*)s); }
+ else if (type == 'I') { snprintf(buf, bufSize, "%u", *(uint32_t*)s); }
+ else if (type == 'i') { snprintf(buf, bufSize, "%d", *(int32_t*)s); }
+ else if (type == 'f') { snprintf(buf, bufSize, "%g", *(float*)s); }
+ else if (type == 'd') { snprintf(buf, bufSize, "%lg", *(double*)s); }
+ else if (type == 'Z' || type == 'H') strncpy(buf, (char *)s, bufSize);
+ else buf[0] = '\0';
+ buf[bufSize-1] = '\0'; // TODO: is this nec?? see man pages
+ val = buf;
+ break;
+ }
+ else
+ {
+ if (type == 'A' || type == 'C' || type == 'c') { ++s; }
+ else if (type == 'S' || type == 's') { s += 2; }
+ else if (type == 'I' || type == 'i' || type == 'f') { s += 4; }
+ else if (type == 'd') { s += 8; }
+ else if (type == 'Z' || type == 'H')
+ {
+ while (*s++);
+ }
+ }
+ }
+return val;
+}
+
+void bamUnpackAux(const bam1_t *bam, struct dyString *dy)
+/* Unpack the tag:type:val part of bam into dy */
+{
+// adapted from part of bam.c bam_format1:
+uint8_t *s = bam1_aux(bam);
+boolean firstTime = TRUE;
+while (s < bam->data + bam->data_len)
+ {
+ if (firstTime)
+ firstTime = FALSE;
+ else
+ dyStringAppendC(dy, '\t');
+ dyStringAppendC(dy, *s++);
+ dyStringAppendC(dy, *s++);
+ dyStringAppendC(dy, ':');
+ dyStringAppendC(dy, s[0]);
+ dyStringAppendC(dy, ':');
+ uint8_t type = *s++;
+ if (type == 'A') { dyStringPrintf(dy, "%c", *s); ++s; }
+ else if (type == 'C') { dyStringPrintf(dy, "%u", *s); ++s; }
+ else if (type == 'c') { dyStringPrintf(dy, "%d", *s); ++s; }
+ else if (type == 'S') { dyStringPrintf(dy, "%u", *(uint16_t*)s); s += 2; }
+ else if (type == 's') { dyStringPrintf(dy, "%d", *(int16_t*)s); s += 2; }
+ else if (type == 'I') { dyStringPrintf(dy, "%u", *(uint32_t*)s); s += 4; }
+ else if (type == 'i') { dyStringPrintf(dy, "%d", *(int32_t*)s); s += 4; }
+ else if (type == 'f') { dyStringPrintf(dy, "%g", *(float*)s); s += 4; }
+ else if (type == 'd') { dyStringPrintf(dy, "%lg", *(double*)s); s += 8; }
+ else if (type == 'Z' || type == 'H')
+ {
+ dyStringAppend(dy, (char *)s);
+ s += strlen((char *)s) + 1;
+ }
+ }
+}
+
+#else
+// If we're not compiling with samtools, make stub routines so compile won't fail:
+
+boolean bamFileExistsUdcFuse(char *bamFileName, char *udcFuseRoot)
+/* Return TRUE if we can successfully open the bam file and its index file. */
+{
+warn(COMPILE_WITH_SAMTOOLS, "bamFileExistsUdcFuse");
+return FALSE;
+}
+
+samfile_t *bamOpenUdcFuse(char *fileOrUrl, char **retBamFileName)
+/* Return an open bam file, dealing with some FUSE caching if need be. */
+{
+warn(COMPILE_WITH_SAMTOOLS, "bamOpenUdc");
+return FALSE;
+}
+
+void bamClose(samfile_t **pSamFile)
+/* Close down a samefile_t */
+{
+errAbort(COMPILE_WITH_SAMTOOLS, "bamClose");
+}
+
+void bamFetchUdcFuse(char *fileOrUrl, char *position, bam_fetch_f callbackFunc, void *callbackData,
+ samfile_t **pSamFile, char *udcFuseRoot)
+/* Open the .bam file, fetch items in the seq:start-end position range,
+ * and call callbackFunc on each bam item retrieved from the file plus callbackData.
+ * This handles BAM files with "chr"-less sequence names, e.g. from Ensembl.
+ * The pSamFile parameter is optional. If non-NULL it will be filled in, just for
+ * the benefit of the callback function, with the open samFile. */
+{
+errAbort(COMPILE_WITH_SAMTOOLS, "bamFetch");
+}
+
+boolean bamIsRc(const bam1_t *bam)
+/* Return TRUE if alignment is on - strand. */
+{
+errAbort(COMPILE_WITH_SAMTOOLS, "bamIsRc");
+return FALSE;
+}
+
+void bamGetSoftClipping(const bam1_t *bam, int *retLow, int *retHigh, int *retClippedQLen)
+/* If retLow is non-NULL, set it to the number of "soft-clipped" (skipped) bases at
+ * the beginning of the query sequence and quality; likewise for retHigh at end.
+ * For convenience, retClippedQLen is the original query length minus soft clipping
+ * (and the length of the query sequence that will be returned). */
+{
+errAbort(COMPILE_WITH_SAMTOOLS, "bamGetSoftClipping");
+}
+
+char *bamGetQuerySequence(const bam1_t *bam, boolean useStrand)
+/* Return the nucleotide sequence encoded in bam. The BAM format
+ * reverse-complements query sequence when the alignment is on the - strand,
+ * so if useStrand is given we rev-comp it back to restore the original query
+ * sequence. */
+{
+errAbort(COMPILE_WITH_SAMTOOLS, "bamGetQuerySequence");
+return NULL;
+}
+
+UBYTE *bamGetQueryQuals(const bam1_t *bam, boolean useStrand)
+/* Return the base quality scores encoded in bam as an array of ubytes. */
+{
+errAbort(COMPILE_WITH_SAMTOOLS, "bamGetQueryQuals");
+return NULL;
+}
+
+char *bamGetCigar(const bam1_t *bam)
+/* Return a BAM-enhanced CIGAR string, decoded from the packed encoding in bam. */
+{
+errAbort(COMPILE_WITH_SAMTOOLS, "bamGetCigar");
+return NULL;
+}
+
+void bamShowCigarEnglish(const bam1_t *bam)
+/* Print out cigar in English e.g. "20 (mis)Match, 1 Deletion, 3 (mis)Match" */
+{
+errAbort(COMPILE_WITH_SAMTOOLS, "bamShowCigarEnglish");
+}
+
+void bamShowFlagsEnglish(const bam1_t *bam)
+/* Print out flags in English, e.g. "Mate is on '-' strand; Properly paired". */
+{
+errAbort(COMPILE_WITH_SAMTOOLS, "bamShowFlagsEnglish");
+}
+
+int bamGetTargetLength(const bam1_t *bam)
+/* Tally up the alignment's length on the reference sequence from
+ * bam's packed-int CIGAR representation. */
+{
+errAbort(COMPILE_WITH_SAMTOOLS, "bamGetTargetLength");
+return 0;
+}
+
+bam1_t *bamClone(const bam1_t *bam)
+/* Return a newly allocated copy of bam. */
+{
+errAbort(COMPILE_WITH_SAMTOOLS, "bamClone");
+return NULL;
+}
+
+void bamShowTags(const bam1_t *bam)
+/* Print out tags in HTML: bold key, no type indicator for brevity. */
+{
+errAbort(COMPILE_WITH_SAMTOOLS, "bamShowTags");
+}
+
+char *bamGetTagString(const bam1_t *bam, char *tag, char *buf, size_t bufSize)
+/* If bam's tags include the given 2-character tag, place the value into
+ * buf (zero-terminated, trunc'd if nec) and return a pointer to buf,
+ * or NULL if tag is not present. */
+{
+errAbort(COMPILE_WITH_SAMTOOLS, "bamGetTagString");
+return NULL;
+}
+
+#endif//ndef USE_BAM