d2c7e8be3ceae6027a5274faafdf6239d778cc63 aamp Sat Jul 16 08:31:16 2011 -0700 Changes related to change source filenames around: bamUdc.ch -> bamFile.ch and bamFile.ch -> hgBam.ch diff --git src/lib/bamFile.c src/lib/bamFile.c new file mode 100644 index 0000000..4456991 --- /dev/null +++ src/lib/bamFile.c @@ -0,0 +1,693 @@ +/* bamFile -- interface to binary alignment format files using Heng Li's samtools lib. */ + +#include "common.h" +#include "portable.h" +#include "bamFile.h" +#ifdef USE_BAM +#include "htmshell.h" +#include "udc.h" + +static boolean isRegularFile(char *filename) +/* File not only exists, but is also a file not a directory. */ +{ +struct stat mystat; +if (stat(filename, &mystat) != 0) + return FALSE; +return S_ISREG(mystat.st_mode); +} + +static char *samtoolsFileNameUdcFuse(char *fileOrUrl, char *udcFuseRoot) +/* If udcFuse is configured, and we have a URL, convert it into a filename in + * the udcFuse filesystem for use by samtools. Thus samtools will think it's + * working on a local file, but udcFuse will pass access requests to udc and + * we'll get the benefits of sparse-file local caching and https support. + * udcFuse needs us to open udc files before invoking udcFuse paths, so open + * both the .bam and .bai (index) URLs with udc here. + * If udcFuse is not configured, or fileOrUrl is not an URL, just pass through fileOrUrl. */ +{ +char *protocol = NULL, *afterProtocol = NULL, *colon = NULL, *auth = NULL; +udcParseUrlFull(fileOrUrl, &protocol, &afterProtocol, &colon, &auth); +if (udcFuseRoot != NULL && afterProtocol != NULL) + { + struct dyString *dy = dyStringNew(0); + if (auth == NULL) + auth = ""; + dyStringPrintf(dy, "%s/%s/%s%s", udcFuseRoot, protocol, auth, afterProtocol); + char *bamFileName = dyStringCannibalize(&dy); + if (!isRegularFile(bamFileName)) + { + verbose(2, "going to call udcFileMayOpen(%s).\n", fileOrUrl); + struct udcFile *udcf = udcFileMayOpen(fileOrUrl, NULL); + if (udcf != NULL) + { + udcFileClose(&udcf); + verbose(2, "closed udcf. testing existence of %s.\n", bamFileName); + if (!isRegularFile(bamFileName)) + { + warn("Cannot find %s -- remount udcFuse?", bamFileName); + freeMem(bamFileName); + return cloneString(fileOrUrl); + } + } + else + { + warn("Failed to open BAM URL \"%s\" with udc", fileOrUrl); + freeMem(bamFileName); + return cloneString(fileOrUrl); + } + } + // Look for index file: xxx.bam.bai or xxx.bai. Look for both in udcFuse, + // and only open the URL with udc if neither udcFuse file exists. + int urlLen = strlen(fileOrUrl), fLen = strlen(bamFileName); + char *indexFileName = needMem(fLen+5); + safef(indexFileName, fLen+5, "%s.bai", bamFileName); + if (!isRegularFile(indexFileName)) + { + verbose(2, "%s does not already exist\n", indexFileName); + char *altIndexFileName = NULL; + if (endsWith(fileOrUrl, ".bam")) + { + altIndexFileName = cloneString(indexFileName); + strcpy(altIndexFileName+fLen-1, "i"); + } + if (!(altIndexFileName && isRegularFile(altIndexFileName))) + { + char *indexUrl = needMem(urlLen+5); + safef(indexUrl, urlLen+5, "%s.bai", fileOrUrl); + verbose(2, "going to call udcFileMayOpen(%s).\n", indexUrl); + struct udcFile *udcf = udcFileMayOpen(indexUrl, NULL); + if (udcf != NULL) + udcFileClose(&udcf); + else if (altIndexFileName != NULL) + { + char *altIndexUrl = cloneString(indexUrl); + strcpy(altIndexUrl+urlLen-1, "i"); + verbose(2, "going to call udcFileMayOpen(%s).\n", altIndexUrl); + udcf = udcFileMayOpen(altIndexUrl, NULL); + if (udcf == NULL) + { + warn("Cannot find BAM index file (%s or %s)", indexUrl, altIndexUrl); + return cloneString(fileOrUrl); + } + udcFileClose(&udcf); + freeMem(altIndexUrl); + } + else + { + warn("Cannot find BAM index file for \"%s\"", fileOrUrl); + return cloneString(fileOrUrl); + } + freeMem(indexUrl); + } + freeMem(altIndexFileName); + } + freeMem(indexFileName); + return bamFileName; + } +return cloneString(fileOrUrl); +} + +#ifndef KNETFILE_HOOKS +static char *getSamDir() +/* Return the name of a trash dir for samtools to run in (it creates files in current dir) + * and make sure the directory exists. */ +{ +static char *samDir = NULL; +char *dirName = "samtools"; +if (samDir == NULL) + { + mkdirTrashDirectory(dirName); + size_t len = strlen(trashDir()) + 1 + strlen(dirName) + 1; + samDir = needMem(len); + safef(samDir, len, "%s/%s", trashDir(), dirName); + } +return samDir; +} +#endif//ndef KNETFILE_HOOKS + +boolean bamFileExistsUdc(char *fileOrUrl, char *udcFuseRoot) +/* Return TRUE if we can successfully open the bam file and its index file. */ +{ +char *bamFileName = samtoolsFileNameUdcFuse(fileOrUrl, udcFuseRoot); +samfile_t *fh = samopen(bamFileName, "rb", NULL); +boolean usingUrl = TRUE; +usingUrl = (strstr(fileOrUrl, "tp://") || strstr(fileOrUrl, "https://")); +if (fh != NULL) + { +#ifndef KNETFILE_HOOKS + // When file is an URL, this caches the index file in addition to validating: + // Since samtools's url-handling code saves the .bai file to the current directory, + // chdir to a trash directory before calling bam_index_load, then chdir back. + char *runDir = getCurrentDir(); + char *samDir = getSamDir(); + if (usingUrl) + setCurrentDir(samDir); +#endif//ndef KNETFILE_HOOKS + bam_index_t *idx = bam_index_load(bamFileName); +#ifndef KNETFILE_HOOKS + if (usingUrl) + setCurrentDir(runDir); +#endif//ndef KNETFILE_HOOKS + samclose(fh); + if (idx == NULL) + { + warn("bamFileExists: failed to read index corresponding to %s", bamFileName); + return FALSE; + } + free(idx); // Not freeMem, freez etc -- sam just uses malloc/calloc. + return TRUE; + } +return FALSE; +} + +samfile_t *bamOpenUdc(char *fileOrUrl, char **retBamFileName, char *udcFuseRoot) +/* Return an open bam file, dealing with FUSE caching if need be. + * Return parameter if NON-null will return the file name after FUSing */ +{ +char *bamFileName = samtoolsFileNameUdcFuse(fileOrUrl, udcFuseRoot); +if (retBamFileName != NULL) + *retBamFileName = bamFileName; +samfile_t *fh = samopen(bamFileName, "rb", NULL); +if (fh == NULL) + { + boolean usingUrl = (strstr(fileOrUrl, "tp://") || strstr(fileOrUrl, "https://")); + struct dyString *urlWarning = dyStringNew(0); + if (usingUrl) + { + boolean usingUdc = (udcFuseRoot != NULL && startsWith(udcFuseRoot, bamFileName)); + if (usingUdc) + dyStringAppend(urlWarning, " (using udcFuse)"); + dyStringAppend(urlWarning, + ". If you are able to access the URL with your web browser, " + "please try reloading this page."); + } + errAbort("Failed to open %s%s", fileOrUrl, urlWarning->string); + } +return fh; +} + +void bamClose(samfile_t **pSamFile) +/* Close down a samefile_t */ +{ +if (pSamFile != NULL) + { + samclose(*pSamFile); + *pSamFile = NULL; + } +} + +void bamFetchUdc(char *fileOrUrl, char *position, bam_fetch_f callbackFunc, void *callbackData, + samfile_t **pSamFile, char *udcFuseRoot) +/* Open the .bam file, fetch items in the seq:start-end position range, + * and call callbackFunc on each bam item retrieved from the file plus callbackData. + * This handles BAM files with "chr"-less sequence names, e.g. from Ensembl. + * The pSamFile parameter is optional. If non-NULL it will be filled in, just for + * the benefit of the callback function, with the open samFile. */ +{ +char *bamFileName = NULL; +samfile_t *fh = bamOpenUdc(fileOrUrl, &bamFileName, udcFuseRoot); +boolean usingUrl = TRUE; +usingUrl = (strstr(fileOrUrl, "tp://") || strstr(fileOrUrl, "https://")); +if (pSamFile != NULL) + *pSamFile = fh; +int chromId, start, end; +int ret = bam_parse_region(fh->header, position, &chromId, &start, &end); +if (ret != 0 && startsWith("chr", position)) + ret = bam_parse_region(fh->header, position+strlen("chr"), &chromId, &start, &end); +if (ret != 0) + // If the bam file does not cover the current chromosome, OK + return; +#ifndef KNETFILE_HOOKS +// Since samtools' url-handling code saves the .bai file to the current directory, +// chdir to a trash directory before calling bam_index_load, then chdir back. +char *runDir = getCurrentDir(); +char *samDir = getSamDir(); +if (usingUrl) + setCurrentDir(samDir); +#endif//ndef KNETFILE_HOOKS +bam_index_t *idx = bam_index_load(bamFileName); +#ifndef KNETFILE_HOOKS +if (usingUrl) + setCurrentDir(runDir); +#endif//ndef KNETFILE_HOOKS +if (idx == NULL) + warn("bam_index_load(%s) failed.", bamFileName); +else + { + ret = bam_fetch(fh->x.bam, idx, chromId, start, end, callbackData, callbackFunc); + if (ret != 0) + warn("bam_fetch(%s, %s (chromId=%d) failed (%d)", bamFileName, position, chromId, ret); + free(idx); // Not freeMem, freez etc -- sam just uses malloc/calloc. + } +bamClose(&fh); +} + +boolean bamIsRc(const bam1_t *bam) +/* Return TRUE if alignment is on - strand. */ +{ +const bam1_core_t *core = &bam->core; +return (core->flag & BAM_FREVERSE); +} + +void bamGetSoftClipping(const bam1_t *bam, int *retLow, int *retHigh, int *retClippedQLen) +/* If retLow is non-NULL, set it to the number of "soft-clipped" (skipped) bases at + * the beginning of the query sequence and quality; likewise for retHigh at end. + * For convenience, retClippedQLen is the original query length minus soft clipping + * (and the length of the query sequence that will be returned). */ +{ +unsigned int *cigarPacked = bam1_cigar(bam); +const bam1_core_t *core = &bam->core; +char op; +int n = bamUnpackCigarElement(cigarPacked[0], &op); +int low = (op == 'S') ? n : 0; +n = bamUnpackCigarElement(cigarPacked[core->n_cigar-1], &op); +int high = (op == 'S') ? n : 0; +if (retLow != NULL) + *retLow = low; +if (retHigh != NULL) + *retHigh = high; +if (retClippedQLen != NULL) + *retClippedQLen = (core->l_qseq - low - high); +} + + +void bamUnpackQuerySequence(const bam1_t *bam, boolean useStrand, char *qSeq) +/* Fill in qSeq with the nucleotide sequence encoded in bam. The BAM format + * reverse-complements query sequence when the alignment is on the - strand, + * so if useStrand is given we rev-comp it back to restore the original query + * sequence. */ +{ +const bam1_core_t *core = &bam->core; +int qLen = core->l_qseq; +uint8_t *packedQSeq = bam1_seq(bam); +int i; +for (i = 0; i < qLen; i++) + qSeq[i] = bam_nt16_rev_table[bam1_seqi(packedQSeq, i)]; +qSeq[i] = '\0'; +if (useStrand && bamIsRc(bam)) + reverseComplement(qSeq, qLen); +} + +char *bamGetQuerySequence(const bam1_t *bam, boolean useStrand) +/* Allocate and return the nucleotide sequence encoded in bam. The BAM format + * reverse-complements query sequence when the alignment is on the - strand, + * so if useStrand is given we rev-comp it back to restore the original query + * sequence. */ +{ +const bam1_core_t *core = &bam->core; +int qLen = core->l_qseq; +char *qSeq = needMem(qLen+1); +bamUnpackQuerySequence(bam, useStrand, qSeq); +return qSeq; +} + +UBYTE *bamGetQueryQuals(const bam1_t *bam, boolean useStrand) +/* Return the base quality scores encoded in bam as an array of ubytes. */ +{ +const bam1_core_t *core = &bam->core; +int qLen = core->l_qseq; +UBYTE *arr = needMem(qLen); +boolean isRc = useStrand && bamIsRc(bam); +UBYTE *qualStr = bam1_qual(bam); +int i; +for (i = 0; i < core->l_qseq; i++) + { + int offset = isRc ? (qLen - 1 - i) : i; + arr[i] = (qualStr[0] == 255) ? 255 : qualStr[offset]; + } +return arr; +} + +void bamUnpackCigar(const bam1_t *bam, struct dyString *dyCigar) +/* Unpack CIGAR string into dynamic string */ +{ +unsigned int *cigarPacked = bam1_cigar(bam); +const bam1_core_t *core = &bam->core; +int i; +for (i = 0; i < core->n_cigar; i++) + { + char op; + int n = bamUnpackCigarElement(cigarPacked[i], &op); + dyStringPrintf(dyCigar, "%d", n); + dyStringAppendC(dyCigar, op); + } +} + +char *bamGetCigar(const bam1_t *bam) +/* Return a BAM-enhanced CIGAR string, decoded from the packed encoding in bam. */ +{ +const bam1_core_t *core = &bam->core; +struct dyString *dyCigar = dyStringNew(min(8, core->n_cigar*4)); +bamUnpackCigar(bam, dyCigar); +return dyStringCannibalize(&dyCigar); +} + +void bamShowCigarEnglish(const bam1_t *bam) +/* Print out cigar in English e.g. "20 (mis)Match, 1 Deletion, 3 (mis)Match" */ +{ +unsigned int *cigarPacked = bam1_cigar(bam); +const bam1_core_t *core = &bam->core; +int i; +for (i = 0; i < core->n_cigar; i++) + { + char op; + int n = bamUnpackCigarElement(cigarPacked[i], &op); + if (i > 0) + printf(", "); + switch (op) + { + case 'M': // match or mismatch (gapless aligned block) + printf("%d (mis)Match", n); + break; + case 'I': // inserted in query + printf("%d Insertion", n); + break; + case 'S': // skipped query bases at beginning or end ("soft clipping") + printf("%d Skipped", n); + break; + case 'D': // deleted from query + printf("%d Deletion", n); + break; + case 'N': // long deletion from query (intron as opposed to small del) + printf("%d deletioN", n); + break; + case 'H': // skipped query bases not stored in record's query sequence ("hard clipping") + printf("%d Hard clipped query", n); + break; + case 'P': // P="silent deletion from padded reference sequence" + printf("%d Padded / silent deletion", n); + break; + default: + errAbort("bamShowCigarEnglish: unrecognized CIGAR op %c -- update me", op); + } + } +} + +static void descFlag(unsigned flag, unsigned bitMask, char *desc, boolean makeRed, + boolean *retFirst) +/* Describe a flag bit (or multi-bit mask) if it is set in flag. */ +{ +if ((flag & bitMask) == bitMask) // *all* bits in bitMask are set in flag + { + if (!*retFirst) + printf(" | "); + printf("(0x%02x) %s", + (makeRed ? " style='color: red'" : ""), bitMask, desc); + *retFirst = FALSE; + } +} + +void bamShowFlagsEnglish(const bam1_t *bam) +/* Print out flags in English, e.g. "Mate is on '-' strand; Properly paired". */ +{ +const bam1_core_t *core = &bam->core; +unsigned flag = core->flag; +boolean first = TRUE; +descFlag(flag, BAM_FDUP, "Optical or PCR duplicate", TRUE, &first); +descFlag(flag, BAM_FQCFAIL, "QC failure", TRUE, &first); +descFlag(flag, BAM_FSECONDARY, "Not primary alignment", TRUE, &first); +descFlag(flag, BAM_FREAD2, "Read 2 of pair", FALSE, &first); +descFlag(flag, BAM_FREAD1, "Read 1 of pair", FALSE, &first); +descFlag(flag, BAM_FMREVERSE, "Mate is on '-' strand", FALSE, &first); +descFlag(flag, BAM_FREVERSE, "Read is on '-' strand", FALSE, &first); +descFlag(flag, BAM_FMUNMAP, "Mate is unmapped", TRUE, &first); +if (flag & BAM_FUNMAP) + errAbort("Read is unmapped (what is it doing here?!?)"); +descFlag(flag, (BAM_FPROPER_PAIR | BAM_FPAIRED), "Properly paired", FALSE, &first); +if ((flag & BAM_FPAIRED) && !(flag & BAM_FPROPER_PAIR)) + descFlag(flag, BAM_FPAIRED, "Not properly paired", TRUE, &first); +} + +int bamGetTargetLength(const bam1_t *bam) +/* Tally up the alignment's length on the reference sequence from + * bam's packed-int CIGAR representation. */ +{ +unsigned int *cigarPacked = bam1_cigar(bam); +const bam1_core_t *core = &bam->core; +int tLength=0; +int i; +for (i = 0; i < core->n_cigar; i++) + { + char op; + int n = bamUnpackCigarElement(cigarPacked[i], &op); + switch (op) + { + case 'M': // match or mismatch (gapless aligned block) + tLength += n; + break; + case 'I': // inserted in query + break; + case 'D': // deleted from query + case 'N': // long deletion from query (intron as opposed to small del) + tLength += n; + break; + case 'S': // skipped query bases at beginning or end ("soft clipping") + case 'H': // skipped query bases not stored in record's query sequence ("hard clipping") + case 'P': // P="silent deletion from padded reference sequence" -- ignore these. + break; + default: + errAbort("bamGetTargetLength: unrecognized CIGAR op %c -- update me", op); + } + } +return tLength; +} + +bam1_t *bamClone(const bam1_t *bam) +/* Return a newly allocated copy of bam. */ +{ +// Using typecasts to get around compiler complaints about bam being const: +bam1_t *newBam = cloneMem((void *)bam, sizeof(*bam)); +newBam->data = cloneMem((void *)bam->data, bam->data_len*sizeof(bam->data[0])); +return newBam; +} + +void bamShowTags(const bam1_t *bam) +/* Print out tags in HTML: bold key, no type indicator for brevity. */ +{ +// adapted from part of bam.c bam_format1: +uint8_t *s = bam1_aux(bam); +while (s < bam->data + bam->data_len) + { + uint8_t type, key[2]; + key[0] = s[0]; key[1] = s[1]; + s += 2; type = *s; ++s; + printf(" %c%c:", key[0], key[1]); + if (type == 'A') { printf("%c", *s); ++s; } + else if (type == 'C') { printf("%u", *s); ++s; } + else if (type == 'c') { printf("%d", *s); ++s; } + else if (type == 'S') { printf("%u", *(uint16_t*)s); s += 2; } + else if (type == 's') { printf("%d", *(int16_t*)s); s += 2; } + else if (type == 'I') { printf("%u", *(uint32_t*)s); s += 4; } + else if (type == 'i') { printf("%d", *(int32_t*)s); s += 4; } + else if (type == 'f') { printf("%g", *(float*)s); s += 4; } + else if (type == 'd') { printf("%lg", *(double*)s); s += 8; } + else if (type == 'Z' || type == 'H') + { + htmTextOut(stdout, (char *)s); + s += strlen((char *)s) + 1; + } + } +putc('\n', stdout); +} + +char *bamGetTagString(const bam1_t *bam, char *tag, char *buf, size_t bufSize) +/* If bam's tags include the given 2-character tag, place the value into + * buf (zero-terminated, trunc'd if nec) and return a pointer to buf, + * or NULL if tag is not present. */ +{ +if (tag == NULL) + errAbort("NULL tag passed to bamGetTagString"); +if (! (isalpha(tag[0]) && isalnum(tag[1]) && tag[2] == '\0')) + errAbort("bamGetTagString: invalid tag '%s'", htmlEncode(tag)); +char *val = NULL; +// adapted from part of bam.c bam_format1: +uint8_t *s = bam1_aux(bam); +while (s < bam->data + bam->data_len) + { + uint8_t type, key[2]; + key[0] = s[0]; key[1] = s[1]; + s += 2; type = *s; ++s; + if (key[0] == tag[0] && key[1] == tag[1]) + { + if (type == 'A') { snprintf(buf, bufSize, "%c", *s);} + else if (type == 'C') { snprintf(buf, bufSize, "%u", *s); } + else if (type == 'c') { snprintf(buf, bufSize, "%d", *s); } + else if (type == 'S') { snprintf(buf, bufSize, "%u", *(uint16_t*)s); } + else if (type == 's') { snprintf(buf, bufSize, "%d", *(int16_t*)s); } + else if (type == 'I') { snprintf(buf, bufSize, "%u", *(uint32_t*)s); } + else if (type == 'i') { snprintf(buf, bufSize, "%d", *(int32_t*)s); } + else if (type == 'f') { snprintf(buf, bufSize, "%g", *(float*)s); } + else if (type == 'd') { snprintf(buf, bufSize, "%lg", *(double*)s); } + else if (type == 'Z' || type == 'H') strncpy(buf, (char *)s, bufSize); + else buf[0] = '\0'; + buf[bufSize-1] = '\0'; // TODO: is this nec?? see man pages + val = buf; + break; + } + else + { + if (type == 'A' || type == 'C' || type == 'c') { ++s; } + else if (type == 'S' || type == 's') { s += 2; } + else if (type == 'I' || type == 'i' || type == 'f') { s += 4; } + else if (type == 'd') { s += 8; } + else if (type == 'Z' || type == 'H') + { + while (*s++); + } + } + } +return val; +} + +void bamUnpackAux(const bam1_t *bam, struct dyString *dy) +/* Unpack the tag:type:val part of bam into dy */ +{ +// adapted from part of bam.c bam_format1: +uint8_t *s = bam1_aux(bam); +boolean firstTime = TRUE; +while (s < bam->data + bam->data_len) + { + if (firstTime) + firstTime = FALSE; + else + dyStringAppendC(dy, '\t'); + dyStringAppendC(dy, *s++); + dyStringAppendC(dy, *s++); + dyStringAppendC(dy, ':'); + dyStringAppendC(dy, s[0]); + dyStringAppendC(dy, ':'); + uint8_t type = *s++; + if (type == 'A') { dyStringPrintf(dy, "%c", *s); ++s; } + else if (type == 'C') { dyStringPrintf(dy, "%u", *s); ++s; } + else if (type == 'c') { dyStringPrintf(dy, "%d", *s); ++s; } + else if (type == 'S') { dyStringPrintf(dy, "%u", *(uint16_t*)s); s += 2; } + else if (type == 's') { dyStringPrintf(dy, "%d", *(int16_t*)s); s += 2; } + else if (type == 'I') { dyStringPrintf(dy, "%u", *(uint32_t*)s); s += 4; } + else if (type == 'i') { dyStringPrintf(dy, "%d", *(int32_t*)s); s += 4; } + else if (type == 'f') { dyStringPrintf(dy, "%g", *(float*)s); s += 4; } + else if (type == 'd') { dyStringPrintf(dy, "%lg", *(double*)s); s += 8; } + else if (type == 'Z' || type == 'H') + { + dyStringAppend(dy, (char *)s); + s += strlen((char *)s) + 1; + } + } +} + +#else +// If we're not compiling with samtools, make stub routines so compile won't fail: + +boolean bamFileExistsUdcFuse(char *bamFileName, char *udcFuseRoot) +/* Return TRUE if we can successfully open the bam file and its index file. */ +{ +warn(COMPILE_WITH_SAMTOOLS, "bamFileExistsUdcFuse"); +return FALSE; +} + +samfile_t *bamOpenUdcFuse(char *fileOrUrl, char **retBamFileName) +/* Return an open bam file, dealing with some FUSE caching if need be. */ +{ +warn(COMPILE_WITH_SAMTOOLS, "bamOpenUdc"); +return FALSE; +} + +void bamClose(samfile_t **pSamFile) +/* Close down a samefile_t */ +{ +errAbort(COMPILE_WITH_SAMTOOLS, "bamClose"); +} + +void bamFetchUdcFuse(char *fileOrUrl, char *position, bam_fetch_f callbackFunc, void *callbackData, + samfile_t **pSamFile, char *udcFuseRoot) +/* Open the .bam file, fetch items in the seq:start-end position range, + * and call callbackFunc on each bam item retrieved from the file plus callbackData. + * This handles BAM files with "chr"-less sequence names, e.g. from Ensembl. + * The pSamFile parameter is optional. If non-NULL it will be filled in, just for + * the benefit of the callback function, with the open samFile. */ +{ +errAbort(COMPILE_WITH_SAMTOOLS, "bamFetch"); +} + +boolean bamIsRc(const bam1_t *bam) +/* Return TRUE if alignment is on - strand. */ +{ +errAbort(COMPILE_WITH_SAMTOOLS, "bamIsRc"); +return FALSE; +} + +void bamGetSoftClipping(const bam1_t *bam, int *retLow, int *retHigh, int *retClippedQLen) +/* If retLow is non-NULL, set it to the number of "soft-clipped" (skipped) bases at + * the beginning of the query sequence and quality; likewise for retHigh at end. + * For convenience, retClippedQLen is the original query length minus soft clipping + * (and the length of the query sequence that will be returned). */ +{ +errAbort(COMPILE_WITH_SAMTOOLS, "bamGetSoftClipping"); +} + +char *bamGetQuerySequence(const bam1_t *bam, boolean useStrand) +/* Return the nucleotide sequence encoded in bam. The BAM format + * reverse-complements query sequence when the alignment is on the - strand, + * so if useStrand is given we rev-comp it back to restore the original query + * sequence. */ +{ +errAbort(COMPILE_WITH_SAMTOOLS, "bamGetQuerySequence"); +return NULL; +} + +UBYTE *bamGetQueryQuals(const bam1_t *bam, boolean useStrand) +/* Return the base quality scores encoded in bam as an array of ubytes. */ +{ +errAbort(COMPILE_WITH_SAMTOOLS, "bamGetQueryQuals"); +return NULL; +} + +char *bamGetCigar(const bam1_t *bam) +/* Return a BAM-enhanced CIGAR string, decoded from the packed encoding in bam. */ +{ +errAbort(COMPILE_WITH_SAMTOOLS, "bamGetCigar"); +return NULL; +} + +void bamShowCigarEnglish(const bam1_t *bam) +/* Print out cigar in English e.g. "20 (mis)Match, 1 Deletion, 3 (mis)Match" */ +{ +errAbort(COMPILE_WITH_SAMTOOLS, "bamShowCigarEnglish"); +} + +void bamShowFlagsEnglish(const bam1_t *bam) +/* Print out flags in English, e.g. "Mate is on '-' strand; Properly paired". */ +{ +errAbort(COMPILE_WITH_SAMTOOLS, "bamShowFlagsEnglish"); +} + +int bamGetTargetLength(const bam1_t *bam) +/* Tally up the alignment's length on the reference sequence from + * bam's packed-int CIGAR representation. */ +{ +errAbort(COMPILE_WITH_SAMTOOLS, "bamGetTargetLength"); +return 0; +} + +bam1_t *bamClone(const bam1_t *bam) +/* Return a newly allocated copy of bam. */ +{ +errAbort(COMPILE_WITH_SAMTOOLS, "bamClone"); +return NULL; +} + +void bamShowTags(const bam1_t *bam) +/* Print out tags in HTML: bold key, no type indicator for brevity. */ +{ +errAbort(COMPILE_WITH_SAMTOOLS, "bamShowTags"); +} + +char *bamGetTagString(const bam1_t *bam, char *tag, char *buf, size_t bufSize) +/* If bam's tags include the given 2-character tag, place the value into + * buf (zero-terminated, trunc'd if nec) and return a pointer to buf, + * or NULL if tag is not present. */ +{ +errAbort(COMPILE_WITH_SAMTOOLS, "bamGetTagString"); +return NULL; +} + +#endif//ndef USE_BAM