6d058848117e130ca8c99f8d1c57f15e78616605 braney Wed May 20 14:40:34 2020 -0700 add an understanding of the 'B' type to BAM parsing diff --git src/lib/bamFile.c src/lib/bamFile.c index ce3674d..03b0241 100644 --- src/lib/bamFile.c +++ src/lib/bamFile.c @@ -534,55 +534,88 @@ errAbort("bamGetTargetLength: unrecognized CIGAR op %c -- update me", op); } } return tLength; } bam1_t *bamClone(const bam1_t *bam) /* Return a newly allocated copy of bam. */ { // Using typecasts to get around compiler complaints about bam being const: bam1_t *newBam = cloneMem((void *)bam, sizeof(*bam)); newBam->data = cloneMem((void *)bam->data, bam->data_len*sizeof(bam->data[0])); return newBam; } -void bamShowTags(const bam1_t *bam) -/* Print out tags in HTML: bold key, no type indicator for brevity. */ +static inline int typeSize(int x) +/* Return size of basic BAM types. */ { -// adapted from part of bam.c bam_format1: -uint8_t *s = bam1_aux(bam); -while (s < bam->data + bam->data_len) +if (x == 'C' || x == 'c' || x == 'A') return 1; +else if (x == 'S' || x == 's') return 2; +else if (x == 'I' || x == 'i' || x == 'f' || x == 'F') return 4; +else return 0; +} + +static void printType(uint8_t type, uint8_t **str) +/* Print out a string that has a value of the given type. */ { - uint8_t type, key[2]; - key[0] = s[0]; key[1] = s[1]; - s += 2; type = *s; ++s; - printf(" <B>%c%c</B>:", key[0], key[1]); +uint8_t *s = *str; if (type == 'A') { printf("%c", *s); ++s; } else if (type == 'C') { printf("%u", *s); ++s; } else if (type == 'c') { printf("%d", *(int8_t*)s); ++s; } else if (type == 'S') { printf("%u", *(uint16_t*)s); s += 2; } else if (type == 's') { printf("%d", *(int16_t*)s); s += 2; } else if (type == 'I') { printf("%u", *(uint32_t*)s); s += 4; } else if (type == 'i') { printf("%d", *(int32_t*)s); s += 4; } else if (type == 'f') { printf("%g", *(float*)s); s += 4; } else if (type == 'd') { printf("%lg", *(double*)s); s += 8; } +else if (type == 'B') + { + uint8_t subType = *s++; + int count = *(int32_t*)s; + s += 4; + + int ii; + for (ii=0; ii < count; ii++) + { + printType(subType, &s); + if (ii < count - 1) + printf(","); + } + } else if (type == 'Z' || type == 'H') { htmTextOut(stdout, (char *)s); s += strlen((char *)s) + 1; } +else + errAbort("missing BAM type %c", type); +*str = s; +} + +void bamShowTags(const bam1_t *bam) +/* Print out tags in HTML: bold key, no type indicator for brevity. */ +{ +// adapted from part of bam.c bam_format1: +uint8_t *s = bam1_aux(bam); +while (s < bam->data + bam->data_len) + { + uint8_t type, key[2]; + key[0] = s[0]; key[1] = s[1]; + s += 2; type = *s; ++s; + printf(" <B>%c%c</B>:", key[0], key[1]); + printType(type, &s); } putc('\n', stdout); } char *bamGetTagString(const bam1_t *bam, char *tag, char *buf, size_t bufSize) /* If bam's tags include the given 2-character tag, place the value into * buf (zero-terminated, trunc'd if nec) and return a pointer to buf, * or NULL if tag is not present. */ { if (tag == NULL) errAbort("NULL tag passed to bamGetTagString"); if (! (isalpha(tag[0]) && isalnum(tag[1]) && tag[2] == '\0')) errAbort("bamGetTagString: invalid tag '%s'", htmlEncode(tag)); char *val = NULL; // adapted from part of bam.c bam_format1: @@ -603,30 +636,35 @@ else if (type == 'i') { snprintf(buf, bufSize, "%d", *(int32_t*)s); } else if (type == 'f') { snprintf(buf, bufSize, "%g", *(float*)s); } else if (type == 'd') { snprintf(buf, bufSize, "%lg", *(double*)s); } else if (type == 'Z' || type == 'H') strncpy(buf, (char *)s, bufSize); else buf[0] = '\0'; buf[bufSize-1] = '\0'; // TODO: is this nec?? see man pages val = buf; break; } else { if (type == 'A' || type == 'C' || type == 'c') { ++s; } else if (type == 'S' || type == 's') { s += 2; } else if (type == 'I' || type == 'i' || type == 'f') { s += 4; } else if (type == 'd') { s += 8; } + else if (type == 'B') + { + // 5 is for type byte and a following int32 + s += 5 + typeSize(*(s)) * (*(int32_t*)((s)+1)); + } else if (type == 'Z' || type == 'H') { while (*s++); } } } return val; } void bamUnpackAux(const bam1_t *bam, struct dyString *dy) /* Unpack the tag:type:val part of bam into dy */ { // adapted from part of bam.c bam_format1: uint8_t *s = bam1_aux(bam); boolean firstTime = TRUE;