6d058848117e130ca8c99f8d1c57f15e78616605
braney
Wed May 20 14:40:34 2020 -0700
add an understanding of the 'B' type to BAM parsing
diff --git src/lib/bamFile.c src/lib/bamFile.c
index ce3674d..03b0241 100644
--- src/lib/bamFile.c
+++ src/lib/bamFile.c
@@ -534,55 +534,88 @@
errAbort("bamGetTargetLength: unrecognized CIGAR op %c -- update me", op);
}
}
return tLength;
}
bam1_t *bamClone(const bam1_t *bam)
/* Return a newly allocated copy of bam. */
{
// Using typecasts to get around compiler complaints about bam being const:
bam1_t *newBam = cloneMem((void *)bam, sizeof(*bam));
newBam->data = cloneMem((void *)bam->data, bam->data_len*sizeof(bam->data[0]));
return newBam;
}
-void bamShowTags(const bam1_t *bam)
-/* Print out tags in HTML: bold key, no type indicator for brevity. */
+static inline int typeSize(int x)
+/* Return size of basic BAM types. */
{
-// adapted from part of bam.c bam_format1:
-uint8_t *s = bam1_aux(bam);
-while (s < bam->data + bam->data_len)
+if (x == 'C' || x == 'c' || x == 'A') return 1;
+else if (x == 'S' || x == 's') return 2;
+else if (x == 'I' || x == 'i' || x == 'f' || x == 'F') return 4;
+else return 0;
+}
+
+static void printType(uint8_t type, uint8_t **str)
+/* Print out a string that has a value of the given type. */
{
- uint8_t type, key[2];
- key[0] = s[0]; key[1] = s[1];
- s += 2; type = *s; ++s;
- printf(" %c%c:", key[0], key[1]);
+uint8_t *s = *str;
if (type == 'A') { printf("%c", *s); ++s; }
else if (type == 'C') { printf("%u", *s); ++s; }
else if (type == 'c') { printf("%d", *(int8_t*)s); ++s; }
else if (type == 'S') { printf("%u", *(uint16_t*)s); s += 2; }
else if (type == 's') { printf("%d", *(int16_t*)s); s += 2; }
else if (type == 'I') { printf("%u", *(uint32_t*)s); s += 4; }
else if (type == 'i') { printf("%d", *(int32_t*)s); s += 4; }
else if (type == 'f') { printf("%g", *(float*)s); s += 4; }
else if (type == 'd') { printf("%lg", *(double*)s); s += 8; }
+else if (type == 'B')
+ {
+ uint8_t subType = *s++;
+ int count = *(int32_t*)s;
+ s += 4;
+
+ int ii;
+ for (ii=0; ii < count; ii++)
+ {
+ printType(subType, &s);
+ if (ii < count - 1)
+ printf(",");
+ }
+ }
else if (type == 'Z' || type == 'H')
{
htmTextOut(stdout, (char *)s);
s += strlen((char *)s) + 1;
}
+else
+ errAbort("missing BAM type %c", type);
+*str = s;
+}
+
+void bamShowTags(const bam1_t *bam)
+/* Print out tags in HTML: bold key, no type indicator for brevity. */
+{
+// adapted from part of bam.c bam_format1:
+uint8_t *s = bam1_aux(bam);
+while (s < bam->data + bam->data_len)
+ {
+ uint8_t type, key[2];
+ key[0] = s[0]; key[1] = s[1];
+ s += 2; type = *s; ++s;
+ printf(" %c%c:", key[0], key[1]);
+ printType(type, &s);
}
putc('\n', stdout);
}
char *bamGetTagString(const bam1_t *bam, char *tag, char *buf, size_t bufSize)
/* If bam's tags include the given 2-character tag, place the value into
* buf (zero-terminated, trunc'd if nec) and return a pointer to buf,
* or NULL if tag is not present. */
{
if (tag == NULL)
errAbort("NULL tag passed to bamGetTagString");
if (! (isalpha(tag[0]) && isalnum(tag[1]) && tag[2] == '\0'))
errAbort("bamGetTagString: invalid tag '%s'", htmlEncode(tag));
char *val = NULL;
// adapted from part of bam.c bam_format1:
@@ -603,30 +636,35 @@
else if (type == 'i') { snprintf(buf, bufSize, "%d", *(int32_t*)s); }
else if (type == 'f') { snprintf(buf, bufSize, "%g", *(float*)s); }
else if (type == 'd') { snprintf(buf, bufSize, "%lg", *(double*)s); }
else if (type == 'Z' || type == 'H') strncpy(buf, (char *)s, bufSize);
else buf[0] = '\0';
buf[bufSize-1] = '\0'; // TODO: is this nec?? see man pages
val = buf;
break;
}
else
{
if (type == 'A' || type == 'C' || type == 'c') { ++s; }
else if (type == 'S' || type == 's') { s += 2; }
else if (type == 'I' || type == 'i' || type == 'f') { s += 4; }
else if (type == 'd') { s += 8; }
+ else if (type == 'B')
+ {
+ // 5 is for type byte and a following int32
+ s += 5 + typeSize(*(s)) * (*(int32_t*)((s)+1));
+ }
else if (type == 'Z' || type == 'H')
{
while (*s++);
}
}
}
return val;
}
void bamUnpackAux(const bam1_t *bam, struct dyString *dy)
/* Unpack the tag:type:val part of bam into dy */
{
// adapted from part of bam.c bam_format1:
uint8_t *s = bam1_aux(bam);
boolean firstTime = TRUE;