ff5530a2dba0cbf6730cb36524ad2ce2fbc45adc angie Mon Nov 30 09:49:58 2020 -0800 Add support for SARS-CoV-2 RefSeq or GenBank accesion in bam. diff --git src/lib/bamFile.c src/lib/bamFile.c index 03b0241..26ac806 100644 --- src/lib/bamFile.c +++ src/lib/bamFile.c @@ -161,30 +161,44 @@ bamClose(&bamF); } void bamFetchAlreadyOpen(samfile_t *samfile, bam_hdr_t *header, bam_index_t *idx, char *bamFileName, char *position, bam_fetch_f callbackFunc, void *callbackData) /* With the open bam file, return items the same way with the callbacks as with bamFetch() */ /* except in this case use an already-open bam file and index (use bam_index_load and free() for */ /* the index). It seems a little strange to pass the filename in with the open bam, but */ /* it's just used to report errors. */ { bam1_t *b; AllocVar(b); hts_itr_t *iter = sam_itr_querys(idx, header, position); if (iter == NULL && startsWith("chr", position)) iter = sam_itr_querys(idx, header, position + strlen("chr")); +// Special case to support SARS-CoV-2: If header uses a RefSeq or GenBank accession instead of +// our "NC_045512v2", go with that. +if (iter == NULL && header->n_targets == 1 && startsWith("NC_045512v2", position) && + (startsWith("NC_045512", header->target_name[0]) || + startsWith("MN908947", header->target_name[0]))) + { + char *colon = strrchr(position, ':'); + char customPos[512]; + if (colon) + safef(customPos, sizeof customPos, "%s%s", header->target_name[0], colon); + else + safecpy(customPos, sizeof customPos, header->target_name[0]); + iter = sam_itr_querys(idx, header, customPos); + } if (iter == NULL) return; int result; while ((result = sam_itr_next(samfile, iter, b)) >= 0) callbackFunc(b, callbackData, header); // if we're reading a CRAM file and the MD5 string has been set // we know there was an error finding the reference and we need // to request that it be loaded. if (samfile->format.format == cram) { char *md5String = cram_get_Md5(samfile); if (!isEmpty(md5String)) {