1ae7da004baa664dbbfaa2a7b8bf4b44001f18aa ceisenhart Tue Jun 3 19:50:40 2014 -0700 Removed memory leak, fixed some stylistic errors diff --git src/utils/bamSplitByChrom/bamSplitByChrom.c src/utils/bamSplitByChrom/bamSplitByChrom.c index ebb84ef..bd6bd61 100644 --- src/utils/bamSplitByChrom/bamSplitByChrom.c +++ src/utils/bamSplitByChrom/bamSplitByChrom.c @@ -1,100 +1,111 @@ /* bamSplitByChrom - Splits a bam file into multiple bam files based on chromosome. * Unmapped reads are written to the file unmapped.bam */ #include "common.h" #include "linefile.h" #include "hash.h" #include "options.h" #include "bamFile.h" +boolean clUnmapped = FALSE; + void usage() /* Explain usage and exit. */ { errAbort( "bamSplitByChrom - Splits a bam file into multiple bam files based on chromosome \n" "usage:\n" " bamSplitByChrom input.bam\n" "options:\n" - " \n" + " -unmapped Creates a file for the unmapped reads. \n" ); } /* Command line validation table. */ static struct optionSpec options[] = { + {"unmapped",OPTION_BOOLEAN}, {NULL, 0}, }; void openOutput(struct hash *hash, bam_header_t *head) /* Loops through the input bam's header, opening an output file * for each chromosome in the input file */ { int i; for ( i = 0; i < head->n_targets; ++i ) { char *fileName =catTwoStrings(head->target_name[i], ".bam"); samfile_t *outBam = bamMustOpenLocal(fileName, "wb", head); hashAdd(hash, head->target_name[i], outBam); } } -void closeOutput(struct hash *hash, bam_header_t *head)i +void closeOutput(struct hash *hash, bam_header_t *head) /* Loops through the output files and closes them. */ { int i; for ( i = 0; i < head->n_targets; ++i ) { samclose(hashFindVal(hash, head->target_name[i])); } } -void writeOutput(samfile_t *input, struct hash *hash) +void writeOutput(samfile_t *input, struct hash *hash, boolean unmapped) /* Reads through the input bam and writes each alignment to the correct output file. - * Unmapped reads are written to unmapped.bam " + * Unmapped reads are written to unmapped.bam */ { bam_header_t *head = input ->header; bam1_t one; ZeroVar(&one); -samfile_t *unmapped = bamMustOpenLocal("unmapped.bam", "wb", head); +samfile_t *unmap = bamMustOpenLocal("unmapped.bam", "wb", head); for (;;) { if (samread (input, &one) < 0) { break; } if (one.core.tid > 0) { samwrite(hashFindVal(hash, head->target_name[one.core.tid]), &one); } else { - samwrite(unmapped, &one); + if (!unmapped) + { + samwrite(unmap, &one); + } } } -samclose(unmapped); +if (!unmapped) + { + remove("unmapped.bam"); + } +samclose(unmap); } -void bamSplitByChrom(char *inBam) -/* Splits the input bam into multiple output bam's based on chromosome. " +void bamSplitByChrom(char *inBam, boolean unmapped) +/* Splits the bam file into multiple bam files based on chromosome */ { struct hash *hash = hashNew(0); samfile_t *input = bamMustOpenLocal(inBam, "rb", NULL); bam_header_t *head = input ->header; +/* open the input bam */ openOutput(hash, head); -/* Open up file, loop through header, and make up a hash with chromosome names for keys, - * and samfile_t for values. */ -writeOutput(input, hash); -/* Loop through each record of BAM file, looking up chromosome, getting file from hash, - * and adding record to appropriate file */ +/* open the output bam */ +writeOutput(input, hash, unmapped); +/* write the alignments to the correct output file */ closeOutput(hash, head); +/* close the output files */ samclose(input); -/* Loop through each output file and close it */ + } int main(int argc, char *argv[]) /* Process command line. */ { optionInit(&argc, argv, options); +clUnmapped = optionExists("unmapped"); if (argc != 2) usage(); -bamSplitByChrom(argv[1]); +bamSplitByChrom(argv[1], clUnmapped); return 0; }