3df62d28d44182ce543799fb440ece7daf7acda6 ceisenhart Fri May 30 22:20:40 2014 -0700 changed structure to increase efficiency, added 'unmapped.bam' to output diff --git src/utils/bamSplitByChrom/bamSplitByChrom.c src/utils/bamSplitByChrom/bamSplitByChrom.c index d44374b..71cbd35 100644 --- src/utils/bamSplitByChrom/bamSplitByChrom.c +++ src/utils/bamSplitByChrom/bamSplitByChrom.c @@ -1,115 +1,111 @@ /* bamSplitByChrom - Splits a bam file into multiple bam files based on chromosome . */ #include "common.h" #include "linefile.h" #include "hash.h" #include "options.h" #include "bamFile.h" + void usage() /* Explain usage and exit. */ { errAbort( "bamSplitByChrom - Splits a bam file into multiple bam files based on chromosome \n" "usage:\n" " bamSplitByChrom input.bam\n" "options:\n" " \n" ); } /* Command line validation table. */ static struct optionSpec options[] = { {NULL, 0}, }; samfile_t *samMustOpen(char *fileName, char *mode, void *extraHeader) /* Open up samfile or die trying. */ { samfile_t *sf = samopen(fileName, mode, extraHeader); if (sf == NULL) errnoAbort("Couldn't open %s.\n", fileName); return sf; } char *concat(char *s1, char *s2) /* A simple concatenate function. */ { char *result = needMem(strlen(s1)+strlen(s2) +1); strcpy(result,s1); strcat(result,s2); return result; } -bool isUnderscore(char c) -/* checks if the character is an underscore */ +void openOutput(struct hash *hash, bam_header_t *head) { -if (c == '_') +int i; +for ( i = 0; i < head->n_targets; ++i ) { - return(TRUE); + char *fileName =concat(head->target_name[i], ".bam"); + samfile_t *outBam = bamMustOpenLocal(fileName, "wb", head); + hashAdd(hash, head->target_name[i], outBam); } -return(FALSE); } -char * removeUnderscores(char * input) -/* Removes the Underscores fromo a string */ +void closeOutput(struct hash *hash, bam_header_t *head) { -int i, j=0; -char *result = needMem(strlen(input)+1); -for (i = 0; input[i] != '\0'; ++i) - { - if (!isUnderscore(input[i])) +int i; +for ( i = 0; i < head->n_targets; ++i ) { - result[j] = input[i]; - ++j; + samclose(hashFindVal(hash, head->target_name[i])); } } -return(result); -} -void bamSplitByChrom(char *inBam) -/* bamSplitByChrom - Splits a bam file into multiple bam files based on chromosome . */ +void writeOutput(samfile_t *input, struct hash *hash) { -/* Open file and get header for it. */ -samfile_t *input = samMustOpen(inBam, "rb", NULL); bam_header_t *head = input ->header; bam1_t one; -ZeroVar(&one); // This seems to be necessary! -int i =0; -for (i=0; i<head->n_targets; ++i) -/* Loop through each chromosome. */ - { - //head->target_name[i] = removeUnderscores(head->target_name[i]); - samfile_t *in = samMustOpen(inBam, "rb", NULL); - char *outBam = head->target_name[i]; - char *bam = ".bam"; - samfile_t *out = bamMustOpenLocal(concat(outBam,bam), "wb", head); - /* Open an output bam file. */ +ZeroVar(&one); +samfile_t *unmapped = bamMustOpenLocal("unmapped.bam", "wb", head); for (;;) - /* Loop through the input bam file. */ { - if (samread(in, &one) < 0) + if (samread (input, &one) < 0) { break; } - if (head->target_name[one.core.tid]==outBam) - //if (strcmp(head->target_name[one.core.tid],outBam)==0) - /* Seems to be dropping certain chromosomes (with underscores in names) */ + if (one.core.tid > 0) { - - printf("seems to be working"); - samwrite(out, &one); + samwrite(hashFindVal(hash, head->target_name[one.core.tid]), &one); + } + else + { + samwrite(unmapped, &one); } } - samclose(out); - samclose(in); +samclose(unmapped); } + +void bamSplitByChrom(char *inBam) +{ +struct hash *hash = hashNew(0); +samfile_t *input = bamMustOpenLocal(inBam, "rb", NULL); +bam_header_t *head = input ->header; +openOutput(hash, head); +/* Open up file, loop through header, and make up a hash with chromosome names for keys, + * and FILE * for values. */ +writeOutput(input, hash); +/* Loop through each record of BAM file, looking up chromosome, getting file from hash, + * and adding record to appropriate file */ +closeOutput(hash, head); +samclose(input); +/* Loop through each files nad close it */ } int main(int argc, char *argv[]) /* Process command line. */ { optionInit(&argc, argv, options); if (argc != 2) usage(); bamSplitByChrom(argv[1]); return 0; }