196fd2da24b1eef2d8fbfcd07924e534ff44b969 ceisenhart Wed May 14 16:50:18 2014 -0700 converts a bam file to a fastq filesome illumina header data is lost diff --git src/utils/bamToFastq/bamToFastq.c src/utils/bamToFastq/bamToFastq.c new file mode 100644 index 0000000..5e46d6c --- /dev/null +++ src/utils/bamToFastq/bamToFastq.c @@ -0,0 +1,128 @@ +/* bamToFastq - converts a BAM file to Fastq. */ +/* currently working, however some header information is lost */ +#include "common.h" +#include "linefile.h" +#include "hash.h" +#include "options.h" +#include "bamFile.h" + +void usage() +/* Explain usage and exit. */ +{ +errAbort( + "bamToFastq - converts a BAM file to Fastq\n" + "usage:\n" + " bamToFastq input.bam output.fastq\n" + "options:\n" + " " + ); +} + +/* Command line validation table. */ +static struct optionSpec options[] = { + {NULL, 0}, +}; + +struct fastqSeq +/* holds a single fastq sequence */ + { + struct fastqSeq *next; + int size; /* Size of the sequence. */ + char *header; /* Sequence header, begins with '@' */ + char *del; /* Fastq deliminator '+' */ + char *dna; /* DNA sequence */ + unsigned char *quality; /* DNA quality, in ASCII format, unsigned for bam functions */ + }; + +void fixQuality(struct fastqSeq *seq) +/* the bam quality reader returns a format that is not FASTQ */ +/* this function updates the bam quality to a fastq quality */ +{ +int size = strlen(seq->dna); +int i = 0; +for (i=0; i < size; ++i) + { + seq->quality[i] += 33; + } +seq->quality[size]='\0'; +} + +void fastqWriteNext(struct fastqSeq *input, FILE *f) +/* a function for writing a single fastq struct to file */ +{ + fprintf(f,"%s\n",input->header); + fprintf(f,"%s\n",input->dna); + fprintf(f,"%s\n",input->del); + fprintf(f,"%s\n",input->quality); +} + + +void freeFastqSeq(struct fastqSeq **pInput) +/* frees the memory allocated to a fastq struct */ +{ +struct fastqSeq *input = *pInput; +if (input != NULL) + { + freeMem(input->header); + freeMem(input->dna); + freeMem(input->del); + freeMem(input->quality); + freez(pInput); + } +} + +char *concat(char *s1, char *s2) +/* a simple concatenate function */ +{ +char *result = needMem(strlen(s1)+strlen(s2) +1); +strcpy(result,s1); +strcat(result,s2); +return result; +} + +samfile_t *samMustOpen(char *fileName, char *mode, void *extraHeader) +/* Open up samfile or die trying */ +{ +samfile_t *sf = samopen(fileName, mode, extraHeader); +if (sf == NULL) + errnoAbort("Couldn't open %s.\n", fileName); +return sf; +} + + +void bamToFastq(char *inBam, char *outFastq) +/* bamToFastq - converts a BAM file to Fastq. */ +{ +samfile_t *in = samMustOpen(inBam, "rb", NULL); +/* Open up the BAM input and a fastq sequence */ +FILE *f = mustOpen(outFastq, "w"); +bam1_t one; +ZeroVar(&one); // This seems to be necessary! +struct fastqSeq seq = {}; +for (;;) + { + if (samread(in, &one) < 0) + { + break; + } + seq.header = concat("@",bam1_qname(&one)); + seq.del = "+"; + seq.dna = bamGetQuerySequence(&one, TRUE); + seq.quality = bamGetQueryQuals(&one, TRUE); + /* enter in the required fastqSeq values */ + fixQuality(&seq); + fastqWriteNext(&seq, f); + /* print the fasqSeq to file */ + } +samclose(in); +} + +int main(int argc, char *argv[]) +/* Process command line. */ +{ +optionInit(&argc, argv, options); +if (argc != 3) + usage(); +bamToFastq(argv[1],argv[2]); +return 0; +}