dac55f2232f3f0df03a658c54b44cccc351b01f3 mmaddren Wed Apr 16 15:21:43 2014 -0700 initial commit of methylateGenome tool diff --git src/utils/methylateGenome/methylateGenome.c src/utils/methylateGenome/methylateGenome.c new file mode 100644 index 0000000..5064243 --- /dev/null +++ src/utils/methylateGenome/methylateGenome.c @@ -0,0 +1,69 @@ +/* methylateGenome - Creates a methylated version of an input genome, in which any occurance of CG becomes TG. */ +#include "common.h" +#include "linefile.h" +#include "hash.h" +#include "options.h" +#include "dnaLoad.h" +#include "dnaseq.h" +#include "fa.h" + +void usage() +/* Explain usage and exit. */ +{ +errAbort( + "methylateGenome - Creates a methylated version of a genome, in which any occurance of CG becomes TG\n" + "usage:\n" + " methylateGenome input output.fa\n" + "options:\n" + " -xxx=XXX\n" + ); +} + +/* Command line validation table. */ +static struct optionSpec options[] = { + {NULL, 0}, +}; + +void methylateGenome(char *fileName, char *outputName) +/* methylateGenome - Creates a methylated version of an input genome, in which any occurance of CG becomes TG. */ +{ +// Open input and output files +struct dnaLoad *dl = dnaLoadOpen(fileName); +FILE *f = mustOpen(outputName, "w"); + +// Loop over every line in the input file... +struct dnaSeq *seq = NULL; +while (1) + { + // Take every line in the file, break when we're done + seq = dnaLoadNext(dl); + if (seq == NULL) + break; + + // Replace all 'CG' (or 'cg') with 'TG' (or 'tg') + int i; + for (i = 0; i < seq->size - 1; ++i) + { + if (seq->dna[i] == 'C' && seq->dna[i + 1] == 'G') + seq->dna[i] = 'T'; + else if (seq->dna[i] == 'c' && seq->dna[i + 1] == 'g') + seq->dna[i] = 't'; + } + + // Write out the modified line + faWriteNext(f, seq->name, seq->dna, seq->size); + } + + if (fclose(f) != 0) + errnoAbort("fclose failed"); +} + +int main(int argc, char *argv[]) +/* Process command line. */ +{ +optionInit(&argc, argv, options); +if (argc != 3) + usage(); +methylateGenome(argv[1], argv[2]); +return 0; +}