e70152e44cc66cc599ff6b699eb8adc07f3e656a
kent
  Sat May 24 21:09:34 2014 -0700
Adding Copyright NNNN Regents of the University of California to all files I believe with reasonable certainty were developed under UCSC employ or as part of Genome Browser copyright assignment.
diff --git src/utils/methylateGenome/methylateGenome.c src/utils/methylateGenome/methylateGenome.c
index 49876db..490ff60 100644
--- src/utils/methylateGenome/methylateGenome.c
+++ src/utils/methylateGenome/methylateGenome.c
@@ -1,188 +1,191 @@
 /* methylateGenome - Creates a methylated version of an input genome, in which any occurance of CG becomes TG. */
+
+/* Copyright (C) 2014 The Regents of the University of California 
+ * See README in this or parent directory for licensing information. */
 #include "common.h"
 #include "linefile.h"
 #include "hash.h"
 #include "options.h"
 #include "dnautil.h"
 #include "dnaLoad.h"
 #include "dnaseq.h"
 #include "fa.h"
 
 void usage()
 /* Explain usage and exit. */
 {
 errAbort(
   "methylateGenome - Creates a methylated version of a genome, in which any occurance of CG becomes TG\n"
   "usage:\n"
   "   methylateGenome input.[fa|2bit] outputPrefix\n"
   "options:\n"
   "   -xxx=XXX\n"
   );
 }
 
 /* Command line validation table. */
 static struct optionSpec options[] = {
    {NULL, 0},
 };
 
 void old(struct dnaSeq *seq)
 {
 int end = seq->size - 1;
 int i;
 for (i = 0; i < end; ++i)
     {
     char b1 = seq->dna[i];
     int v1 = ntVal[(unsigned)b1];
     int v2 = ntVal[(unsigned)seq->dna[i+1]];
     
     if (v1 == C_BASE_VAL && v2 == G_BASE_VAL)
         {
         if (b1 == 'C')
             seq->dna[i] = 'T';
         else
             seq->dna[i] = 't';
         }
     }
 }
 
 void forwardStrandNoMethylation(struct dnaSeq *seq)
 {
 int end = seq->size - 1;
 int i;
 for (i = 0; i < end; ++i)
     {
     if (seq->dna[i] == 'C')
         seq->dna[i] = 'T';
     else if (seq->dna[i] == 'c')
         seq->dna[i] = 't';
     }
 }
 
 void reverseStrandNoMethylation(struct dnaSeq *seq)
 {
 int end = seq->size - 1;
 int i;
 for (i = 0; i < end; ++i)
     {
     if (seq->dna[i] == 'G')
         seq->dna[i] = 'A';
     else if (seq->dna[i] == 'g')
         seq->dna[i] = 'a';
     }
 }
 
 void forwardStrandWithMethylation(struct dnaSeq *seq)
 {
 int end = seq->size - 1;
 int i;
 for (i = 0; i < end; ++i)
     {
     char b1 = seq->dna[i];
     int v1 = ntVal[(unsigned)b1];
     int v2 = ntVal[(unsigned)seq->dna[i+1]];
     
     if (v1 == C_BASE_VAL && v2 != G_BASE_VAL)
         {
             if (b1 == 'C')
                 seq->dna[i] = 'T';
             else if (b1 == 'c')
                 seq->dna[i] = 't';
         }
     }
 }
 
 void reverseStrandWithMethylation(struct dnaSeq *seq)
 {
 int end = seq->size - 1;
 int i;
 for (i = 0; i < end; ++i)
     {
     char b1 = seq->dna[i];
     int v1 = ntVal[(unsigned)b1];
     int v2 = ntVal[(unsigned)seq->dna[i+1]];
     
     if (v1 == G_BASE_VAL && v2 != C_BASE_VAL)
         {
             if (b1 == 'G')
                 seq->dna[i] = 'A';
             else if (b1 == 'g')
                 seq->dna[i] = 'a';
         }
     }
 }
 
 void performConversion(char *fileName, char *outputName, void (*replacementPolicy)(struct dnaSeq *))
 {
 // Open input and output files
 struct dnaLoad *dl = dnaLoadOpen(fileName);
 FILE *f = mustOpen(outputName, "w");
 
 // Loop over every line in the input file...
 struct dnaSeq *seq = NULL;
 for (;;)
     {
     // Take every line in the file, break when we're done
     seq = dnaLoadNext(dl);
     if (seq == NULL)
         break;
       
     replacementPolicy(seq);
       
     // Replace all 'CG' (or 'cg') with 'TG' (or 'tg')
     /*int end = seq->size - 1;
     int i;
     for (i = 0; i < end; ++i)
         {
         char b1 = seq->dna[i];
         int v1 = ntVal[(unsigned)b1];
         int v2 = ntVal[(unsigned)seq->dna[i+1]];
         
         if (v1 == C_BASE_VAL && v2 == G_BASE_VAL)
             {
             if (b1 == 'C')
                 seq->dna[i] = 'T';
             else
                 seq->dna[i] = 't';
             }
         }*/
     
     // Write out the modified line
     faWriteNext(f, seq->name, seq->dna, seq->size);
     }
     
 if (fclose(f) != 0)
     errnoAbort("fclose failed");
 }
 
 void methylateGenome(char *fileName, char *outputPrefix)
 /* methylateGenome - Creates a methylated version of an input genome, in which any occurance of CG becomes TG. */
 {
 int nameSize = 64;
 
 char forwardNoMethylName[nameSize];
 safef(forwardNoMethylName, nameSize, "%s_forwardNoMethyl.fa", outputPrefix);
 performConversion(fileName, forwardNoMethylName, &forwardStrandNoMethylation);
 
 char reverseNoMethylName[nameSize];
 safef(reverseNoMethylName, nameSize, "%s_reverseNoMethyl.fa", outputPrefix);
 performConversion(fileName, reverseNoMethylName, &reverseStrandNoMethylation);
 
 char forwardMethylName[nameSize];
 safef(forwardMethylName, nameSize, "%s_forwardMethyl.fa", outputPrefix);
 performConversion(fileName, forwardMethylName, &forwardStrandWithMethylation);
 
 char reverseMethylName[nameSize];
 safef(reverseMethylName, nameSize, "%s_reverseMethyl.fa", outputPrefix);
 performConversion(fileName, reverseMethylName, &reverseStrandWithMethylation);
 }
 
 int main(int argc, char *argv[])
 /* Process command line. */
 {
 optionInit(&argc, argv, options);
 if (argc != 3)
     usage();
 dnaUtilOpen();
 methylateGenome(argv[1], argv[2]);
 return 0;
 }