e70152e44cc66cc599ff6b699eb8adc07f3e656a
kent
  Sat May 24 21:09:34 2014 -0700
Adding Copyright NNNN Regents of the University of California to all files I believe with reasonable certainty were developed under UCSC employ or as part of Genome Browser copyright assignment.
diff --git src/utils/analyzeCpG/analyzeCpG.c src/utils/analyzeCpG/analyzeCpG.c
index 1702c3d..9c60e5a 100644
--- src/utils/analyzeCpG/analyzeCpG.c
+++ src/utils/analyzeCpG/analyzeCpG.c
@@ -1,194 +1,197 @@
 /* analyzeCpG - cluster CpG regions from an input bed file and perform some analysis on them. */
+
+/* Copyright (C) 2014 The Regents of the University of California 
+ * See README in this or parent directory for licensing information. */
 #include "common.h"
 #include "linefile.h"
 #include "hash.h"
 #include "options.h"
 #include "basicBed.h"
 #include "sqlNum.h"
 
 struct bedGraphRecord
 {
     struct bedGraphRecord *next;
     
     char *chrom;
     unsigned chromStart;
     unsigned chromEnd;
     char *name;
     double score;
     char strand;
 };
 
 struct bedGraphRecord *bedGraphLoadNext(struct lineFile *lf)
 {
 char *row[6];
 int rowSize = lineFileChopNext(lf, row, ArraySize(row));
     
 if (rowSize == 0)
     return NULL;
     
 struct bedGraphRecord *bg;
 AllocVar(bg);
     
 bg->chrom = cloneString(row[0]);
 bg->chromStart = sqlUnsigned(row[1]);
 bg->chromEnd = sqlUnsigned(row[2]);
 bg->name = cloneString(row[3]);
 bg->score = sqlFloat(row[4]);
 bg->strand = row[5][0];
 
 return bg;
 }
 
 int clClusterSize = 100;
 int clMinCluster = 100;
 boolean clHist = FALSE;
 
 void usage()
 /* Explain usage and exit. */
 {
 errAbort(
   "analyzeCpG - cluster CpG regions from an input bed file and perform some analysis on them\n"
   "usage:\n"
   "   analyzeCpG input.bed output.bed\n"
   "options:\n"
   "   -xxx=XXX\n"
   );
 }
 
 /* Command line validation table. */
 static struct optionSpec options[] = {
    {"clusterSize", OPTION_INT},
    {"minCluster", OPTION_INT},
    {"hist", OPTION_BOOLEAN}
 };
 
 void writeCluster(struct bedGraphRecord *clusterList, FILE *out)
 {
 int size = slCount(clusterList);
 
 if (size < clMinCluster)
     return;
 
 int blockStarts[size];
 int blockSizes[size];
 double score = 0;
 struct bedGraphRecord *last = clusterList;
 
 slReverse(&clusterList);
         
 struct bed *outBed;
 AllocVar(outBed);
 
 outBed->chrom = cloneString(clusterList->chrom);
 outBed->chromStart = clusterList->chromStart;
 outBed->chromEnd = last->chromEnd;
 outBed->name = "asdf";
 outBed->strand[0] = clusterList->strand;
 
 outBed->blockCount = size;
 
 int i;
 for (i = 0; i < size; i++)
     blockSizes[i] = 1;
 outBed->blockSizes = blockSizes;
 
 i = 0;
 struct bedGraphRecord *cur;
 for (cur = clusterList; cur != NULL; cur = cur->next)
     {
     blockStarts[i] = cur->chromStart - outBed->chromStart;
     score += cur->score;
     i++;
     }
 outBed->chromStarts = blockStarts;
 outBed->score = (int)(score * 10 / size);
         
 //printf("printing %s %d %d", outBed->chrom, outBed->chromStart, outBed->chromEnd);
         
 bedTabOutN(outBed, 12, out);   
 }
 
 void analyzeCpG(char *inputName, char *outputName)
 /* analyzeCpG - cluster CpG regions from an input bed file and perform some analysis on them. */
 {
 
 // open input bed file
 // struct bed *input = bedLoadAll(inputName);
 struct lineFile *input = lineFileOpen(inputName, TRUE);
 FILE *out = fopen(outputName, "w");
 
 struct bedGraphRecord *plusPrev = NULL;
 struct bedGraphRecord *minusPrev = NULL;
 
 // set up lists
 struct bedGraphRecord *plusClusters = NULL;
 struct bedGraphRecord *minusClusters = NULL;
 
 int hist[8096];
 int i;
 for (i = 0; i < 8096; i++)
     hist[i] = 0;
 
 // loop over bed file
 for (;;)
     {
     struct bedGraphRecord *record = bedGraphLoadNext(input);
     
     if (record == NULL)
         break;
     
     if (record->strand == '+')
         {
         //if (plusPrev != NULL)
          //   printf("+: cur %s %d, prev %s %d = %d", record->chrom, record->chromStart, plusPrev->chrom, plusPrev->chromStart, record->chromStart - plusPrev->chromStart);
         if (plusPrev != NULL && (strcmp(record->chrom, plusPrev->chrom) != 0 || record->chromStart - plusPrev->chromStart > clClusterSize))
             {
             hist[slCount(plusClusters)]++;
             writeCluster(plusClusters, out);
             slFreeList(&plusClusters);
             }
         //printf("\n");
         slAddHead(&plusClusters, record);
         plusPrev = record;
         }
     else
         {
         if (minusPrev != NULL && (strcmp(record->chrom, minusPrev->chrom) != 0 || record->chromStart - minusPrev->chromStart > clClusterSize))
             {
             hist[slCount(minusClusters)]++;
             writeCluster(minusClusters, out);
             slFreeList(&minusClusters);
             }
         slAddHead(&minusClusters, record);
         minusPrev = record;
         }
     //printf("plusPrev: %p, minusPrev: %p, plusClusterCount: %d\n", plusPrev, minusPrev, slCount(plusClusters));
     }
 
 lineFileClose(&input);
 
 fclose(out);
 
 if (clHist)
     {
     for (i = 0; i < 8096; i++)
         {
         if (hist[i] > 0)
             printf("%5d%10d\n", i, hist[i]);
         }
     }
 
 }
 
 int main(int argc, char *argv[])
 /* Process command line. */
 {
 optionInit(&argc, argv, options);
 clClusterSize = optionInt("clusterSize", clClusterSize);
 clMinCluster = optionInt("minCluster", clMinCluster);
 clHist = optionExists("hist");
 if (argc != 3)
     usage();
 analyzeCpG(argv[1], argv[2]);
 return 0;
 }