bac00d4c992799aa4213f3e76010c0cb402b7e84 mmaddren Fri Jun 6 15:52:20 2014 -0700 renamed analyzeCpG to edwClusterMethylBed, added some comments, and fixed a few bugs diff --git src/utils/analyzeCpG/analyzeCpG.c src/utils/analyzeCpG/analyzeCpG.c deleted file mode 100644 index 1702c3d..0000000 --- src/utils/analyzeCpG/analyzeCpG.c +++ /dev/null @@ -1,194 +0,0 @@ -/* analyzeCpG - cluster CpG regions from an input bed file and perform some analysis on them. */ -#include "common.h" -#include "linefile.h" -#include "hash.h" -#include "options.h" -#include "basicBed.h" -#include "sqlNum.h" - -struct bedGraphRecord -{ - struct bedGraphRecord *next; - - char *chrom; - unsigned chromStart; - unsigned chromEnd; - char *name; - double score; - char strand; -}; - -struct bedGraphRecord *bedGraphLoadNext(struct lineFile *lf) -{ -char *row[6]; -int rowSize = lineFileChopNext(lf, row, ArraySize(row)); - -if (rowSize == 0) - return NULL; - -struct bedGraphRecord *bg; -AllocVar(bg); - -bg->chrom = cloneString(row[0]); -bg->chromStart = sqlUnsigned(row[1]); -bg->chromEnd = sqlUnsigned(row[2]); -bg->name = cloneString(row[3]); -bg->score = sqlFloat(row[4]); -bg->strand = row[5][0]; - -return bg; -} - -int clClusterSize = 100; -int clMinCluster = 100; -boolean clHist = FALSE; - -void usage() -/* Explain usage and exit. */ -{ -errAbort( - "analyzeCpG - cluster CpG regions from an input bed file and perform some analysis on them\n" - "usage:\n" - " analyzeCpG input.bed output.bed\n" - "options:\n" - " -xxx=XXX\n" - ); -} - -/* Command line validation table. */ -static struct optionSpec options[] = { - {"clusterSize", OPTION_INT}, - {"minCluster", OPTION_INT}, - {"hist", OPTION_BOOLEAN} -}; - -void writeCluster(struct bedGraphRecord *clusterList, FILE *out) -{ -int size = slCount(clusterList); - -if (size < clMinCluster) - return; - -int blockStarts[size]; -int blockSizes[size]; -double score = 0; -struct bedGraphRecord *last = clusterList; - -slReverse(&clusterList); - -struct bed *outBed; -AllocVar(outBed); - -outBed->chrom = cloneString(clusterList->chrom); -outBed->chromStart = clusterList->chromStart; -outBed->chromEnd = last->chromEnd; -outBed->name = "asdf"; -outBed->strand[0] = clusterList->strand; - -outBed->blockCount = size; - -int i; -for (i = 0; i < size; i++) - blockSizes[i] = 1; -outBed->blockSizes = blockSizes; - -i = 0; -struct bedGraphRecord *cur; -for (cur = clusterList; cur != NULL; cur = cur->next) - { - blockStarts[i] = cur->chromStart - outBed->chromStart; - score += cur->score; - i++; - } -outBed->chromStarts = blockStarts; -outBed->score = (int)(score * 10 / size); - -//printf("printing %s %d %d", outBed->chrom, outBed->chromStart, outBed->chromEnd); - -bedTabOutN(outBed, 12, out); -} - -void analyzeCpG(char *inputName, char *outputName) -/* analyzeCpG - cluster CpG regions from an input bed file and perform some analysis on them. */ -{ - -// open input bed file -// struct bed *input = bedLoadAll(inputName); -struct lineFile *input = lineFileOpen(inputName, TRUE); -FILE *out = fopen(outputName, "w"); - -struct bedGraphRecord *plusPrev = NULL; -struct bedGraphRecord *minusPrev = NULL; - -// set up lists -struct bedGraphRecord *plusClusters = NULL; -struct bedGraphRecord *minusClusters = NULL; - -int hist[8096]; -int i; -for (i = 0; i < 8096; i++) - hist[i] = 0; - -// loop over bed file -for (;;) - { - struct bedGraphRecord *record = bedGraphLoadNext(input); - - if (record == NULL) - break; - - if (record->strand == '+') - { - //if (plusPrev != NULL) - // printf("+: cur %s %d, prev %s %d = %d", record->chrom, record->chromStart, plusPrev->chrom, plusPrev->chromStart, record->chromStart - plusPrev->chromStart); - if (plusPrev != NULL && (strcmp(record->chrom, plusPrev->chrom) != 0 || record->chromStart - plusPrev->chromStart > clClusterSize)) - { - hist[slCount(plusClusters)]++; - writeCluster(plusClusters, out); - slFreeList(&plusClusters); - } - //printf("\n"); - slAddHead(&plusClusters, record); - plusPrev = record; - } - else - { - if (minusPrev != NULL && (strcmp(record->chrom, minusPrev->chrom) != 0 || record->chromStart - minusPrev->chromStart > clClusterSize)) - { - hist[slCount(minusClusters)]++; - writeCluster(minusClusters, out); - slFreeList(&minusClusters); - } - slAddHead(&minusClusters, record); - minusPrev = record; - } - //printf("plusPrev: %p, minusPrev: %p, plusClusterCount: %d\n", plusPrev, minusPrev, slCount(plusClusters)); - } - -lineFileClose(&input); - -fclose(out); - -if (clHist) - { - for (i = 0; i < 8096; i++) - { - if (hist[i] > 0) - printf("%5d%10d\n", i, hist[i]); - } - } - -} - -int main(int argc, char *argv[]) -/* Process command line. */ -{ -optionInit(&argc, argv, options); -clClusterSize = optionInt("clusterSize", clClusterSize); -clMinCluster = optionInt("minCluster", clMinCluster); -clHist = optionExists("hist"); -if (argc != 3) - usage(); -analyzeCpG(argv[1], argv[2]); -return 0; -}