8f45bb3988b7548987178934a02fc3e6e3ecea97 kent Mon Jun 14 18:43:58 2010 -0700 Moving clustering to library. diff --git src/inc/peakCluster.h src/inc/peakCluster.h new file mode 100644 index 0000000..e65823a --- /dev/null +++ src/inc/peakCluster.h @@ -0,0 +1,98 @@ +/* peakCluster - cluster peak calls from different sources. */ + +#ifndef PEAKCLUSTER_H +#define PEAKCLUSTER_H + +struct peakDim +/* A peak dimension */ + { + int colIx; /* Column index in table. */ + char *label; /* Label */ + }; + +struct peakSource +/* A source of peak information */ + { + struct peakSource *next; + char *dataSource; /* File (or table) */ + int chromColIx; /* Chromosome column index. */ + int startColIx; /* Start coordinate column index. */ + int endColIx; /* End ccoordinate column ix. */ + int scoreColIx; /* Index for score column. */ + double normFactor; /* Multiply this to get browser score. */ + char **labels; /* Label for each dimension */ + int minColCount; /* Minimum number of columns. */ + }; + +struct peakItem +/* An item in a peak track */ + { + struct peakItem *next; + char *chrom; /* Chromosome. Not allocated here. */ + int chromStart,chromEnd; /* Half open coordinates. */ + double score; /* Ideally something like -log(p). */ + struct peakSource *source; /* Source track/file for item. */ + char *asciiLine; /* Ascii representation of line. */ + }; + +struct peakCluster +/* A cluster of items. */ + { + struct peakCluster *next; + char *chrom; /* Chromosome. Not allocated here. */ + int chromStart, chromEnd; /* Half open coordinates. */ + double score; /* Sum of component scores. */ + double maxSubScore; /* Max of component scores. */ + struct slRef *itemRefList; /* List of references to component items. */ + }; + +struct peakClusterMaker +/* Help make a cluster of peaks on multiple-chromosome data sets. */ + { + struct peakClusterMaker *next; + struct hash *chromHash; /* Key is chromosome, value is a rbTree of items. */ + struct rbTreeNode *stack[128]; /* Stack for rbTree evaluations. */ + }; + +struct peakClusterMaker *peakClusterMakerNew(); +/* Return a new peakClusterMaker. */ + +void peakClusterMakerFree(struct peakClusterMaker **pMaker); +/* Free up a peakClusterMaker. */ + +struct hashEl *peakClusterMakerChromList(struct peakClusterMaker *maker); +/* Return list of chromosomes. In hashEl format where the hashEl val is + * a rangeTree filled with items. Do a slFreeList when done. */ + +struct peakSource *peakSourceLoadAll(char *fileName, int dimCount); +/* Read file, parse it line by line and return list of peakSources. */ + +void peakClusterMakerAddFromSource(struct peakClusterMaker *maker, struct peakSource *source); +/* Read through data source and add items to it to rangeTrees in maker */ + +struct peakCluster *peakClusterItems(struct lm *lm, struct peakItem *itemList, + double forceJoinScore, double weakLevel); +/* Convert a list of items to a list of clusters of items. This may break up clusters that + * have weakly linked parts. + [ ] + AAAAAAAAAAAAAAAAAA + BBBBBB DDDDDD + CCCC EEEE + gets tranformed into + [ ] [ ] + AAAAAAAAAAAAAAAAAA + BBBBBB DDDDDD + CCCC EEEE + The strategy is to build a rangeTree of coverage, which might look something like so: + 123333211123333211 + then define cluster ends that exceed the minimum limit, which is either 10% of the highest + or forceJoinScore if 10% of the highest is more than forceJoinScore. This will go to + something like so: + [---] [----] + Finally the items that are overlapping a cluster are assigned to it. Note that this + may mean that an item may be in multiple clusters. + [ABC] [ ADE] + */ + +#endif /* PEAKCLUSTER_H */ +