8f45bb3988b7548987178934a02fc3e6e3ecea97
kent
  Mon Jun 14 18:43:58 2010 -0700
Moving clustering to library.
diff --git src/inc/peakCluster.h src/inc/peakCluster.h
new file mode 100644
index 0000000..e65823a
--- /dev/null
+++ src/inc/peakCluster.h
@@ -0,0 +1,98 @@
+/* peakCluster - cluster peak calls from different sources. */
+
+#ifndef PEAKCLUSTER_H
+#define PEAKCLUSTER_H
+
+struct peakDim
+/* A peak dimension */
+    {
+    int colIx;		/* Column index in table. */
+    char *label;	/* Label */
+    };
+
+struct peakSource 
+/* A source of peak information */
+    {
+    struct peakSource *next;
+    char *dataSource;		/* File (or table) */
+    int chromColIx;		/* Chromosome column index. */
+    int startColIx;		/* Start coordinate column index. */
+    int endColIx;		/* End ccoordinate column ix. */
+    int scoreColIx;		/* Index for score column. */
+    double normFactor;		/* Multiply this to get browser score. */
+    char **labels;		/* Label for each dimension */
+    int minColCount;		/* Minimum number of columns. */
+    };
+
+struct peakItem
+/* An item in a peak track */
+    {
+    struct peakItem *next;
+    char *chrom;		/* Chromosome. Not allocated here. */
+    int chromStart,chromEnd;	/* Half open coordinates. */
+    double score;		/* Ideally something like -log(p). */
+    struct peakSource *source;   /* Source track/file for item. */
+    char *asciiLine;		/* Ascii representation of line. */
+    };
+
+struct peakCluster
+/* A cluster of items. */
+    {
+    struct peakCluster *next;
+    char *chrom;		/* Chromosome.  Not allocated here. */
+    int chromStart, chromEnd;	/* Half open coordinates. */
+    double score;		/* Sum of component scores. */
+    double maxSubScore;		/* Max of component scores. */
+    struct slRef *itemRefList;	/* List of references to component items. */
+    };
+
+struct peakClusterMaker
+/* Help make a cluster of peaks on multiple-chromosome data sets. */
+    {
+    struct peakClusterMaker *next;
+    struct hash *chromHash;	   /* Key is chromosome, value is a rbTree of items. */
+    struct rbTreeNode *stack[128]; /* Stack for rbTree evaluations. */
+    };
+
+struct peakClusterMaker *peakClusterMakerNew();
+/* Return a new peakClusterMaker. */
+
+void peakClusterMakerFree(struct peakClusterMaker **pMaker);
+/* Free up a peakClusterMaker. */
+
+struct hashEl *peakClusterMakerChromList(struct peakClusterMaker *maker);
+/* Return list of chromosomes.  In hashEl format where the hashEl val is
+ * a rangeTree filled with items. Do a slFreeList when done. */
+
+struct peakSource *peakSourceLoadAll(char *fileName, int dimCount);
+/* Read file, parse it line by line and return list of peakSources. */
+
+void peakClusterMakerAddFromSource(struct peakClusterMaker *maker, struct peakSource *source);
+/* Read through data source and add items to it to rangeTrees in maker */
+
+struct peakCluster *peakClusterItems(struct lm *lm, struct peakItem *itemList, 
+	double forceJoinScore, double weakLevel);
+/* Convert a list of items to a list of clusters of items.  This may break up clusters that
+ * have weakly linked parts. 
+      [                ]
+      AAAAAAAAAAAAAAAAAA 
+       BBBBBB   DDDDDD
+        CCCC     EEEE
+   gets tranformed into
+       [    ]   [    ]
+      AAAAAAAAAAAAAAAAAA 
+       BBBBBB   DDDDDD
+        CCCC     EEEE
+   The strategy is to build a rangeTree of coverage, which might look something like so:
+      123333211123333211 
+   then define cluster ends that exceed the minimum limit, which is either 10% of the highest
+   or forceJoinScore if 10% of the highest is more than forceJoinScore.  This will go to
+   something like so:
+        [---]   [----]   
+   Finally the items that are overlapping a cluster are assigned to it.  Note that this
+   may mean that an item may be in multiple clusters.
+        [ABC]   [ ADE]
+ */
+
+#endif /* PEAKCLUSTER_H */
+