src/hg/instinct/segmentBed/segmentBed.c 1.1
1.1 2009/02/11 22:18:24 jsanborn
initial commit
Index: src/hg/instinct/segmentBed/segmentBed.c
===================================================================
RCS file: src/hg/instinct/segmentBed/segmentBed.c
diff -N src/hg/instinct/segmentBed/segmentBed.c
--- /dev/null 1 Jan 1970 00:00:00 -0000
+++ src/hg/instinct/segmentBed/segmentBed.c 11 Feb 2009 22:18:24 -0000 1.1
@@ -0,0 +1,241 @@
+/* mapProbesToGenes - Will maps probes in BED format to overlapping gene(s). */
+#include "common.h"
+#include "linefile.h"
+#include "hash.h"
+#include "options.h"
+#include "jksql.h"
+#include "bed.h"
+#include "genePred.h"
+#include "hPrint.h"
+#include "hdb.h"
+
+struct chromWidth {
+ struct chromWidth *next;
+ char *chrom;
+ int start;
+ int stop;
+};
+
+char *database = NULL;
+char *table = NULL;
+
+boolean localDb = FALSE; /* Connect to local host, instead of default host, using localDb.XXX variables defined in .hg.conf.\n"*/
+
+void usage()
+/* Explain usage and exit. */
+{
+errAbort(
+ "downsampleBed - Will downsample probes in BED format (from table in hg18) to fit in given pixel density\n"
+ "usage:\n"
+ " downsampleBed database table numPixels\n"
+ " -local - connect to local host, instead of default host, using localDb.XXX variables defined in .hg.conf.\n"
+ );
+}
+
+static struct optionSpec options[] = {
+ {"local", OPTION_BOOLEAN},
+ {NULL, 0}
+};
+
+
+struct bed *makeEmptyBed(struct sqlConnection *conn, char *chrom, int start, int stop)
+{
+if (!conn)
+ return NULL;
+
+char query[512];
+safef(query, sizeof(query),
+ "select * from %s limit 1", table);
+
+char **row;
+struct sqlResult *sr = sqlGetResult(conn, query);
+if ((row = sqlNextRow(sr)) == NULL)
+ return NULL;
+
+struct bed *tuple = bedLoadN(row+1, 15);
+
+sqlFreeResult(&sr);
+
+int i;
+for (i = 0; i < tuple->expCount; i++)
+ tuple->expScores[i] = 0.0;
+
+char name[128];
+safef(name, sizeof(name), "%s:%d-%d", chrom, start, stop);
+
+tuple->name = cloneString(name);
+tuple->chrom = cloneString(chrom);
+tuple->chromStart = start;
+tuple->chromEnd = stop;
+tuple->thickStart = start;
+tuple->thickStart = stop;
+
+return tuple;
+}
+
+
+void segmentify(struct sqlConnection *conn, char *table, struct bed *tupleList,
+ struct chromWidth *cwList, int numSegments)
+/* find overlapping bed-formatted probes with genes in table */
+{
+if (tupleList == NULL)
+ return;
+
+char outFile[256];
+safef(outFile, sizeof(outFile), "%s_segmented.bed", table);
+
+FILE *f = NULL;
+f = mustOpen(outFile, "w");
+
+double totalWidth = 0.0;
+struct chromWidth *cw;
+
+for (cw = cwList; cw; cw = cw->next)
+ totalWidth += (double) cw->stop - (double) cw->start;
+
+
+int i, j, fStart, fStop, totalCount = 0;
+struct bed *tuple;
+for (cw = cwList; cw; cw = cw->next)
+ {
+ //int numIterations = ceil((double) (cw->stop - cw->start) / (double) basesPerSegment);
+
+ double width = (double) cw->stop - (double) cw->start;
+ int basesPerSegment = floor(width / (double) numSegments);
+ uglyf("basesPerSegment = %d\n", basesPerSegment);
+
+
+
+ fStart = cw->start;
+ fStop = fStart + basesPerSegment;
+
+ uglyf("%s:%d-%d : basesPerSegment = %d\n", cw->chrom, cw->start, cw->stop, basesPerSegment);
+
+ for (i = 0; i < numSegments; i++)
+ {
+ if (fStop > cw->stop)
+ fStop = cw->stop;
+
+ struct bed *averTuple = makeEmptyBed(conn, cw->chrom, fStart, fStop);
+
+ if (!averTuple)
+ errAbort("Couldn't make empty tuple!");
+
+ struct bed *filterList =
+ bedFilterListInRange(tupleList, NULL, cw->chrom, fStart, fStop);
+
+ if (filterList)
+ {
+ int count = 0;
+ for (tuple = filterList; tuple; tuple = tuple->next)
+ {
+ if (tuple->chromStart < averTuple->chromStart)
+ averTuple->chromStart = tuple->chromEnd;
+ if (tuple->chromEnd > averTuple->chromEnd)
+ averTuple->chromEnd = tuple->chromEnd;
+
+ if (tuple->thickStart < averTuple->thickStart)
+ averTuple->thickStart = tuple->thickStart;
+ if (tuple->thickEnd > averTuple->thickEnd)
+ averTuple->thickEnd = tuple->thickEnd;
+
+ if (tuple->expCount != averTuple->expCount)
+ errAbort("Bed entries have different experiment counts, %d != %d",
+ tuple->expCount, averTuple->expCount);
+
+ for (j = 0; j < averTuple->expCount; j++)
+ averTuple->expScores[j] += tuple->expScores[j];
+ count++;
+ totalCount++;
+ }
+
+ for (j = 0; j < averTuple->expCount; j++)
+ averTuple->expScores[j] = averTuple->expScores[j] / (double) count;
+ }
+
+ bedOutputN(averTuple, 15, f, '\t', '\n');
+ freez(&averTuple);
+
+// uglyf("%s totalCount = %d\n", cw->chrom, totalCount);
+
+ fStart += basesPerSegment;
+ fStop += basesPerSegment;
+ }
+ }
+}
+
+
+struct chromWidth *getChromWidth(struct sqlConnection *conn)
+{
+if (!conn)
+ return NULL;
+
+char query[512];
+safef(query, sizeof(query),
+ "select chrom,size from chromInfo where chrom in ('chr1','chr2','chr3','chr4','chr5','chr6','chr7','chr8','chr9','chr10','chr11','chr12','chr13','chr14','chr15','chr16','chr17','chr18','chr19','chr20','chr21','chr22','chrX','chrY')");
+
+struct sqlResult *sr = sqlGetResult(conn, query);
+
+char **row = NULL;
+
+struct chromWidth *cw, *cwList = NULL;
+
+while ((row = sqlNextRow(sr)) != NULL)
+ {
+ AllocVar(cw);
+ cw->chrom = cloneString(row[0]);
+ cw->start = 0;
+ cw->stop = atoi(row[1]);
+
+ slAddHead(&cwList, cw);
+ }
+slReverse(&cwList);
+
+sqlFreeResult(&sr);
+return cwList;
+}
+
+void segmentBed(int numSegments)
+{
+char query[512];
+safef(query, sizeof(query),
+ "select * from %s", table);
+
+struct sqlConnection *conn = hAllocConn(database);
+
+struct sqlResult *sr = sqlGetResult(conn, query);
+
+char **row = NULL;
+struct bed *tuple, *tupleList = NULL;
+while ((row = sqlNextRow(sr)) != NULL)
+ {
+ tuple = bedLoadN(row+1, 15);
+ slAddHead(&tupleList, tuple);
+ }
+sqlFreeResult(&sr);
+
+struct chromWidth *cwList = getChromWidth(conn);
+
+slReverse(&tupleList);
+segmentify(conn, table, tupleList, cwList, numSegments);
+
+hFreeConn(&conn);
+}
+
+
+int main(int argc, char *argv[])
+/* Process command line. */
+{
+optionInit(&argc, argv, options);
+if (argc != 4)
+ usage();
+
+localDb = optionExists("local");
+
+database = argv[1];
+table = argv[2];
+
+segmentBed(atoi(argv[3]));
+
+return 0;
+}