src/hg/oneShot/findToFixBedGraphLimits/findToFixBedGraphLimits.c 1.1

1.1 2009/11/22 18:37:43 kent
Utility to figure out missing bedGraph limits seems to work.
Index: src/hg/oneShot/findToFixBedGraphLimits/findToFixBedGraphLimits.c
===================================================================
RCS file: src/hg/oneShot/findToFixBedGraphLimits/findToFixBedGraphLimits.c
diff -N src/hg/oneShot/findToFixBedGraphLimits/findToFixBedGraphLimits.c
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ src/hg/oneShot/findToFixBedGraphLimits/findToFixBedGraphLimits.c	22 Nov 2009 18:37:43 -0000	1.1
@@ -0,0 +1,114 @@
+/* findToFixBedGraphLimits - Scan through ra file of bedGraphs and calculate limits.. */
+#include "common.h"
+#include "linefile.h"
+#include "hash.h"
+#include "options.h"
+#include "ra.h"
+#include "jksql.h"
+
+static char const rcsid[] = "$Id$";
+
+void usage()
+/* Explain usage and exit. */
+{
+errAbort(
+  "findToFixBedGraphLimits - Scan through ra file of bedGraphs and calculate limits.\n"
+  "usage:\n"
+  "   findToFixBedGraphLimits input.ra output.ra\n"
+  "options:\n"
+  "   -xxx=XXX\n"
+  );
+}
+
+static struct optionSpec options[] = {
+   {NULL, 0},
+};
+
+static char *mustFindVal(struct slPair *list, char *tag, struct lineFile *lf)
+/* Look for tag in list and return value.  If none complain and abort. */
+{
+char *val = slPairFindVal(list, tag);
+if (val == NULL)
+    errAbort("missing required %s tag near line %d of %s", tag, lf->lineIx, lf->fileName);
+return val;
+}
+
+void findToFixBedGraphLimits(char *input, char *output)
+/* findToFixBedGraphLimits - Scan through ra file of bedGraphs and calculate limits.. */
+{
+struct lineFile *lf = lineFileOpen(input, TRUE);
+FILE *f = mustOpen(output, "w");
+struct slPair *el, *list;
+while ((list = raNextRecordAsSlPairList(lf)) != NULL)
+    {
+    /* Find required fields for calcs. */
+    char *db = mustFindVal(list, "db", lf);
+    char *track = mustFindVal(list, "track", lf);
+    char *type = cloneString(mustFindVal(list, "type", lf));
+
+    /* Parse out type value, which should be "bedGraph 4" and put the 4 or whatever other number
+     * in dataFieldIndex. */
+    char *typeWords[3];
+    int typeWordCount = chopLine(type, typeWords);
+    if (typeWordCount != 2 || !sameString(typeWords[0], "bedGraph"))
+           errAbort("Not well formed bedGraph type line %d of %s", lf->lineIx, lf->fileName);
+    int dataFieldIndex = sqlUnsigned(typeWords[1]);
+
+    /* Figure out field corresponding to dataFieldIndex. */
+    struct sqlConnection *conn = sqlConnect(db);
+    struct slName *fieldList = sqlFieldNames(conn, track);
+    struct slName *pastBin = fieldList;
+    if (sameString(pastBin->name, "bin"))
+         pastBin = pastBin->next;
+    struct slName *fieldName = slElementFromIx(pastBin, dataFieldIndex - 1);
+    if (fieldName == NULL)
+         errAbort("%s doesn't have enough fields", track);
+    char *field = fieldName->name;
+    assert(sqlFieldIndex(conn, track, field) >= 0);
+
+    /* Print reassuring status message */
+    verbose(1, "%s.%s has %d elements.  Data field is %s\n", db, track, sqlTableSize(conn, track), field);
+         
+    /* Get min/max dataValues in fields.  Do it ourselves rather than using SQL min/max because sometimes
+     * the data field is a name column.... */
+    char query[512];
+    safef(query, sizeof(query), "select %s from %s", field, track);
+    struct sqlResult *sr = sqlGetResult(conn, query);
+    char **row;
+    row = sqlNextRow(sr);
+    assert(row != NULL);
+    double val = sqlDouble(row[0]);
+    double minLimit = val, maxLimit = val;
+    while ((row = sqlNextRow(sr)) != 0)
+        {
+	double val = sqlDouble(row[0]);
+	if (val < minLimit) minLimit = val;
+	if (val > maxLimit) maxLimit = val;
+	}
+    sqlFreeResult(&sr);
+    verbose(1, "    %g %g\n",  minLimit, maxLimit);
+
+    /* Output original table plus new minLimit/maxLimit. */
+    for (el = list; el != NULL; el = el->next)
+	fprintf(f, "%s %s\n", el->name, (char *)el->val);
+    fprintf(f, "minLimit %g\n", minLimit);
+    fprintf(f, "maxLimit %g\n", maxLimit);
+    fprintf(f, "\n");
+
+    sqlDisconnect(&conn);
+    slFreeList(&fieldList);
+    slPairFreeValsAndList(&list);
+    }
+lineFileClose(&lf);
+carefulClose(&f);
+}
+
+int main(int argc, char *argv[])
+/* Process command line. */
+{
+optionInit(&argc, argv, options);
+if (argc != 3)
+    usage();
+findToFixBedGraphLimits(argv[1], argv[2]);
+return 0;
+}