src/hg/instinct/bioInt2/populateDb.c 1.10

1.10 2009/04/28 21:21:24 jsanborn
added sex correction
Index: src/hg/instinct/bioInt2/populateDb.c
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/instinct/bioInt2/populateDb.c,v
retrieving revision 1.9
retrieving revision 1.10
diff -b -B -U 4 -r1.9 -r1.10
--- src/hg/instinct/bioInt2/populateDb.c	27 Apr 2009 22:05:48 -0000	1.9
+++ src/hg/instinct/bioInt2/populateDb.c	28 Apr 2009 21:21:24 -0000	1.10
@@ -642,8 +642,14 @@
 if (!names)
     return NULL;
 struct slName *sl, *slList = slNameListFromComma(names);
 
+if (!slList)
+    return NULL;
+
+if (slCount(slList) == 0)
+    return NULL;
+
 struct dyString *dy = newDyString(100);
 dyStringPrintf(dy, 
 	       "select * from %s where type = \"%s\" "
 	       "and feature_name in (", AF_TABLE, type);
@@ -803,8 +809,63 @@
 
 return avList;
 }
 
+void sexCorrect(struct bed *nb, float *correction)
+{
+int i;
+for (i = 0; i < nb->expCount; i++)
+    nb->expScores[i] -= correction[i];
+}
+
+boolean calcSexCorrection(struct sqlConnection *hgConn, 
+			  char *dataTable, char *chrom, float **retCorrection)
+{
+char query[256];
+safef(query, sizeof(query), 
+      "select expCount from %s where chrom = \"%s\" limit 1", 
+      dataTable, chrom);
+if (!sqlExists(hgConn, query))
+    return FALSE;
+int expCount = sqlQuickNum(hgConn, query);
+
+int i;
+float *tmp;
+AllocArray(tmp, expCount);
+for (i = 0; i < expCount; i++)
+    tmp[i] = 0.0;
+
+safef(query, sizeof(query), 
+      "select * from %s where chrom = \"%s\"", 
+      dataTable, chrom);
+
+float count = 0.0;
+struct sqlResult *sr = sqlGetResult(hgConn, query);
+char **row = NULL;
+while ((row = sqlNextRow(sr)) != NULL)
+    {
+    struct bed *nb = bedLoadN(row+1, 15);
+
+    for (i = 0; i < nb->expCount; i++)
+	tmp[i] += nb->expScores[i];
+    count += 1.0;
+    bedFree(&nb);
+    }
+
+fprintf(stderr, "correction for %s:\n", chrom);
+for (i = 0; i < expCount; i++)
+    {
+    tmp[i] = tmp[i]/count;
+    fprintf(stderr, "%f,", tmp[i]);
+    }
+fprintf(stderr, "\n");
+
+*retCorrection = tmp;
+return TRUE;
+}
+
+
+
 void setupProbeData(struct sqlConnection *hgConn, struct sqlConnection *biConn, 
 		    struct datasets *da, struct maGrouping *allA)
 {
 char *dataTable = da->data_table;
@@ -827,8 +888,12 @@
 
 if (!inputProbeVals)
     return;
 
+float *correctionX, *correctionY;
+boolean doX = calcSexCorrection(hgConn, dataTable, "chrX", &correctionX);
+boolean doY = calcSexCorrection(hgConn, dataTable, "chrY", &correctionY);
+ 
 struct hash *settings = getSettings(dataTable);
 struct hashEl *el = hashLookup(settings, "aliasTable");
 if (!el)
     errAbort("No aliasTable.\n");
@@ -846,8 +911,20 @@
 char **row = NULL;
 while ((row = sqlNextRow(sr)) != NULL)
     {
     struct bed *nb = bedLoadN(row+1, 15);
+  
+//    if (sameString(nb->chrom, "chrX") || sameString(nb->chrom, "chrY"))
+//	{ // skip sex chroms
+//	bedFree(&nb);
+//	continue;
+//	}
+
+    if (sameString(nb->chrom, "chrX") && doX)
+	sexCorrect(nb, correctionX);
+    else if (sameString(nb->chrom, "chrY") && doY)
+	sexCorrect(nb, correctionY);
+
     struct hashEl *el = hashLookup(gaHash, nb->name);
     if (el)
 	{
 	struct geneAlias *ga = el->val;