src/hg/instinct/bioInt2/populateDb.c 1.10
1.10 2009/04/28 21:21:24 jsanborn
added sex correction
Index: src/hg/instinct/bioInt2/populateDb.c
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/instinct/bioInt2/populateDb.c,v
retrieving revision 1.9
retrieving revision 1.10
diff -b -B -U 4 -r1.9 -r1.10
--- src/hg/instinct/bioInt2/populateDb.c 27 Apr 2009 22:05:48 -0000 1.9
+++ src/hg/instinct/bioInt2/populateDb.c 28 Apr 2009 21:21:24 -0000 1.10
@@ -642,8 +642,14 @@
if (!names)
return NULL;
struct slName *sl, *slList = slNameListFromComma(names);
+if (!slList)
+ return NULL;
+
+if (slCount(slList) == 0)
+ return NULL;
+
struct dyString *dy = newDyString(100);
dyStringPrintf(dy,
"select * from %s where type = \"%s\" "
"and feature_name in (", AF_TABLE, type);
@@ -803,8 +809,63 @@
return avList;
}
+void sexCorrect(struct bed *nb, float *correction)
+{
+int i;
+for (i = 0; i < nb->expCount; i++)
+ nb->expScores[i] -= correction[i];
+}
+
+boolean calcSexCorrection(struct sqlConnection *hgConn,
+ char *dataTable, char *chrom, float **retCorrection)
+{
+char query[256];
+safef(query, sizeof(query),
+ "select expCount from %s where chrom = \"%s\" limit 1",
+ dataTable, chrom);
+if (!sqlExists(hgConn, query))
+ return FALSE;
+int expCount = sqlQuickNum(hgConn, query);
+
+int i;
+float *tmp;
+AllocArray(tmp, expCount);
+for (i = 0; i < expCount; i++)
+ tmp[i] = 0.0;
+
+safef(query, sizeof(query),
+ "select * from %s where chrom = \"%s\"",
+ dataTable, chrom);
+
+float count = 0.0;
+struct sqlResult *sr = sqlGetResult(hgConn, query);
+char **row = NULL;
+while ((row = sqlNextRow(sr)) != NULL)
+ {
+ struct bed *nb = bedLoadN(row+1, 15);
+
+ for (i = 0; i < nb->expCount; i++)
+ tmp[i] += nb->expScores[i];
+ count += 1.0;
+ bedFree(&nb);
+ }
+
+fprintf(stderr, "correction for %s:\n", chrom);
+for (i = 0; i < expCount; i++)
+ {
+ tmp[i] = tmp[i]/count;
+ fprintf(stderr, "%f,", tmp[i]);
+ }
+fprintf(stderr, "\n");
+
+*retCorrection = tmp;
+return TRUE;
+}
+
+
+
void setupProbeData(struct sqlConnection *hgConn, struct sqlConnection *biConn,
struct datasets *da, struct maGrouping *allA)
{
char *dataTable = da->data_table;
@@ -827,8 +888,12 @@
if (!inputProbeVals)
return;
+float *correctionX, *correctionY;
+boolean doX = calcSexCorrection(hgConn, dataTable, "chrX", &correctionX);
+boolean doY = calcSexCorrection(hgConn, dataTable, "chrY", &correctionY);
+
struct hash *settings = getSettings(dataTable);
struct hashEl *el = hashLookup(settings, "aliasTable");
if (!el)
errAbort("No aliasTable.\n");
@@ -846,8 +911,20 @@
char **row = NULL;
while ((row = sqlNextRow(sr)) != NULL)
{
struct bed *nb = bedLoadN(row+1, 15);
+
+// if (sameString(nb->chrom, "chrX") || sameString(nb->chrom, "chrY"))
+// { // skip sex chroms
+// bedFree(&nb);
+// continue;
+// }
+
+ if (sameString(nb->chrom, "chrX") && doX)
+ sexCorrect(nb, correctionX);
+ else if (sameString(nb->chrom, "chrY") && doY)
+ sexCorrect(nb, correctionY);
+
struct hashEl *el = hashLookup(gaHash, nb->name);
if (el)
{
struct geneAlias *ga = el->val;