src/hg/instinct/bioInt2/bioIntUI.c 1.16

1.16 2009/04/16 19:53:55 jsanborn
updated
Index: src/hg/instinct/bioInt2/bioIntUI.c
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/instinct/bioInt2/bioIntUI.c,v
retrieving revision 1.15
retrieving revision 1.16
diff -b -B -U 4 -r1.15 -r1.16
--- src/hg/instinct/bioInt2/bioIntUI.c	7 Apr 2009 20:09:27 -0000	1.15
+++ src/hg/instinct/bioInt2/bioIntUI.c	16 Apr 2009 19:53:55 -0000	1.16
@@ -525,15 +525,25 @@
 hashElFreeList(&elList);
 }
 
 void sendUniqueMatch(struct sqlConnection *conn, struct json *js, 
-		     int cohort_id, char *feature_name, char *source, 
+		     int cohort_id, char *feature_names, char *sources, 
 		     struct datasets *daList)
 {
-if (sameString(source, "gene/geneset"))
-    sendAnalysisFeatureData(conn, js, cohort_id, feature_name);
-else if (sameString(source, "clinical"))
-    sendClinicalData(conn, js, cohort_id, feature_name, daList);
+struct slName *s, *sList = slNameListFromComma(sources);
+struct slName *f, *fList = slNameListFromComma(feature_names);
+
+if (slCount(sList) != slCount(fList))
+    errAbort("source list length not equal to feature list length\n");
+
+for (s = sList, f = fList; s && f; s = s->next, f = f->next)
+    {
+    struct json *container = jsonAddContainer(js, f->name);
+    if (sameString(s->name, "gene/geneset"))
+	sendAnalysisFeatureData(conn, container, cohort_id, f->name);
+    else if (sameString(s->name, "clinical"))
+	sendClinicalData(conn, container, cohort_id, f->name, daList);
+    }
 }
 
 void getFeatureData()
 {
@@ -800,18 +810,9 @@
 
 hFreeConn(&conn);
 }
 
-boolean runStat(struct analysisVals *avList, double *prob)
-{
-if (!avList)
-    return FALSE;
-
-*prob = slCount(avList);
-return TRUE;
-}
-
-void getMostDiff()
+void expandFeature()
 {
 int takeTop = cartUsualInt(cart, bioIntTakeTop, 5);
 
 int cohort_id = cartUsualInt(cart, bioIntCohortId, -1);
@@ -819,81 +820,95 @@
     cohort_id = 2;  // hard code for first analysis during testing!
 
 char *feature_name = cartOptionalString(cart, bioIntFeatureName);
 if (!feature_name)
-    errAbort("%s must be set for mode=getMostDiff\n", bioIntFeatureName);
+    errAbort("%s must be set for mode=getGenesInGeneset\n", bioIntFeatureName);
 
 struct sqlConnection *conn = hAllocConnProfile(localDb, db);
 
-struct analyses *an, *analyses = getAnalysesByCohortId(conn, cohort_id);
-char query[512];
-
-an = analyses;
-safef(query, sizeof(query), "select * from %s order by feature_id", an->result_table);
-
-uglyTime(NULL);
-
-//struct analysisVals *avList = analysisValsLoadByQuery(conn, query);
-struct slPair *sp, *spList = NULL;
-struct slDouble *sd;
-double prob;
-
-struct sqlResult *sr = sqlGetResult(conn, query);
-char **row = NULL;
-struct analysisVals *av, *avList = NULL;
-int currentId = -1;
-
-while ((row = sqlNextRow(sr)) != NULL)
+struct analysisFeatures *af = getAnalysisFeaturesByName(conn, feature_name);
+if (!af)
     {
-    av = analysisValsLoad(row);
-    if (currentId == -1)
-	currentId = av->feature_id;
+    hFreeConn(&conn);
+    errAbort("Could not find analysisFeature in db");
+    }
 
-    if (av->feature_id == currentId)
-	slAddHead(&avList, av);
-    else
-	{
-	prob = -1.0;
-	if (runStat(avList, &prob))
+struct analyses *an, *anList = getAnalysesByCohortId(conn, cohort_id);
+if (!anList)
 	    {
-	    sd = slDoubleNew(prob);
-	    AllocVar(sp);
-	    sp->name = cloneString("test");
-	    sp->val = sd;
-	    slAddHead(&spList, sp);
+    hFreeConn(&conn);
+    errAbort("No analyses with cohort_id = %d.\n", cohort_id);
 	    }	
-	analysisValsFreeList(&avList);
-	avList = NULL;
-	currentId = av->feature_id;
-	slAddHead(&avList, av);
+
+char tmpQ[512];
+for (an = anList; an; an = an->next)
+    {
+    safef(tmpQ, sizeof(tmpQ),
+	  "select * from %s where feature_id = %d;",
+	  an->result_table, af->id);
+    if (sqlExists(conn, tmpQ))
+	break;
 	}
+
+if (!an)
+    {
+    hFreeConn(&conn);
+    errAbort("No analysis feature with id=%d in any analysis table in cohort with id = %d", 
+	     af->id, cohort_id);
     }
-sqlFreeResult(&sr);
 
-prob = -1.0;
-if (runStat(avList, &prob))
+// input_tables may have comma-separated list of raw datasets
+//    we don't want to expand into those yet.
+char *input_tables = an->input_tables;
+
+if (!sqlTableExists(conn, input_tables))
     {
-    sd = slDoubleNew(prob);
-    AllocVar(sp);
-    sp->name = cloneString("test");
-    sp->val = sd;
-    slAddHead(&spList, sp);
+    hFreeConn(&conn);
+    errAbort("Table does not exist, %s.\n", input_tables);
     }	
 
-uglyTime("finished");
-analysisValsFreeList(&avList);
-uglyTime("free'd");
+/***
+select DISTINCT analysisFeatures.id from pathways join pathwayGenes on pathways.id=pathwayGenes.id join geneLookup on geneLookup.id=pathwayGenes.gene_id join kgXref on kgXref.kgID=geneLookup.kgId join analysisFeatures on analysisFeatures.feature_name=kgXref.geneSymbol where pathways.id=123;
+***/
+uglyTime(NULL);
+struct dyString *dy = newDyString(100);
+dyStringPrintf(dy, 
+	       "select DISTINCT %s.id from %s join %s on %s.id=%s.id "
+	       "join %s on %s.id=%s.gene_id join %s on %s.kgID=%s.kgId "
+	       "join %s on %s.feature_name=%s.geneSymbol where %s.name=\"%s\";",
+	       AF_TABLE, PA_TABLE, PG_TABLE, PA_TABLE, PG_TABLE,
+	       GL_TABLE, GL_TABLE, PG_TABLE, KX_TABLE, KX_TABLE, GL_TABLE,
+	       AF_TABLE, AF_TABLE, KX_TABLE, PA_TABLE, af->feature_name);
+char *query = dyStringCannibalize(&dy);
 
-int count = 0;
-struct json *js = newJson();
-for (sp = spList; sp; sp = sp->next)
+struct slInt *si, *siList = sqlQuickNumList(conn, query);
+
+dy = newDyString(100);
+dyStringPrintf(dy, 
+	       "select %s.feature_name from %s join %s on feature_id=%s.id where feature_id in (",
+	       AF_TABLE, input_tables, AF_TABLE, AF_TABLE);
+for (si = siList; si; si = si->next)
     {
-    sd = sp->val;
-    jsonAddDouble(js, sp->name, sd->val);
-    if (count > takeTop)
-	break;
-    count++;
+    dyStringPrintf(dy, "%d", si->val);
+    if (si->next)
+	dyStringPrintf(dy, ",");
     }
+dyStringPrintf(dy, 
+	       ") group by feature_id order by sum(abs(val)) DESC limit %d;", 
+	       takeTop);
+query = dyStringCannibalize(&dy);
+
+struct slName *sl, *fList = sqlQuickList(conn, query);
+struct slName *sList = NULL;
+// Only dealing with gene/genesets now
+for (sl = fList; sl; sl = sl->next)
+    slNameAddHead(&sList, "gene/geneset");
+
+char *feature_names = slNameListToString(fList, ',');
+char *sources = slNameListToString(sList, ',');
+
+struct json *js = newJson();
+sendUniqueMatch(conn, js, cohort_id, feature_names, sources, NULL);  
 
 if (js)
     hPrintf("%s", js->print(js));
 
@@ -923,10 +937,10 @@
 else if (sameString(mode, "getClinicalFeatures"))
     getClinicalFeatures();
 else if (sameString(mode, "getMostCorrelated"))
     getMostCorrelated();
-else if (sameString(mode, "getMostDiff"))
-    getMostDiff();
+else if (sameString(mode, "expandFeature"))
+    expandFeature();
 else
     errAbort("Incorrect mode = %s", mode);
 
 cartRemovePrefix(cart, bioIntPrefix);