src/hg/instinct/extractData/extractData.c 1.9

1.9 2010/05/10 09:04:51 jzhu
Index: src/hg/instinct/extractData/extractData.c
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/instinct/extractData/extractData.c,v
retrieving revision 1.8
retrieving revision 1.9
diff -b -B -U 4 -r1.8 -r1.9
--- src/hg/instinct/extractData/extractData.c	7 May 2010 18:41:49 -0000	1.8
+++ src/hg/instinct/extractData/extractData.c	10 May 2010 09:04:51 -0000	1.9
@@ -27,20 +27,26 @@
   "options:\n"
   "   -median       Output median value if multiple probes\n"
   "   -tcga         Handle TCGA ids, keeps only first 16 chars in sample_id\n"
   "   -samples=STR  Sample comma-separated list, otherwise all sample data are returned.\n"
+  "   -genes=STR    Gene comma-separated list, otherwise all genes are returned.\n"
+  "\n"
+  " ************the current code only works for localDb hg18 data, not data on cancerPub yet, needs more work\n"
   "examples:\n"
   "* Single sample / Single gene / All probes:\n"
-  "  ./extractData -tcga -samples=TCGA-06-0145-01A harvardCGH egfr\n"
+  "  ./extractData -tcga -samples=TCGA-06-0145-01A harvardCGH -genes=egfr\n"
   "\n"
   "* Multiple samples / Single gene /Median-value of probes:\n"
-  "  ./extractData -tcga -median -samples=TCGA-06-0145-01A,TCGA-06-0145-10A harvardCGH egfr\n"
+  "  ./extractData -tcga -median -samples=TCGA-06-0145-01A,TCGA-06-0145-10A harvardCGH -genes=egfr\n"
   "\n"
   "* All samples / multiple genes / All probes of all genes:\n"
-  "  ./extractData harvardCGH egfr,erbb2,esr1\n"
+  "  ./extractData harvardCGH -genes=egfr,erbb2,esr1\n"
+  "\n"
+  "* All samples / All genes / All probes of all genes:\n"
+  "  ./extractData harvardCGH\n"
   "\n"
   "* All samples / multiple genes / Median-value of probes:\n"
-  "  ./extractData -median harvardCGH egfr,erbb2,esr1\n"
+  "  ./extractData -median harvardCGH -genes=egfr,erbb2,esr1\n"
   );
 }
 
 #define TCGA_PATIENT_PREFIX 12
@@ -52,8 +58,9 @@
 static struct optionSpec options[] = {
     {"median", OPTION_BOOLEAN},
     {"tcga", OPTION_BOOLEAN},
     {"samples", OPTION_STRING},
+    {"genes", OPTION_STRING},
     {NULL, 0},
 };
 
 struct maGrouping *getMaGrouping(struct sqlConnection *hgConn, char *tableName)
@@ -254,11 +261,15 @@
     {
     dyStringPrintf(dy, "%s\tmedian\t", ga->gene);
     for (sv = svList; sv; sv = sv->next)
 	{
+	if (sv->vals== NULL)
+	    dyStringPrintf(dy, "NA");
+	else
+	    {
 	double val = slDoubleMedian(sv->vals);
 	dyStringPrintf(dy, "%f", val);
-	
+	    }
 	if (sv->next)
 	    dyStringPrintf(dy, "\t");
 	}
     dyStringPrintf(dy, "\n");
@@ -306,13 +317,8 @@
 
 
 void extractData(char *tableName, char *geneList, char *samples)
 {
-if (!geneList)
-    return;
-
-struct slName *genes = slNameListFromComma(geneList);
-
 struct hashEl *el;
 struct sqlConnection *hgConn = hAllocConnProfile("localDb", hgDb);
 
 /* Set up datasets entry */
@@ -325,8 +331,32 @@
 if (!el)
     errAbort("No aliasTable.\n");
 char *aliasTable = cloneString(el->val);
 
+struct slName *genes=NULL;
+if (geneList)
+    genes = slNameListFromComma(geneList);
+else
+    {
+    struct dyString *dy = newDyString(100);
+    dyStringPrintf(dy, "select alias from %s where name!=alias", aliasTable);
+    char *query = dyStringCannibalize(&dy);
+    
+    struct sqlResult *sr = sqlGetResult(hgConn, query);
+    char **row;
+    struct dyString *genelistDy = newDyString(0);
+    while ((row = sqlNextRow(sr)) != NULL)
+	{
+	char *gene = cloneString(row[0]);
+	dyStringPrintf(genelistDy, "%s,",gene);
+	}
+
+    fprintf(stderr,genelistDy->string);
+    fprintf(stderr,"\n");
+    sqlFreeResult(&sr);         
+    genes = slNameListFromComma(dyStringCannibalize(&genelistDy));
+    }
+
 struct hash *gaHash = getAliases(hgConn, aliasTable, genes);
 
 struct sampleVals *sv, *svList = prepareSampleVals(samples, allA);
 if (!svList)
@@ -355,16 +385,19 @@
 int main(int argc, char *argv[])
 /* Process command line. */
 {
 optionInit(&argc, argv, options);
-if (argc != 3)
+if (argc != 2)
     usage();
 
 if (optionExists("median"))
     median = TRUE;
 if (optionExists("tcga"))
     isTCGA = TRUE;
 
 char *samples = optionVal("samples", NULL);
-extractData(argv[1], argv[2], samples);
+char *genes = optionVal("genes", NULL);
+
+extractData(argv[1], genes, samples);
+
 return 0;
 }