src/hg/instinct/extractData/extractData.c 1.9
1.9 2010/05/10 09:04:51 jzhu
Index: src/hg/instinct/extractData/extractData.c
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/instinct/extractData/extractData.c,v
retrieving revision 1.8
retrieving revision 1.9
diff -b -B -U 4 -r1.8 -r1.9
--- src/hg/instinct/extractData/extractData.c 7 May 2010 18:41:49 -0000 1.8
+++ src/hg/instinct/extractData/extractData.c 10 May 2010 09:04:51 -0000 1.9
@@ -27,20 +27,26 @@
"options:\n"
" -median Output median value if multiple probes\n"
" -tcga Handle TCGA ids, keeps only first 16 chars in sample_id\n"
" -samples=STR Sample comma-separated list, otherwise all sample data are returned.\n"
+ " -genes=STR Gene comma-separated list, otherwise all genes are returned.\n"
+ "\n"
+ " ************the current code only works for localDb hg18 data, not data on cancerPub yet, needs more work\n"
"examples:\n"
"* Single sample / Single gene / All probes:\n"
- " ./extractData -tcga -samples=TCGA-06-0145-01A harvardCGH egfr\n"
+ " ./extractData -tcga -samples=TCGA-06-0145-01A harvardCGH -genes=egfr\n"
"\n"
"* Multiple samples / Single gene /Median-value of probes:\n"
- " ./extractData -tcga -median -samples=TCGA-06-0145-01A,TCGA-06-0145-10A harvardCGH egfr\n"
+ " ./extractData -tcga -median -samples=TCGA-06-0145-01A,TCGA-06-0145-10A harvardCGH -genes=egfr\n"
"\n"
"* All samples / multiple genes / All probes of all genes:\n"
- " ./extractData harvardCGH egfr,erbb2,esr1\n"
+ " ./extractData harvardCGH -genes=egfr,erbb2,esr1\n"
+ "\n"
+ "* All samples / All genes / All probes of all genes:\n"
+ " ./extractData harvardCGH\n"
"\n"
"* All samples / multiple genes / Median-value of probes:\n"
- " ./extractData -median harvardCGH egfr,erbb2,esr1\n"
+ " ./extractData -median harvardCGH -genes=egfr,erbb2,esr1\n"
);
}
#define TCGA_PATIENT_PREFIX 12
@@ -52,8 +58,9 @@
static struct optionSpec options[] = {
{"median", OPTION_BOOLEAN},
{"tcga", OPTION_BOOLEAN},
{"samples", OPTION_STRING},
+ {"genes", OPTION_STRING},
{NULL, 0},
};
struct maGrouping *getMaGrouping(struct sqlConnection *hgConn, char *tableName)
@@ -254,11 +261,15 @@
{
dyStringPrintf(dy, "%s\tmedian\t", ga->gene);
for (sv = svList; sv; sv = sv->next)
{
+ if (sv->vals== NULL)
+ dyStringPrintf(dy, "NA");
+ else
+ {
double val = slDoubleMedian(sv->vals);
dyStringPrintf(dy, "%f", val);
-
+ }
if (sv->next)
dyStringPrintf(dy, "\t");
}
dyStringPrintf(dy, "\n");
@@ -306,13 +317,8 @@
void extractData(char *tableName, char *geneList, char *samples)
{
-if (!geneList)
- return;
-
-struct slName *genes = slNameListFromComma(geneList);
-
struct hashEl *el;
struct sqlConnection *hgConn = hAllocConnProfile("localDb", hgDb);
/* Set up datasets entry */
@@ -325,8 +331,32 @@
if (!el)
errAbort("No aliasTable.\n");
char *aliasTable = cloneString(el->val);
+struct slName *genes=NULL;
+if (geneList)
+ genes = slNameListFromComma(geneList);
+else
+ {
+ struct dyString *dy = newDyString(100);
+ dyStringPrintf(dy, "select alias from %s where name!=alias", aliasTable);
+ char *query = dyStringCannibalize(&dy);
+
+ struct sqlResult *sr = sqlGetResult(hgConn, query);
+ char **row;
+ struct dyString *genelistDy = newDyString(0);
+ while ((row = sqlNextRow(sr)) != NULL)
+ {
+ char *gene = cloneString(row[0]);
+ dyStringPrintf(genelistDy, "%s,",gene);
+ }
+
+ fprintf(stderr,genelistDy->string);
+ fprintf(stderr,"\n");
+ sqlFreeResult(&sr);
+ genes = slNameListFromComma(dyStringCannibalize(&genelistDy));
+ }
+
struct hash *gaHash = getAliases(hgConn, aliasTable, genes);
struct sampleVals *sv, *svList = prepareSampleVals(samples, allA);
if (!svList)
@@ -355,16 +385,19 @@
int main(int argc, char *argv[])
/* Process command line. */
{
optionInit(&argc, argv, options);
-if (argc != 3)
+if (argc != 2)
usage();
if (optionExists("median"))
median = TRUE;
if (optionExists("tcga"))
isTCGA = TRUE;
char *samples = optionVal("samples", NULL);
-extractData(argv[1], argv[2], samples);
+char *genes = optionVal("genes", NULL);
+
+extractData(argv[1], genes, samples);
+
return 0;
}