a9f88c971e47c2f09f25184b61b531a3d3bdc22c ceisenhart Sun Jun 22 16:51:27 2014 -0700 Modified significantly. Takes in a processed table of expression data, and outputs a file with comparison data diff --git src/hg/expData/expData.c src/hg/expData/expData.c index c2a4166..3d0e285 100644 --- src/hg/expData/expData.c +++ src/hg/expData/expData.c @@ -13,92 +13,192 @@ { errAbort( "expData - Takes in a relational database and outputs expression information\n" "usage:\n" " expData inputDataBase output\n" "options:\n" " -xxx=XXX\n" ); } /* Command line validation table. */ static struct optionSpec options[] = { {NULL, 0}, }; +typedef struct +{ +char *name; +int count; +} expCell; + +typedef struct +{ +char *name; +float value; +}nameValue; + + char* floatToString(float input) { char* result = needMem(sizeof(result)); sprintf(result,"%f", input); return result; freez(result); } +int compareExpCells(expCell exp1, expCell exp2) +/* a compare function for the HAC tree */ +{ +int result = exp1.count - exp2.count; +return sqrt(result*result); +} + +expCell mergeExpCells(expCell exp1, expCell exp2) +/* a merge function for the HAC tree */ +{ +expCell result; +result.count = (exp1.count + exp2.count)/2; +result.name = catTwoStrings(exp1.name,exp2.name); +return result; +} + +int compareTwoRows(char *line1, char* line2) +/* seems to be working + * Takes in two rows from the table, as strings. + * Parses the rows and finds the summation of distance between + * the expression values of each row. */ +{ +char *output1[10000]; +char *output2[10000]; +char *temp1 = cloneString(line1); +char *temp2 = cloneString(line2); +int size1 = 0; +size1 = chopTabs(temp1,output1); +int size2 = 0; +size2 = chopTabs(temp2,output2); +//double result = 1000000; +//if (size1 != size2) +// { +// return result; +// } +int i; +double sum = 0; +for (i = 0; i<size1; ++i) + { + double diff = atoi(output1[i]) - atoi(output2[i]); + sum += sqrt((diff * diff)); + } +return sum; +} + + + +void fileIO (char *input, char *output) +{ +FILE *f = mustOpen(output,"w"); +//struct lm *localMem = lmInit(0); +struct lineFile *lf = lineFileOpen(input, TRUE); +char *line; +long count = 1; +int i, j; +struct hash *hashRows = hashNew(0); +if (!lineFileNext(lf, &line, NULL)) + { + errAbort("%s This should be the column names", lf->fileName); + } +while (lineFileNext(lf,&line,NULL)) + { + char *temp = cloneString(line); + hashAdd(hashRows,floatToString(count),temp); + ++ count; + } +for (i = 1; i < count; ++i) + { + for (j = i + 1 ; j < count ; ++j) + { + expCell exp; + exp.name = catTwoStrings(floatToString(i), floatToString(j)); + char *temp1 = hashFindVal(hashRows, floatToString(i)); + char *temp2 = hashFindVal(hashRows, floatToString(j)); + exp.count = compareTwoRows(temp1,temp2); + fprintf(f,"%d",compareTwoRows(temp1, temp2)); + fprintf(f,"%s\n"," "); + } + } +//struct hacTree *clusters = hacTreeFromItems((struct slList *)sldList, localMem, compareExpCell, mergeExpCell, NULL, NULL); +} + void printOutput(FILE *f, struct hash *hash, int count) +/* Prints the data to the output file. + * Each row corresponds to a tissue sample; the first element. + * ALl subsequent elements are in sequence name : expression value pairs. */ { -char *result = needMem(sizeof(result)); +nameValue *output; int i; for (i = 0 ; i < count; ++i) { - result = hashFindVal(hash,floatToString(i)); + output = hashFindVal(hash,floatToString(i)); struct hashEl *hel; fprintf(f,"%s", floatToString(i)); fprintf(f,"%s", " "); for (hel = hashLookup(hash,floatToString(i)); hel != NULL; hel = hashLookupNext(hel)) { - char *value = hel->val; - fprintf(f, "%s", value); - fprintf(f, "%s", " "); + nameValue *temp = hel->val; + char *name = temp->name; + long score = temp->value; + fprintf(f, "%s", name); + fprintf(f, "%s", ":"); + fprintf(f, "%ld", score); + fprintf(f, "%s", " + "); } - fprintf(f,"%s\n", ";"); + fprintf(f, "\n%s", " " ); } } - - void expData(char *output) /* Grabs expression data and formats it nicely */ { FILE *f = mustOpen(output, "w"); struct sqlConnection *conn = sqlConnect("hgFixed"); char query[512]; -sqlSafef(query, sizeof(query), "select * from gnfHumanAtlas2Median limit 1"); +sqlSafef(query, sizeof(query), "select * from gnfHumanAtlas2Median limit 2"); struct sqlResult *sr = sqlGetResult(conn, query); char **row; char *line = needMem(sizeof(line)); struct expData *list = NULL; struct hash *hash = hashNew(0); int count = 0; while ((row = sqlNextRow(sr)) != NULL) { struct expData *exp = expDataLoad(row); int i; count = exp->expCount; for (i = 0; i<exp->expCount; ++i) { - char *prepName = catTwoStrings(exp->name," : "); - char *index = floatToString(exp->expScores[i]); - line = catTwoStrings(prepName,index); - hashAdd(hash,floatToString(i),line); + nameValue *pair = needMem(sizeof(pair)); + pair->name = exp->name; + pair->value = exp->expScores[i]; + hashAdd(hash,floatToString(i),pair); } slAddHead(&list, exp); } printOutput(f,hash,count); slReverse(&list); expDataFreeList(&list); carefulClose(&f); } int main(int argc, char *argv[]) /* Process command line. */ { optionInit(&argc, argv, options); -if (argc != 2) +if (argc != 3) usage(); -expData(argv[1]); +fileIO(argv[1],argv[2]); return 0; }