2671b4753566a83d798087c6432159336ba90a40 kent Sat Dec 18 11:04:43 2021 -0800 Microoptimization in writing out zeros sped it up 10%. Refactoring a code snippet into a function for possible reuse. diff --git src/utils/matrixClusterColumns/matrixClusterColumns.c src/utils/matrixClusterColumns/matrixClusterColumns.c index b77a4bc..cc64fe6 100644 --- src/utils/matrixClusterColumns/matrixClusterColumns.c +++ src/utils/matrixClusterColumns/matrixClusterColumns.c @@ -386,41 +386,59 @@ } } } /* Do output to file and grand totalling */ FILE *f = job->matrixFile; fprintf(f, "%s", v->rowLabel->string); double *grandTotal = job->clusterGrandTotal; for (i=0; i<clusterCount; ++i) { fprintf(f, "\t"); double total = clusterTotal[i]; grandTotal[i] += total; double val; if (doMedian) + { val = doubleMedian(clusterElements[i], job->clusterSamples[i]); + fprintf(f, "%g", val); + } else + { + if (total > 0) + { val = total/clusterElements[i]; fprintf(f, "%g", val); } + else + fputc('0', f); + } + } fprintf(f, "\n"); } - - +static void addRowToIndex(FILE *fIndex, struct vMatrix *v) +/* Write out info to index file about where this row begins */ +{ +if (fIndex) + { + fprintf(fIndex, "%s", v->rowLabel->string); + struct lineFile *lf = v->lf; + fprintf(fIndex, "\t%lld\t%lld\n", (long long)lineFileTell(lf), (long long)lineFileTellSize(lf)); + } +} void matrixClusterColumns(char *matrixFile, char *metaFile, char *sampleField, int outputCount, char **clusterFields, char **outMatrixFiles, char **outStatsFiles, char *outputIndex, boolean doMedian) /* matrixClusterColumns - Group the columns of a matrix into clusters, and output a matrix * the with same number of rows and generally much fewer columns.. */ { FILE *fIndex = NULL; if (outputIndex) fIndex = mustOpen(outputIndex, "w"); /* Load up metadata and make sure we have all of the cluster fields we need * and fill out array of clusterIx corresponding to clusterFields in metaFile. */ struct fieldedTable *metaTable = fieldedTableFromTabFile(metaFile, metaFile, NULL, 0); struct hash *metaHash = fieldedTableIndex(metaTable, sampleField); @@ -438,41 +456,36 @@ for (i=0; i<outputCount; ++i) { job = clusteringNew(clusterFields[i], outMatrixFiles[i], outStatsFiles[i], metaTable, v, doMedian); slAddTail(&jobList, job); } /* Chug through big matrix a row at a time clustering */ dotForUserInit(100); for (;;) { double *a = vMatrixNextRow(v); if (a == NULL) break; - if (fIndex) - { - fprintf(fIndex, "%s", v->rowLabel->string); - struct lineFile *lf = v->lf; - fprintf(fIndex, "\t%lld\t%lld\n", (long long)lineFileTell(lf), (long long)lineFileTellSize(lf)); - } + addRowToIndex(fIndex, v); for (job = jobList; job != NULL; job = job->next) clusterRow(job, v, a); dotForUser(); } -fputc('\n', stderr); // Cover last dotForUser +dotForUserEnd(); /* Do stats and close files */ for (job = jobList; job != NULL; job = job->next) { outputClusterStats(job); carefulClose(&job->matrixFile); } vMatrixClose(&v); carefulClose(&fIndex); } int main(int argc, char *argv[]) /* Process command line. */ {