cc7c832d77253b88358ece4b128ceafd6bf8d5c4 kent Sat Dec 18 11:58:42 2021 -0800 Multithreading to speed things up a bit in column mode. diff --git src/utils/matrixNormalize/matrixNormalize.c src/utils/matrixNormalize/matrixNormalize.c index 1bc9c6a..1b1dcb3 100644 --- src/utils/matrixNormalize/matrixNormalize.c +++ src/utils/matrixNormalize/matrixNormalize.c @@ -1,43 +1,51 @@ /* matrixNormalize - Normalize a matrix somehow - make it's columns or rows all sum to one or have vector length one.. */ #include "common.h" #include "linefile.h" #include "hash.h" #include "options.h" #include "vMatrix.h" +#include "pthreadDoList.h" + +int clThreads = 10; void usage() /* Explain usage and exit. */ { errAbort( "matrixNormalize - Normalize a matrix somehow - make it's columns or rows all sum to one or have vector length one.\n" "usage:\n" " matrixNormalize direction how inMatrix outMatrix\n" "where \"direction\" is one of\n" " row - normalize rows to one\n" " column - normalize columns to one\n" "and \"how\" is one of\n" " sum - sum adds to one after normalization\n" " length - Euclidian length as a vector adds to one\n" "options:\n" " -target=val - use target val instead of one for normalizing\n" + " -threads=N - number of threads to use, Expect limited help if you increase this\n" + " Default is %d. Only goes multitheaded in column mode and even then\n" + " about 60%% of program is in the non-parallel bit. Still, it helps\n" + , clThreads ); } /* Command line validation table. */ static struct optionSpec options[] = { {"target", OPTION_DOUBLE}, + {"threads", OPTION_INT}, {NULL, 0}, }; boolean howIsLength(char *how) /* Return TRUE if how is "length", FALSE if it is "sum", and abort otherwise */ { if (sameWord(how, "length")) return TRUE; else if (sameWord(how, "sum")) return FALSE; errAbort("Unrecognized \"how\" %s", how); return FALSE; } void matrixNormalizeRows(char *inFile, boolean isLength, double target, char *outFile) @@ -100,81 +108,126 @@ fprintf(f, "%s", m->yLabels[y]); double *row = m->rows[y]; for (x=0; xxSize, ySize = m->ySize; -int x,y; -for (x=0; xySize; +int y; if (isLength) { for (y=0; yrows[y][x]; sum += val*val; } sum = sqrt(sum); } else { for (y=0; yrows[y][x]; } /* Multiply ourselves by normalizing scale factor */ if (sum != 0.0) { scaleVal /= sum; for (y=0; yrows[y][x] *= scaleVal; } } +struct columnJob +/* Multithreaded job that handles a column */ + { + struct columnJob *next; + int x; /* Index of column withing matrix */ + boolean isLength; /* Do sqrt(sum(squares)) type normalization */ + double target; /* Want column to end up summing to this */ + }; + +void columnWorker(void *item, void *context) +/* A worker to execute a single column job */ +{ +struct columnJob *job = item; +struct memMatrix *m = context; +normCol(m, job->x, job->target, job->isLength); +} + +void matrixNormalizeColumns(char *inFile, boolean isLength, double target, char *outFile) +/* Normalize matrix one row at a time. */ +{ +/* Open up input file */ +struct memMatrix *m = memMatrixFromTsv(inFile); + +/* Go through matrix by column (y-dimension) */ +int xSize = m->xSize; +int x; + +if (clThreads > 1) + { + int threadCount = min(clThreads, 100); + struct columnJob *jobList = NULL; + for (x=0; xx = x; + job->isLength = isLength; + job->target = target; + slAddHead(&jobList, job); + } + slReverse(&jobList); + pthreadDoList(threadCount, jobList, columnWorker, m); + } +else + { + for (x=0; x