2e56b90285da462507a48babb2f9aee32dcbb96c kent Sat Jan 9 11:08:01 2021 -0800 A little matrix normalizing command, one of many in the world. diff --git src/utils/matrixNormalize/matrixNormalize.c src/utils/matrixNormalize/matrixNormalize.c new file mode 100644 index 0000000..410dbdd --- /dev/null +++ src/utils/matrixNormalize/matrixNormalize.c @@ -0,0 +1,174 @@ +/* matrixNormalize - Normalize a matrix somehow - make it's columns or rows all sum to one or have vector length one.. */ +#include "common.h" +#include "linefile.h" +#include "hash.h" +#include "options.h" +#include "vMatrix.h" + +void usage() +/* Explain usage and exit. */ +{ +errAbort( + "matrixNormalize - Normalize a matrix somehow - make it's columns or rows all sum to one or have vector length one.\n" + "usage:\n" + " matrixNormalize direction how inMatrix outMatrix\n" + "where \"direction\" is one of\n" + " row - normalize rows to one\n" + " column - normalize columns to one\n" + "and \"how\" is one of\n" + " sum - sum adds to one after normalization\n" + " length - Euclidian length as a vector adds to one\n" + "options:\n" + " -target=val - use target val instead of one for normalizing\n" + ); +} + +/* Command line validation table. */ +static struct optionSpec options[] = { + {"target", OPTION_DOUBLE}, + {NULL, 0}, +}; + +boolean howIsLength(char *how) +/* Return TRUE if how is "length", FALSE if it is "sum", and abort otherwise */ +{ +if (sameWord(how, "length")) + return TRUE; +else if (sameWord(how, "sum")) + return FALSE; +errAbort("Unrecognized \"how\" %s", how); +return FALSE; +} + +void matrixNormalizeRows(char *inFile, boolean isLength, double target, char *outFile) +/* Normalize matrix one row at a time. */ +{ +struct vRowMatrix *m = vRowMatrixOnTsv(inFile); +FILE *f = mustOpen(outFile, "w"); +int size = m->xSize; +double *row; +char *label; +while ((row = vRowMatrixNextRow(m, &label)) != NULL) + { + double scaleVal = target; + double sum = 0.0; + int i; + if (isLength) + { + for (i=0; i<size; ++i) + { + double val = row[i]; + sum += val*val; + } + sum = sqrt(sum); + } + else + { + for (i=0; i<size; ++i) + sum += row[i]; + } + if (sum != 0.0) + scaleVal /= sum; + for (i=0; i<size; ++i) + row[i] *= scaleVal; + fprintf(f, "%s", label); + for (i=0; i<size; ++i) + fprintf(f, "\t%g", row[i]); + fprintf(f, "\n"); + } +carefulClose(&f); +vRowMatrixFree(&m); +} + +void memMatrixToTsv(struct memMatrix *m, char *fileName) +/* Output memory matrix to tab-sep file with labels */ +{ +FILE *f = mustOpen(fileName, "w"); + +/* Print label row */ +fprintf(f, "%s", m->centerLabel); +int x, xSize = m->xSize; +for (x=0; x<xSize; ++x) + fprintf(f, "\t%s", m->xLabels[x]); +fprintf(f, "\n"); + +/* Print rest */ +/* Now output going through row by row */ +int y, ySize = m->ySize; +for (y=0; y<ySize; ++y) + { + fprintf(f, "%s", m->yLabels[y]); + double *row = m->rows[y]; + for (x=0; x<xSize; ++x) + fprintf(f, "\t%g", row[x]); + fprintf(f, "\n"); + } +carefulClose(&f); +} + +void matrixNormalizeColumns(char *inFile, boolean isLength, double target, char *outFile) +/* Normalize matrix one row at a time. */ +{ +/* Open up input file */ +struct memMatrix *m = memMatrixFromTsv(inFile); + +/* Go through matrix by column (y-dimension) */ +int xSize = m->xSize, ySize = m->ySize; +int x,y; +for (x=0; x<xSize; ++x) + { + /* Calculate how big we are currently */ + double scaleVal = target; + double sum = 0.0; + if (isLength) + { + for (y=0; y<ySize; ++y) + { + double val = m->rows[y][x]; + sum += val*val; + } + sum = sqrt(sum); + } + else + { + for (y=0; y<ySize; ++y) + sum += m->rows[y][x]; + } + + /* Multiply ourselves by normalizing scale factor */ + if (sum != 0.0) + { + scaleVal /= sum; + for (y=0; y<ySize; ++y) + m->rows[y][x] *= scaleVal; + } + } + +/* Output and go home */ +memMatrixToTsv(m, outFile); +} + + +void matrixNormalize(char *direction, char *how, char *inFile, char *outFile) +/* matrixNormalize - Normalize a matrix somehow - make it's columns or rows all sum to one or + * have vector length one.. */ +{ +boolean isLength = howIsLength(how); +double target = optionDouble("target", 1.0); +if (sameWord(direction, "row")) + matrixNormalizeRows(inFile, isLength, target, outFile); +else if (sameWord(direction, "column")) + matrixNormalizeColumns(inFile, isLength, target, outFile); +else + errAbort("Unrecognized direction %s", direction); +} + +int main(int argc, char *argv[]) +/* Process command line. */ +{ +optionInit(&argc, argv, options); +if (argc != 5) + usage(); +matrixNormalize(argv[1], argv[2], argv[3], argv[4]); +return 0; +}