69bc117defcc4d25684b377b8733a5db6d067425
kent
  Mon May 24 16:11:39 2021 -0700
First cut of a utility to convert matrixMarket format matrices to tsv format.

diff --git src/utils/matrixMarketToTsv/matrixMarketToTsv.c src/utils/matrixMarketToTsv/matrixMarketToTsv.c
new file mode 100644
index 0000000..533d55e
--- /dev/null
+++ src/utils/matrixMarketToTsv/matrixMarketToTsv.c
@@ -0,0 +1,98 @@
+/* matrixMarketToTsv - Convert matrix file from Matrix Market sparse matrix format to tab-separated-values.. */
+#include "common.h"
+#include "linefile.h"
+#include "hash.h"
+#include "options.h"
+#include "matrixMarket.h"
+#include "obscure.h"
+
+void usage()
+/* Explain usage and exit. */
+{
+errAbort(
+  "matrixMarketToTsv - Convert matrix file from Matrix Market sparse matrix format to tab-separated-values.\n"
+  "usage:\n"
+  "   matrixMarketToTsv in.mtx sampleLabels.lst geneLabels.lst out.tsv\n"
+  "where in.mtx is a matrix market format matrix.  SampleLabels is a text file\n"
+  "with one label per line.  It will end in the first row of the output.\n"
+  "GeneLabels.lst is a text file with one gene name per line.  It will end up\n"
+  "in the first column of the output\n"
+  );
+}
+
+/* Command line validation table. */
+static struct optionSpec options[] = {
+   {NULL, 0},
+};
+
+void matrixMarketToTsv(char *inMatrix, char *inSamples, char *inGenes, char *outMatrix)
+/* matrixMarketToTsv - Convert matrix file from Matrix Market sparse matrix format to tab-separated-values.. */
+{
+struct matrixMarket *mm = matrixMarketOpen(inMatrix);
+verbose(1, "%s has %d rows and %d columns\n", inMatrix, mm->rowCount, mm->colCount);
+struct slName *sampleList = readAllLines(inSamples);
+int sampleCount = slCount(sampleList);
+verbose(1, "Read %d samples from %s\n", sampleCount, inSamples);
+if (sampleCount != mm->rowCount)
+    errAbort("Mismatch between row count in matrix and sample count");
+struct slName *geneList = readAllLines(inGenes);
+int geneCount = slCount(geneList);
+verbose(1, "Read %d genes from %s\n", geneCount, inGenes);
+if (geneCount != mm->colCount)
+    errAbort("Mismatch between column count in matrix and gene count");
+
+/* Create matrix in memory the right size */
+double **rows;
+AllocArray(rows, geneCount);
+int i;
+for (i=0; i<geneCount; ++i)
+    rows[i] = needMem(sampleCount * sizeof(double) );
+
+/* Fill in matrix from mart */
+verbose(1, "Reading matrix\n");
+while (matrixMarketNext(mm))
+    {
+    rows[mm->x][mm->y] = mm->val;
+    }
+verbose(1, "Done reading matrix\n");
+matrixMarketClose(&mm);
+
+/* Open output */
+FILE *f = mustOpen(outMatrix, "w");
+
+/* Write first line */
+struct slName *name = sampleList;
+for (i=0; i<sampleCount; ++i)
+    {
+    fprintf(f, "\t%s", name->name);
+    name = name->next;
+    }
+fprintf(f, "\n");
+
+/* Write out rest of lines */
+dotForUserInit(100);
+name = geneList;
+for (i=0; i<geneCount; ++i)
+    {
+    fprintf(f, "%s", name->name);
+    name = name->next;
+    int j;
+    for (j=0; j<sampleCount; ++j)
+        {
+	fprintf(f, "\t%g", rows[i][j]);
+	}
+    fprintf(f, "\n");
+    dotForUser();
+    }
+carefulClose(&f);
+}
+
+int main(int argc, char *argv[])
+/* Process command line. */
+{
+optionInit(&argc, argv, options);
+if (argc != 5)
+    usage();
+matrixMarketToTsv(argv[1], argv[2], argv[3], argv[4]);
+return 0;
+}