9186636eb6806a49e2e2cc5fd6b11d690ab729c1
kent
  Mon Dec 28 18:36:02 2020 -0800
A little utility for changing labels in first row and or column of a tsv table.

diff --git src/utils/matrixRelabel/matrixRelabel.c src/utils/matrixRelabel/matrixRelabel.c
new file mode 100644
index 0000000..6097704
--- /dev/null
+++ src/utils/matrixRelabel/matrixRelabel.c
@@ -0,0 +1,121 @@
+/* matrixRelabel - Relabel rows and/or columns of a matrix. */
+#include "common.h"
+#include "linefile.h"
+#include "hash.h"
+#include "options.h"
+#include "obscure.h"
+
+void usage()
+/* Explain usage and exit. */
+{
+errAbort(
+  "matrixRelabel - Relabel rows and/or columns of a matrix\n"
+  "usage:\n"
+  "   matrixRelabel in.tsv out.tsv\n"
+  "options:\n"
+  "   -newCol=colLabels - one line per label in a file\n"
+  "   -newRow=rowLabels - one line per label in a file\n"
+  "   -first=text - text to use as first word in file\n"
+  );
+}
+
+/* Command line validation table. */
+static struct optionSpec options[] = {
+   {"newCol", OPTION_STRING},
+   {"newRow", OPTION_STRING},
+   {"first", OPTION_STRING},
+   {NULL, 0},
+};
+
+void readLineArray(char *fileName, int *retCount, char ***retLines)
+/* Return an array of strings, one for each line of file.  Return # of lines in file too */
+{
+/* This is sloppy with memory but it doesn't matter since we won't free it. */
+struct slName *el, *list = readAllLines(fileName);
+if (list == NULL)
+    errAbort("%s is empty", fileName);
+int count = slCount(list);
+char **lines;
+AllocArray(lines, count);
+int i;
+for (i=0, el=list; i<count; ++i, el = el->next)
+    {
+    lines[i] = el->name;
+    }
+*retCount = count;
+*retLines = lines;
+}
+
+void matrixRelabel(char *input, char *output)
+/* matrixRelabel - Relabel rows and/or columns of a matrix. */
+{
+/* Set up stuff to relabel a column if need be */
+char **newColumns = NULL;
+int newColumnCount = 0;
+char *newColumnFile = optionVal("newCol", NULL);
+if (newColumnFile != NULL)
+    readLineArray(newColumnFile, &newColumnCount, &newColumns);
+
+
+/* Set up stuff to relabel a row if new be */
+char **newRows = NULL;
+int newRowCount = 0;
+char *newRowFile = optionVal("newRow", NULL);
+if (newRowFile != NULL)
+    readLineArray(newRowFile, &newRowCount, &newRows);
+
+struct lineFile *lf = lineFileOpen(input, TRUE);
+FILE *f = mustOpen(output, "w");
+
+/* Get first row.  Set colCount from it */
+char *line;
+int lineSize;
+lineFileNeedNext(lf, &line, &lineSize);
+int colCount = 0;
+char *word = nextTabWord(&line);
+char *first = optionVal("first", word);
+fprintf(f, "%s", first);
+if (newColumns != NULL)
+    {
+    colCount = newColumnCount;
+    int i;
+    for (i=0; i<colCount; ++i)
+        fprintf(f, "\t%s", newColumns[i]);
+    fputc('\n', f);
+    }
+else
+    {
+    while ((word = nextTabWord(&line)) != NULL)
+         {
+	 fprintf(f, "\t%s", word);
+	 colCount += 1;
+	 }
+    fputc('\n', f);
+    }
+
+int rowIx = 0;
+while (lineFileNext(lf, &line, NULL))
+    {
+    if (newRows != NULL)
+        {
+	if (rowIx >= newRowCount)
+	    errAbort("Not enough lines in %s for %s", newRowFile, input);
+	fputs(newRows[rowIx], f);
+	nextTabWord(&line); // skip over old first word
+	fputc('\t', f);
+	++rowIx;
+	}
+    fputs(line, f);
+    fputc('\n', f);
+    }
+}
+
+int main(int argc, char *argv[])
+/* Process command line. */
+{
+optionInit(&argc, argv, options);
+if (argc != 3)
+    usage();
+matrixRelabel(argv[1], argv[2]);
+return 0;
+}