be4311c07e14feb728abc6425ee606ffaa611a58 markd Fri Jan 22 06:46:58 2021 -0800 merge with master diff --git src/utils/matrixPaste/matrixPaste.c src/utils/matrixPaste/matrixPaste.c new file mode 100644 index 0000000..20501e7 --- /dev/null +++ src/utils/matrixPaste/matrixPaste.c @@ -0,0 +1,151 @@ +/* matrixPaste - Paste together matrices - much like paste but sensible about labels. + * For unlabeled matrices just use paste. To paste vertically just use cat. */ + +#include "common.h" +#include "linefile.h" +#include "hash.h" +#include "options.h" + +boolean fit1 = FALSE; +boolean filePrefix = FALSE; + +void usage() +/* Explain usage and exit. */ +{ +errAbort( + "matrixPaste - Concatenate matrices - much like paste but sensible about labels. \n" + "For unlabeled matrices just use paste. Output goes to stdout\n" + "usage:\n" + " matrixPaste matrix1 matrix2 ... matrixN\n" + "options:\n" + " -fit1 - row labels are matrix1's and no problem if later matrices have more rows\n" + " -filePrefix - add the root part of the matrix file name as a prefix to column labels\n" + ); +} + +/* Command line validation table. */ +static struct optionSpec options[] = { + {"fit1", OPTION_BOOLEAN}, + {"filePrefix", OPTION_BOOLEAN}, + {NULL, 0}, +}; + +void matrixPaste(int count, char *files[]) +/* matrixPaste - Concatenate matrices - much like paste but sensible about labels. */ +{ +struct lineFile *lfs[count]; +char *midFileNames[count]; +FILE *f = stdout; + +/* Open all inputs */ +int i; +for (i=0; i<count; ++i) + lfs[i] = lineFileOpen(files[i], TRUE); +struct lineFile *firstLf = lfs[0]; + +/* Figure out root file names for labels if need be */ +for (i=0; i<count; ++i) + { + char name[FILENAME_LEN]; + splitPath(files[i], name, NULL, NULL); + trimLastChar(name); + midFileNames[i] = cloneString(name); + } + +/* Read through all the rows */ +boolean isLabelRow = TRUE; +for (;;) + { + /* Get the next line from the first file. If it's not there we are done but + * let's make sure all ther rest are too before breaking out. */ + char *first; + char *firstLabel = NULL; + if (!lineFileNext(firstLf, &first, NULL)) + { + /* We are done with first file. Do some error checking that other files are at end too */ + if (!fit1) + { + int i; + for (i=1; i<count; ++i) + { + struct lineFile *lf = lfs[i]; + char *line; + if (lineFileNext(lf, &line, NULL)) + errAbort("%s has more lines that %s\n", lf->fileName, firstLf->fileName); + } + } + break; // One way or another we are done + } + + /* Print out first file including labels */ + firstLabel = nextTabWord(&first); + if (isLabelRow) + { + char *mid = midFileNames[0]; + if (filePrefix) + fprintf(f, "%s ", mid); + fprintf(f, "%s", firstLabel); + char *colLabel; + while ((colLabel = nextTabWord(&first)) != NULL) + { + fputc('\t', f); + if (filePrefix) + fprintf(f, "%s ", mid); + fprintf(f, "%s", colLabel); + } + } + else + { + fprintf(f, "%s\t%s", firstLabel, first); + } + + /* Go through rest of files, not printing row labels (but still printing column labels on + * first row */ + int i; + for (i=1; i<count; ++i) + { + struct lineFile *lf = lfs[i]; + char *line; + if (!lineFileNext(lf, &line, NULL)) + errAbort("%s has fewer lines than %s\n", lf->fileName, firstLf->fileName); + if (isLabelRow) + { + nextTabWord(&line); // Middle labels from label rows vanish + char *mid = midFileNames[i]; + char *colLabel; + while ((colLabel = nextTabWord(&line)) != NULL) + { + fputc('\t', f); + if (filePrefix) + fprintf(f, "%s ", mid); + fprintf(f, "%s", colLabel); + } + } + else + { + char *label = nextTabWord(&line); /* Skip over label */ + if (!fit1) + { + if (!sameString(label, firstLabel)) + errAbort("%s has %s as label for line %d, %s has %s instead", + lf->fileName, label, lf->lineIx, firstLf->fileName, firstLabel); + } + fprintf(f, "\t%s", line); + } + } + fputc('\n', f); + isLabelRow = FALSE; + } +} + +int main(int argc, char *argv[]) +/* Process command line. */ +{ +optionInit(&argc, argv, options); +if (argc < 2) + usage(); +fit1 = optionExists("fit1"); +filePrefix = optionExists("filePrefix"); +matrixPaste(argc-1, argv+1); +return 0; +}