45b9bd3fd9a066cb2515629b23c3eef5a10eeb74 kent Sat Dec 26 20:22:29 2020 -0800 Making outputting position in expMatrix part of default output. diff --git src/utils/clusterMatrixToBarChartBed/clusterMatrixToBarChartBed.c src/utils/clusterMatrixToBarChartBed/clusterMatrixToBarChartBed.c index 4f42ff8..1b7fe7e 100644 --- src/utils/clusterMatrixToBarChartBed/clusterMatrixToBarChartBed.c +++ src/utils/clusterMatrixToBarChartBed/clusterMatrixToBarChartBed.c @@ -1,56 +1,55 @@ /* clusterMatrixToBarchartBed - Compute a barchart bed file from a gene matrix * and a gene bed file and a way to cluster samples. */ #include "common.h" #include "linefile.h" #include "hash.h" #include "options.h" #include "localmem.h" #include "obscure.h" #include "sqlNum.h" -boolean clDataOffset = FALSE; +boolean clSimple = FALSE; boolean clMedian = FALSE; char *clName2 = NULL; void usage() /* Explain usage and exit. */ { errAbort( "clusterMatrixToBarchartBed - Compute a barchart bed file from a gene matrix\n" "and a gene bed file and a way to cluster samples.\n" "usage:\n" " clusterMatrixToBarchartBed sampleClusters.tsv geneMatrix.tsv geneset.bed output.bed\n" "where:\n" " sampleClusters.tsv is a two column tab separated file with sampleId and clusterId\n" " geneMatrix.tsv has a row for each gene. The first row uses the same sampleId as above\n" " geneset.bed has the maps the genes in the matrix (from it's first column) to the genome\n" " geneset.bed needs 6 standard bed fields. Unless name2 is set it also needs a name2\n" " field as the last field\n" " output.bed is the resulting bar chart, with one column per cluster\n" "options:\n" - " -dataOffset - store the position of gene in geneMatrix.tsv file in output\n" + " -simple - don't store the position of gene in geneMatrix.tsv file in output\n" " -median - use median (instead of mean)\n" " -name2=twoColFile.tsv - get name2 from file where first col is same ase geneset.bed's name\n" ); } /* Command line validation table. */ static struct optionSpec options[] = { - {"dataOffset", OPTION_BOOLEAN}, - {"_dataOffset", OPTION_BOOLEAN}, + {"simple", OPTION_BOOLEAN}, {"median", OPTION_BOOLEAN}, {"name2", OPTION_STRING}, {NULL, 0}, }; struct hash *hashTsvBy(char *in, int keyColIx, int *retColCount) /* Return a hash of rows keyed by the given column */ { struct lineFile *lf = lineFileOpen(in, TRUE); struct hash *hash = hashNew(0); char *line = NULL, **row = NULL; int colCount = 0, colAlloc=0; /* Columns as counted and as allocated */ while (lineFileNextReal(lf, &line)) { if (colCount == 0) @@ -307,38 +306,38 @@ name2 = name; fprintf(f, "%s\t", name2); fprintf(f, "%d\t", clusterCount); for (i=0; i<clusterCount; ++i) { if (i != 0) fprintf(f, ","); if (doMedian) fprintf(f, "%g", doubleMedian(clusterElements[i], clusterSamples[i])); else fprintf(f, "%g", clusterTotal[i]/clusterElements[i]); } /* Data file offset info */ - if (clDataOffset) + if (!clSimple) fprintf(f, "\t%lld\t%lld", (long long)lineFileTell(lf), (long long)lineLength); fprintf(f, "\n"); } dotForUser(); } verbose(1, "\n%d genes found, %d (%0.2f%%) missed\n", hitCount, missCount, 100.0*missCount/(hitCount+missCount)); carefulClose(&f); } int main(int argc, char *argv[]) /* Process command line. */ { optionInit(&argc, argv, options); if (argc != 5) usage(); -clDataOffset = (optionExists("_dataOffset") || optionExists("dataOffset")); +clSimple = optionExists("simple"); clMedian = optionExists("median"); clName2 = optionVal("name2", clName2); clusterMatrixToBarchartBed(argv[1], argv[2], argv[3], argv[4]); return 0; }