92f77f7bef111331d3c88d14cf98ef2e64c765f8 kent Tue Dec 21 12:06:48 2021 -0800 Improved a structure name and removed some debugging. Added some comments. diff --git src/utils/matrixClusterColumns/matrixClusterColumns.c src/utils/matrixClusterColumns/matrixClusterColumns.c index 2ba453a..d0f47bd 100644 --- src/utils/matrixClusterColumns/matrixClusterColumns.c +++ src/utils/matrixClusterColumns/matrixClusterColumns.c @@ -383,52 +383,60 @@ } dyStringAppendC(out, '\n'); } static void addRowToIndex(FILE *fIndex, char *rowLabel, struct lineFile *lf) /* Write out info to index file about where this row begins */ { if (fIndex) { fprintf(fIndex, "%s", rowLabel); fprintf(fIndex, "\t%lld\t%lld\n", (long long)lineFileTell(lf), (long long)lineFileTellSize(lf)); } } -struct lineIoInfo -/* Enough rows to do things in parallel we hope? */ +struct lineIoItem +/* This is an item fed to a parallel worker. It corresponds to a single line of + * input matrix */ { - struct lineIoInfo *next; + struct lineIoItem *next; /* Pointer to next in list */ + + /* Information about input file and where we are in it. */ char *fileName; - struct clustering *clusteringList; - int lineIx; /* Index of line in file */ - int chunkIx; /* Index of line in chunk */ + int lineIx; /* Index of line in input file */ long long lineStartOffset; /* Start offset within file */ long long lineSize; /* Size of line */ long long lineEndOffset; + int chunkIx; /* Index of line in chunk */ + + /* Slightly parsed input. */ struct dyString *rowLabel; /* Just the row label of input */ struct dyString *lineIn; /* Unparsed rest of input line */ - double *vals; /* Array of values parsed from string */ - double *totalingTemp; /* Buffer for parllel computation */ + + struct clustering *clusteringList; /* Instructions on how to cluster and output */ + + /* Temporary values used for calculating output */ + double *vals; /* Parse out input matrix values for this line */ + double *totalingTemp; /* Buffer for parallel computation of totals */ int *elementsTemp; /* buffers for parallel computation */ }; void lineWorker(void *item, void *context) /* A worker to execute a single column clustering */ { -struct lineIoInfo *lii = item; +struct lineIoItem *lii = item; struct ccMatrix *v = context; int xSize = v->colCount; char *s = lii->lineIn->string; char *rowLabel = lii->rowLabel->string;; /* Convert ascii to floating point, with little optimization for the many zeroes we usually see */ int i; double *vals = lii->vals; for (i=0; i<xSize; ++i) { char *str = nextTabWord(&s); if (str == NULL) errAbort("not enough fields in input matrix line %d", lii->lineIx); double val = ((str[0] == '0' && str[1] == 0) ? 0.0 : sqlDouble(str)); vals[i] = val; @@ -464,54 +472,53 @@ /* Load up input matrix first line at least */ struct ccMatrix *v = ccMatrixOpen(matrixFile); verbose(1, "matrix %s has %d fields\n", matrixFile, v->colCount); /* Create a clustering for each output and find index in metaTable for each. */ struct clustering *clusteringList = NULL, *clustering; int i; for (i=0; i<outputCount; ++i) { clustering = clusteringNew(clusterFields[i], outMatrixFiles[i], outStatsFiles[i], metaTable, v, doMedian); slAddTail(&clusteringList, clustering); } /* Set up buffers for pthread workers */ -struct lineIoInfo chunks[chunkMaxSize]; +struct lineIoItem chunks[chunkMaxSize]; for (i=0; i<chunkMaxSize; ++i) { - struct lineIoInfo *chunk = &chunks[i]; + struct lineIoItem *chunk = &chunks[i]; chunk->fileName = matrixFile; chunk->clusteringList = clusteringList; chunk->chunkIx = i; chunk->lineIn = dyStringNew(0); chunk->rowLabel = dyStringNew(0); AllocArray(chunk->vals, v->colCount); AllocArray(chunk->totalingTemp, v->colCount); AllocArray(chunk->elementsTemp, v->colCount); } /* Chug through big matrix a row at a time clustering */ dotForUserInit(1); boolean atEof = FALSE; struct lineFile *lf = v->lf; -uglyf("Starting main loop on %d columns\n", v->colCount); while (!atEof) { /* Read a chunk of lines of the file */ - struct lineIoInfo *chunkList = NULL, *chunk; + struct lineIoItem *chunkList = NULL, *chunk; int chunkSize; for (chunkSize = 0; chunkSize < chunkMaxSize; chunkSize += 1) { char *line; if (!lineFileNextReal(lf, &line)) { atEof = TRUE; break; } chunk = &chunks[chunkSize]; chunk->lineIx = lf->lineIx; char *rowLabel = nextTabWord(&line); addRowToIndex(fIndex, rowLabel, lf); dyStringClear(chunk->rowLabel); dyStringAppend(chunk->rowLabel, rowLabel);