5deadb92cd2d8ddc11d8e11fee61d5f8faf304d6 kent Sun Oct 4 09:48:11 2020 -0700 Made stats output sort so most common occurs first in list. diff --git src/tabFile/tabToTabDir/tabToTabDir.c src/tabFile/tabToTabDir/tabToTabDir.c index 852625e..1d34ddb 100644 --- src/tabFile/tabToTabDir/tabToTabDir.c +++ src/tabFile/tabToTabDir/tabToTabDir.c @@ -241,31 +241,30 @@ fv->combineHash = hashNew(0); if (startsWithWord("count", command)) { if (!isEmpty(s)) errAbort("Something following $count line %d of %s", fileLineNumber, fileName);; fv->combineType = ctCount; fv->type = fvCount; } else if (startsWithWord("list", command)) { fv->combineType = ctUniq; } else if (startsWithWord("stats", command)) { fv->combineType = ctStats; - uglyf("ctStats command\n"); } else { errAbort("Unrecognized command $%s line %d of %s", command, fileLineNumber, fileName); } } if (fv->combineHash == NULL || fv->combineType != ctCount) { if (isTotallySimple(s) && hashLookup(symbols->varHash, s) == NULL) { fv->val = cloneString(skipLeadingSpaces(s)); eraseTrailingSpaces(fv->val); fv->type = fvVar; } else @@ -348,36 +347,52 @@ if (rowIx < 0) return NULL; return rec->tableRow[rowIx]; } } struct uniqValLister /* A list of unique values */ { struct uniqValLister *next; struct dyString *csv; // Comma separated list of values seen so far struct hash *uniq; // Hash of values seen so far. }; +struct oneValCount +/* Counts occurences of one */ + { + struct oneValCount *next; + char *name; // Name - not allocated here + int count; // Number of times seen + }; + +int oneValCountCmp(const void *va, const void *vb) +/* Compare two oneValCounts. */ +{ +const struct oneValCount *a = *((struct oneValCount **)va); +const struct oneValCount *b = *((struct oneValCount **)vb); +return b->count - a->count; +} + struct uniqValCounter /* A list of unique values and how often they occur */ { struct uniqValCounter *next; - struct hash *uniq; // Integer valued list of values seen so far - struct slName *list; // List of uniq values seen so far + struct hash *uniq; // Integer valued list of values seen so far - oneValCount values + struct oneValCount *list; // List of uniq values seen so far }; void selectUniqueIntoTable(struct fieldedTable *inTable, struct symRec *symbols, char *specFile, // Just for error reporting struct newFieldInfo *fieldList, int keyFieldIx, struct fieldedTable *outTable) /* Populate out table with selected unique rows from newTable */ { struct hash *uniqHash = hashNew(0); struct fieldedRow *fr; int outFieldCount = outTable->fieldCount; char *outRow[outFieldCount]; if (slCount(fieldList) != outFieldCount) // A little cheap defensive programming on inputs internalErr(); @@ -447,35 +462,38 @@ hashAdd(lister->uniq, val, NULL); csvEscapeAndAppend(lister->csv, val); } break; } case ctStats: { struct uniqValCounter *counter = hashFindVal(fv->combineHash, key); if (counter == NULL) { AllocVar(counter); counter->uniq = hashNew(0); hashAdd(fv->combineHash, key, counter); } char *val = outRow[fv->newIx]; - if (hashLookup(counter->uniq, val) == NULL) + struct oneValCount *one = hashFindVal(counter->uniq, val); + if (one == NULL) { - slNameAddHead(&counter->list, val); + AllocVar(one); + hashAddSaveName(counter->uniq, val, one, &one->name); + slAddHead(&counter->list, one); } - hashIncInt(counter->uniq, val); + one->count += 1; break; } } } } struct fieldedRow *uniqFr = hashFindVal(uniqHash, key); if (uniqFr == NULL) { uniqFr = fieldedTableAdd(outTable, outRow, outFieldCount, 0); hashAdd(uniqHash, key, uniqFr); } else /* Do error checking for true uniqueness of key */ { int i; @@ -518,34 +536,35 @@ char countBuf[16]; safef(countBuf, sizeof(countBuf), "%d", hashIntVal(fv->combineHash, key)); fr->row[fv->newIx] = lmCloneString(outTable->lm, countBuf); break; } case ctUniq: { struct uniqValLister *lister = hashMustFindVal(fv->combineHash, key); fr->row[fv->newIx] = lister->csv->string; break; } case ctStats: { struct uniqValCounter *counter = hashMustFindVal(fv->combineHash, key); struct dyString *dy = dyStringNew(0); - struct slName *el; + struct oneValCount *el; + slSort(&counter->list, oneValCountCmp); for (el = counter->list; el != NULL; el = el->next) { - dyStringPrintf(dy, "%s(%d),", el->name, hashIntVal(counter->uniq, el->name) ); + dyStringPrintf(dy, "%s(%d),", el->name, el->count); } fr->row[fv->newIx] = dyStringCannibalize(&dy); break; } } } } } } } struct hash *hashFieldIx(char **fields, int fieldCount) /* Create a hash filled with fields with integer valued indexes */