09275329a3702936dd2f0907157711703aab3b75 galt Tue Jul 14 12:06:52 2020 -0700 dbSnoop improvements. Added -sortAlpha to help with django db that has tables with mixed up field order so that all the fields appear in alphabetical order. Fixed a bug in index display where the multi-column indexes were not being handled correctly. diff --git src/hg/makeDb/schema/dbSnoop/dbSnoop.c src/hg/makeDb/schema/dbSnoop/dbSnoop.c index 05e81ea..6fbe743 100644 --- src/hg/makeDb/schema/dbSnoop/dbSnoop.c +++ src/hg/makeDb/schema/dbSnoop/dbSnoop.c @@ -1,55 +1,60 @@ /* dbSnoop - Produce an overview of a database.. */ /* Copyright (C) 2014 The Regents of the University of California * See README in this or parent directory for licensing information. */ #include "common.h" #include "linefile.h" #include "hash.h" #include "options.h" #include "dystring.h" #include "jksql.h" #include "obscure.h" #include "tableStatus.h" char *profile = NULL; +boolean sortAlpha = FALSE; + void usage() /* Explain usage and exit. */ { errAbort( "dbSnoop - Produce an overview of a database.\n" "usage:\n" " dbSnoop database output\n" "options:\n" " -unsplit - if set will merge together tables split by chromosome\n" " -noNumberCommas - if set will leave out commas in big numbers\n" " -justSchema - only schema parts, no contents\n" " -skipTable=tableName - if set skip a given table name\n" - " -profile=profileName - use profile for connection settings, default = '%s'\n", profile + " -profile=profileName - use profile for connection settings, default = '%s'\n" + " -sortAlpha - if set changes output order of fields to make comparisons between databases which have field order swapped easier.\n" + , profile ); } static struct optionSpec options[] = { {"unsplit", OPTION_BOOLEAN}, {"noNumberCommas", OPTION_BOOLEAN}, {"justSchema", OPTION_BOOLEAN}, {"skipTable", OPTION_STRING}, {"profile", OPTION_STRING}, {"host", OPTION_STRING}, {"user", OPTION_STRING}, {"password", OPTION_STRING}, + {"sortAlpha", OPTION_BOOLEAN}, {NULL, 0}, }; boolean noNumberCommas = FALSE; boolean unsplit = FALSE; boolean justSchema = FALSE; char *skipTable = NULL; struct slName *chromList; /* List of chromosomes in unsplit case. */ struct fieldDescription /* Information on a field. */ { struct fieldDescription *next; /* Next in list. */ char *name; /* Field name. */ char *type; /* SQL type. */ @@ -109,30 +114,38 @@ { struct fieldInfo *next; /* Next in list. */ char *name; /* Name of field. */ struct slName *tableList; /* List of tables using name. */ int tableCount; /* Count of tables. */ }; int fieldInfoCmp(const void *va, const void *vb) /* Compare two fieldInfo. */ { const struct fieldInfo *a = *((struct fieldInfo **)va); const struct fieldInfo *b = *((struct fieldInfo **)vb); return b->tableCount - a->tableCount; } +int fieldInfoCmpName(const void *va, const void *vb) +/* Compare two fieldInfo on name. */ +{ +const struct fieldInfo *a = *((struct fieldInfo **)va); +const struct fieldInfo *b = *((struct fieldInfo **)vb); +return strcmp(a->name, b->name); +} + void noteField(struct hash *hash, char *table, char *field, struct fieldInfo **pList) /* Keep track of field. */ { struct fieldInfo *fi = hashFindVal(hash, field); if (fi == NULL) { AllocVar(fi); hashAddSaveName(hash, field, fi, &fi->name); slAddHead(pList, fi); } slNameAddHead(&fi->tableList, table); fi->tableCount += 1; } @@ -318,30 +331,39 @@ * a multi-column index. */ { struct indexGroup *next; char *name; /* Name of index. */ struct indexInfo *fieldList; /* Various fields involved in index. */ }; int indexGroupCmp(const void *va, const void *vb) /* Compare two index groups to order by name */ { const struct indexGroup *a = *((struct indexGroup **)va); const struct indexGroup *b = *((struct indexGroup **)vb); return strcmp(a->name, b->name); } +int indexInfoCmp(const void *va, const void *vb) +/* Compare two index groups to order by name */ +{ +const struct indexInfo *a = *((struct indexInfo **)va); +const struct indexInfo *b = *((struct indexInfo **)vb); +return a->seqInIndex - b->seqInIndex; +} + + void printIndexes(FILE *f, struct sqlConnection *conn, struct tableInfo *ti) /* Print info about indexes on table to file. */ { char query[256], **row; struct sqlResult *sr; struct indexGroup *groupList = NULL, *group; struct hash *groupHash = newHash(8); struct indexInfo *ii; char *tableName = ti->name; char splitTable[256]; boolean isSplit = FALSE; if (unsplit) { struct slName *chrom; @@ -356,56 +378,58 @@ isSplit = TRUE; break; } } } } /* Build up information on indexes in this table by processing * mysql show indexes command results. This command returns * a separate row for each field in each index. We'll group these. */ sqlSafef(query, sizeof(query), "show indexes from `%s`", tableName); sr = sqlGetResult(conn, query); while ((row = sqlNextRow(sr)) != NULL) { ii = indexInfoLoad(row); - group = hashFindVal(groupHash, ii->field); + group = hashFindVal(groupHash, ii->indexName); if (group == NULL) { AllocVar(group); - hashAddSaveName(groupHash, ii->field, group, &group->name); + hashAddSaveName(groupHash, ii->indexName, group, &group->name); slAddTail(&groupList, group); } slAddTail(&group->fieldList, ii); } sqlFreeResult(&sr); fprintf(f, "%s has ", ti->name); if (!justSchema) fprintf(f, "%d rows and ", ti->status->rows); slSort(&groupList, indexGroupCmp); fprintf(f, "%d indexes\n", slCount(groupList)); for (group = groupList; group != NULL; group = group->next) { long long maxCardinality = 0, nonUnique = FALSE; fprintf(f, "\t"); + fprintf(f, "%s: ", group->name); + slSort(&group->fieldList, indexInfoCmp); for (ii=group->fieldList; ii != NULL; ii = ii->next) { - fprintf(f, "%s.%s", ti->name, ii->field); + fprintf(f, "%s", ii->field); if (ii->next != NULL) - fprintf(f, ","); + fprintf(f, "+"); if (ii->cardinality > maxCardinality) maxCardinality = ii->cardinality; nonUnique = ii->nonUnique; } fprintf(f, "\t%s", (nonUnique ? "MUL" : "PRI") ); if (maxCardinality > 0 && !isSplit && !justSchema) fprintf(f, "\t%lld", maxCardinality); else fprintf(f, "\tn/a"); fprintf(f, "\n"); } hashFree(&groupHash); } struct sqlConnection *dbConnect(char *database) @@ -511,33 +535,35 @@ fprintf(f, "%s\t%s\t%s\t%s\n", "#table", "update time", "create time", "checkTime"); for (ti = tiList; ti != NULL; ti = ti->next) { fprintf(f, "%s\t%s\t%s\t%s\n", ti->name, naForNull(ti->status->updateTime), ti->status->createTime, naForNull(ti->status->checkTime)); } fprintf(f, "\n"); } /* Print summary of rows and fields in each table ordered alphabetically */ slSort(&tiList, tableInfoCmpName); fprintf(f, "TABLE FIELDS SUMMARY:\n"); fprintf(f, "%s\t%s\n", "#name", "fields"); for (ti = tiList; ti != NULL; ti = ti->next) { - struct fieldDescription *field; - slReverse(&ti->fieldList); fprintf(f, "%s\t", ti->name); + slReverse(&ti->fieldList); + if (sortAlpha) + slSort(&ti->fieldList, fieldInfoCmpName); + struct fieldDescription *field; for (field = ti->fieldList; field != NULL; field = field->next) fprintf(f, "%s,", field->name); fprintf(f, "\n"); } fprintf(f, "\n"); /* Print summary of indexes. */ fprintf(f, "TABLE INDEX SUMMARY\n"); for (ti = tiList; ti != NULL; ti = ti->next) printIndexes(f, conn, ti); fprintf(f, "\n"); /* Print summary of fields and tables fields are used in * ordered by the number of tables a field is in. */ slSort(&fiList, fieldInfoCmp); @@ -572,18 +598,19 @@ carefulClose(&f); sqlDisconnect(&conn); } int main(int argc, char *argv[]) /* Process command line. */ { optionInit(&argc, argv, options); profile = optionVal("profile", getDefaultProfileName()); if (argc != 3) usage(); noNumberCommas = optionExists("noNumberCommas"); unsplit = optionExists("unsplit"); justSchema = optionExists("justSchema"); skipTable = optionVal("skipTable", skipTable); +sortAlpha = optionExists("sortAlpha"); dbSnoop(argv[1], argv[2]); return 0; }