09275329a3702936dd2f0907157711703aab3b75
galt
  Tue Jul 14 12:06:52 2020 -0700
dbSnoop improvements. Added -sortAlpha to help with django db that has tables with mixed up field order so that all the fields appear in alphabetical order. Fixed a bug in index display where the multi-column indexes were not being handled correctly.

diff --git src/hg/makeDb/schema/dbSnoop/dbSnoop.c src/hg/makeDb/schema/dbSnoop/dbSnoop.c
index 05e81ea..6fbe743 100644
--- src/hg/makeDb/schema/dbSnoop/dbSnoop.c
+++ src/hg/makeDb/schema/dbSnoop/dbSnoop.c
@@ -1,55 +1,60 @@
 /* dbSnoop - Produce an overview of a database.. */
 
 /* Copyright (C) 2014 The Regents of the University of California 
  * See README in this or parent directory for licensing information. */
 #include "common.h"
 #include "linefile.h"
 #include "hash.h"
 #include "options.h"
 #include "dystring.h"
 #include "jksql.h"
 #include "obscure.h"
 #include "tableStatus.h"
 
 char *profile = NULL;
 
+boolean sortAlpha = FALSE;
+
 void usage()
 /* Explain usage and exit. */
 {
 errAbort(
   "dbSnoop - Produce an overview of a database.\n"
   "usage:\n"
   "   dbSnoop database output\n"
   "options:\n"
   "   -unsplit - if set will merge together tables split by chromosome\n"
   "   -noNumberCommas - if set will leave out commas in big numbers\n"
   "   -justSchema - only schema parts, no contents\n"
   "   -skipTable=tableName - if set skip a given table name\n"
-  "   -profile=profileName - use profile for connection settings, default = '%s'\n", profile
+  "   -profile=profileName - use profile for connection settings, default = '%s'\n"
+  "   -sortAlpha - if set changes output order of fields to make comparisons between databases which have field order swapped easier.\n"
+  , profile
   );
 }
 
 static struct optionSpec options[] = {
    {"unsplit", OPTION_BOOLEAN},
    {"noNumberCommas", OPTION_BOOLEAN},
    {"justSchema", OPTION_BOOLEAN},
    {"skipTable", OPTION_STRING},
    {"profile", OPTION_STRING},
    {"host", OPTION_STRING},
    {"user", OPTION_STRING},
    {"password", OPTION_STRING},
+   {"sortAlpha", OPTION_BOOLEAN},
    {NULL, 0},
 };
 
 boolean noNumberCommas = FALSE;
 boolean unsplit = FALSE;
 boolean justSchema = FALSE;
 char *skipTable = NULL;
 struct slName *chromList;	/* List of chromosomes in unsplit case. */
 
 struct fieldDescription
 /* Information on a field. */
     {
     struct fieldDescription *next;	/* Next in list. */
     char *name;			/* Field name. */
     char *type;			/* SQL type. */
@@ -109,30 +114,38 @@
     {
     struct fieldInfo *next;	/* Next in list. */
     char *name;			/* Name of field. */
     struct slName *tableList;	/* List of tables using name. */
     int tableCount;		/* Count of tables. */
     };
 
 int fieldInfoCmp(const void *va, const void *vb)
 /* Compare two fieldInfo. */
 {
 const struct fieldInfo *a = *((struct fieldInfo **)va);
 const struct fieldInfo *b = *((struct fieldInfo **)vb);
 return b->tableCount - a->tableCount;
 }
 
+int fieldInfoCmpName(const void *va, const void *vb)
+/* Compare two fieldInfo on name. */
+{
+const struct fieldInfo *a = *((struct fieldInfo **)va);
+const struct fieldInfo *b = *((struct fieldInfo **)vb);
+return strcmp(a->name, b->name);
+}
+
 void noteField(struct hash *hash, char *table, char *field,
 	struct fieldInfo **pList)
 /* Keep track of field. */
 {
 struct fieldInfo *fi = hashFindVal(hash, field);
 if (fi == NULL)
     {
     AllocVar(fi);
     hashAddSaveName(hash, field, fi, &fi->name);
     slAddHead(pList, fi);
     }
 slNameAddHead(&fi->tableList, table);
 fi->tableCount += 1;
 }
 
@@ -318,30 +331,39 @@
  * a multi-column index. */
     {
     struct indexGroup *next;
     char *name;				/* Name of index. */
     struct indexInfo *fieldList;	/* Various fields involved in index. */
     };
 
 int indexGroupCmp(const void *va, const void *vb)
 /* Compare two index groups to order by name */
 {
 const struct indexGroup *a = *((struct indexGroup **)va);
 const struct indexGroup *b = *((struct indexGroup **)vb);
 return strcmp(a->name, b->name);
 }
 
+int indexInfoCmp(const void *va, const void *vb)
+/* Compare two index groups to order by name */
+{
+const struct indexInfo *a = *((struct indexInfo **)va);
+const struct indexInfo *b = *((struct indexInfo **)vb);
+return a->seqInIndex - b->seqInIndex;
+}
+
+
 void printIndexes(FILE *f, struct sqlConnection *conn, struct tableInfo *ti)
 /* Print info about indexes on table to file. */
 {
 char query[256], **row;
 struct sqlResult *sr;
 struct indexGroup *groupList = NULL, *group;
 struct hash *groupHash = newHash(8);
 struct indexInfo *ii;
 char *tableName = ti->name;
 char splitTable[256];
 boolean isSplit = FALSE;
 
 if (unsplit)
     {
     struct slName *chrom;
@@ -356,56 +378,58 @@
 		isSplit = TRUE;
 		break;
 		}
 	    }
 	}
     }
 
 /* Build up information on indexes in this table by processing
  * mysql show indexes command results.  This command returns 
  * a separate row for each field in each index.  We'll group these. */
 sqlSafef(query, sizeof(query), "show indexes from `%s`", tableName);
 sr = sqlGetResult(conn, query);
 while ((row = sqlNextRow(sr)) != NULL)
     {
     ii = indexInfoLoad(row);
-    group = hashFindVal(groupHash, ii->field);
+    group = hashFindVal(groupHash, ii->indexName);
     if (group == NULL)
         {
 	AllocVar(group);
-	hashAddSaveName(groupHash, ii->field, group, &group->name);
+	hashAddSaveName(groupHash, ii->indexName, group, &group->name);
 	slAddTail(&groupList, group);
 	}
     slAddTail(&group->fieldList, ii);
     }
 sqlFreeResult(&sr);
 
 fprintf(f, "%s has ", ti->name);
 if (!justSchema)
     fprintf(f, "%d rows and ", ti->status->rows);
 slSort(&groupList, indexGroupCmp);
 fprintf(f, "%d indexes\n", slCount(groupList)); 
 
 for (group = groupList; group != NULL; group = group->next)
     {
     long long maxCardinality = 0, nonUnique = FALSE;
     fprintf(f, "\t");
+    fprintf(f, "%s: ", group->name);
+    slSort(&group->fieldList, indexInfoCmp);
     for (ii=group->fieldList; ii != NULL; ii = ii->next)
         {
-	fprintf(f, "%s.%s", ti->name, ii->field);
+	fprintf(f, "%s", ii->field);
 	if (ii->next != NULL)
-	    fprintf(f, ",");
+	    fprintf(f, "+");
 	if (ii->cardinality > maxCardinality)
 	    maxCardinality = ii->cardinality;
 	nonUnique = ii->nonUnique;
 	}
     fprintf(f, "\t%s", (nonUnique ? "MUL" : "PRI") );
     if (maxCardinality > 0 && !isSplit && !justSchema)
 	fprintf(f, "\t%lld", maxCardinality);
     else
         fprintf(f, "\tn/a");
     fprintf(f, "\n");
     }
 hashFree(&groupHash);
 }
 
 struct sqlConnection *dbConnect(char *database)
@@ -511,33 +535,35 @@
     fprintf(f, "%s\t%s\t%s\t%s\n", "#table", "update time", "create time", "checkTime");
     for (ti = tiList; ti != NULL; ti = ti->next)
 	{
 	fprintf(f, "%s\t%s\t%s\t%s\n", ti->name, naForNull(ti->status->updateTime), 
 	    ti->status->createTime, naForNull(ti->status->checkTime));
 	}
     fprintf(f, "\n");
     }
 
 /* Print summary of rows and fields in each table ordered alphabetically */
 slSort(&tiList, tableInfoCmpName);
 fprintf(f, "TABLE FIELDS SUMMARY:\n");
 fprintf(f, "%s\t%s\n", "#name", "fields");
 for (ti = tiList; ti != NULL; ti = ti->next)
     {
-    struct fieldDescription *field;
-    slReverse(&ti->fieldList);
     fprintf(f, "%s\t", ti->name);
+    slReverse(&ti->fieldList);
+    if (sortAlpha)
+	slSort(&ti->fieldList, fieldInfoCmpName);
+    struct fieldDescription *field;
     for (field = ti->fieldList; field != NULL; field = field->next)
 	fprintf(f, "%s,", field->name);
     fprintf(f, "\n");
     }
 fprintf(f, "\n");
 
 /* Print summary of indexes. */
 fprintf(f, "TABLE INDEX SUMMARY\n");
 for (ti = tiList; ti != NULL; ti = ti->next)
     printIndexes(f, conn, ti);
 fprintf(f, "\n");
 
 /* Print summary of fields and tables fields are used in 
  * ordered by the number of tables a field is in. */
 slSort(&fiList, fieldInfoCmp);
@@ -572,18 +598,19 @@
 carefulClose(&f);
 sqlDisconnect(&conn);
 }
 
 int main(int argc, char *argv[])
 /* Process command line. */
 {
 optionInit(&argc, argv, options);
 profile = optionVal("profile", getDefaultProfileName());
 if (argc != 3)
     usage();
 noNumberCommas = optionExists("noNumberCommas");
 unsplit = optionExists("unsplit");
 justSchema = optionExists("justSchema");
 skipTable = optionVal("skipTable", skipTable);
+sortAlpha = optionExists("sortAlpha");
 dbSnoop(argv[1], argv[2]);
 return 0;
 }