8ba435bdd1a1fda7d9f05e027477b6b9d8bdc7f4
galt
  Wed Jun 29 11:18:08 2016 -0700
About to remove findExpectedIntersectingRows() because it seems less necessary now.

diff --git src/hg/hgTablesTest/hgTablesTest.c src/hg/hgTablesTest/hgTablesTest.c
index 7e7bc96..4604f40 100644
--- src/hg/hgTablesTest/hgTablesTest.c
+++ src/hg/hgTablesTest/hgTablesTest.c
@@ -22,76 +22,79 @@
 
 #define MAX_ATTEMPTS 10
 
 
 /* Command line variables. */
 char *clOrg = NULL;	/* Organism from command line. */
 char *clDb = NULL;	/* DB from command line */
 char *clGroup = NULL;	/* Group from command line. */
 char *clTrack = NULL;	/* Track from command line. */
 char *clTable = NULL;	/* Table from command line. */
 int clGroups = BIGNUM;	/* Number of groups to test. */
 int clTracks = 4;	/* Number of track to test. */
 int clTables = 2;	/* Number of tables to test. */
 int clDbs = 1;		/* Number of databases per organism. */
 int clOrgs = 2;		/* Number of organisms to test. */
-boolean appendLog;      /* append to log rather than create it */
+boolean appendLog;      /* Append to log rather than create it. */
+boolean noShuffle;      /* Suppress shuffling of track and table lists. */
 
 void usage()
 /* Explain usage and exit. */
 {
 errAbort(
   "hgTablesTest - Test hgTables web page\n"
   "usage:\n"
   "   hgTablesTest url log\n"
   "Where url is something like hgwbeta.cse.ucsc.edu/cgi-bin/hgTables\n"
   "and log is a file where error messages and statistics will be written\n"
   "options:\n"
   "   -org=Human - Restrict to Human (or Mouse, Fruitfly, etc.)\n"
   "   -db=hg17 - Restrict to particular database\n"
   "   -group=genes - Restrict to a particular group\n"
   "   -track=knownGene - Restrict to a particular track\n"
   "   -table=knownGeneMrna - Restrict to a particular table\n"
   "   -orgs=N - Number of organisms to test.  Default %d\n"
   "   -dbs=N - Number of databases per organism to test. Default %d\n"
   "   -groups=N - Number of groups to test (default all)\n"
   "   -tracks=N - Number of tracks per group to test (default %d)\n"
   "   -tables=N - Number of tables per track to test (default %d)\n"
   "   -verbose=N - Set to 0 for silent operation, 2 or 3 for debugging\n"
   "   -appendLog - Append to log file rather than creating it\n"
-  "   -seed flag to specify seed for random number generator as debugging aid.\n"
+  "   -seed N - Specify seed for random number generator as debugging aid.\n"
+  "   -noShuffle - do not shuffle tracks and tables lists.\n"
   , clOrgs, clDbs, clTracks, clTables);
 }
 
 FILE *logFile;	/* Log file. */
 int seed = 0;           /* seed for random number generator */
 
 static struct optionSpec options[] = 
 {
     {"org", OPTION_STRING},
     {"db", OPTION_STRING},
     {"group", OPTION_STRING},
     {"track", OPTION_STRING},
     {"table", OPTION_STRING},
     {"orgs", OPTION_INT},
     {"dbs", OPTION_INT},
     {"search", OPTION_STRING},
     {"groups", OPTION_INT},
     {"tracks", OPTION_INT},
     {"tables", OPTION_INT},
     {"appendLog", OPTION_BOOLEAN},
     {"seed", OPTION_INT},
+    {"noShuffle", OPTION_BOOLEAN},
     {NULL, 0},
 };
 
 struct tablesTest
 /* Test on one column. */
     {
     struct tablesTest *next;
     struct qaStatus *status;	/* Result of test. */
     char *info[6];
     };
 
 enum tablesTestInfoIx 
 {
     ntiiType,
     ntiiOrg,
@@ -152,30 +155,41 @@
 if (basePage != NULL)
     {
     struct qaStatus *qs;
     if (db != NULL)
 	htmlPageSetVar(basePage, NULL, "db", db);
     if (org != NULL)
 	htmlPageSetVar(basePage, NULL, "org", org);
     if (group != NULL)
         htmlPageSetVar(basePage, NULL, hgtaGroup, group);
     if (track != NULL)
         htmlPageSetVar(basePage, NULL, hgtaTrack, track);
     if (table != NULL)
         htmlPageSetVar(basePage, NULL, hgtaTable, table);
     qs = qaPageFromForm(basePage, basePage->forms, 
 	    button, buttonVal, &page);
+
+    if (!page)
+	{
+	verbose(2, "page is NULL, qs->errMessage=[%s]\n", qs->errMessage);
+	if (startsWith("carefulAlloc: Allocated too much memory", qs->errMessage))
+	    {
+	          verbose(1, "Response html page too large (500MB) (%s %s %s %s %s)\n", org, db, group, track, table);
+	    fprintf(logFile, "Response html page too large (500MB) (%s %s %s %s %s)\n", org, db, group, track, table);
+	    }
+	}
+
     /* 
     if (page->forms != NULL)
         htmlFormPrint(page->forms, stdout);
     */
     // do not need to keep the returned structure, the answer is accumulating
     // in global variable: tablesTestList
     (void) tablesTestNew(qs, testName, org, db, group, track, table);
     }
 return page;
 }
 
 void serialSubmit(struct htmlPage **pPage,
 	char *org, char *db, char *group, char *track, char *table,
 	char *testName, char *button, char *buttonVal)
 /* Submit page, replacing old page with new one. */
@@ -206,32 +220,32 @@
 char *hostinfo = sqlHostInfo(conn);
       verbose(1, "Connecting as %s to database server %s\n", user, hostinfo);
 fprintf(logFile, "Connecting as %s to database server %s\n", user, hostinfo); fflush(logFile);
 sqlDisconnect(&conn);
 }
 
 void showRunningHostName()
 /* Show hostname of the machine we are running on. */
 {
 char hostname[HOST_NAME_MAX];
 if (gethostname(hostname, sizeof hostname))
     {
     perror("gethostname");
     safecpy(hostname, sizeof hostname, "error-reading-hostname");
     }
-      verbose(1, "Runnng on machine %s\n", hostname);
-fprintf(logFile, "Runnng on machine %s\n", hostname); fflush(logFile);
+      verbose(1, "Running on machine %s\n", hostname);
+fprintf(logFile, "Running on machine %s\n", hostname); fflush(logFile);
 }
 
 void quickErrReport()
 /* Report error at head of list if any */
 {
 struct tablesTest *test = tablesTestList;
 if (test->status->errMessage != NULL)
     tablesTestLogOne(test, stderr);
 }
 
 void testSchema(struct htmlPage *tablePage, struct htmlForm *mainForm,
      char *org, char *db, char *group, char *track, char *table)
 /* Make sure schema page comes up. */
 /* mainForm not used */
 {
@@ -283,31 +297,35 @@
 }
 
 int testAllFields(struct htmlPage *tablePage, struct htmlForm *mainForm,
      char *org, char *db, char *group, char *track, char *table)
 /* Get all fields and return count of rows. */
 /* mainForm not used */
 {
 struct htmlPage *outPage;
 int rowCount = 0;
 
 htmlPageSetVar(tablePage, NULL, hgtaOutputType, "primaryTable");
 outPage = quickSubmit(tablePage, org, db, group, track, table,
     "allFields", hgtaDoTopSubmit, "submit");
 /* check for NULL outPage */
 if (outPage == NULL)
-    errAbort("Null page in testAllFields (%s %s %s %s %s)", org, db, group, track, table);
+    {
+          verbose(1, "Null page in testAllFields (%s %s %s %s %s)\n", org, db, group, track, table);
+    fprintf(logFile, "Null page in testAllFields (%s %s %s %s %s)\n", org, db, group, track, table);
+    return -1;
+    }
 rowCount = countNoncommentLines(outPage->htmlText);
 htmlPageFree(&outPage);
 return rowCount;
 }
 
 struct htmlFormVar *findPrefixedVar(struct htmlFormVar *list, char *prefix)
 /* Find first var with given prefix in list. */
 {
 struct htmlFormVar *var;
 for (var = list; var != NULL; var = var->next)
     {
     if (startsWith(prefix, var->name))
         return var;
     }
 return NULL;
@@ -642,86 +660,170 @@
 
 htmlPageFree(&outPage);
 }
 	
 boolean isObsolete(char *table)
 /* Some old table types we can't handle.  Just warn that
  * they are there and skip. */
 {
 boolean obsolete = sameString(table, "wabaCbr");
 if (obsolete)
     qaStatusSoftError(tablesTestList->status, 
 	"Skipping obsolete table %s", table);
 return obsolete;
 }
 
+
+int findExpectedIntersectingRows(char *db, char *table, struct htmlPage *tablePage)
+/* Even when it can support intersections, some tables have way too many rows.
+ * Try to estimate how big the output will be using bin column. */
+{
+struct sqlConnection *conn = sqlConnect(db);
+int size = -1;
+
+if (!sqlTableExists(conn, table))
+    return -3;  // not table found, might be a custom track or hub?
+
+// TODO add or call code to discover if this is a bigBed and if it is local?
+// it should have 1 column called "fileName" 
+if (hHasField(db, table, "fileName") && (sqlCountColumnsInTable(conn,table) == 1))
+    return -4;  // bigBed (maybe bigWig)?
+
+if (!hIsBinned(db, table))
+    return -1;  // no bin column
+
+char *region = cloneString(htmlPageGetVar(tablePage, NULL, "position")->curVal);
+char *chrom = region;
+char *colon = strchr(region,':');
+char *dash = strchr(region,'-');
+*colon = 0;
+*dash = 0;
+int start = sqlUnsigned(colon+1);
+int end = sqlUnsigned(dash+1);
+
+struct dyString *query = newDyString(256);
+sqlDyStringPrintf(query, "select count(*) from %s where ", table);
+
+hAddBinToQuery(start, end, query);
+
+char *chromField = NULL;
+if (hHasField(db, table, "chrom"))
+    chromField = "chrom";
+if (hHasField(db, table, "tName"))
+    chromField = "tName";
+if (hHasField(db, table, "genoName"))
+    chromField = "genoName";
+if (!chromField) // no chrom field found
+    {
+          verbose(1, "findExpectedIntersectingRows failed to find chrom column name in %s.%s\n", db, table);
+    fprintf(logFile, "findExpectedIntersectingRows failed to find chrom column name in %s.%s\n", db, table);
+    return -2;  // no chrom name column
+    }
+
+sqlDyStringPrintfFrag(query, "%s = '%s'", chromField, chrom);
+
+verbose(2, "query=[%s]\n", query->string);
+
+size = sqlQuickNum(conn, dyStringCannibalize(&query));
+
+sqlDisconnect(&conn);
+return size;
+}
+
+
 void testOneTable(struct htmlPage *trackPage, char *org, char *db,
 	char *group, char *track, char *table)
 /* Test stuff on one table if we haven't already tested this table. */
 {
 /* Why declared here and not globally? */
 static struct hash *uniqHash = NULL;
 char fullName[256];
 if (uniqHash == NULL)
      uniqHash = newHash(0);
 safef(fullName, sizeof(fullName), "%s.%s", db, table);
 if (!hashLookup(uniqHash, fullName))
     {
     struct htmlPage *tablePage;
     struct htmlForm *mainForm;
 
     hashAdd(uniqHash, fullName, NULL);
     verbose(1, "Testing %s %s %s %s %s\n", naForNull(org), db, group, track, table);
     tablePage = quickSubmit(trackPage, org, db, group, 
 	    track, table, "selectTable", hgtaTable, table);
     if (!isObsolete(table) && tablePage != NULL)
 	{
 	if ((mainForm = htmlFormGet(tablePage, "mainForm")) == NULL)
 	    {
 	    qaStatusSoftError(tablesTestList->status, 
 		    "Couldn't get main form on tablePage for %s %s %s %s", db, group, track, table);
 	    }
 	else
 	    {
+	    verbose(3, "testOneTable testSchema() got here 1.1\n");
 	    testSchema(tablePage, mainForm, org, db, group, track, table);
+	    verbose(3, "testOneTable testSummaryStats() got here 1.2\n");
 	    testSummaryStats(tablePage, mainForm, org, db, group, track, table);
+	    verbose(3, "testOneTable got here 1.3\n");
 	    if (outTypeAvailable(mainForm, "bed")) 
 		{
+		verbose(3, "testOneTable bed output avail means can filter on position got here 2\n");
 		if (outTypeAvailable(mainForm, "primaryTable"))
 		    {
-		    int rowCount;
-		    rowCount = testAllFields(tablePage, mainForm, org, db, group, track, table);
+		    verbose(3, "testOneTable got here 3\n");
+
+		    int expectedSize = 0; // DEBUG RESTORE findExpectedIntersectingRows(db, table, tablePage);
+		    // DEBUG RESTORE verbose(1, "%s.%s expectedSize=%d\n", db, table, expectedSize);
+
+		    if (expectedSize < 500000)
+			{
+			int rowCount = testAllFields(tablePage, mainForm, org, db, group, track, table);
+			if (rowCount >= 0)
+			    {
 			    testOneField(tablePage, mainForm, org, db, group, track, table, rowCount);
 			    testOutSequence(tablePage, mainForm, org, db, group, track, table, rowCount);
 			    testOutBed(tablePage, mainForm, org, db, group, track, table, rowCount);
 			    testOutHyperlink(tablePage, mainForm, org, db, group, track, table, rowCount);
 			    testOutGff(tablePage, mainForm, org, db, group, track, table);
 			    if (rowCount > 0)
 				testOutCustomTrack(tablePage, mainForm, org, db, group, track, table);
 			    }
 			}
+		    else
+			{
+			      verbose(1, "%s.%s expectedSize=%d, too large, skipping.\n", db, table, expectedSize);
+			fprintf(logFile, "%s.%s expectedSize=%d, too large, skipping.\n", db, table, expectedSize);
+			}
+		    }
+		}
 	    else if (outTypeAvailable(mainForm, "primaryTable"))
 		{
+		verbose(3, "testOneTable no bed output available, so no position filtering available. got here 4\n");
 		/* If BED type is not available then the region will be ignored, and
 		 * we'll end up scanning whole table.  Make sure table is not huge
 		 * before proceeding. */
-		if (tableSize(db, table) < 500000)
+		int tableRows = tableSize(db, table);
+		if (tableRows < 500000)
 		    {
-		    int rowCount;
-		    rowCount = testAllFields(tablePage, mainForm, org, db, group, track, table);
+		    int rowCount = testAllFields(tablePage, mainForm, org, db, group, track, table);
+		    if (rowCount >= 0)
 			testOneField(tablePage, mainForm, org, db, group, track, table, rowCount);
 		    }
+		else
+		    {
+			  verbose(1, "%s.%s tableRows=%d, too large >= 500000, skipping.\n", db, table, tableRows);
+		    fprintf(logFile, "%s.%s tableRows=%d, too large >= 500000, skipping.\n", db, table, tableRows);
+		    }
 		}
 	    }
 	htmlPageFree(&tablePage);
 	}
     carefulCheckHeap();
     }
 }
 
 void testOneTrack(struct htmlPage *groupPage, char *org, char *db,
 	char *group, char *track, int maxTables)
 /* Test a little something on up to maxTables in one track. */
 {
 struct htmlPage *trackPage = quickSubmit(groupPage, org, db, group, 
 	track, NULL, "selectTrack", hgtaTrack, track);
 struct htmlForm *mainForm;
@@ -741,94 +843,102 @@
     if (sameString(type, "bigPsl"))
 	{
     	      verbose(1, "Skipping testing track %s since type bigPsl not supported by hgTables at this time (2016-06-20)\n", track);
     	fprintf(logFile, "Skipping testing track %s since type bigPsl not supported by hgTables at this time (2016-06-20)\n", track);
 	return;
 	}
     else
 	errAbort("Couldn't select track %s", track);
     }
 if ((mainForm = htmlFormGet(trackPage, "mainForm")) == NULL)
     errAbort("Couldn't get main form on trackPage");
 if ((tableVar = htmlFormVarGet(mainForm, hgtaTable)) == NULL)
     errAbort("Can't find table var");
 
 // put the tables in random order:
+if (!noShuffle)
     shuffleList(&tableVar->values);
 
 for (table = tableVar->values, tableIx = 0; 
 	table != NULL && tableIx < maxTables; 
-	table = table->next, ++tableIx)
+	table = table->next)
     {
-    if (clTable == NULL || sameString(clTable, table->name))
+    if (clTable && !sameString(clTable, table->name))
+	continue;
     testOneTable(trackPage, org, db, group, track, table->name);
+    ++tableIx;
     }
 /* Clean up. */
 htmlPageFree(&trackPage);
 }
 
 void testOneGroup(struct htmlPage *dbPage, char *org, char *db, char *group, 
 	int maxTracks)
 /* Test a little something on up to maxTracks in one group */
 {
 struct htmlPage *groupPage = quickSubmit(dbPage, org, db, group, NULL, NULL,
 	"selectGroup", hgtaGroup, group);
 struct htmlForm *mainForm;
 struct htmlFormVar *trackVar;
 struct slName *track;
 int trackIx;
 
 if ((mainForm = htmlFormGet(groupPage, "mainForm")) == NULL)
     errAbort("Couldn't get main form on groupPage");
 if ((trackVar = htmlFormVarGet(mainForm, hgtaTrack)) == NULL)
     errAbort("Can't find track var");
 
 // put the tracks in random order:
+if (!noShuffle)
     shuffleList(&trackVar->values);
 
 for (track = trackVar->values, trackIx = 0; 
 	track != NULL && trackIx < maxTracks; 
-	track = track->next, ++trackIx)
+	track = track->next)
     {
-    if (clTrack == NULL || sameString(track->name, clTrack))
+    if (clTrack && !sameString(track->name, clTrack))
+	continue;
     testOneTrack(groupPage, org, db, group, track->name, clTables);
+    ++trackIx;
     }
 
 /* Clean up. */
 htmlPageFree(&groupPage);
 }
 
 void testGroups(struct htmlPage *dbPage, char *org, char *db, int maxGroups)
 /* Test a little something in all groups for dbPage. */
 {
 struct htmlForm *mainForm;
 struct htmlFormVar *groupVar;
 struct slName *group;
 int groupIx;
 
 if ((mainForm = htmlFormGet(dbPage, "mainForm")) == NULL)
     errAbort("Couldn't get main form on dbPage");
 if ((groupVar = htmlFormVarGet(mainForm, hgtaGroup)) == NULL)
     errAbort("Can't find group var");
 for (group = groupVar->values, groupIx=0; 
 	group != NULL && groupIx < maxGroups; 
-	group = group->next, ++groupIx)
+	group = group->next)
     {
     if (!sameString("allTables", group->name))
 	{
-	if (clGroup == NULL || sameString(clGroup, group->name))
+	if (clGroup && !sameString(clGroup, group->name))
+	    continue;
 	testOneGroup(dbPage, org, db, group->name, clTracks);
+	++groupIx;
 	}
     }
 }
 
 void getTestRegion(char *db, char region[256], int regionSize)
 /* Look up first chromosome in database and grab five million bases
  * from the middle of it. */
 {
 struct sqlConnection *conn = sqlConnect(db);
 struct sqlResult *sr = sqlGetResult(conn, NOSQLINJ "select * from chromInfo limit 1");
 char **row;
 struct chromInfo ci;
 int start,end,middle;
 
 if ((row = sqlNextRow(sr)) == NULL)
@@ -1243,21 +1353,22 @@
     usage();
 seed = optionInt("seed",time(NULL));
       verbose(1,"seed=%d\n",seed);
 srand(seed);
 clDb = optionVal("db", clDb);
 clOrg = optionVal("org", clOrg);
 clGroup = optionVal("group", clGroup);
 clTrack = optionVal("track", clTrack);
 clTable = optionVal("table", clTable);
 clDbs = optionInt("dbs", clDbs);
 clOrgs = optionInt("orgs", clOrgs);
 clGroups = optionInt("groups", clGroups);
 clTracks = optionInt("tracks", clTracks);
 clTables = optionInt("tables", clTables);
 appendLog = optionExists("appendLog");
+noShuffle = optionExists("noShuffle");
 if (clOrg != NULL)
    clOrgs = BIGNUM;
 hgTablesTest(argv[1], argv[2]);
 carefulCheckHeap();
 return 0;
 }