8ba435bdd1a1fda7d9f05e027477b6b9d8bdc7f4 galt Wed Jun 29 11:18:08 2016 -0700 About to remove findExpectedIntersectingRows() because it seems less necessary now. diff --git src/hg/hgTablesTest/hgTablesTest.c src/hg/hgTablesTest/hgTablesTest.c index 7e7bc96..4604f40 100644 --- src/hg/hgTablesTest/hgTablesTest.c +++ src/hg/hgTablesTest/hgTablesTest.c @@ -22,76 +22,79 @@ #define MAX_ATTEMPTS 10 /* Command line variables. */ char *clOrg = NULL; /* Organism from command line. */ char *clDb = NULL; /* DB from command line */ char *clGroup = NULL; /* Group from command line. */ char *clTrack = NULL; /* Track from command line. */ char *clTable = NULL; /* Table from command line. */ int clGroups = BIGNUM; /* Number of groups to test. */ int clTracks = 4; /* Number of track to test. */ int clTables = 2; /* Number of tables to test. */ int clDbs = 1; /* Number of databases per organism. */ int clOrgs = 2; /* Number of organisms to test. */ -boolean appendLog; /* append to log rather than create it */ +boolean appendLog; /* Append to log rather than create it. */ +boolean noShuffle; /* Suppress shuffling of track and table lists. */ void usage() /* Explain usage and exit. */ { errAbort( "hgTablesTest - Test hgTables web page\n" "usage:\n" " hgTablesTest url log\n" "Where url is something like hgwbeta.cse.ucsc.edu/cgi-bin/hgTables\n" "and log is a file where error messages and statistics will be written\n" "options:\n" " -org=Human - Restrict to Human (or Mouse, Fruitfly, etc.)\n" " -db=hg17 - Restrict to particular database\n" " -group=genes - Restrict to a particular group\n" " -track=knownGene - Restrict to a particular track\n" " -table=knownGeneMrna - Restrict to a particular table\n" " -orgs=N - Number of organisms to test. Default %d\n" " -dbs=N - Number of databases per organism to test. Default %d\n" " -groups=N - Number of groups to test (default all)\n" " -tracks=N - Number of tracks per group to test (default %d)\n" " -tables=N - Number of tables per track to test (default %d)\n" " -verbose=N - Set to 0 for silent operation, 2 or 3 for debugging\n" " -appendLog - Append to log file rather than creating it\n" - " -seed flag to specify seed for random number generator as debugging aid.\n" + " -seed N - Specify seed for random number generator as debugging aid.\n" + " -noShuffle - do not shuffle tracks and tables lists.\n" , clOrgs, clDbs, clTracks, clTables); } FILE *logFile; /* Log file. */ int seed = 0; /* seed for random number generator */ static struct optionSpec options[] = { {"org", OPTION_STRING}, {"db", OPTION_STRING}, {"group", OPTION_STRING}, {"track", OPTION_STRING}, {"table", OPTION_STRING}, {"orgs", OPTION_INT}, {"dbs", OPTION_INT}, {"search", OPTION_STRING}, {"groups", OPTION_INT}, {"tracks", OPTION_INT}, {"tables", OPTION_INT}, {"appendLog", OPTION_BOOLEAN}, {"seed", OPTION_INT}, + {"noShuffle", OPTION_BOOLEAN}, {NULL, 0}, }; struct tablesTest /* Test on one column. */ { struct tablesTest *next; struct qaStatus *status; /* Result of test. */ char *info[6]; }; enum tablesTestInfoIx { ntiiType, ntiiOrg, @@ -152,30 +155,41 @@ if (basePage != NULL) { struct qaStatus *qs; if (db != NULL) htmlPageSetVar(basePage, NULL, "db", db); if (org != NULL) htmlPageSetVar(basePage, NULL, "org", org); if (group != NULL) htmlPageSetVar(basePage, NULL, hgtaGroup, group); if (track != NULL) htmlPageSetVar(basePage, NULL, hgtaTrack, track); if (table != NULL) htmlPageSetVar(basePage, NULL, hgtaTable, table); qs = qaPageFromForm(basePage, basePage->forms, button, buttonVal, &page); + + if (!page) + { + verbose(2, "page is NULL, qs->errMessage=[%s]\n", qs->errMessage); + if (startsWith("carefulAlloc: Allocated too much memory", qs->errMessage)) + { + verbose(1, "Response html page too large (500MB) (%s %s %s %s %s)\n", org, db, group, track, table); + fprintf(logFile, "Response html page too large (500MB) (%s %s %s %s %s)\n", org, db, group, track, table); + } + } + /* if (page->forms != NULL) htmlFormPrint(page->forms, stdout); */ // do not need to keep the returned structure, the answer is accumulating // in global variable: tablesTestList (void) tablesTestNew(qs, testName, org, db, group, track, table); } return page; } void serialSubmit(struct htmlPage **pPage, char *org, char *db, char *group, char *track, char *table, char *testName, char *button, char *buttonVal) /* Submit page, replacing old page with new one. */ @@ -206,32 +220,32 @@ char *hostinfo = sqlHostInfo(conn); verbose(1, "Connecting as %s to database server %s\n", user, hostinfo); fprintf(logFile, "Connecting as %s to database server %s\n", user, hostinfo); fflush(logFile); sqlDisconnect(&conn); } void showRunningHostName() /* Show hostname of the machine we are running on. */ { char hostname[HOST_NAME_MAX]; if (gethostname(hostname, sizeof hostname)) { perror("gethostname"); safecpy(hostname, sizeof hostname, "error-reading-hostname"); } - verbose(1, "Runnng on machine %s\n", hostname); -fprintf(logFile, "Runnng on machine %s\n", hostname); fflush(logFile); + verbose(1, "Running on machine %s\n", hostname); +fprintf(logFile, "Running on machine %s\n", hostname); fflush(logFile); } void quickErrReport() /* Report error at head of list if any */ { struct tablesTest *test = tablesTestList; if (test->status->errMessage != NULL) tablesTestLogOne(test, stderr); } void testSchema(struct htmlPage *tablePage, struct htmlForm *mainForm, char *org, char *db, char *group, char *track, char *table) /* Make sure schema page comes up. */ /* mainForm not used */ { @@ -283,31 +297,35 @@ } int testAllFields(struct htmlPage *tablePage, struct htmlForm *mainForm, char *org, char *db, char *group, char *track, char *table) /* Get all fields and return count of rows. */ /* mainForm not used */ { struct htmlPage *outPage; int rowCount = 0; htmlPageSetVar(tablePage, NULL, hgtaOutputType, "primaryTable"); outPage = quickSubmit(tablePage, org, db, group, track, table, "allFields", hgtaDoTopSubmit, "submit"); /* check for NULL outPage */ if (outPage == NULL) - errAbort("Null page in testAllFields (%s %s %s %s %s)", org, db, group, track, table); + { + verbose(1, "Null page in testAllFields (%s %s %s %s %s)\n", org, db, group, track, table); + fprintf(logFile, "Null page in testAllFields (%s %s %s %s %s)\n", org, db, group, track, table); + return -1; + } rowCount = countNoncommentLines(outPage->htmlText); htmlPageFree(&outPage); return rowCount; } struct htmlFormVar *findPrefixedVar(struct htmlFormVar *list, char *prefix) /* Find first var with given prefix in list. */ { struct htmlFormVar *var; for (var = list; var != NULL; var = var->next) { if (startsWith(prefix, var->name)) return var; } return NULL; @@ -642,86 +660,170 @@ htmlPageFree(&outPage); } boolean isObsolete(char *table) /* Some old table types we can't handle. Just warn that * they are there and skip. */ { boolean obsolete = sameString(table, "wabaCbr"); if (obsolete) qaStatusSoftError(tablesTestList->status, "Skipping obsolete table %s", table); return obsolete; } + +int findExpectedIntersectingRows(char *db, char *table, struct htmlPage *tablePage) +/* Even when it can support intersections, some tables have way too many rows. + * Try to estimate how big the output will be using bin column. */ +{ +struct sqlConnection *conn = sqlConnect(db); +int size = -1; + +if (!sqlTableExists(conn, table)) + return -3; // not table found, might be a custom track or hub? + +// TODO add or call code to discover if this is a bigBed and if it is local? +// it should have 1 column called "fileName" +if (hHasField(db, table, "fileName") && (sqlCountColumnsInTable(conn,table) == 1)) + return -4; // bigBed (maybe bigWig)? + +if (!hIsBinned(db, table)) + return -1; // no bin column + +char *region = cloneString(htmlPageGetVar(tablePage, NULL, "position")->curVal); +char *chrom = region; +char *colon = strchr(region,':'); +char *dash = strchr(region,'-'); +*colon = 0; +*dash = 0; +int start = sqlUnsigned(colon+1); +int end = sqlUnsigned(dash+1); + +struct dyString *query = newDyString(256); +sqlDyStringPrintf(query, "select count(*) from %s where ", table); + +hAddBinToQuery(start, end, query); + +char *chromField = NULL; +if (hHasField(db, table, "chrom")) + chromField = "chrom"; +if (hHasField(db, table, "tName")) + chromField = "tName"; +if (hHasField(db, table, "genoName")) + chromField = "genoName"; +if (!chromField) // no chrom field found + { + verbose(1, "findExpectedIntersectingRows failed to find chrom column name in %s.%s\n", db, table); + fprintf(logFile, "findExpectedIntersectingRows failed to find chrom column name in %s.%s\n", db, table); + return -2; // no chrom name column + } + +sqlDyStringPrintfFrag(query, "%s = '%s'", chromField, chrom); + +verbose(2, "query=[%s]\n", query->string); + +size = sqlQuickNum(conn, dyStringCannibalize(&query)); + +sqlDisconnect(&conn); +return size; +} + + void testOneTable(struct htmlPage *trackPage, char *org, char *db, char *group, char *track, char *table) /* Test stuff on one table if we haven't already tested this table. */ { /* Why declared here and not globally? */ static struct hash *uniqHash = NULL; char fullName[256]; if (uniqHash == NULL) uniqHash = newHash(0); safef(fullName, sizeof(fullName), "%s.%s", db, table); if (!hashLookup(uniqHash, fullName)) { struct htmlPage *tablePage; struct htmlForm *mainForm; hashAdd(uniqHash, fullName, NULL); verbose(1, "Testing %s %s %s %s %s\n", naForNull(org), db, group, track, table); tablePage = quickSubmit(trackPage, org, db, group, track, table, "selectTable", hgtaTable, table); if (!isObsolete(table) && tablePage != NULL) { if ((mainForm = htmlFormGet(tablePage, "mainForm")) == NULL) { qaStatusSoftError(tablesTestList->status, "Couldn't get main form on tablePage for %s %s %s %s", db, group, track, table); } else { + verbose(3, "testOneTable testSchema() got here 1.1\n"); testSchema(tablePage, mainForm, org, db, group, track, table); + verbose(3, "testOneTable testSummaryStats() got here 1.2\n"); testSummaryStats(tablePage, mainForm, org, db, group, track, table); + verbose(3, "testOneTable got here 1.3\n"); if (outTypeAvailable(mainForm, "bed")) { + verbose(3, "testOneTable bed output avail means can filter on position got here 2\n"); if (outTypeAvailable(mainForm, "primaryTable")) { - int rowCount; - rowCount = testAllFields(tablePage, mainForm, org, db, group, track, table); + verbose(3, "testOneTable got here 3\n"); + + int expectedSize = 0; // DEBUG RESTORE findExpectedIntersectingRows(db, table, tablePage); + // DEBUG RESTORE verbose(1, "%s.%s expectedSize=%d\n", db, table, expectedSize); + + if (expectedSize < 500000) + { + int rowCount = testAllFields(tablePage, mainForm, org, db, group, track, table); + if (rowCount >= 0) + { testOneField(tablePage, mainForm, org, db, group, track, table, rowCount); testOutSequence(tablePage, mainForm, org, db, group, track, table, rowCount); testOutBed(tablePage, mainForm, org, db, group, track, table, rowCount); testOutHyperlink(tablePage, mainForm, org, db, group, track, table, rowCount); testOutGff(tablePage, mainForm, org, db, group, track, table); if (rowCount > 0) testOutCustomTrack(tablePage, mainForm, org, db, group, track, table); } } + else + { + verbose(1, "%s.%s expectedSize=%d, too large, skipping.\n", db, table, expectedSize); + fprintf(logFile, "%s.%s expectedSize=%d, too large, skipping.\n", db, table, expectedSize); + } + } + } else if (outTypeAvailable(mainForm, "primaryTable")) { + verbose(3, "testOneTable no bed output available, so no position filtering available. got here 4\n"); /* If BED type is not available then the region will be ignored, and * we'll end up scanning whole table. Make sure table is not huge * before proceeding. */ - if (tableSize(db, table) < 500000) + int tableRows = tableSize(db, table); + if (tableRows < 500000) { - int rowCount; - rowCount = testAllFields(tablePage, mainForm, org, db, group, track, table); + int rowCount = testAllFields(tablePage, mainForm, org, db, group, track, table); + if (rowCount >= 0) testOneField(tablePage, mainForm, org, db, group, track, table, rowCount); } + else + { + verbose(1, "%s.%s tableRows=%d, too large >= 500000, skipping.\n", db, table, tableRows); + fprintf(logFile, "%s.%s tableRows=%d, too large >= 500000, skipping.\n", db, table, tableRows); + } } } htmlPageFree(&tablePage); } carefulCheckHeap(); } } void testOneTrack(struct htmlPage *groupPage, char *org, char *db, char *group, char *track, int maxTables) /* Test a little something on up to maxTables in one track. */ { struct htmlPage *trackPage = quickSubmit(groupPage, org, db, group, track, NULL, "selectTrack", hgtaTrack, track); struct htmlForm *mainForm; @@ -741,94 +843,102 @@ if (sameString(type, "bigPsl")) { verbose(1, "Skipping testing track %s since type bigPsl not supported by hgTables at this time (2016-06-20)\n", track); fprintf(logFile, "Skipping testing track %s since type bigPsl not supported by hgTables at this time (2016-06-20)\n", track); return; } else errAbort("Couldn't select track %s", track); } if ((mainForm = htmlFormGet(trackPage, "mainForm")) == NULL) errAbort("Couldn't get main form on trackPage"); if ((tableVar = htmlFormVarGet(mainForm, hgtaTable)) == NULL) errAbort("Can't find table var"); // put the tables in random order: +if (!noShuffle) shuffleList(&tableVar->values); for (table = tableVar->values, tableIx = 0; table != NULL && tableIx < maxTables; - table = table->next, ++tableIx) + table = table->next) { - if (clTable == NULL || sameString(clTable, table->name)) + if (clTable && !sameString(clTable, table->name)) + continue; testOneTable(trackPage, org, db, group, track, table->name); + ++tableIx; } /* Clean up. */ htmlPageFree(&trackPage); } void testOneGroup(struct htmlPage *dbPage, char *org, char *db, char *group, int maxTracks) /* Test a little something on up to maxTracks in one group */ { struct htmlPage *groupPage = quickSubmit(dbPage, org, db, group, NULL, NULL, "selectGroup", hgtaGroup, group); struct htmlForm *mainForm; struct htmlFormVar *trackVar; struct slName *track; int trackIx; if ((mainForm = htmlFormGet(groupPage, "mainForm")) == NULL) errAbort("Couldn't get main form on groupPage"); if ((trackVar = htmlFormVarGet(mainForm, hgtaTrack)) == NULL) errAbort("Can't find track var"); // put the tracks in random order: +if (!noShuffle) shuffleList(&trackVar->values); for (track = trackVar->values, trackIx = 0; track != NULL && trackIx < maxTracks; - track = track->next, ++trackIx) + track = track->next) { - if (clTrack == NULL || sameString(track->name, clTrack)) + if (clTrack && !sameString(track->name, clTrack)) + continue; testOneTrack(groupPage, org, db, group, track->name, clTables); + ++trackIx; } /* Clean up. */ htmlPageFree(&groupPage); } void testGroups(struct htmlPage *dbPage, char *org, char *db, int maxGroups) /* Test a little something in all groups for dbPage. */ { struct htmlForm *mainForm; struct htmlFormVar *groupVar; struct slName *group; int groupIx; if ((mainForm = htmlFormGet(dbPage, "mainForm")) == NULL) errAbort("Couldn't get main form on dbPage"); if ((groupVar = htmlFormVarGet(mainForm, hgtaGroup)) == NULL) errAbort("Can't find group var"); for (group = groupVar->values, groupIx=0; group != NULL && groupIx < maxGroups; - group = group->next, ++groupIx) + group = group->next) { if (!sameString("allTables", group->name)) { - if (clGroup == NULL || sameString(clGroup, group->name)) + if (clGroup && !sameString(clGroup, group->name)) + continue; testOneGroup(dbPage, org, db, group->name, clTracks); + ++groupIx; } } } void getTestRegion(char *db, char region[256], int regionSize) /* Look up first chromosome in database and grab five million bases * from the middle of it. */ { struct sqlConnection *conn = sqlConnect(db); struct sqlResult *sr = sqlGetResult(conn, NOSQLINJ "select * from chromInfo limit 1"); char **row; struct chromInfo ci; int start,end,middle; if ((row = sqlNextRow(sr)) == NULL) @@ -1243,21 +1353,22 @@ usage(); seed = optionInt("seed",time(NULL)); verbose(1,"seed=%d\n",seed); srand(seed); clDb = optionVal("db", clDb); clOrg = optionVal("org", clOrg); clGroup = optionVal("group", clGroup); clTrack = optionVal("track", clTrack); clTable = optionVal("table", clTable); clDbs = optionInt("dbs", clDbs); clOrgs = optionInt("orgs", clOrgs); clGroups = optionInt("groups", clGroups); clTracks = optionInt("tracks", clTracks); clTables = optionInt("tables", clTables); appendLog = optionExists("appendLog"); +noShuffle = optionExists("noShuffle"); if (clOrg != NULL) clOrgs = BIGNUM; hgTablesTest(argv[1], argv[2]); carefulCheckHeap(); return 0; }