b510e09f43330d387df999db278d44fe717e9dd2
jcasper
  Mon Jun 17 11:02:19 2024 -0700
Fix for hgNearTest crashing with the new more restrictive db string constraints.
We expect 0 for a missing database in hgNear; the test should supply 0.  No ticket.

diff --git src/hg/near/hgNearTest/hgNearTest.c src/hg/near/hgNearTest/hgNearTest.c
index 1dca83e..364d1ca 100644
--- src/hg/near/hgNearTest/hgNearTest.c
+++ src/hg/near/hgNearTest/hgNearTest.c
@@ -1,627 +1,627 @@
 /* hgNearTest - Test hgNear web page. */
 
 /* Copyright (C) 2014 The Regents of the University of California 
  * See kent/LICENSE or http://genome.ucsc.edu/license/ for licensing information. */
 #include "common.h"
 #include "memalloc.h"
 #include "linefile.h"
 #include "hash.h"
 #include "htmshell.h"
 #include "portable.h"
 #include "options.h"
 #include "errCatch.h"
 #include "ra.h"
 #include "htmlPage.h"
 #include "../hgNear/hgNear.h"
 #include "hdb.h"
 #include "qa.h"
 
 
 /* Command line variables. */
 char *dataDir = "/usr/local/apache/cgi-bin/hgNearData";
 char *clOrg = NULL;	/* Organism from command line. */
 char *clDb = NULL;	/* DB from command line */
 char *clSearch = NULL;	/* Search var from command line. */
 int clRepeat = 3;	/* Number of repetitions. */
 int seed = 0;           /* seed for random number generator */
 
 void usage()
 /* Explain usage and exit. */
 {
 errAbort(
   "hgNearTest - Test hgNear web page\n"
   "usage:\n"
   "   hgNearTest url log\n"
   "options:\n"
   "   -org=Human - Restrict to Human (or Mouse, Fruitfly, etc.)\n"
   "   -db=hg16 - Restrict to particular database\n"
   "   -search=gene - Use selected gene\n"
   "   -dataDir=dataDir - Use selected data dir, default %s\n"
   "   -repeat=N - Number of times to repeat test (on random genes)\n"
   "               (default %d)\n"
   "   -seed flag to specify seed for random number generator as debugging aid.\n"
   , dataDir, clRepeat);
 }
 
 
 static struct optionSpec options[] = {
     {"org", OPTION_STRING},
     {"db", OPTION_STRING},
     {"search", OPTION_STRING},
     {"dataDir", OPTION_STRING},
     {"repeat", OPTION_INT},
     {"seed", OPTION_INT},
     {NULL, 0},
 };
 
 struct nearTest
 /* Test on one column. */
     {
     struct nearTest *next;
     struct qaStatus *status;	/* Result of test. */
     char *info[6];
     };
 
 enum nearTestInfoIx {
    ntiiType = 0,
    ntiiSort = 1,
    ntiiOrg = 2,
    ntiiDb = 3,
    ntiiCol = 4,
    ntiiGene = 5,
 };
 
 char *nearTestInfoTypes[] =
    { "type", "sort", "organism", "db", "column", "gene" };
 
 struct nearTest *nearTestList = NULL;	/* List of all tests, latest on top. */
 
 struct nearTest *nearTestNew(struct qaStatus *status,
 	char *type, char *sort, char *org, char *db, char *col, char *gene)
 /* Save away column test results. */
 {
 struct nearTest *test;
 AllocVar(test);
 test->status = status;
 test->info[ntiiType] = cloneString(naForNull(type));
 test->info[ntiiSort] = cloneString(naForNull(sort));
 test->info[ntiiOrg] = cloneString(naForNull(org));
 test->info[ntiiDb] = cloneString(naForNull(db));
 test->info[ntiiCol] = cloneString(naForNull(col));
 test->info[ntiiGene] = cloneString(naForNull(gene));
 slAddHead(&nearTestList, test);
 return test;
 }
 
 void nearTestLogOne(struct nearTest *test, FILE *f)
 /* Log test result to file. */
 {
 int i;
 for (i=0; i<ArraySize(test->info); ++i)
     fprintf(f, "%s ", test->info[i]);
 fprintf(f, "%s\n", test->status->errMessage);
 }
 
 char *nearStartTablePat = "<!-- Start Rows -->";
 char *nearEndTablePat = "<!-- End Rows -->";
 char *nearEndRowPat = "<!-- Row -->";
 
 int nearCountRows(struct htmlPage *page)
 /* Count number of rows in big table. */
 {
 return qaCountBetween(page->htmlText, nearStartTablePat,
 	nearEndTablePat, nearEndRowPat);
 }
 
 int nearCountUniqAccRows(struct htmlPage *page)
 /* Count number of unique rows in table containing just hyperlinked 
  * accessions. */
 {
 char *s, *e, *row, *acc;
 int count = 0;
 struct hash *uniqHash = hashNew(0);
 
 if (page == NULL)
     return -1;
 
 /* Set s to first row. */
 s = stringIn(nearStartTablePat, page->htmlText);
 if (s == NULL)
     return -1;
 s += strlen(nearStartTablePat);
 
 for (;;)
     {
     e = stringIn(nearEndRowPat, s);
     if (e == NULL)
         break;
     row = cloneStringZ(s, e-s);
     acc = qaStringBetween(row, ">", "</a>");
     if (acc == NULL)
         {
 	warn("Can't find acc text between > and </a> while counting uniq row %s",
 		row);
 	freez(&row);
 	break;
 	}
     if (!hashLookup(uniqHash, acc))
         {
 	hashAdd(uniqHash, acc, NULL);
 	++count;
 	}
     freez(&row);
     freez(&acc);
     s = e + strlen(nearEndRowPat);
     }
 hashFree(&uniqHash);
 return count;
 }
 
 struct htmlPage *quickSubmit(struct htmlPage *basePage,
 	char *sort, char *org, char *db, char *col, char *gene, 
 	char *testName, char *button, char *buttonVal)
 /* Submit page and record info.  Return NULL if a problem. */
 {
 struct htmlPage *page = NULL;
 if (basePage != NULL)
     {
     struct qaStatus *qs;
     if (db != NULL)
 	htmlPageSetVar(basePage, NULL, "db", db);
     if (org != NULL)
 	htmlPageSetVar(basePage, NULL, "org", org);
     qs = qaPageFromForm(basePage, basePage->forms, 
 	    button, buttonVal, &page);
     // ignore return value, answer is accumulating in global: nearTestList
     (void) nearTestNew(qs, testName, sort, org, db, col, gene);
     }
 return page;
 }
 
 void serialSubmit(struct htmlPage **pPage,
 	char *sort, char *org, char *db, char *col, char *gene, 
 	char *testName, char *button, char *buttonVal)
 /* Submit page, replacing old page with new one. */
 {
 struct htmlPage *oldPage = *pPage;
 if (oldPage != NULL)
     {
     *pPage = quickSubmit(oldPage, sort, org, db, col, gene, 
     	testName, button, buttonVal);
     htmlPageFree(&oldPage);
     }
 }
 
 void quickErrReport()
 /* Report error at head of list if any */
 {
 struct nearTest *test = nearTestList;
 if (test->status->errMessage != NULL)
     nearTestLogOne(test, stderr);
 }
 
 void testCol(struct htmlPage *emptyConfig, char *org, char *db, char *col, char *gene)
 /* Test one column. */
 {
 struct htmlPage *printPage = NULL;
 char visVar[256];
 safef(visVar, sizeof(visVar), "near.col.%s.vis", col);
 htmlPageSetVar(emptyConfig, NULL, visVar, "on");
 htmlPageSetVar(emptyConfig, NULL, orderVarName, "nameSimilarity");
 htmlPageSetVar(emptyConfig, NULL, countVarName, "25");
 
 printPage = quickSubmit(emptyConfig, NULL, org, db, col, gene, "colPrint", "Submit", "on");
 if (printPage != NULL)
     {
     int expectCount = 25;
     int lineCount = nearCountRows(printPage);
     if (lineCount != expectCount)
 	qaStatusSoftError(nearTestList->status, 
 		"Got %d rows, expected %d", lineCount, expectCount);
     }
 quickErrReport();
 htmlPageFree(&printPage);
 htmlPageSetVar(emptyConfig, NULL, visVar, NULL);
 }
 
 struct htmlPage *emptyConfigPage(struct htmlPage *dbPage, char *org, char *db)
 /* Get empty configuration page. */
 {
 return quickSubmit(dbPage, NULL, org, db, NULL, NULL, "emptyConfig", hideAllConfName, "on");
 }
 
 void testColInfo(struct htmlPage *dbPage, char *org, char *db, char *col) 
 /* Click on all colInfo columns. */
 {
 struct htmlPage *infoPage = 
     quickSubmit(dbPage, NULL, org, db, col, NULL, "colInfo", colInfoVarName, col);
 
 if (infoPage != NULL)
     {
     if (stringIn("No additional info available", infoPage->htmlText))
 	qaStatusSoftError(nearTestList->status, 
 		"%s failed - no %s.html?", colInfoVarName, col);
     }
 quickErrReport();
 htmlPageFree(&infoPage);
 }
 
 void testDbColumns(struct htmlPage *dbPage, char *org, char *db, 
 	struct slName *geneList)
 /* Test on one database. */
 {
 struct htmlPage *emptyConfig;
 struct slName *colList = NULL, *col;
 struct htmlFormVar *var;
 struct slName *gene;
 
 uglyf("testDbColumns %s %s\n", org, db);
 emptyConfig = emptyConfigPage(dbPage, org, db);
 if (emptyConfig != NULL )
     {
     for (var = emptyConfig->forms->vars; var != NULL; var = var->next)
 	{
 	if (startsWith("near.col.", var->name) && endsWith(var->name, ".vis"))
 	    {
 	    char *colNameStart = var->name + strlen("near.col.");
 	    char *colNameEnd = strchr(colNameStart, '.');
 	    *colNameEnd = 0;
 	    col = slNameNew(colNameStart);
 	    slAddHead(&colList, col);
 	    *colNameEnd = '.';
 	    }
 	}
     slReverse(&colList);
 
     for (gene = geneList; gene != NULL; gene = gene->next)
 	{
 	htmlPageSetVar(emptyConfig, NULL, searchVarName, gene->name);
 	for (col = colList; col != NULL; col = col->next)
 	    {
 	    testCol(emptyConfig, org, db, col->name, gene->name);
 	    }
 	}
     for (col = colList; col != NULL; col = col->next)
         {
 	testColInfo(dbPage, org, db, col->name);
 	}
     }
 htmlPageFree(&emptyConfig);
 }
 
 
 void testSort(struct htmlPage *emptyConfig, char *org, char *db, char *sort, char *gene, char *accColumn)
 /* Test one column. */
 {
 char accVis[256];
 struct htmlPage *printPage = NULL;
 safef(accVis, sizeof(accVis), "near.col.%s.vis", accColumn);
 htmlPageSetVar(emptyConfig, NULL, accVis, "on");
 htmlPageSetVar(emptyConfig, NULL, orderVarName, sort);
 htmlPageSetVar(emptyConfig, NULL, countVarName, "25");
 htmlPageSetVar(emptyConfig, NULL, searchVarName, gene);
 
 printPage = quickSubmit(emptyConfig, sort, org, db, NULL, gene, "sortType", "submit", "on");
 if (printPage != NULL)
     {
     int lineCount = nearCountRows(printPage);
     if (lineCount < 1)
 	qaStatusSoftError(nearTestList->status, "No rows for sort %s", sort);
     }
 quickErrReport();
 htmlPageFree(&printPage);
 }
 
 
 
 void testDbSorts(struct htmlPage *dbPage, char *org, char *db, 
 	char *accColumn, struct slName *geneList)
 /* Test on one database. */
 {
 struct htmlPage *emptyConfig;
 struct htmlFormVar *sortVar = htmlFormVarGet(dbPage->forms, orderVarName);
 struct slName *gene, *sort;
 
 uglyf("testDbSorts %s %s\n", org, db);
 if (sortVar == NULL)
     errAbort("Couldn't find var %s", orderVarName);
 
 emptyConfig = emptyConfigPage(dbPage, org, db);
 if (emptyConfig != NULL)
     {
     for (sort = sortVar->values; sort != NULL; sort= sort->next)
 	{
 	for (gene = geneList; gene != NULL; gene = gene->next)
 	    {
 	    testSort(emptyConfig, org, db, sort->name, gene->name, accColumn);
 	    }
 	}
     htmlPageFree(&emptyConfig);
     }
 }
 
 void testDbFilters(struct htmlPage *dbPage, char *org, char *db, 
 	char *accColumn, struct slName *geneList)
 /* Test filter that returns just geneList. */
 {
 struct slName *gene;
 int rowCount;
 char accFilter[256];
 
 /* Start out with filter page. */
 struct htmlPage *page = quickSubmit(dbPage, NULL, org, db, accColumn, NULL,
 	"accOneFilterPage", advFilterVarName, "on");
 verbose(1, "testFilters %s %s\n", org, db);
 if (page == NULL)
     return;
 
 /* Set up to filter exactly one gene. */
 safef(accFilter, sizeof(accFilter), "near.as.%s.wild", accColumn);
     {
     htmlPageSetVar(page, NULL, accFilter, geneList->name);
     htmlPageSetVar(page, NULL, searchVarName, geneList->name);
     serialSubmit(&page, NULL, org, db, accColumn, geneList->name,
 	    "accOneFilterSubmit", "Submit", "on");
     if (page == NULL)
 	return;
 
     /* Make sure really got one gene. */
     rowCount = nearCountUniqAccRows(page);
     if (rowCount != 1)
 	{
 	qaStatusSoftError(nearTestList->status, 
 	    "Acc exact filter returned %d items", rowCount);
 	}
     }
 
 /* Set up filter for all genes in list. */
     {
     struct dyString *dy = dyStringNew(0);
     int geneCount = slCount(geneList);
     for (gene = geneList; gene != NULL; gene = gene->next)
 	dyStringPrintf(dy, "%s ", gene->name);
     htmlPageSetVar(page, NULL, accFilter, dy->string);
     htmlPageSetVar(page, NULL, countVarName, "all");  /* despite 3 genes requested, must see all if many dupes */
     serialSubmit(&page, NULL, org, db, accColumn, dy->string,
 	    "accMultiFilterSubmit", "Submit", "on");
     dyStringFree(&dy);
     if (page == NULL)
 	return;
     rowCount = nearCountUniqAccRows(page);
     if (rowCount != geneCount)
 	{
 	qaStatusSoftError(nearTestList->status, 
 	    "Acc multi filter expecting %d, got %d items", geneCount, rowCount);
 	}
     }
 
 /* Set up filter for wildcard in list. */
     {
     struct dyString *dy = dyStringNew(0);
     char len = strlen(geneList->name);
     dyStringAppendN(dy, geneList->name, len-1);
     dyStringAppendC(dy, '*');
     htmlPageSetVar(page, NULL, accFilter, dy->string);
     serialSubmit(&page, NULL, org, db, accColumn, dy->string,
 	    "accWildFilterSubmit", "Submit", "on");
     dyStringFree(&dy);
     if (page == NULL)
 	return;
     rowCount = nearCountRows(page);
     if (rowCount < 1)
 	{
 	qaStatusSoftError(nearTestList->status, 
 	    "Acc wild filter no match");
 	}
     }
 
 
 /* Clear out advanced filters. */
     {
     htmlPageFree(&page);
     page = quickSubmit(dbPage, NULL, org, db, NULL, NULL,
 	"advFilterClear", advFilterClearVarName, "on");
     }
 htmlPageFree(&page);
 }
 
 void testDb(struct htmlPage *orgPage, char *org, char *db)
 /* Test on one database. */
 {
 struct hash *genomeRa = hgReadRa(org, db, dataDir, "genome.ra", NULL);
 char *canonicalTable = hashMustFindVal(genomeRa, "canonicalTable");
 char *accColumn = hashMustFindVal(genomeRa, "idColumn");
 char *knownDatabase = hdbDefaultKnownDb(db);
 
 struct slName *geneList = sqlRandomSampleWithSeed(knownDatabase, canonicalTable, "transcript", clRepeat, seed);
 struct htmlPage *dbPage;
 struct slName *ptr;
 
 verbose(1, "genelist:\n");
 for (ptr = geneList; ptr != NULL; ptr = ptr->next)
     verbose(1, "%s\n", ptr->name);
 
 htmlPageSetVar(orgPage, NULL, "db", db);
 htmlPageSetVar(orgPage, NULL, searchVarName, "");
 
 dbPage = quickSubmit(orgPage, NULL, org, db, NULL, NULL, "dbEmptyPage", "submit", "go");
 
 quickErrReport();
 if (dbPage != NULL)
     {
     testDbColumns(dbPage, org, db, geneList);
     testDbSorts(dbPage, org, db, accColumn, geneList);
     testDbFilters(dbPage, org, db, accColumn, geneList);
     }
 
 htmlPageFree(&dbPage);
 hashFree(&genomeRa);
 slNameFreeList(&geneList);
 }
 
 
 void testOrg(struct htmlPage *rootPage, struct htmlForm *rootForm, char *org, char *forceDb)
 /* Test on organism.  If forceDb is non-null, only test on
  * given database. */
 {
 struct htmlPage *orgPage;
 struct htmlForm *mainForm;
 struct htmlFormVar *dbVar;
 struct slName *db;
 htmlPageSetVar(rootPage, rootForm, "org", org);
-htmlPageSetVar(rootPage, rootForm, "db", NULL);
+htmlPageSetVar(rootPage, rootForm, "db", "0");
 htmlPageSetVar(rootPage, rootForm, searchVarName, "");
 orgPage = htmlPageFromForm(rootPage, rootPage->forms, "submit", "Go");
 if ((mainForm = htmlFormGet(orgPage, "mainForm")) == NULL)
     errAbort("Couldn't get main form on orgPage");
 if ((dbVar = htmlFormVarGet(mainForm, "db")) == NULL)
     errAbort("Couldn't get org var");
 for (db = dbVar->values; db != NULL; db = db->next)
     {
     if (forceDb == NULL || sameString(forceDb, db->name))
 	testDb(orgPage, org, db->name);
     }
 htmlPageFree(&orgPage);
 }
 
 void statsOnSubsets(struct nearTest *list, int subIx, FILE *f)
 /* Report tests of certain subtype. */
 {
 struct nearTest *test;
 struct hash *hash = newHash(0);
 struct slName *typeList = NULL, *type;
 
 fprintf(f, "\n%s subtotals\n", nearTestInfoTypes[subIx]);
 
 /* Get list of all types in this field. */
 for (test = list; test != NULL; test = test->next)
     {
     char *info = test->info[subIx];
     if (!hashLookup(hash, info))
        {
        type = slNameNew(info);
        hashAdd(hash, info, type);
        slAddHead(&typeList, type);
        }
     }
 slNameSort(&typeList);
 hashFree(&hash);
 
 for (type = typeList; type != NULL; type = type->next)
     {
     struct qaStatistics *stats;
     AllocVar(stats);
     for (test = list; test != NULL; test = test->next)
         {
 	if (sameString(type->name, test->info[subIx]))
 	    {
 	    qaStatisticsAdd(stats, test->status);
 	    }
 	}
     qaStatisticsReport(stats, type->name, f);
     freez(&stats);
     }
 }
 
 
 void reportSummary(struct nearTest *list, FILE *f)
 /* Report summary of test results. */
 {
 struct qaStatistics *stats;
 struct nearTest *test;
 int i;
 
 AllocVar(stats);
 for (i=0; i<=ntiiCol; ++i)
     statsOnSubsets(list, i, f);
 for (test = list; test != NULL; test = test->next)
     qaStatisticsAdd(stats, test->status);
 qaStatisticsReport(stats, "Total", f);
 freeMem(stats);
 }
 
 
 void reportAll(struct nearTest *list, FILE *f)
 /* Report all tests. */
 {
 struct nearTest *test;
 for (test = list; test != NULL; test = test->next)
     {
     if (test->status->errMessage != NULL)
 	nearTestLogOne(test, f);
     }
 }
 
 void hgNearTest(char *url, char *log)
 /* hgNearTest - Test hgNear web page. */
 {
 struct htmlPage *rootPage = htmlPageGet(url);
 
 struct htmlForm *mainForm;
 struct htmlFormVar *orgVar;
 FILE *f = mustOpen(log, "w");
 
 htmlPageValidateOrAbort(rootPage);
 htmlPageSetVar(rootPage, NULL, orderVarName, "geneDistance");
 htmlPageSetVar(rootPage, NULL, countVarName, "25");
 if ((mainForm = htmlFormGet(rootPage, "mainForm")) == NULL)
     errAbort("Couldn't get main form");
 if ((orgVar = htmlFormVarGet(mainForm, "org")) == NULL)
     errAbort("Couldn't get org var");
 if (clOrg != NULL)
     testOrg(rootPage, mainForm, clOrg, clDb);
 else
     {
     struct slName *org;
     for (org = orgVar->values; org != NULL; org = org->next)
         {
 	testOrg(rootPage, mainForm, org->name, clDb);
 	}
     }
 
 htmlPageFree(&rootPage);
 
 slReverse(&nearTestList);
 
 reportSummary(nearTestList, stdout);
 fprintf(f,"seed=%d\n",seed);
 reportAll(nearTestList, f);
 fprintf(f, "---------------------------------------------\n");
 reportSummary(nearTestList, f);
 slFreeList(&nearTestList);
 carefulClose(&f);
 }
 
 #ifdef TEST 
 void hgNearTest(char *url)
 {
 struct htmlPage *page;
 struct qaStatus *qs;
 qs = qaPageGet(url, &page);
 qaStatusReportOne(stdout, qs, url);
 htmlPageFree(&page);
 }
 #endif /* TEST */
 
 int main(int argc, char *argv[])
 /* Process command line. */
 {
 pushCarefulMemHandler(200000000);
 optionInit(&argc, argv, options);
 if (argc != 3)
     usage();
 clDb = optionVal("db", clDb);
 clOrg = optionVal("org", clOrg);
 clSearch = optionVal("search", clSearch);
 dataDir = optionVal("dataDir", dataDir);
 clRepeat = optionInt("repeat", clRepeat);
 
 /* Seed the random number generator. */
 seed = optionInt("seed",time(NULL));
 printf("seed=%d\n",seed);
 srand(seed);
 
 hgNearTest(argv[1], argv[2]);
 carefulCheckHeap();
 return 0;
 }