03ad0cc3681e3a08fa2988990bdea9262faddc24
galt
  Mon May 18 00:31:03 2020 -0700
Added All Results checkbox to hgBlat. Changed it to read tileSize etc from gfserver status so it can use whatever it finds. Correctly shows lengths and required lengths. Instead of limit 20 for dna it now detects min psl->match from genome size. Error messages and warnings should now be complete and correct about suggested and required minimum query lengths. In particular this is motivated by allowing extremely sensitive and short protein and dna queries on Sars-Cov2 (wuhCor1) as well as other microorganisms. refs #25477.

diff --git src/hg/hgBlat/hgBlat.c src/hg/hgBlat/hgBlat.c
index 58331fb..d3ec464 100644
--- src/hg/hgBlat/hgBlat.c
+++ src/hg/hgBlat/hgBlat.c
@@ -22,36 +22,36 @@
 #include "blatServers.h"
 #include "web.h"
 #include "hash.h"
 #include "botDelay.h"
 #include "trashDir.h"
 #include "trackHub.h"
 #include "hgConfig.h"
 #include "errCatch.h"
 #include "portable.h"
 #include "portable.h"
 #include "dystring.h"
 #include "chromInfo.h"
 #include "net.h"
 #include "fuzzyFind.h"
 
-
 struct cart *cart;	/* The user's ui state. */
 struct hash *oldVars = NULL;
 boolean orgChange = FALSE;
 boolean dbChange = FALSE;
 boolean allGenomes = FALSE;
+boolean allResults = FALSE;
 
 
 struct gfResult
 /* Detailed gfServer results, this is a span of several nearby tiles, minimum 2 for dna. */
     {
     struct gfResult *next;
     /* have to multiply translated coordinates by 3 */
     int qStart;    /* Query Start Coordinate */  
     int qEnd;      /* Query End Coordinate */
     char *chrom;   /* Target Chrom Name */
     int tStart;    /* Target Start Coordinate */  
     int tEnd;      /* Target End Coordinate */
     int numHits;   /* number of tile hits, minimum  2 for dna */ 
     char tStrand;  /* + or - Target Strand used with prot, rnax, dnax */ 
     int tFrame;    /* Target Frame 0,1,2 (mostly ignorable?) used with prot, rnax, dnax */ 
@@ -280,40 +280,39 @@
 pthread_mutex_unlock( &pfdMutex );
 return errCount;
 }
 
 // ==================
 
 struct serverTable
 /* Information on a server. */
     {
     char *db;		/* Database name. */
     char *genome;	/* Genome name. */
     boolean isTrans;	/* Is tranlated to protein? */
     char *host;		/* Name of machine hosting server. */
     char *port;		/* Port that hosts server. */
     char *nibDir;	/* Directory of sequence files. */
+    int tileSize;       /* gfServer -tileSize */
+    int stepSize;       /* gfServer -stepSize */
+    int minMatch;       /* gfServer -minMatch */
     };
 
 char *typeList[] = {"BLAT's guess", "DNA", "protein", "translated RNA", "translated DNA"};
 char *outputList[] = {"hyperlink", "psl", "psl no header"};
 
-#ifdef LOWELAB
-int minMatchShown = 14;
-#else
-int minMatchShown = 20;
-#endif
+int minMatchShown = 0;
 
 static struct serverTable *trackHubServerTable(char *db, boolean isTrans)
 /* Find out if database is a track hub with a blat server */
 {
 char *host, *port;
 
 if (!trackHubGetBlatParams(db, isTrans, &host, &port))
     return NULL;
 
 struct serverTable *st;
 
 AllocVar(st);
 
 st->db = cloneString(db);
 st->genome = cloneString(hGenome(db));
@@ -345,33 +344,33 @@
 struct sqlConnection *conn = hConnectCentral();
 char query[256];
 struct sqlResult *sr;
 char **row;
 char dbActualName[32];
 
 /* If necessary convert database description to name. */
 sqlSafef(query, sizeof(query), "select name from dbDb where name = '%s'", db);
 if (!sqlExists(conn, query))
     {
     sqlSafef(query, sizeof(query), "select name from dbDb where description = '%s'", db);
     if (sqlQuickQuery(conn, query, dbActualName, sizeof(dbActualName)) != NULL)
         db = dbActualName;
     }
 
-/* Do a little join to get data to fit into the serverTable. */
-sqlSafef(query, sizeof(query), "select dbDb.name,dbDb.description,blatServers.isTrans"
-               ",blatServers.host,blatServers.port,dbDb.nibPath "
+/* Do a little join to get data to fit into the serverTable and grab dbDb.nibPath too. */
+sqlSafef(query, sizeof(query), "select dbDb.name,dbDb.description,blatServers.isTrans,"
+               "blatServers.host,blatServers.port,dbDb.nibPath "
 	       "from dbDb,blatServers where blatServers.isTrans = %d and "
 	       "dbDb.name = '%s' and dbDb.name = blatServers.db", 
 	       isTrans, db);
 sr = sqlGetResult(conn, query);
 if ((row = sqlNextRow(sr)) == NULL)
     {
     errAbort("Can't find a server for %s database %s.  Click "
 	     "<A HREF=\"/cgi-bin/hgBlat?%s&command=start&db=%s\">here</A> "
 	     "to reset to default database.",
 	     (isTrans ? "translated" : "DNA"), db,
 	     cartSidUrlString(cart), hDefaultDb());
     }
 st.db = cloneString(row[0]);
 st.genome = cloneString(row[1]);
 st.isTrans = atoi(row[2]);
@@ -493,54 +492,52 @@
            enum gfType qType, enum gfType tType, 
            char *organism, boolean feelingLucky)
 /* Show all the places that align. */
 {
 boolean useBigPsl = cfgOptionBooleanDefault("useBlatBigPsl", TRUE);
 struct lineFile *lf = pslFileOpen(pslName);
 struct psl *pslList = NULL, *psl;
 char *browserUrl = hgTracksName();
 char *hgcUrl = hgcName();
 char uiState[64];
 char *vis;
 char unhideTrack[64];
 char *sort = cartUsualString(cart, "sort", pslSortList[0]);
 char *output = cartUsualString(cart, "output", outputList[0]);
 boolean pslOut = startsWith("psl", output);
-boolean isStraightNuc = (qType == gftRna || qType == gftDna);
-int  minThreshold = (isStraightNuc ? minMatchShown : 0);
 
 sprintf(uiState, "%s=%s", cartSessionVarName(), cartSessionId(cart));
 
 /* If user has hidden BLAT track, add a setting that will unhide the 
    track if user clicks on a browser link. */
 vis = cartOptionalString(cart, "hgUserPsl");
 if (vis != NULL && sameString(vis, "hide"))
     snprintf(unhideTrack, sizeof(unhideTrack), "&hgUserPsl=dense");
 else
     unhideTrack[0] = 0;
 
 while ((psl = pslNext(lf)) != NULL)
     {
-    if (psl->match >= minThreshold)
+    if (psl->match >= minMatchShown)
 	slAddHead(&pslList, psl);
     }
 lineFileClose(&lf);
 if (pslList == NULL)
     {
     printf("<table><tr><td><hr>Sorry, no matches found");
-    if (isStraightNuc)
-	printf(" (with score at least %d)", minThreshold);
+    if (!allResults)
+	printf(" (with score at least %d)", minMatchShown);
     printf("<hr><td></tr></table>\n");
     return;
     }
 
 pslSortListByVar(&pslList, sort);
 
 if(feelingLucky)
     {
     /* If we found something jump browser to there. */
     if(slCount(pslList) > 0)
 	printLuckyRedirect(browserUrl, pslList, database, pslName, faName, uiState, unhideTrack);
     /* Otherwise call ourselves again not feeling lucky to print empty 
        results. */
     else 
 	{
@@ -890,55 +887,46 @@
 char bestTStrand = ' ';
 char bestQStrand = ' ';
 
 char *thisChrom = NULL;
 int thisHits   = 0;
 int thisTStart = 0;
 int thisTEnd   = 0;
 int thisExons  = 0;
 char thisTStrand = ' ';
 char thisQStrand = ' ';
 
 int geneGap = ffIntronMaxDefault;  // about a million bases is a cutoff for genes.
 
 int qSlop = 5; // forgive 5 bases, about dna stepSize = 5.
 if (gH->complex)
-    qSlop = 4; // reduce for translated with tileSize/stepSize = 4.
+    qSlop = 4; // reduce for translated with stepSize = 4.
 
 struct gfResult *gfR = NULL, *gfLast = NULL;
 for(gfR=gH->gfList; gfR; gfR=gfR->next)
     {
     // filter on given queryFrame
     if (gfR->qFrame != queryFrame)
 	continue;
 
     if (gfLast && (gfR->tStrand == gfLast->tStrand) && sameString(gfR->chrom,gfLast->chrom) && 
 
 	(gfR->qStart >= (gfLast->qEnd - qSlop)) && (gfR->tStart >= gfLast->tEnd) && ((gfR->tStart - gfLast->tEnd) < geneGap))
 	{	
 	thisHits += gfR->numHits;
 	thisTEnd  = gfR->tEnd;
 	thisExons += 1;
-
-	//if (gH->complex)
-	    //{
-	    // gfR->tStrand, gfR->tFrame
-	    //if (gH->complex)
-		//{
-		//gfR->qFrame
-		//}
-	    //}
 	}
     else
 	{
 	thisHits   = gfR->numHits;
 	thisChrom  = gfR->chrom;
 	thisTStart = gfR->tStart;
 	thisTEnd   = gfR->tEnd;
 	thisExons  = 1;
 	thisTStrand = gfR->tStrand;
 	thisQStrand = gfR->qStrand;
 	}
     if (thisHits > bestHits)
 	{
 	bestHits   = thisHits;
 	bestExons  = thisExons;
@@ -1072,31 +1060,31 @@
 	    }
 
 	gfR->tStart = sqlUnsigned(word[3]); // e.g. 173515
 	gfR->tEnd   = sqlUnsigned(word[4]); // e.g. 173586
 
 	numHits     = sqlUnsigned(word[5]); // e.g. 14
 
 	// Frustrated with weird little results with vastly exaggerated hit-score,
 	// I have flattened that out so it maxes out at #tiles that could fit 
 	// It seems to still work just fine. I am going to leave it for now.
 	// One minor note, by suppressing extra scores for short exons,
 	// it will not prefer alignments with the same number of hits, but more exons.
 	if (!gH->complex) // dna xType
 	    {
 	    // maximum tiles that could fit in qSpan
-	    int limit = ((gfR->qEnd - gfR->qStart) - 6)/5; 
+	    int limit = ((gfR->qEnd - gfR->qStart) - 6)/5;  // stepSize=5
 	    ++limit;  // for a little extra.
 	    if (numHits > limit)
 		numHits = limit;
 	    }
 
 	gfR->numHits = numHits;
 
 	if (gH->complex)
 	    {
 	    gfR->tStrand = word[6][0];          // e.g. + or -
 	    gfR->tFrame = sqlUnsigned(word[7]); // e.g. 0,1,2
 	    if (!gH->isProt)
 		{
 		gfR->qFrame = sqlUnsigned(word[8]); // e.g. 0,1,2
 		}
@@ -1233,38 +1221,144 @@
 
 close(gH->sd);
 }
 
 int gfConnectEx(char *host, char *port)
 /* Try to connect to gfServer */
 {
 int conn = -1;
 if (allGenomes)
     conn = gfMayConnect(host, port); // returns -1 on failure
 else
     conn = gfConnect(host, port);  // errAborts on failure.
 return conn;
 }
 
+int findMinMatch(long genomeSize, boolean isProt)
+// Return default minMatch for genomeSize,
+// the expected number of occurrences of string length k 
+// in random genome of size N = N/(4^k)
+{
+int alphaBetSize;
+if (isProt)
+    {
+    alphaBetSize = 20;
+    genomeSize = genomeSize / 3;
+    }
+else
+    {
+    alphaBetSize = 4;
+    }
+int k = 1;
+double expected = genomeSize;
+for (k=1; k<36; k++)
+    {
+    expected /= alphaBetSize;
+    if (expected < .004)
+	break;
+    }
+return k;
+}
+
+long findGenomeSize(char *database)
+// get genomeSize from database.
+{
+struct sqlConnection *conn = hAllocConn(database);
+char query[256];
+sqlSafef(query, sizeof query, "select sum(size) from chromInfo");
+long genomeSize = sqlQuickLongLong(conn, query);
+hFreeConn(&conn);
+if (genomeSize == 0)
+    {
+    warn("Genome Size not found for %s", database);
+    genomeSize = 3272116950;  // substitute human genome size
+    }
+return genomeSize;
+}
+
+long findGenomeSizeFromHub(char *database)
+// fetch genome size by adding up chroms from hub 2bit.
+{
+struct trackHubGenome *genome = trackHubGetGenome(database);
+
+genome->tbf = twoBitOpen(genome->twoBitPath);
+
+long genomeSize = 0;
+struct twoBitIndex *index;
+for (index = genome->tbf->indexList; index != NULL; index = index->next)
+    {
+    genomeSize += twoBitSeqSize(genome->tbf, index->name);
+    }
+
+twoBitClose(&genome->tbf);
+
+return genomeSize;
+}
+
+
+int findGenomeParams(struct serverTable *serve)
+/* Send status message to server arnd report result.
+ * Get tileSize stepSize and minMatch.
+ */
+
+{
+char buf[256];
+int sd = 0;
+int ret = 0;
+
+/* Put together command. */
+sd = gfConnectEx(serve->host, serve->port);
+sprintf(buf, "%sstatus", gfSignature());
+mustWriteFd(sd, buf, strlen(buf));
+
+for (;;)
+    {
+    if (netGetString(sd, buf) == NULL)
+	{
+	warn("Error reading status information from %s:%s",serve->host, serve->port);
+	ret = -1;
+        break;
+	}
+    if (sameString(buf, "end"))
+        break;
+    else
+        {
+	if (startsWith("tileSize ", buf))
+	    {
+            serve->tileSize = atoi(buf+strlen("tileSize "));
+	    }
+	if (startsWith("stepSize ", buf))
+	    {
+            serve->stepSize = atoi(buf+strlen("stepSize "));
+	    }
+	if (startsWith("minMatch ", buf))
+	    {
+            serve->minMatch = atoi(buf+strlen("minMatch "));
+	    }
+        }
+    }
+close(sd);
+return(ret); 
+}
+
 void blatSeq(char *userSeq, char *organism, char *database, int dbCount)
 /* Blat sequence user pasted in. */
 {
 FILE *f;
 struct dnaSeq *seqList = NULL, *seq;
 struct tempName pslTn, faTn;
 int maxSingleSize, maxTotalSize, maxSeqCount;
-int minSingleSize = minMatchShown;
 char *genome, *db;
 char *type = cgiString("type");
 char *seqLetters = cloneString(userSeq);
 struct serverTable *serve;
 int conn;
 int oneSize, totalSize = 0, seqCount = 0;
 boolean isTx = FALSE;
 boolean isTxTx = FALSE;
 boolean txTxBoth = FALSE;
 struct gfOutput *gvo;
 boolean qIsProt = FALSE;
 enum gfType qType, tType;
 struct hash *tFileCache = gfFileCacheNew();
 // allGenomes ignores I'm Feeling Lucky for simplicity
 boolean feelingLucky = cgiBoolean("Lucky") && !allGenomes;
@@ -1357,122 +1451,171 @@
 maxSeqCount = 25;
 #endif
 char *optionMaxSeqCount = cfgOptionDefault("hgBlat.maxSequenceCount", NULL);
 
 if (isNotEmpty(optionMaxSeqCount))
    maxSeqCount = sqlSigned(optionMaxSeqCount);
 
 /* Create temporary file to store sequence. */
 trashDirFile(&faTn, "hgSs", "hgSs", ".fa");
 faWriteAll(faTn.forCgi, seqList);
 
 /* Create a temporary .psl file with the alignments against genome. */
 trashDirFile(&pslTn, "hgSs", "hgSs", ".pslx");
 f = mustOpen(pslTn.forCgi, "w");
 gvo = gfOutputPsl(0, qIsProt, FALSE, f, FALSE, TRUE);
+
+
+/* Strategy for calculating minMatchShown.
+ Calculate the minimum size for filtering based on genome size.
+ The expected numberof occurrences of q = n/(4^k)
+ = N * pow(4,-k);  where N is genome size, and k is minimum size required.
+ For protein, use 22 instead of 4.
+ For human, a cutoff of 20, expected number < .003 or 0.3% 
+ so the probability of getting it just by chance is low.
+*/
+
+int xlat;
+
 serve = findServer(db, isTx);
 /* Write header for extended (possibly protein) psl file. */
 if (isTx)
     {
     if (isTxTx)
         {
 	qType = gftDnaX;
 	tType = gftDnaX;
+        xlat = 3;
 	}
     else
         {
 	qType = gftProt;
 	tType = gftDnaX;
+        xlat = 1;
 	}
+    serve->tileSize = 4;
+    serve->stepSize = 4;
+    serve->minMatch = 3;
     }
 else
     {
     qType = gftDna;
     tType = gftDna;
+    serve->tileSize = 11;
+    serve->stepSize = 5;
+    serve->minMatch = 2;
+    xlat = 1;
     }
 pslxWriteHead(f, qType, tType);
 
-if (qType == gftProt)
+
+long genomeSize = 3272116950;  // substitute human genome size
+
+// Use with message about recommended length when using short query
+// This is the minimum that can find anything at all.
+int minSuggested = 0;
+
+if (allGenomes)
+    {
+    minMatchShown = 0;
+    }
+else
+    {
+    #ifdef LOWELAB
+    minMatchShown = 14;
+    #else
+    // read genome size
+    if (trackHubDatabase(database))
 	{
-    minSingleSize = 14;
+	genomeSize = findGenomeSizeFromHub(database);
 	}
-else if (qType == gftDnaX)
+    else
 	{
-    minSingleSize = 36;
+	genomeSize = findGenomeSize(database);
+	}
+    minMatchShown = findMinMatch(genomeSize, qType == gftProt);
+    #endif
+    if (allResults)
+	minMatchShown = 0;
+
+    // read tileZize stepSize minMatch from server status
+    findGenomeParams(serve);
+
+    int minLucky = (serve->minMatch * serve->stepSize + (serve->tileSize - serve->stepSize)) * xlat;
+
+    minSuggested = max(minMatchShown,minLucky);
     }
 
 int seqNumber = 0;
 /* Loop through each sequence. */
 for (seq = seqList; seq != NULL; seq = seq->next)
     {
     printf(" "); fflush(stdout);  /* prevent apache cgi timeout by outputting something */
     oneSize = realSeqSize(seq, !isTx);
     // Impose half the usual bot delay per sequence
     
     if (dbCount == 0)
 	hgBotDelayFrac(0.5);
     if (++seqCount > maxSeqCount)
         {
 	warn("More than %d input sequences, stopping at %s<br>(see also: cgi-bin/hg.conf hgBlat.maxSequenceCount setting).",
 	    maxSeqCount, seq->name);
 	break;
 	}
     if (oneSize > maxSingleSize)
 	{
 	warn("Sequence %s is %d letters long (max is %d), skipping",
 	    seq->name, oneSize, maxSingleSize);
 	continue;
 	}
-    if (oneSize < minSingleSize)
+    if (oneSize < minSuggested)
         {
 	warn("Warning: Sequence %s is only %d letters long (%d is the recommended minimum)", 
-		seq->name, oneSize, minSingleSize);
+		seq->name, oneSize, minSuggested);
 	// we could use "continue;" here to actually enforce skipping, 
 	// but let's give the short sequence a chance, it might work.
 	// minimum possible length = tileSize+stepSize, so mpl=16 for dna stepSize=5, mpl=10 for protein.
 	if (qIsProt && oneSize < 1) // protein does not tolerate oneSize==0
 	    continue;
 	}
     totalSize += oneSize;
     if (totalSize > maxTotalSize)
         {
 	warn("Sequence %s would take us over the %d letter limit, stopping here.",
 	     seq->name, maxTotalSize);
 	break;
 	}
 
     conn = gfConnectEx(serve->host, serve->port);
 
     if (isTx)
 	{
 	gvo->reportTargetStrand = TRUE;
 	if (isTxTx)
 	    {
 	    if (allGenomes)
 		queryServer(conn, db, seq, "transQuery", xType, TRUE, FALSE, FALSE, seqNumber);
 	    else
-		gfAlignTransTrans(&conn, serve->nibDir, seq, FALSE, 5, 
-		    tFileCache, gvo, !txTxBoth);
+		gfAlignTransTrans(&conn, serve->nibDir, seq, FALSE, 5, tFileCache, gvo, !txTxBoth);
 	    if (txTxBoth)
 		{
 		reverseComplement(seq->dna, seq->size);
 		conn = gfConnectEx(serve->host, serve->port);
 		if (allGenomes)
 		    queryServer(conn, db, seq, "transQuery", xType, TRUE, FALSE, TRUE, seqNumber);
 		else
-		    gfAlignTransTrans(&conn, serve->nibDir, seq, TRUE, 5, 
-			tFileCache, gvo, FALSE);
+		    gfAlignTransTrans(&conn, serve->nibDir, seq, TRUE, 5, tFileCache, gvo, FALSE);
 		}
 	    }
 	else
 	    {
 	    if (allGenomes)
 		queryServer(conn, db, seq, "protQuery", xType, TRUE, TRUE, FALSE, seqNumber);
 	    else
 		gfAlignTrans(&conn, serve->nibDir, seq, 5, tFileCache, gvo);
 	    }
 	}
     else
 	{
 	if (allGenomes)
 	    queryServer(conn, db, seq, "query", xType, FALSE, FALSE, FALSE, seqNumber);
 	else
@@ -1510,84 +1653,101 @@
 
 /* JavaScript to update form when org changes */
 char *onChangeText = ""
     "document.mainForm.changeInfo.value='orgChange';"
     "document.mainForm.submit();";
 
 char *userSeq = NULL;
 char *type = NULL;
 
 printf( 
 "<FORM ACTION=\"../cgi-bin/hgBlat\" METHOD=\"POST\" ENCTYPE=\"multipart/form-data\" NAME=\"mainForm\">\n"
 "<H2>BLAT Search Genome</H2>\n");
 cartSaveSession(cart);
 puts("\n");
 puts("<INPUT TYPE=HIDDEN NAME=changeInfo VALUE=\"\">\n");
-puts("<TABLE BORDER=0 WIDTH=80>\n<TR>\n");
+puts("<TABLE BORDER=0 WIDTH=80>\n");
+printf("<TR>\n");
 printf("<TD ALIGN=CENTER style='overflow:hidden;white-space:nowrap;'>Genome:");
 printf(" <INPUT TYPE=CHECKBOX id=allGenomes NAME=allGenomes VALUE=\"\">");
 printf(" <span id=searchAllText> Search all<span>");
 printf("</TD>");
 // clicking on the Search ALL text clicks the checkbox.
 jsOnEventById("click", "searchAllText", 
     "document.mainForm.allGenomes.click();"
     "return false;"   // cancel the default
     );
 
 printf("<TD ALIGN=CENTER>Assembly:</TD>");
 printf("<TD ALIGN=CENTER>Query type:</TD>");
 printf("<TD ALIGN=CENTER>Sort output:</TD>");
 printf("<TD ALIGN=CENTER>Output type:</TD>");
 printf("<TD ALIGN=CENTER>&nbsp</TD>");
-printf("</TR>\n<TR>\n");
+printf("</TR>\n");
+
+printf("<TR>\n");
 printf("<TD ALIGN=CENTER>\n");
 printBlatGenomeListHtml(db, "change", onChangeText);
 printf("</TD>\n");
 printf("<TD ALIGN=CENTER>\n");
 printBlatAssemblyListHtml(db);
 printf("</TD>\n");
 printf("<TD ALIGN=CENTER>\n");
 if (orgChange)
     type = cartOptionalString(cart, "type");
 cgiMakeDropList("type", typeList, ArraySize(typeList), type);
 printf("</TD>\n");
 printf("<TD ALIGN=CENTER>\n");
 cgiMakeDropList("sort", pslSortList, ArraySize(pslSortList), cartOptionalString(cart, "sort"));
 printf("</TD>\n");
 printf("<TD ALIGN=CENTER>\n");
 cgiMakeDropList("output", outputList, ArraySize(outputList), cartOptionalString(cart, "output"));
 printf("</TD>\n");
 printf("</TR>\n<TR>\n");
 userSeq = cartUsualString(cart, "userSeq", "");
 printf("<TD COLSPAN=5 ALIGN=CENTER>\n");
 htmlPrintf("<TEXTAREA NAME=userSeq ROWS=14 COLS=80>%s</TEXTAREA>\n", userSeq);
 printf("</TD>\n");
-printf("</TR>\n<TR>\n");
-printf("<TD COLSPAN=5 ALIGN=CENTER>\n");
+printf("</TR>\n");
+
+printf("<TR>\n");
+printf("<TD COLSPAN=1 ALIGN=CENTER style='overflow:hidden;white-space:nowrap;'>\n");
+cgiMakeCheckBoxWithId("allResults", allResults, "allResults");
+printf(" <span id=allResultsText> All Results<br> (no minimum score)<span>");
+// clicking on the All Results text clicks the checkbox.
+jsOnEventById("click", "allResultsText", 
+    "document.mainForm.allResults.click();"
+    "return false;"   // cancel the default
+    );
+printf("</TD>\n");
+
+printf("<TD COLSPAN=4 ALIGN=LEFT>\n");
 printf("<INPUT TYPE=SUBMIT NAME=Submit VALUE=submit>\n");
 printf("<INPUT TYPE=SUBMIT NAME=Lucky VALUE=\"I'm feeling lucky\">\n");
 printf("<INPUT TYPE=SUBMIT NAME=Clear VALUE=clear>\n");
 printf("</TD>\n");
-printf("</TR>\n<TR>\n"); 
+printf("</TR>\n");
+
+printf("<TR>\n"); 
 puts("<TD COLSPAN=5 WIDTH=\"100%\">\n" 
     "Paste in a query sequence to find its location in the\n"
     "the genome. Multiple sequences may be searched \n"
     "if separated by lines starting with '>' followed by the sequence name.\n"
     "</TD>\n"
     "</TR>\n"
-
 );
+
 puts("<TR><TD COLSPAN=5 WIDTH=\"100%\">\n"); 
 puts("<BR><B>File Upload:</B> ");
 puts("Rather than pasting a sequence, you can choose to upload a text file containing "
 	 "the sequence.<BR>");
 puts("Upload sequence: <INPUT TYPE=FILE NAME=\"seqFile\">");
 puts(" <INPUT TYPE=SUBMIT Name=Submit VALUE=\"submit file\"><P>\n");
 printf("%s", 
 "<P>Only DNA sequences of 25,000 or fewer bases and protein or translated \n"
 "sequence of 10000 or fewer letters will be processed.  Up to 25 sequences\n"
 "can be submitted at the same time. The total limit for multiple sequence\n"
 "submissions is 50,000 bases or 25,000 letters.<br> A valid example "
 "is <tt>GTCCTCGGAACCAGGACCTCGGCGTGGCCTAGCG</tt> (human SOD1).\n</P>\n");
 
 printf("%s", 
 "<P>The <b>Search all</b> checkbox allows you to search all\n"
@@ -1814,30 +1974,32 @@
 boolean clearUserSeq = cgiBoolean("Clear");
 allGenomes = cgiVarExists("allGenomes");
 
 cart = theCart;
 dnaUtilOpen();
 
 orgChange = sameOk(cgiOptionalString("changeInfo"),"orgChange");
 if (orgChange)
     {
     cgiVarSet("db", hDefaultDbForGenome(cgiOptionalString("org"))); 
     }
 getDbAndGenome(cart, &db, &organism, oldVars);
 char *oldDb = cloneString(db);
 findClosestServer(&db, &organism);
 
+allResults = cartUsualBoolean(cart, "allResults", allResults);
+
 /* Get sequence - from userSeq variable, or if 
  * that is empty from a file. */
 if (clearUserSeq)
     {
     cartSetString(cart, "userSeq", "");
     cartSetString(cart, "seqFile", "");
     }
 userSeq = cartUsualString(cart, "userSeq", "");
 if (isEmpty(userSeq))
     {
     userSeq = cartOptionalString(cart, "seqFile");
     }
 if (isEmpty(userSeq) || orgChange)
     {
     cartWebStart(theCart, db, "%s BLAT Search", trackHubSkipHubName(organism));
@@ -1929,31 +2091,31 @@
 	// requires db for chromSize, do database after multi-threading done.
 	changeMaxGenePositionToPositiveStrandCoords(pfdDone);
 
         // sort by maximum hits
 	slSort(&pfdDone, genomeHitsCmp);
 
 	// Print instructions
         printf("The single best alignment found for each assembly is shown below.\n"
 		"The approximate results below are sorted by number of matching 'tiles', "
                 "perfectly matching sub-sequences of length 11 (DNA) "
                 "or 4 (protein). Using only tile hits, this speedy method can not see mismatches.");
 	printf("Click the 'assembly' link to trigger a full BLAT alignment for that genome. \n");
 	printf("The entire alignment, including mismatches and gaps, must score 20 or higher in order to appear in the Blat output.\n");
 	printf("For more details see the <a href='/FAQ/FAQblat.html#blat9'>BLAT FAQ</a>.<br>\n");
 
-	// Print report  // TODO move to final report at the end of ALL Assemblies
+	// Print report  // move to final report at the end of ALL Assemblies
 	int lastSeqNumber = -1;
 	int idCount = 0;
 	char id[256];
 	struct genomeHits *gH = NULL;
 	for (gH = pfdDone; gH; gH = gH->next)
 	    {
 	    if (lastSeqNumber != gH->seqNumber)
 		{
 		if (lastSeqNumber != -1) // end previous table
 		    {
 		    printf("</TABLE><br><br>\n");
 		    }
 		lastSeqNumber = gH->seqNumber;
 		// print next sequence table header
 		printf("<TABLE cellspacing='5'>\n");