9893abd21ce01ced9a71dbefb55f8a76ab2e0e60
max
  Mon Jan 29 05:58:17 2024 -0800
adding links to isPcr, shortMatch and findMotifs on hgBlat error
message, refs #32918

diff --git src/hg/hgBlat/hgBlat.c src/hg/hgBlat/hgBlat.c
index 81300ae..d872778 100644
--- src/hg/hgBlat/hgBlat.c
+++ src/hg/hgBlat/hgBlat.c
@@ -1322,30 +1322,31 @@
 int alphaBetSize;
 if (isProt)
     {
     alphaBetSize = 20;
     genomeSize = genomeSize / 3;
     }
 else
     {
     alphaBetSize = 4;
     }
 int k = 1;
 double expected = genomeSize;
 for (k=1; k<36; k++)
     {
     expected /= alphaBetSize;
+    // set this to .05 to allow 18bp searches on hg38.
     if (expected < .004)
 	break;
     }
 return k;
 }
 
 long findGenomeSize(char *database)
 // get genomeSize from database.
 {
 struct sqlConnection *conn = hAllocConn(database);
 char query[256];
 sqlSafef(query, sizeof query, "select sum(size) from chromInfo");
 long genomeSize = sqlQuickLongLong(conn, query);
 hFreeConn(&conn);
 if (genomeSize == 0)
@@ -1669,32 +1670,37 @@
 
     if (++seqCount > maxSeqCount)
         {
 	warn("More than %d input sequences, stopping at %s<br>(see also: cgi-bin/hg.conf hgBlat.maxSequenceCount setting).",
 	    maxSeqCount, seq->name);
 	break;
 	}
     if (oneSize > maxSingleSize)
 	{
 	warn("Sequence %s is %d letters long (max is %d), skipping",
 	    seq->name, oneSize, maxSingleSize);
 	continue;
 	}
     if (oneSize < minSuggested)
         {
-	warn("Warning: Sequence %s is only %d letters long (%d is the recommended minimum)", 
-		seq->name, oneSize, minSuggested);
+	warn("Warning: Sequence %s is only %d letters long (%d is the recommended minimum).<br>"
+                "To search for short sequences in the current browser window, use our <a href='hgTrackUi?%s=%s&g=oligoMatch&oligoMatch=pack'>Short Sequence Match</a> track, "
+                "or, if you are using the command line and want to search the entire genome, our command line tool <tt>findMotifs</tt>, from the "
+                "<a target=_blank href='https://hgdownload.soe.ucsc.edu/downloads.html#utilities_downloads'>utilities download page</a>.<br>"
+                "For primers, you can use our tool <a href='hgPcr?%s=%s'>In-silico PCR</a>. In-silico PCR searches the entire genome or a set of transcripts. In the latter case, it can find matches that straddle exon/intron boundaries.<br><br>"
+                "Contact us if none of these options solve the problem. ",
+		seq->name, oneSize, minSuggested, cartSessionVarName(), cartSessionId(cart), cartSessionVarName(), cartSessionId(cart));
 	// we could use "continue;" here to actually enforce skipping, 
 	// but let's give the short sequence a chance, it might work.
 	// minimum possible length = tileSize+stepSize, so mpl=16 for dna stepSize=5, mpl=10 for protein.
 	if (qIsProt && oneSize < 1) // protein does not tolerate oneSize==0
 	    continue;
 	}
     totalSize += oneSize;
     if (totalSize > maxTotalSize)
         {
 	warn("Sequence %s would take us over the %d letter limit, stopping here.",
 	     seq->name, maxTotalSize);
 	break;
 	}
 
     if (isTx)
@@ -1878,31 +1884,36 @@
 "The new dynamic BLAT servers are not supported, and they are noted as skipped in the output. "
 "<b>See our <a href='/FAQ/FAQblat.html#blat9'>BLAT All FAQ</a> for more information.</b>\n"
 );
 
 printf("<P>The <b>All Results</b> checkbox disables minimum matches filtering so all results are seen." 
 " For example, with a human dna search, 20 is minimum matches required, based on the genome size, to filter out lower-quality results.\n"
 "This checkbox can be useful with short queries and with the tiny genomes of microorganisms. \n"
 );
 
 printf("<P>For programmatic access, BLAT supports URL queries which return in JSON format. "
 "See our <a href=\"/FAQ/FAQblat.html#blat14\">BLAT FAQ</a> for more.</P>\n"
 );
 
 if (hgPcrOk(db))
     printf("<P>For locating PCR primers, use <A HREF=\"../cgi-bin/hgPcr?db=%s\">In-Silico PCR</A>"
-           " for best results instead of BLAT.</P>", db);
+           " for best results instead of BLAT. " 
+           "To search for short sequences &lt; 20bp only in the sequence shown on the Genome Browser, "
+           "use our <a href='hgTrackUi?%s=%s&g=oligoMatch&oligoMatch=pack'>Short Sequence Match</a> track, "
+           "If you are using the command line and want to search the entire genome, our command line tool <tt>findMotifs</tt>, from the "
+           "<a target=_blank href='https://hgdownload.soe.ucsc.edu/downloads.html#utilities_downloads'>utilities download page</a>.</p>",
+           db, cartSessionVarName(), cartSessionId(cart));
 puts("</TD></TR></TABLE>\n");
 
 
 
 printf("</FORM>\n");
 
 webNewSection("About BLAT");
 printf( 
 "<P>BLAT on DNA is designed to\n"
 "quickly find sequences of 95%% and greater similarity of length 25 bases or\n"
 "more.  It may miss more divergent or shorter sequence alignments.  It will find\n"
 "perfect sequence matches of 20 bases.\n"
 "BLAT on proteins finds sequences of 80%% and greater similarity of length 20 amino\n"
 "acids or more.  In practice DNA BLAT works well on primates, and protein\n"
 "BLAT on land vertebrates."