387377eb3982c570bc06728b9e0380c3d1c006ec
max
  Wed Jan 25 05:30:25 2023 -0800
adding linkout to primer3 RT-QPCR primer designer from hgGene, refs #30545

diff --git src/hg/cgilib/hgSeq.c src/hg/cgilib/hgSeq.c
index aeddc35..694ac16 100644
--- src/hg/cgilib/hgSeq.c
+++ src/hg/cgilib/hgSeq.c
@@ -1,22 +1,24 @@
 /* hgSeq - Fetch and format DNA sequence data (to stdout) for gene records. */
 
 /* Copyright (C) 2014 The Regents of the University of California 
  * See kent/LICENSE or http://genome.ucsc.edu/license/ for licensing information. */
 #include "common.h"
+#include "hCommon.h"
 #include "dnautil.h"
 #include "dnaseq.h"
+#include "dystring.h"
 #include "cart.h"
 #include "cheapcgi.h"
 #include "hdb.h"
 #include "web.h"
 #include "rmskOut.h"
 #include "fa.h"
 #include "genePred.h"
 #include "bed.h"
 #include "hgSeq.h"
 #include "trackHub.h"
 
 
 int hgSeqChromSize(char *db, char *chromName)
 /* get chrom size if there's a database out there,
  * otherwise just return 0 */
@@ -240,53 +242,105 @@
 else if ((table == NULL) || (table[0] == 0))
     {
     hti = NULL;
     }
 else
     {
     hParseTableName(db, table, rootName, chrom);
     hti = hFindTableInfo(db, chrom, rootName);
     if (hti == NULL)
 	webAbort("Error", "Could not find table info for table %s (%s)",
 		 rootName, table);
     }
 hgSeqOptionsHtiCart(hti, cart);
 }
 
+static struct dyString *genPrimer3Exons(int rCount, unsigned *rStarts, unsigned *rSizes, boolean *exonFlags, int seqStart) 
+/* generate string with exon positions on transcript sequence, format <relExonStart>-<relExonEnd,... e.g. "10-20,30-40," and return.
+ * Result must be freed.*/
+{
+struct dyString* exonStr = dyStringNew(1024);
+for (int i=0;  i < rCount;  i++)
+{
+    int start = rStarts[i] - seqStart;
+    int size  = rSizes[i];
+    if (exonFlags[i])
+        dyStringPrintf(exonStr, "%d-%d,", start, size);
+}
+return exonStr;
+}
+
+static void printPrimerForm(char *seq, char *exons, char *db, char strand, char *returnUrl) {
+    /* print the form to send arguments to primer3 and click the submit button */
+    puts("<form id='primer3Form' method='POST' action='https://dev.primer3plus.com/primer3plus/api/v1/upload'>\n");
+
+    printf("<input type='hidden' name='SEQUENCE_TEMPLATE' value='%s'>\n", seq);
+    printf("<input type='hidden' name='P3P_GB_EXONS' value='%s'>\n", exons);
+    printf("<input type='hidden' name='P3P_GB_DB' value='%s'>\n", db);
+    printf("<input type='hidden' name='P3P_GB_ORIENTATION' value='%c'>\n", strand);
+    printf("<input type='hidden' name='P3P_GB_RETURN_PATH' value='%s'>\n", returnUrl);
+
+    char *chrom = cgiString("c");
+    char *winLeft = cgiString("l");
+    char *winRight = cgiString("r");
+
+    printf("<input type='hidden' name='P3P_GB_POSITION' value='%s:%s-%s'>\n", chrom, winLeft, winRight);
+    puts("<input style='display:none' type='submit' value='Submit'>");
+    puts("</form>");
+    jsInline("document.getElementById(\"primer3Form\").submit();\n");
+}
+
 static void hgSeqConcatRegionsDb(char *db, char *chrom, int chromSize, char strand, char *name,
 			  int rCount, unsigned *rStarts, unsigned *rSizes,
-			  boolean *exonFlags, boolean *cdsFlags)
-/* Concatenate and print out dna for a series of regions. */
+			  boolean *exonFlags, boolean *cdsFlags, boolean toPrimer3)
+/* Concatenate and print out dna for a series of regions. 
+ *
+ * If toPrimer3 is true, will send them to primer3, overriding most arguments to allow exon-exon primers..*/
 {
 // Note: this code use to generate different sequence ids if the global
 // database in hdb was different than the db parameter.  This functionality
 // has been removed since the global database was removed and it didn't
 // appear to be used.
 
 struct dnaSeq *rSeq = NULL;
 struct dnaSeq *cSeq = NULL;
 char recName[256];
 int seqStart, seqEnd;
 int offset, cSize;
 int i;
 boolean isRc       = (strand == '-') || cgiBoolean("hgSeq.revComp");
-boolean maskRep  = cgiBoolean("hgSeq.maskRepeats");
 int padding5       = cgiOptionalInt("hgSeq.padding5", 0);
 int padding3       = cgiOptionalInt("hgSeq.padding3", 0);
-char *casing     = cgiString("hgSeq.casing");
-char *repMasking = cgiString("hgSeq.repMasking");
 char *granularity  = cgiOptionalString("hgSeq.granularity");
+
+char *casing, *repMasking;
+boolean maskRep;
+
+if (toPrimer3) 
+    {
+    casing = "lower";
+    maskRep = TRUE;
+    repMasking = "upper";
+    granularity = "gene";
+    } 
+else 
+    {
+    casing       = cgiString("hgSeq.casing");
+    repMasking   = cgiString("hgSeq.repMasking");
+    maskRep    = cgiBoolean("hgSeq.maskRepeats");
+    }
+
 boolean concatRegions = granularity && sameString("gene", granularity);
 
 if (rCount < 1)
     return;
 
 /* Don't support padding if granularity is gene (i.e. concat'ing all). */
 if (concatRegions)
     {
     padding5 = padding3 = 0;
     }
 
 i = rCount - 1;
 seqStart = rStarts[0]             - (isRc ? padding3 : padding5);
 seqEnd   = rStarts[i] + rSizes[i] + (isRc ? padding5 : padding3);
 /* Padding might push us off the edge of the chrom; if so, truncate: */
@@ -296,162 +350,178 @@
 	padding3 += seqStart;
     else
 	padding5 += seqStart;
     seqStart = 0;
     }
 
 /* if we know the chromSize, don't pad out beyond it */
 if ((chromSize > 0) && (seqEnd > chromSize))
     {
     if (isRc)
 	padding5 += (chromSize - seqEnd);
     else
 	padding3 += (chromSize - seqEnd);
     seqEnd = chromSize;
     }
+
 if (seqEnd <= seqStart)
     {
     printf("# Null range for %s_%s (range=%s:%d-%d 5'pad=%d 3'pad=%d) (may indicate a query-side insert)\n",
 	   db, 
 	   name,
 	   chrom, seqStart+1, seqEnd,
 	   padding5, padding3);
     return;
     }
+
 if (maskRep)
     {
     rSeq = hDnaFromSeq(db, chrom, seqStart, seqEnd, dnaMixed);
     if (sameString(repMasking, "N"))
 	lowerToN(rSeq->dna, strlen(rSeq->dna));
     if (!sameString(casing, "upper"))
 	tolowers(rSeq->dna);
     }
 else if (sameString(casing, "upper"))
     rSeq = hDnaFromSeq(db, chrom, seqStart, seqEnd, dnaUpper);
 else
     rSeq = hDnaFromSeq(db, chrom, seqStart, seqEnd, dnaLower);
 
 /* Handle casing and compute size of concatenated sequence */
 cSize = 0;
+
 for (i=0;  i < rCount;  i++)
     {
     if ((sameString(casing, "exon") && exonFlags[i]) ||
 	(sameString(casing, "cds") && cdsFlags[i]))
 	{
 	int rStart = rStarts[i] - seqStart;
 	toUpperN(rSeq->dna+rStart, rSizes[i]);
 	}
     cSize += rSizes[i];
     }
+
 cSize += (padding5 + padding3);
 AllocVar(cSeq);
 cSeq->dna = needLargeMem(cSize+1);
 cSeq->size = cSize;
 
 offset = 0;
+
 for (i=0;  i < rCount;  i++)
     {
     int start = rStarts[i] - seqStart;
     int size  = rSizes[i];
+
     if (i == 0)
 	{
 	start -= (isRc ? padding3 : padding5);
 	assert(start == 0);
 	size  += (isRc ? padding3 : padding5);
 	}
     if (i == rCount-1)
 	{
 	size  += (isRc ? padding5 : padding3);
 	}
     memcpy(cSeq->dna+offset, rSeq->dna+start, size);
     offset += size;
     }
+
 assert(offset == cSeq->size);
 cSeq->dna[offset] = 0;
 freeDnaSeq(&rSeq);
 
-if (isRc)
+if (isRc && !toPrimer3)
     reverseComplement(cSeq->dna, cSeq->size);
 
+if (toPrimer3)
+{
+    struct dyString* primer3Exons = genPrimer3Exons(rCount, rStarts, rSizes, exonFlags, seqStart);
+    printPrimerForm(cSeq->dna, primer3Exons->string, db, strand, hgAbsUrlCgi("hgTracks"));
+    dyStringFree(&primer3Exons);
+}
+else
+{
     safef(recName, sizeof(recName),
           "%s_%s range=%s:%d-%d 5'pad=%d 3'pad=%d "
           "strand=%c repeatMasking=%s",
           db, 
           name,
           chrom, seqStart+1, seqEnd,
           padding5, padding3,
           (isRc ? '-' : '+'),
           (maskRep ? repMasking : "none"));
     faWriteNext(stdout, recName, cSeq->dna, cSeq->size);
     freeDnaSeq(&cSeq);
 }
+}
 
 
 static void hgSeqRegionsAdjDb(char *db, char *chrom, int chromSize, char strand, char *name,
 		       boolean concatRegions, boolean concatAdjacent,
 		       int rCount, unsigned *rStarts, unsigned *rSizes,
-		       boolean *exonFlags, boolean *cdsFlags)
+		       boolean *exonFlags, boolean *cdsFlags, boolean toPrimer3)
 /* Concatenate and print out dna for a series of regions, 
  * optionally concatenating adjacent exons. */
 {
 if (concatRegions || (rCount == 1))
     {
     hgSeqConcatRegionsDb(db, chrom, chromSize, strand, name,
-			 rCount, rStarts, rSizes, exonFlags, cdsFlags);
+			 rCount, rStarts, rSizes, exonFlags, cdsFlags, toPrimer3);
     }
 else
     {
     int i, count;
     boolean isRc = (strand == '-');
     char rName[256];
     for (i=0,count=0;  i < rCount;  i++,count++)
 	{
 	int j, jEnd, len, lo, hi;
 	safef(rName, sizeof(rName), "%s_%d", name, count);
 	j = (isRc ? (rCount - i - 1) : i);
 	jEnd = (isRc ? (j - 1) : (j + 1));
 	if (concatAdjacent && exonFlags[j])
 	    {
 	    lo = (isRc ? jEnd       : (jEnd - 1));
 	    hi = (isRc ? (jEnd + 1) : jEnd);
 	    while ((i < rCount) &&
 		   ((rStarts[lo] + rSizes[lo]) == rStarts[hi]) &&
 		   exonFlags[jEnd])
 		{
 		i++;
 		jEnd = (isRc ? (jEnd - 1) : (jEnd + 1));
 		lo = (isRc ? jEnd       : (jEnd - 1));
 		hi = (isRc ? (jEnd + 1) : jEnd);
 		}
 	    }
 	len = (isRc ? (j - jEnd) : (jEnd - j));
 	lo  = (isRc ? (jEnd + 1) : j);
 	hgSeqConcatRegionsDb(db, chrom, chromSize, strand, rName,
 			     len, &rStarts[lo], &rSizes[lo], &exonFlags[lo],
-			     &cdsFlags[lo]);
+			     &cdsFlags[lo], toPrimer3);
 	}
     }
 }
 
 static void hgSeqRegionsDb(char *db, char *chrom, int chromSize, char strand, char *name,
 		    boolean concatRegions,
 		    int rCount, unsigned *rStarts, unsigned *rSizes,
 		    boolean *exonFlags, boolean *cdsFlags)
 /* Concatenate and print out dna for a series of regions. */
 {
 hgSeqRegionsAdjDb(db, chrom, chromSize, strand, name, concatRegions, FALSE,
-		  rCount, rStarts, rSizes, exonFlags, cdsFlags);
+		  rCount, rStarts, rSizes, exonFlags, cdsFlags, FALSE);
 }
 
 static int maxStartsOffset = 0;
 
 static void addFeature(int *count, unsigned *starts, unsigned *sizes,
 		boolean *exonFlags, boolean *cdsFlags,
                 int start, int size, boolean eFlag, boolean cFlag,
                 int chromSize)
 {
 if (start < 0)
     {
     size += start;
     start = 0;
     }
 
@@ -488,45 +558,67 @@
 
 int hgSeqBed(char *db, struct hTableInfo *hti, struct bed *bedList)
 /* Print out dna sequence from the given database of all items in bedList.  
  * hti describes the bed-compatibility level of bedList items.  
  * Returns number of FASTA records printed out. */
 {
 struct bed *bedItem;
 char itemName[512];
 boolean isRc;
 int count;
 unsigned *starts = NULL;
 unsigned *sizes = NULL;
 boolean *exonFlags = NULL;
 boolean *cdsFlags = NULL;
 int i, rowCount, totalCount;
+
 boolean promoter   = cgiBoolean("hgSeq.promoter");
 boolean intron     = cgiBoolean("hgSeq.intron");
 boolean utrExon5   = cgiBoolean("hgSeq.utrExon5");
-boolean utrIntron5 = utrExon5 && intron;
 boolean cdsExon    = cgiBoolean("hgSeq.cdsExon");
-boolean cdsIntron  = cdsExon && intron;
 boolean utrExon3   = cgiBoolean("hgSeq.utrExon3");
-boolean utrIntron3 = utrExon3 && intron;
 boolean downstream = cgiBoolean("hgSeq.downstream");
+
 int promoterSize   = cgiOptionalInt("hgSeq.promoterSize", 0);
 int downstreamSize = cgiOptionalInt("hgSeq.downstreamSize", 0);
 char *granularity  = cgiOptionalString("hgSeq.granularity");
+
+boolean toPrimer3  = cgiBoolean("primer3");
+if (toPrimer3) 
+{
+    promoter = FALSE;
+    utrExon5 = TRUE;
+    intron = TRUE;
+    cdsExon = TRUE;
+    utrExon3 = TRUE;
+    downstream = FALSE;
+    downstreamSize = 0;
+    promoterSize = 0;
+    granularity = "gene";
+}
+
+boolean utrIntron3 = utrExon3 && intron;
+boolean utrIntron5 = utrExon5 && intron;
+boolean cdsIntron  = cdsExon && intron;
+
 boolean concatRegions = granularity && sameString("gene", granularity);
 boolean concatAdjacent = (cgiBooleanDefined("hgSeq.splitCDSUTR") &&
 			  (! cgiBoolean("hgSeq.splitCDSUTR")));
+
+if (toPrimer3)
+    concatAdjacent = TRUE;
+
 boolean isCDS, doIntron;
 boolean canDoUTR, canDoIntrons;
 
 /* catch a special case: introns selected, but no exons -> include all introns
  * instead of qualifying intron with exon flags. */
 if (intron && !(utrExon5 || cdsExon || utrExon3))
     {
     utrIntron5 = cdsIntron = utrIntron3 = TRUE;
     }
 
 canDoUTR = hti->hasCDS;
 canDoIntrons = hti->hasBlocks;
 
 rowCount = totalCount = 0;
 for (bedItem = bedList;  bedItem != NULL;  bedItem = bedItem->next)
@@ -762,31 +854,31 @@
 	{
 	addFeature(&count, starts, sizes, exonFlags, cdsFlags,
 		   bedItem->chromEnd, downstreamSize, FALSE, FALSE, chromSize);
 	}
     else if (isRc && promoter && (promoterSize > 0))
 	{
 	addFeature(&count, starts, sizes, exonFlags, cdsFlags,
 		   bedItem->chromEnd, promoterSize, FALSE, FALSE, chromSize);
 	}
     if (bedItem->name != NULL)
 	safef(itemName, sizeof(itemName), "%s_%s", hti->rootName, bedItem->name);
     else
 	safef(itemName, sizeof(itemName), "%s_%d", hti->rootName, rowCount);
     hgSeqRegionsAdjDb(db, bedItem->chrom, chromSize, bedItem->strand[0], itemName,
 		      concatRegions, concatAdjacent,
-		      count, starts, sizes, exonFlags, cdsFlags);
+		      count, starts, sizes, exonFlags, cdsFlags, toPrimer3);
     rowCount++;
     totalCount += count;
     freeMem(starts);
     freeMem(sizes);
     freeMem(exonFlags);
     freeMem(cdsFlags);
     }
 return totalCount;
 }
 
 
 int hgSeqItemsInRange(char *db, char *table, char *chrom, int chromStart,
                       int chromEnd, char *sqlConstraints)
 /* Print out dna sequence of all items (that match sqlConstraints, if nonNULL) 
    in the given range in table.  Return number of items. */