387377eb3982c570bc06728b9e0380c3d1c006ec max Wed Jan 25 05:30:25 2023 -0800 adding linkout to primer3 RT-QPCR primer designer from hgGene, refs #30545 diff --git src/hg/cgilib/hgSeq.c src/hg/cgilib/hgSeq.c index aeddc35..694ac16 100644 --- src/hg/cgilib/hgSeq.c +++ src/hg/cgilib/hgSeq.c @@ -1,22 +1,24 @@ /* hgSeq - Fetch and format DNA sequence data (to stdout) for gene records. */ /* Copyright (C) 2014 The Regents of the University of California * See kent/LICENSE or http://genome.ucsc.edu/license/ for licensing information. */ #include "common.h" +#include "hCommon.h" #include "dnautil.h" #include "dnaseq.h" +#include "dystring.h" #include "cart.h" #include "cheapcgi.h" #include "hdb.h" #include "web.h" #include "rmskOut.h" #include "fa.h" #include "genePred.h" #include "bed.h" #include "hgSeq.h" #include "trackHub.h" int hgSeqChromSize(char *db, char *chromName) /* get chrom size if there's a database out there, * otherwise just return 0 */ @@ -240,53 +242,105 @@ else if ((table == NULL) || (table[0] == 0)) { hti = NULL; } else { hParseTableName(db, table, rootName, chrom); hti = hFindTableInfo(db, chrom, rootName); if (hti == NULL) webAbort("Error", "Could not find table info for table %s (%s)", rootName, table); } hgSeqOptionsHtiCart(hti, cart); } +static struct dyString *genPrimer3Exons(int rCount, unsigned *rStarts, unsigned *rSizes, boolean *exonFlags, int seqStart) +/* generate string with exon positions on transcript sequence, format -\n"); + + printf("\n", seq); + printf("\n", exons); + printf("\n", db); + printf("\n", strand); + printf("\n", returnUrl); + + char *chrom = cgiString("c"); + char *winLeft = cgiString("l"); + char *winRight = cgiString("r"); + + printf("\n", chrom, winLeft, winRight); + puts(""); + puts(""); + jsInline("document.getElementById(\"primer3Form\").submit();\n"); +} + static void hgSeqConcatRegionsDb(char *db, char *chrom, int chromSize, char strand, char *name, int rCount, unsigned *rStarts, unsigned *rSizes, - boolean *exonFlags, boolean *cdsFlags) -/* Concatenate and print out dna for a series of regions. */ + boolean *exonFlags, boolean *cdsFlags, boolean toPrimer3) +/* Concatenate and print out dna for a series of regions. + * + * If toPrimer3 is true, will send them to primer3, overriding most arguments to allow exon-exon primers..*/ { // Note: this code use to generate different sequence ids if the global // database in hdb was different than the db parameter. This functionality // has been removed since the global database was removed and it didn't // appear to be used. struct dnaSeq *rSeq = NULL; struct dnaSeq *cSeq = NULL; char recName[256]; int seqStart, seqEnd; int offset, cSize; int i; boolean isRc = (strand == '-') || cgiBoolean("hgSeq.revComp"); -boolean maskRep = cgiBoolean("hgSeq.maskRepeats"); int padding5 = cgiOptionalInt("hgSeq.padding5", 0); int padding3 = cgiOptionalInt("hgSeq.padding3", 0); -char *casing = cgiString("hgSeq.casing"); -char *repMasking = cgiString("hgSeq.repMasking"); char *granularity = cgiOptionalString("hgSeq.granularity"); + +char *casing, *repMasking; +boolean maskRep; + +if (toPrimer3) + { + casing = "lower"; + maskRep = TRUE; + repMasking = "upper"; + granularity = "gene"; + } +else + { + casing = cgiString("hgSeq.casing"); + repMasking = cgiString("hgSeq.repMasking"); + maskRep = cgiBoolean("hgSeq.maskRepeats"); + } + boolean concatRegions = granularity && sameString("gene", granularity); if (rCount < 1) return; /* Don't support padding if granularity is gene (i.e. concat'ing all). */ if (concatRegions) { padding5 = padding3 = 0; } i = rCount - 1; seqStart = rStarts[0] - (isRc ? padding3 : padding5); seqEnd = rStarts[i] + rSizes[i] + (isRc ? padding5 : padding3); /* Padding might push us off the edge of the chrom; if so, truncate: */ @@ -296,162 +350,178 @@ padding3 += seqStart; else padding5 += seqStart; seqStart = 0; } /* if we know the chromSize, don't pad out beyond it */ if ((chromSize > 0) && (seqEnd > chromSize)) { if (isRc) padding5 += (chromSize - seqEnd); else padding3 += (chromSize - seqEnd); seqEnd = chromSize; } + if (seqEnd <= seqStart) { printf("# Null range for %s_%s (range=%s:%d-%d 5'pad=%d 3'pad=%d) (may indicate a query-side insert)\n", db, name, chrom, seqStart+1, seqEnd, padding5, padding3); return; } + if (maskRep) { rSeq = hDnaFromSeq(db, chrom, seqStart, seqEnd, dnaMixed); if (sameString(repMasking, "N")) lowerToN(rSeq->dna, strlen(rSeq->dna)); if (!sameString(casing, "upper")) tolowers(rSeq->dna); } else if (sameString(casing, "upper")) rSeq = hDnaFromSeq(db, chrom, seqStart, seqEnd, dnaUpper); else rSeq = hDnaFromSeq(db, chrom, seqStart, seqEnd, dnaLower); /* Handle casing and compute size of concatenated sequence */ cSize = 0; + for (i=0; i < rCount; i++) { if ((sameString(casing, "exon") && exonFlags[i]) || (sameString(casing, "cds") && cdsFlags[i])) { int rStart = rStarts[i] - seqStart; toUpperN(rSeq->dna+rStart, rSizes[i]); } cSize += rSizes[i]; } + cSize += (padding5 + padding3); AllocVar(cSeq); cSeq->dna = needLargeMem(cSize+1); cSeq->size = cSize; offset = 0; + for (i=0; i < rCount; i++) { int start = rStarts[i] - seqStart; int size = rSizes[i]; + if (i == 0) { start -= (isRc ? padding3 : padding5); assert(start == 0); size += (isRc ? padding3 : padding5); } if (i == rCount-1) { size += (isRc ? padding5 : padding3); } memcpy(cSeq->dna+offset, rSeq->dna+start, size); offset += size; } + assert(offset == cSeq->size); cSeq->dna[offset] = 0; freeDnaSeq(&rSeq); -if (isRc) +if (isRc && !toPrimer3) reverseComplement(cSeq->dna, cSeq->size); +if (toPrimer3) +{ + struct dyString* primer3Exons = genPrimer3Exons(rCount, rStarts, rSizes, exonFlags, seqStart); + printPrimerForm(cSeq->dna, primer3Exons->string, db, strand, hgAbsUrlCgi("hgTracks")); + dyStringFree(&primer3Exons); +} +else +{ safef(recName, sizeof(recName), "%s_%s range=%s:%d-%d 5'pad=%d 3'pad=%d " "strand=%c repeatMasking=%s", db, name, chrom, seqStart+1, seqEnd, padding5, padding3, (isRc ? '-' : '+'), (maskRep ? repMasking : "none")); faWriteNext(stdout, recName, cSeq->dna, cSeq->size); freeDnaSeq(&cSeq); } +} static void hgSeqRegionsAdjDb(char *db, char *chrom, int chromSize, char strand, char *name, boolean concatRegions, boolean concatAdjacent, int rCount, unsigned *rStarts, unsigned *rSizes, - boolean *exonFlags, boolean *cdsFlags) + boolean *exonFlags, boolean *cdsFlags, boolean toPrimer3) /* Concatenate and print out dna for a series of regions, * optionally concatenating adjacent exons. */ { if (concatRegions || (rCount == 1)) { hgSeqConcatRegionsDb(db, chrom, chromSize, strand, name, - rCount, rStarts, rSizes, exonFlags, cdsFlags); + rCount, rStarts, rSizes, exonFlags, cdsFlags, toPrimer3); } else { int i, count; boolean isRc = (strand == '-'); char rName[256]; for (i=0,count=0; i < rCount; i++,count++) { int j, jEnd, len, lo, hi; safef(rName, sizeof(rName), "%s_%d", name, count); j = (isRc ? (rCount - i - 1) : i); jEnd = (isRc ? (j - 1) : (j + 1)); if (concatAdjacent && exonFlags[j]) { lo = (isRc ? jEnd : (jEnd - 1)); hi = (isRc ? (jEnd + 1) : jEnd); while ((i < rCount) && ((rStarts[lo] + rSizes[lo]) == rStarts[hi]) && exonFlags[jEnd]) { i++; jEnd = (isRc ? (jEnd - 1) : (jEnd + 1)); lo = (isRc ? jEnd : (jEnd - 1)); hi = (isRc ? (jEnd + 1) : jEnd); } } len = (isRc ? (j - jEnd) : (jEnd - j)); lo = (isRc ? (jEnd + 1) : j); hgSeqConcatRegionsDb(db, chrom, chromSize, strand, rName, len, &rStarts[lo], &rSizes[lo], &exonFlags[lo], - &cdsFlags[lo]); + &cdsFlags[lo], toPrimer3); } } } static void hgSeqRegionsDb(char *db, char *chrom, int chromSize, char strand, char *name, boolean concatRegions, int rCount, unsigned *rStarts, unsigned *rSizes, boolean *exonFlags, boolean *cdsFlags) /* Concatenate and print out dna for a series of regions. */ { hgSeqRegionsAdjDb(db, chrom, chromSize, strand, name, concatRegions, FALSE, - rCount, rStarts, rSizes, exonFlags, cdsFlags); + rCount, rStarts, rSizes, exonFlags, cdsFlags, FALSE); } static int maxStartsOffset = 0; static void addFeature(int *count, unsigned *starts, unsigned *sizes, boolean *exonFlags, boolean *cdsFlags, int start, int size, boolean eFlag, boolean cFlag, int chromSize) { if (start < 0) { size += start; start = 0; } @@ -488,45 +558,67 @@ int hgSeqBed(char *db, struct hTableInfo *hti, struct bed *bedList) /* Print out dna sequence from the given database of all items in bedList. * hti describes the bed-compatibility level of bedList items. * Returns number of FASTA records printed out. */ { struct bed *bedItem; char itemName[512]; boolean isRc; int count; unsigned *starts = NULL; unsigned *sizes = NULL; boolean *exonFlags = NULL; boolean *cdsFlags = NULL; int i, rowCount, totalCount; + boolean promoter = cgiBoolean("hgSeq.promoter"); boolean intron = cgiBoolean("hgSeq.intron"); boolean utrExon5 = cgiBoolean("hgSeq.utrExon5"); -boolean utrIntron5 = utrExon5 && intron; boolean cdsExon = cgiBoolean("hgSeq.cdsExon"); -boolean cdsIntron = cdsExon && intron; boolean utrExon3 = cgiBoolean("hgSeq.utrExon3"); -boolean utrIntron3 = utrExon3 && intron; boolean downstream = cgiBoolean("hgSeq.downstream"); + int promoterSize = cgiOptionalInt("hgSeq.promoterSize", 0); int downstreamSize = cgiOptionalInt("hgSeq.downstreamSize", 0); char *granularity = cgiOptionalString("hgSeq.granularity"); + +boolean toPrimer3 = cgiBoolean("primer3"); +if (toPrimer3) +{ + promoter = FALSE; + utrExon5 = TRUE; + intron = TRUE; + cdsExon = TRUE; + utrExon3 = TRUE; + downstream = FALSE; + downstreamSize = 0; + promoterSize = 0; + granularity = "gene"; +} + +boolean utrIntron3 = utrExon3 && intron; +boolean utrIntron5 = utrExon5 && intron; +boolean cdsIntron = cdsExon && intron; + boolean concatRegions = granularity && sameString("gene", granularity); boolean concatAdjacent = (cgiBooleanDefined("hgSeq.splitCDSUTR") && (! cgiBoolean("hgSeq.splitCDSUTR"))); + +if (toPrimer3) + concatAdjacent = TRUE; + boolean isCDS, doIntron; boolean canDoUTR, canDoIntrons; /* catch a special case: introns selected, but no exons -> include all introns * instead of qualifying intron with exon flags. */ if (intron && !(utrExon5 || cdsExon || utrExon3)) { utrIntron5 = cdsIntron = utrIntron3 = TRUE; } canDoUTR = hti->hasCDS; canDoIntrons = hti->hasBlocks; rowCount = totalCount = 0; for (bedItem = bedList; bedItem != NULL; bedItem = bedItem->next) @@ -762,31 +854,31 @@ { addFeature(&count, starts, sizes, exonFlags, cdsFlags, bedItem->chromEnd, downstreamSize, FALSE, FALSE, chromSize); } else if (isRc && promoter && (promoterSize > 0)) { addFeature(&count, starts, sizes, exonFlags, cdsFlags, bedItem->chromEnd, promoterSize, FALSE, FALSE, chromSize); } if (bedItem->name != NULL) safef(itemName, sizeof(itemName), "%s_%s", hti->rootName, bedItem->name); else safef(itemName, sizeof(itemName), "%s_%d", hti->rootName, rowCount); hgSeqRegionsAdjDb(db, bedItem->chrom, chromSize, bedItem->strand[0], itemName, concatRegions, concatAdjacent, - count, starts, sizes, exonFlags, cdsFlags); + count, starts, sizes, exonFlags, cdsFlags, toPrimer3); rowCount++; totalCount += count; freeMem(starts); freeMem(sizes); freeMem(exonFlags); freeMem(cdsFlags); } return totalCount; } int hgSeqItemsInRange(char *db, char *table, char *chrom, int chromStart, int chromEnd, char *sqlConstraints) /* Print out dna sequence of all items (that match sqlConstraints, if nonNULL) in the given range in table. Return number of items. */