533112afe2a2005e80cdb1f82904ea65032d4302 braney Sat Oct 2 11:37:34 2021 -0700 split hg/lib into two separate libaries, one only used by the cgis diff --git src/hg/cgilib/hgSeq.c src/hg/cgilib/hgSeq.c new file mode 100644 index 0000000..56e96d9 --- /dev/null +++ src/hg/cgilib/hgSeq.c @@ -0,0 +1,811 @@ +/* hgSeq - Fetch and format DNA sequence data (to stdout) for gene records. */ + +/* Copyright (C) 2014 The Regents of the University of California + * See README in this or parent directory for licensing information. */ +#include "common.h" +#include "dnautil.h" +#include "dnaseq.h" +#include "cart.h" +#include "cheapcgi.h" +#include "hdb.h" +#include "web.h" +#include "rmskOut.h" +#include "fa.h" +#include "genePred.h" +#include "bed.h" +#include "hgSeq.h" +#include "trackHub.h" + + +int hgSeqChromSize(char *db, char *chromName) +/* get chrom size if there's a database out there, + * otherwise just return 0 */ +{ +int thisSize = 0; +if (hDbExists(db)) + thisSize = hChromSize(db, chromName); +return thisSize; +} + +void hgSeqFeatureRegionOptions(struct cart *cart, boolean canDoUTR, + boolean canDoIntrons) +/* Print out HTML FORM entries for feature region options. */ +{ +char *exonStr = canDoIntrons ? " Exons" : ""; +char *setting; + +puts("\n

Sequence Retrieval Region Options:

\n"); + +if (canDoIntrons || canDoUTR) + { + cgiMakeCheckBox("hgSeq.promoter", + cartCgiUsualBoolean(cart, "hgSeq.promoter", FALSE)); + puts("Promoter/Upstream by "); + setting = cartCgiUsualString(cart, "hgSeq.promoterSize", "1000"); + cgiMakeTextVar("hgSeq.promoterSize", setting, 5); + puts("bases
"); + } + +if (canDoUTR) + { + cgiMakeCheckBox("hgSeq.utrExon5", + cartCgiUsualBoolean(cart, "hgSeq.utrExon5", TRUE)); + printf("5' UTR%s
\n", exonStr); + } + +if (canDoIntrons) + { + cgiMakeCheckBox("hgSeq.cdsExon", + cartCgiUsualBoolean(cart, "hgSeq.cdsExon", TRUE)); + if (canDoUTR) + printf("CDS Exons
\n"); + else + printf("Blocks
\n"); + } +else if (canDoUTR) + { + cgiMakeCheckBox("hgSeq.cdsExon", + cartCgiUsualBoolean(cart, "hgSeq.cdsExon", TRUE)); + printf("CDS
\n"); + } +else + { + cgiMakeHiddenVar("hgSeq.cdsExon", "1"); + } + +if (canDoUTR) + { + cgiMakeCheckBox("hgSeq.utrExon3", + cartCgiUsualBoolean(cart, "hgSeq.utrExon3", TRUE)); + printf("3' UTR%s
\n", exonStr); + } + +if (canDoIntrons) + { + cgiMakeCheckBox("hgSeq.intron", + cartCgiUsualBoolean(cart, "hgSeq.intron", TRUE)); + if (canDoUTR) + puts("Introns
"); + else + puts("Regions between blocks
"); + } + +if (canDoIntrons || canDoUTR) + { + cgiMakeCheckBox("hgSeq.downstream", + cartCgiUsualBoolean(cart, "hgSeq.downstream", FALSE)); + puts("Downstream by "); + setting = cartCgiUsualString(cart, "hgSeq.downstreamSize", "1000"); + cgiMakeTextVar("hgSeq.downstreamSize", setting, 5); + puts("bases
"); + } + +if (canDoIntrons || canDoUTR) + { + setting = cartCgiUsualString(cart, "hgSeq.granularity", "gene"); + cgiMakeRadioButton("hgSeq.granularity", "gene", + sameString(setting, "gene")); + if (canDoUTR) + puts("One FASTA record per gene.
"); + else + puts("One FASTA record per item.
"); + cgiMakeRadioButton("hgSeq.granularity", "feature", + sameString(setting, "feature")); + if (canDoUTR) + puts("One FASTA record per region (exon, intron, etc.) with "); + else + puts("One FASTA record per region (block/between blocks) with "); + } +else + { + puts("Add "); + } +setting = cartCgiUsualString(cart, "hgSeq.padding5", "0"); +cgiMakeTextVar("hgSeq.padding5", setting, 5); +puts("extra bases upstream (5') and "); +setting = cartCgiUsualString(cart, "hgSeq.padding3", "0"); +cgiMakeTextVar("hgSeq.padding3", setting, 5); +puts("extra downstream (3')
"); +if (canDoIntrons && canDoUTR) + { + puts(" "); + cgiMakeCheckBox("hgSeq.splitCDSUTR", + cartCgiUsualBoolean(cart, "hgSeq.splitCDSUTR", FALSE)); + puts("Split UTR and CDS parts of an exon into separate FASTA records"); + } +puts("
\n"); +puts("Note: if a feature is close to the beginning or end of a chromosome \n" + "and upstream/downstream bases are added, they may be truncated \n" + "in order to avoid extending past the edge of the chromosome.

"); +} + + +void hgSeqDisplayOptions(struct cart *cart, boolean canDoUTR, + boolean canDoIntrons, boolean offerRevComp) +/* Print out HTML FORM entries for sequence display options. */ +{ +char *casing, *repMasking; + +puts("\n

Sequence Formatting Options:

\n"); + +casing = cartCgiUsualString(cart, "hgSeq.casing", "exon"); +if (canDoIntrons) + { + cgiMakeRadioButton("hgSeq.casing", "exon", sameString(casing, "exon")); + if (canDoUTR) + puts("Exons in upper case, everything else in lower case.
"); + else + puts("Blocks in upper case, everything else in lower case.
"); + } +if (canDoUTR) + { + if (sameString(casing, "exon") && !canDoIntrons) + casing = "cds"; + cgiMakeRadioButton("hgSeq.casing", "cds", sameString(casing, "cds")); + puts("CDS in upper case, UTR in lower case.
"); + } +if ((sameString(casing, "exon") && !canDoIntrons) || + (sameString(casing, "cds") && !canDoUTR)) + casing = "upper"; +cgiMakeRadioButton("hgSeq.casing", "upper", sameString(casing, "upper")); +puts("All upper case.
"); +cgiMakeRadioButton("hgSeq.casing", "lower", sameString(casing, "lower")); +puts("All lower case.
"); + +cgiMakeCheckBox("hgSeq.maskRepeats", + cartCgiUsualBoolean(cart, "hgSeq.maskRepeats", FALSE)); +puts("Mask repeats: "); + +repMasking = cartCgiUsualString(cart, "hgSeq.repMasking", "lower"); +cgiMakeRadioButton("hgSeq.repMasking", "lower", + sameString(repMasking, "lower")); +puts(" to lower case "); +cgiMakeRadioButton("hgSeq.repMasking", "N", sameString(repMasking, "N")); +puts(" to N
"); +if (offerRevComp) + { + cgiMakeCheckBox("hgSeq.revComp", + cartCgiUsualBoolean(cart, "hgSeq.revComp", FALSE)); + puts("Reverse complement (get \'-\' strand sequence)"); + } +} + + +void hgSeqOptionsHtiCart(struct hTableInfo *hti, struct cart *cart) +/* Print out HTML FORM entries for gene region and sequence display options. + * Use defaults from CGI and cart. */ +{ +boolean canDoUTR, canDoIntrons, offerRevComp; + +if (hti == NULL) + { + canDoUTR = canDoIntrons = FALSE; + offerRevComp = TRUE; + } +else + { + canDoUTR = hti->hasCDS; + canDoIntrons = hti->hasBlocks; + offerRevComp = (hti->strandField[0] == 0); + } +hgSeqFeatureRegionOptions(cart, canDoUTR, canDoIntrons); +hgSeqDisplayOptions(cart, canDoUTR, canDoIntrons, offerRevComp); +} + + +#ifdef NEVER +void hgSeqOptionsHti(struct hTableInfo *hti) +/* Print out HTML FORM entries for gene region and sequence display options. + * Use defaults from CGI. */ +{ +hgSeqOptionsHtiCart(hti, NULL); +} +#endif /* NEVER */ + + +void hgSeqOptions(struct cart *cart, char *db, char *table) +/* Print out HTML FORM entries for gene region and sequence display options. */ +{ +struct hTableInfo *hti; +char chrom[32]; +char rootName[256]; + +if (isCustomTrack(table) || startsWith("hub_", table)) + { + // we asssume that this is a bigGenePred table if we got here with it + hgSeqFeatureRegionOptions(cart, TRUE, TRUE); + hgSeqDisplayOptions(cart, TRUE, TRUE, FALSE); + return; + } +else if ((table == NULL) || (table[0] == 0)) + { + hti = NULL; + } +else + { + hParseTableName(db, table, rootName, chrom); + hti = hFindTableInfo(db, chrom, rootName); + if (hti == NULL) + webAbort("Error", "Could not find table info for table %s (%s)", + rootName, table); + } +hgSeqOptionsHtiCart(hti, cart); +} + +static void hgSeqConcatRegionsDb(char *db, char *chrom, int chromSize, char strand, char *name, + int rCount, unsigned *rStarts, unsigned *rSizes, + boolean *exonFlags, boolean *cdsFlags) +/* Concatenate and print out dna for a series of regions. */ +{ +// Note: this code use to generate different sequence ids if the global +// database in hdb was different than the db parameter. This functionality +// has been removed since the global database was removed and it didn't +// appear to be used. + +struct dnaSeq *rSeq = NULL; +struct dnaSeq *cSeq = NULL; +char recName[256]; +int seqStart, seqEnd; +int offset, cSize; +int i; +boolean isRc = (strand == '-') || cgiBoolean("hgSeq.revComp"); +boolean maskRep = cgiBoolean("hgSeq.maskRepeats"); +int padding5 = cgiOptionalInt("hgSeq.padding5", 0); +int padding3 = cgiOptionalInt("hgSeq.padding3", 0); +char *casing = cgiString("hgSeq.casing"); +char *repMasking = cgiString("hgSeq.repMasking"); +char *granularity = cgiOptionalString("hgSeq.granularity"); +boolean concatRegions = granularity && sameString("gene", granularity); + +if (rCount < 1) + return; + +/* Don't support padding if granularity is gene (i.e. concat'ing all). */ +if (concatRegions) + { + padding5 = padding3 = 0; + } + +i = rCount - 1; +seqStart = rStarts[0] - (isRc ? padding3 : padding5); +seqEnd = rStarts[i] + rSizes[i] + (isRc ? padding5 : padding3); +/* Padding might push us off the edge of the chrom; if so, truncate: */ +if (seqStart < 0) + { + if (isRc) + padding3 += seqStart; + else + padding5 += seqStart; + seqStart = 0; + } + +/* if we know the chromSize, don't pad out beyond it */ +if ((chromSize > 0) && (seqEnd > chromSize)) + { + if (isRc) + padding5 += (chromSize - seqEnd); + else + padding3 += (chromSize - seqEnd); + seqEnd = chromSize; + } +if (seqEnd <= seqStart) + { + printf("# Null range for %s_%s (range=%s:%d-%d 5'pad=%d 3'pad=%d) (may indicate a query-side insert)\n", + db, + name, + chrom, seqStart+1, seqEnd, + padding5, padding3); + return; + } +if (maskRep) + { + rSeq = hDnaFromSeq(db, chrom, seqStart, seqEnd, dnaMixed); + if (sameString(repMasking, "N")) + lowerToN(rSeq->dna, strlen(rSeq->dna)); + if (!sameString(casing, "upper")) + tolowers(rSeq->dna); + } +else if (sameString(casing, "upper")) + rSeq = hDnaFromSeq(db, chrom, seqStart, seqEnd, dnaUpper); +else + rSeq = hDnaFromSeq(db, chrom, seqStart, seqEnd, dnaLower); + +/* Handle casing and compute size of concatenated sequence */ +cSize = 0; +for (i=0; i < rCount; i++) + { + if ((sameString(casing, "exon") && exonFlags[i]) || + (sameString(casing, "cds") && cdsFlags[i])) + { + int rStart = rStarts[i] - seqStart; + toUpperN(rSeq->dna+rStart, rSizes[i]); + } + cSize += rSizes[i]; + } +cSize += (padding5 + padding3); +AllocVar(cSeq); +cSeq->dna = needLargeMem(cSize+1); +cSeq->size = cSize; + +offset = 0; +for (i=0; i < rCount; i++) + { + int start = rStarts[i] - seqStart; + int size = rSizes[i]; + if (i == 0) + { + start -= (isRc ? padding3 : padding5); + assert(start == 0); + size += (isRc ? padding3 : padding5); + } + if (i == rCount-1) + { + size += (isRc ? padding5 : padding3); + } + memcpy(cSeq->dna+offset, rSeq->dna+start, size); + offset += size; + } +assert(offset == cSeq->size); +cSeq->dna[offset] = 0; +freeDnaSeq(&rSeq); + +if (isRc) + reverseComplement(cSeq->dna, cSeq->size); + +safef(recName, sizeof(recName), + "%s_%s range=%s:%d-%d 5'pad=%d 3'pad=%d " + "strand=%c repeatMasking=%s", + db, + name, + chrom, seqStart+1, seqEnd, + padding5, padding3, + (isRc ? '-' : '+'), + (maskRep ? repMasking : "none")); +faWriteNext(stdout, recName, cSeq->dna, cSeq->size); +freeDnaSeq(&cSeq); +} + + +static void hgSeqRegionsAdjDb(char *db, char *chrom, int chromSize, char strand, char *name, + boolean concatRegions, boolean concatAdjacent, + int rCount, unsigned *rStarts, unsigned *rSizes, + boolean *exonFlags, boolean *cdsFlags) +/* Concatenate and print out dna for a series of regions, + * optionally concatenating adjacent exons. */ +{ +if (concatRegions || (rCount == 1)) + { + hgSeqConcatRegionsDb(db, chrom, chromSize, strand, name, + rCount, rStarts, rSizes, exonFlags, cdsFlags); + } +else + { + int i, count; + boolean isRc = (strand == '-'); + char rName[256]; + for (i=0,count=0; i < rCount; i++,count++) + { + int j, jEnd, len, lo, hi; + safef(rName, sizeof(rName), "%s_%d", name, count); + j = (isRc ? (rCount - i - 1) : i); + jEnd = (isRc ? (j - 1) : (j + 1)); + if (concatAdjacent && exonFlags[j]) + { + lo = (isRc ? jEnd : (jEnd - 1)); + hi = (isRc ? (jEnd + 1) : jEnd); + while ((i < rCount) && + ((rStarts[lo] + rSizes[lo]) == rStarts[hi]) && + exonFlags[jEnd]) + { + i++; + jEnd = (isRc ? (jEnd - 1) : (jEnd + 1)); + lo = (isRc ? jEnd : (jEnd - 1)); + hi = (isRc ? (jEnd + 1) : jEnd); + } + } + len = (isRc ? (j - jEnd) : (jEnd - j)); + lo = (isRc ? (jEnd + 1) : j); + hgSeqConcatRegionsDb(db, chrom, chromSize, strand, rName, + len, &rStarts[lo], &rSizes[lo], &exonFlags[lo], + &cdsFlags[lo]); + } + } +} + +static void hgSeqRegionsDb(char *db, char *chrom, int chromSize, char strand, char *name, + boolean concatRegions, + int rCount, unsigned *rStarts, unsigned *rSizes, + boolean *exonFlags, boolean *cdsFlags) +/* Concatenate and print out dna for a series of regions. */ +{ +hgSeqRegionsAdjDb(db, chrom, chromSize, strand, name, concatRegions, FALSE, + rCount, rStarts, rSizes, exonFlags, cdsFlags); +} + +static int maxStartsOffset = 0; + +static void addFeature(int *count, unsigned *starts, unsigned *sizes, + boolean *exonFlags, boolean *cdsFlags, + int start, int size, boolean eFlag, boolean cFlag, + int chromSize) +{ +if (start < 0) + { + size += start; + start = 0; + } + +/* if we know the chromSize, don't try to get more than's there */ +if ((chromSize > 0) && (start + size > chromSize)) + size = chromSize - start; +if (*count > maxStartsOffset) + errAbort("addFeature: overflow (%d > %d)", *count, maxStartsOffset); +starts[*count] = start; +sizes[*count] = size; +exonFlags[*count] = eFlag; +cdsFlags[*count] = cFlag; +(*count)++; +} + +void hgSeqRange(char *db, char *chrom, int chromStart, int chromEnd, char strand, + char *name) +/* Print out dna sequence for the given range. */ +{ +int count = 0; +unsigned starts[1]; +unsigned sizes[1]; +boolean exonFlags[1]; +boolean cdsFlags[1]; +int chromSize = hgSeqChromSize(db, chrom); +maxStartsOffset = 0; +addFeature(&count, starts, sizes, exonFlags, cdsFlags, + chromStart, chromEnd - chromStart, FALSE, FALSE, chromSize); + +hgSeqRegionsDb(db, chrom, chromSize, strand, name, FALSE, count, starts, sizes, exonFlags, + cdsFlags); +} + + +int hgSeqBed(char *db, struct hTableInfo *hti, struct bed *bedList) +/* Print out dna sequence from the given database of all items in bedList. + * hti describes the bed-compatibility level of bedList items. + * Returns number of FASTA records printed out. */ +{ +struct bed *bedItem; +char itemName[512]; +boolean isRc; +int count; +unsigned *starts = NULL; +unsigned *sizes = NULL; +boolean *exonFlags = NULL; +boolean *cdsFlags = NULL; +int i, rowCount, totalCount; +boolean promoter = cgiBoolean("hgSeq.promoter"); +boolean intron = cgiBoolean("hgSeq.intron"); +boolean utrExon5 = cgiBoolean("hgSeq.utrExon5"); +boolean utrIntron5 = utrExon5 && intron; +boolean cdsExon = cgiBoolean("hgSeq.cdsExon"); +boolean cdsIntron = cdsExon && intron; +boolean utrExon3 = cgiBoolean("hgSeq.utrExon3"); +boolean utrIntron3 = utrExon3 && intron; +boolean downstream = cgiBoolean("hgSeq.downstream"); +int promoterSize = cgiOptionalInt("hgSeq.promoterSize", 0); +int downstreamSize = cgiOptionalInt("hgSeq.downstreamSize", 0); +char *granularity = cgiOptionalString("hgSeq.granularity"); +boolean concatRegions = granularity && sameString("gene", granularity); +boolean concatAdjacent = (cgiBooleanDefined("hgSeq.splitCDSUTR") && + (! cgiBoolean("hgSeq.splitCDSUTR"))); +boolean isCDS, doIntron; +boolean canDoUTR, canDoIntrons; + +/* catch a special case: introns selected, but no exons -> include all introns + * instead of qualifying intron with exon flags. */ +if (intron && !(utrExon5 || cdsExon || utrExon3)) + { + utrIntron5 = cdsIntron = utrIntron3 = TRUE; + } + +canDoUTR = hti->hasCDS; +canDoIntrons = hti->hasBlocks; + +rowCount = totalCount = 0; +for (bedItem = bedList; bedItem != NULL; bedItem = bedItem->next) + { + if (bedItem->blockCount == 0) /* An intersection may have made hti unreliable. */ + canDoIntrons = FALSE; + int chromSize = hgSeqChromSize(db, bedItem->chrom); + // bed: translate relative starts to absolute starts + for (i=0; i < bedItem->blockCount; i++) + { + bedItem->chromStarts[i] += bedItem->chromStart; + } + isRc = (bedItem->strand[0] == '-'); + // here's the max # of feature regions: + if (canDoIntrons) + count = 4 + (2 * bedItem->blockCount); + else + count = 5; + maxStartsOffset = count-1; + starts = needMem(sizeof(unsigned) * count); + sizes = needMem(sizeof(unsigned) * count); + exonFlags = needMem(sizeof(boolean) * count); + cdsFlags = needMem(sizeof(boolean) * count); + // build up a list of selected regions + count = 0; + if (!isRc && promoter && (promoterSize > 0)) + { + addFeature(&count, starts, sizes, exonFlags, cdsFlags, + (bedItem->chromStart - promoterSize), promoterSize, + FALSE, FALSE, chromSize); + } + else if (isRc && downstream && (downstreamSize > 0)) + { + addFeature(&count, starts, sizes, exonFlags, cdsFlags, + (bedItem->chromStart - downstreamSize), downstreamSize, + FALSE, FALSE, chromSize); + } + if (canDoIntrons && canDoUTR) + { + for (i=0; i < bedItem->blockCount; i++) + { + if ((bedItem->chromStarts[i] + bedItem->blockSizes[i]) <= + bedItem->thickStart) + { + if ((!isRc && utrExon5) || (isRc && utrExon3)) + { + addFeature(&count, starts, sizes, exonFlags, cdsFlags, + bedItem->chromStarts[i], bedItem->blockSizes[i], + TRUE, FALSE, chromSize); + } + if (((!isRc && utrIntron5) || (isRc && utrIntron3)) && + (i < bedItem->blockCount - 1)) + { + addFeature(&count, starts, sizes, exonFlags, cdsFlags, + (bedItem->chromStarts[i] + + bedItem->blockSizes[i]), + (bedItem->chromStarts[i+1] - + bedItem->chromStarts[i] - + bedItem->blockSizes[i]), + FALSE, FALSE, chromSize); + } + } + else if (bedItem->chromStarts[i] < bedItem->thickEnd) + { + if ((bedItem->chromStarts[i] < bedItem->thickStart) && + ((bedItem->chromStarts[i] + bedItem->blockSizes[i]) > + bedItem->thickEnd)) + { + if ((!isRc && utrExon5) || (isRc && utrExon3)) + { + addFeature(&count, starts, sizes, exonFlags, cdsFlags, + bedItem->chromStarts[i], + (bedItem->thickStart - + bedItem->chromStarts[i]), + TRUE, FALSE, chromSize); + } + if (cdsExon) + { + addFeature(&count, starts, sizes, exonFlags, cdsFlags, + bedItem->thickStart, + (bedItem->thickEnd - bedItem->thickStart), + TRUE, TRUE, chromSize); + } + if ((!isRc && utrExon3) || (isRc && utrExon5)) + { + addFeature(&count, starts, sizes, exonFlags, cdsFlags, + bedItem->thickEnd, + (bedItem->chromStarts[i] + + bedItem->blockSizes[i] - + bedItem->thickEnd), + TRUE, FALSE, chromSize); + } + } + else if (bedItem->chromStarts[i] < bedItem->thickStart) + { + if ((!isRc && utrExon5) || (isRc && utrExon3)) + { + addFeature(&count, starts, sizes, exonFlags, cdsFlags, + bedItem->chromStarts[i], + (bedItem->thickStart - + bedItem->chromStarts[i]), + TRUE, FALSE, chromSize); + } + if (cdsExon) + { + addFeature(&count, starts, sizes, exonFlags, cdsFlags, + bedItem->thickStart, + (bedItem->chromStarts[i] + + bedItem->blockSizes[i] - + bedItem->thickStart), + TRUE, TRUE, chromSize); + } + } + else if ((bedItem->chromStarts[i] + bedItem->blockSizes[i]) > + bedItem->thickEnd) + { + if (cdsExon) + { + addFeature(&count, starts, sizes, exonFlags, cdsFlags, + bedItem->chromStarts[i], + (bedItem->thickEnd - + bedItem->chromStarts[i]), + TRUE, TRUE, chromSize); + } + if ((!isRc && utrExon3) || (isRc && utrExon5)) + { + addFeature(&count, starts, sizes, exonFlags, cdsFlags, + bedItem->thickEnd, + (bedItem->chromStarts[i] + + bedItem->blockSizes[i] - + bedItem->thickEnd), + TRUE, FALSE, chromSize); + } + } + else if (cdsExon) + { + addFeature(&count, starts, sizes, exonFlags, cdsFlags, + bedItem->chromStarts[i], bedItem->blockSizes[i], + TRUE, TRUE, chromSize); + } + isCDS = ! ((bedItem->chromStarts[i] + bedItem->blockSizes[i]) > + bedItem->thickEnd); + doIntron = (isCDS ? cdsIntron : + ((!isRc) ? utrIntron3 : utrIntron5)); + if (doIntron && (i < bedItem->blockCount - 1)) + { + addFeature(&count, starts, sizes, exonFlags, cdsFlags, + (bedItem->chromStarts[i] + + bedItem->blockSizes[i]), + (bedItem->chromStarts[i+1] - + bedItem->chromStarts[i] - + bedItem->blockSizes[i]), + FALSE, isCDS, chromSize); + } + } + else + { + if ((!isRc && utrExon3) || (isRc && utrExon5)) + { + addFeature(&count, starts, sizes, exonFlags, cdsFlags, + bedItem->chromStarts[i], bedItem->blockSizes[i], + TRUE, FALSE, chromSize); + } + if (((!isRc && utrIntron3) || (isRc && utrIntron5)) && + (i < bedItem->blockCount - 1)) + { + addFeature(&count, starts, sizes, exonFlags, cdsFlags, + (bedItem->chromStarts[i] + + bedItem->blockSizes[i]), + (bedItem->chromStarts[i+1] - + bedItem->chromStarts[i] - + bedItem->blockSizes[i]), + FALSE, FALSE, chromSize); + } + } + } + } + else if (canDoIntrons) + { + for (i=0; i < bedItem->blockCount; i++) + { + if (cdsExon) + { + addFeature(&count, starts, sizes, exonFlags, cdsFlags, + bedItem->chromStarts[i], bedItem->blockSizes[i], + TRUE, FALSE, chromSize); + } + if (cdsIntron && (i < bedItem->blockCount - 1)) + { + addFeature(&count, starts, sizes, exonFlags, cdsFlags, + (bedItem->chromStarts[i] + bedItem->blockSizes[i]), + (bedItem->chromStarts[i+1] - + bedItem->chromStarts[i] - + bedItem->blockSizes[i]), + FALSE, FALSE, chromSize); + } + } + } + else if (canDoUTR) + { + if (bedItem->thickStart == 0 && bedItem->thickEnd == 0) + bedItem->thickStart = bedItem->thickEnd = bedItem->chromStart; + if ((!isRc && utrExon5) || (isRc && utrExon3)) + { + addFeature(&count, starts, sizes, exonFlags, cdsFlags, + bedItem->chromStart, + (bedItem->thickStart - bedItem->chromStart), + TRUE, FALSE, chromSize); + } + if (cdsExon) + { + addFeature(&count, starts, sizes, exonFlags, cdsFlags, + bedItem->thickStart, + (bedItem->thickEnd - bedItem->thickStart), + TRUE, TRUE, chromSize); + } + if ((!isRc && utrExon3) || (isRc && utrExon5)) + { + addFeature(&count, starts, sizes, exonFlags, cdsFlags, + bedItem->thickEnd, + (bedItem->chromEnd - bedItem->thickEnd), + TRUE, FALSE, chromSize); + } + } + else + { + addFeature(&count, starts, sizes, exonFlags, cdsFlags, + bedItem->chromStart, + (bedItem->chromEnd - bedItem->chromStart), + TRUE, FALSE, chromSize); + } + if (!isRc && downstream && (downstreamSize > 0)) + { + addFeature(&count, starts, sizes, exonFlags, cdsFlags, + bedItem->chromEnd, downstreamSize, FALSE, FALSE, chromSize); + } + else if (isRc && promoter && (promoterSize > 0)) + { + addFeature(&count, starts, sizes, exonFlags, cdsFlags, + bedItem->chromEnd, promoterSize, FALSE, FALSE, chromSize); + } + if (bedItem->name != NULL) + safef(itemName, sizeof(itemName), "%s_%s", hti->rootName, bedItem->name); + else + safef(itemName, sizeof(itemName), "%s_%d", hti->rootName, rowCount); + hgSeqRegionsAdjDb(db, bedItem->chrom, chromSize, bedItem->strand[0], itemName, + concatRegions, concatAdjacent, + count, starts, sizes, exonFlags, cdsFlags); + rowCount++; + totalCount += count; + freeMem(starts); + freeMem(sizes); + freeMem(exonFlags); + freeMem(cdsFlags); + } +return totalCount; +} + + +int hgSeqItemsInRange(char *db, char *table, char *chrom, int chromStart, + int chromEnd, char *sqlConstraints) +/* Print out dna sequence of all items (that match sqlConstraints, if nonNULL) + in the given range in table. Return number of items. */ +{ +struct hTableInfo *hti; +struct bed *bedList = NULL; +char rootName[256]; +char parsedChrom[32]; +int itemCount; + +hParseTableName(db, table, rootName, parsedChrom); +hti = hFindTableInfo(db, chrom, rootName); +if (hti == NULL) + webAbort("Error", "Could not find table info for table %s (%s)", + rootName, table); +bedList = hGetBedRange(db, table, chrom, chromStart, chromEnd, + sqlConstraints); + +itemCount = hgSeqBed(db, hti, bedList); +bedFreeList(&bedList); +return itemCount; +}