e1abdbe4945561b19dad7d4139982f511e4b2b5d kent Wed Jun 29 19:12:39 2022 -0700 Expanding coloring based on query sequence to include chain tracks. diff --git src/hg/hgTracks/cds.c src/hg/hgTracks/cds.c index ac9ed6c..ed74e51 100644 --- src/hg/hgTracks/cds.c +++ src/hg/hgTracks/cds.c @@ -347,33 +347,33 @@ if ((chromStart < lf->tallStart) || (chromStart > lf->tallEnd)) { /* adjust for UTR. WARNING: this duplicates shortOff & shortHeight * calculations in linkedFeaturesDrawAt */ thisY += height/4; thisHeight = height - height/2; } hvGfxLine(hvg, thisX, thisY, thisX, thisY+thisHeight, color); } static void drawCdsDiffBaseTickmarksOnly(struct track *tg, struct linkedFeatures *lf, struct hvGfx *hvg, int xOff, int y, double scale, int heightPer, - struct dnaSeq *mrnaSeq, struct psl *psl, + struct dnaSeq *qSeq, int qOffset, struct psl *psl, int winStart) -/* Draw 1-pixel wide red lines only where mRNA bases differ from genomic. +/* Draw thin vertical red lines only where mRNA bases differ from genomic. * This assumes that lf has been drawn already, we're zoomed out past * zoomedToBaseLevel, we're not in dense mode etc. */ { struct simpleFeature *sf = NULL; char *winDna = getCachedDna(winStart, winEnd); Color c = cdsColor[CDS_STOP]; // check if we need a contrasting color instead of default 'red' (CDS_STOP) char *tickColor = NULL; if ( tg->itemColor && (tickColor = trackDbSetting(tg->tdb, "baseColorTickColor"))) { Color ci = tg->itemColor(tg, lf, hvg); if (sameString(tickColor, "contrastingColor")) c = hvGfxContrastingColor(hvg, ci); else if (sameString(tickColor, "lighterShade")) c = lighterShade(hvg, ci, 1.5); @@ -384,32 +384,33 @@ int e = min(winEnd, sf->end); if (s > winEnd || e < winStart) continue; if (e > s) { int mrnaS = -1; if (psl) mrnaS = convertCoordUsingPsl(s, psl); else mrnaS = sf->qStart + (s - sf->start); if(mrnaS >= 0) { int i; for (i=0; i < (e - s); i++) { - if (mrnaSeq->dna[mrnaS+i] != winDna[s-winStart+i]) - drawVertLine(lf, hvg, s+i, xOff, y+1, heightPer-2, scale, c); + if (qSeq->dna[mrnaS+i-qOffset] != winDna[s-winStart+i]) + // drawVertLine(lf, hvg, s+i, xOff, y+1, heightPer-2, scale, c); + drawScaledBox(hvg, s+i, s+i+1, scale, xOff, y+1, heightPer-2, c); } } } } } static void maskDiffString( char *retStr, char *s1, char *s2, char mask ) /*copies s1, masking off similar characters, and returns result into retStr. *if strings are of different size it stops after s1 is done.*/ { int s1Len = strlen(s1); memset(retStr, mask, s1Len); int i; for (i=0; i < s1Len; i++) @@ -974,46 +975,46 @@ } struct hash *seqHash = (doRc ? cache->rcSeqHash : cache->seqHash); struct dnaSeq *seq = hashFindVal(seqHash, seqName); if (seq == NULL) { seq = twoBitReadSeqFrag(cache->tbf, seqName, 0, 0); touppers(seq->dna); if (doRc) reverseComplement(seq->dna, seq->size); hashAdd(seqHash, seqName, seq); } return seq; } -static struct dnaSeq *maybeGetSeqUpper(struct linkedFeatures *lf, +static struct dnaSeq *maybeGetSeqUpper(struct linkedFeatures *lf, char *mrnaName, char *tableName, struct track *tg, boolean doRc) /* Look up the sequence in genbank tables (hGenBankGetMrna also searches * seq if it can't find it in GB tables) or user's blat sequence, * uppercase and return it if we find it, return NULL if we don't find it. */ { boolean doUpper = TRUE; struct dnaSeq *mrnaSeq = NULL; -char *name = getItemDataName(tg, lf->name); +char *name = getItemDataName(tg, mrnaName); if (sameString(tableName,"refGene") || sameString(tableName,"refSeqAli")) mrnaSeq = hGenBankGetMrna(database, name, "refMrna"); else { char *seqSource = trackDbSetting(tg->tdb, BASE_COLOR_USE_SEQUENCE); - if (seqSource == NULL) - errAbort("setting '%s' missing for track '%s'", BASE_COLOR_USE_SEQUENCE, tg->track); + if (seqSource != NULL) + { if (sameString(seqSource, "ss")) mrnaSeq = maybeGetUserSeq(name); #ifndef GBROWSE else if (sameString(seqSource, PCR_RESULT_TRACK_NAME)) mrnaSeq = maybeGetPcrResultSeq(lf); #endif /* GBROWSE */ else if (startsWith("extFile", seqSource)) mrnaSeq = maybeGetExtFileSeq(seqSource, name); else if (endsWith("ExtFile", seqSource)) mrnaSeq = maybeGetExtFileSeq(seqSource, name); else if (sameString("nameIsSequence", seqSource)) { mrnaSeq = newDnaSeq(cloneString(name), strlen(name), name); if (lf->orientation == -1) reverseComplement(mrnaSeq->dna, mrnaSeq->size); @@ -1051,30 +1052,31 @@ char *table = seqSource; nextWord(&table); mrnaSeq = hGenBankGetMrna(database, name, table); } else if (startsWithWord("db", seqSource)) { char *sourceDb = seqSource; nextWord(&sourceDb); if (isEmpty(sourceDb)) sourceDb = database; mrnaSeq = hChromSeq(sourceDb, name, 0, 0); } else mrnaSeq = hGenBankGetMrna(database, name, NULL); } + } if (mrnaSeq != NULL && doUpper) touppers(mrnaSeq->dna); if (mrnaSeq != NULL && doRc) reverseComplement(mrnaSeq->dna, mrnaSeq->size); return mrnaSeq; } static void makeCdsShades(struct hvGfx *hvg, Color *cdsColor) /* setup CDS colors */ { cdsColor[CDS_ERROR] = hvGfxFindColorIx(hvg,0,0,0); cdsColor[CDS_ODD] = hvGfxFindColorIx(hvg,CDS_ODD_R,CDS_ODD_G,CDS_ODD_B); cdsColor[CDS_EVEN] = hvGfxFindColorIx(hvg,CDS_EVEN_R,CDS_EVEN_G,CDS_EVEN_B); @@ -1616,31 +1618,31 @@ dyStringFree(&dyMrnaSeq); } else { /*show we have an error by coloring entire exon block yellow*/ drawScaledBox(hvg, s, e, scale, xOff, y, heightPer, MG_YELLOW); // FIXME: this shouldn't ever happen, should be an errAbort warn("Bug: drawDiffTextBox: convertCoordUsingPsl failed<br>\n"); } } void baseColorDrawItem(struct track *tg, struct linkedFeatures *lf, int grayIx, struct hvGfx *hvg, int xOff, int y, double scale, MgFont *font, int s, int e, int heightPer, boolean zoomedToCodonLevel, - struct dnaSeq *mrnaSeq, struct simpleFeature *sf, struct psl *psl, + struct dnaSeq *mrnaSeq, int qOffset, struct simpleFeature *sf, struct psl *psl, enum baseColorDrawOpt drawOpt, int maxPixels, int winStart, Color originalColor) /* Draw codon/base-colored item. */ { char codon[64] = " "; Color color = colorAndCodonFromGrayIx(hvg, codon, grayIx, originalColor); if (sf->codonIndex && ( e - s >= 3)) // don't put exon numbers on split codons because there isn't space. safef(codon, sizeof(codon), "%c %d", codon[0], sf->codonIndex); /* When we are zoomed out far enough so that multiple bases/codons share the * same pixel, we have to draw differences in a separate pass (baseColorOverdrawDiff) * so don't waste time drawing the differences here: */ boolean zoomedOutToPostProcessing = ((drawOpt == baseColorDrawDiffBases && !zoomedToBaseLevel) || (drawOpt == baseColorDrawDiffCodons && !zoomedToCdsColorLevel)); @@ -1692,62 +1694,62 @@ drawScaledBox(hvg, s, e, scale, xOff+1, y+1, heightPer -2, color); } else { drawScaledBox(hvg, s, e, scale, xOff, y, heightPer, color); } } } static void drawCdsDiffCodonsOnly(struct track *tg, struct linkedFeatures *lf, struct hvGfx *hvg, int xOff, int y, double scale, int heightPer, - struct dnaSeq *mrnaSeq, struct psl *psl, + struct dnaSeq *qSeq, int qOffset, struct psl *psl, int winStart) /* Draw red boxes only where mRNA codons differ from genomic. This assumes * that lf has been drawn already, we're zoomed out past zoomedToCdsColorLevel, * we're not in dense mode etc. */ { struct simpleFeature *sf = NULL; Color dummyColor = 0; if (lf->codons == NULL) errAbort("drawCdsDiffCodonsOnly: lf->codons is NULL"); for (sf = lf->codons; sf != NULL; sf = sf->next) { int s = sf->start; int e = sf->end; if (s < lf->tallStart) s = lf->tallStart; if (e > lf->tallEnd) e = lf->tallEnd; if (s > winEnd || e < winStart) continue; if (e > s) { int mrnaS = convertCoordUsingPsl( s, psl ); if (mrnaS >= 0) { char mrnaBases[4]; char genomicCodon[2], mrnaCodon; boolean queryInsertion = FALSE; Color color = cdsColor[CDS_STOP]; - getMrnaBases(psl, mrnaSeq, mrnaS, s, e, (lf->orientation == -1), + getMrnaBases(psl, qSeq, mrnaS, s, e, (lf->orientation == -1), mrnaBases, &queryInsertion); if (queryInsertion) color = cdsColor[CDS_QUERY_INSERTION]; mrnaCodon = baseColorLookupCodon(mrnaBases); colorAndCodonFromGrayIx(hvg, genomicCodon, sf->grayIx, dummyColor); if (queryInsertion) drawScaledBox(hvg, s, e, scale, xOff, y, heightPer, color); if (mrnaCodon != genomicCodon[0]) { if (mrnaCodon != genomicCodon[0] && protEquivalent(genomicCodon[0], mrnaCodon)) color = cdsColor[CDS_SYN_PROT]; /* this was a call to drawScaledBoxBlend, but this breaks under * 32-bit color, so for the moment we're going to depend * on the painter's algorithm */ drawScaledBox(hvg, s, e, scale, xOff, y, heightPer, color); @@ -1755,119 +1757,119 @@ } else { /*show we have an error by coloring entire exon block yellow*/ drawScaledBox(hvg, s, e, scale, xOff, y, heightPer, MG_YELLOW); // FIXME: this shouldn't ever happen, should be an errAbort warn("Bug: drawCdsDiffCodonsOnly: convertCoordUsingPsl failed<br>\n"); } } } } void baseColorOverdrawDiff(struct track *tg, struct linkedFeatures *lf, struct hvGfx *hvg, int xOff, int y, double scale, int heightPer, - struct dnaSeq *mrnaSeq, struct psl *psl, + struct dnaSeq *qSeq, int qOffset, struct psl *psl, int winStart, enum baseColorDrawOpt drawOpt) /* If we're drawing different bases/codons, and zoomed out past base/codon * level, draw 1-pixel wide red lines only where bases/codons differ from * genomic. This tests drawing mode and zoom level but assumes that lf itself * has been drawn already and we're not in dense mode etc. */ { boolean enabled = TRUE; // check max zoom float showDiffBasesMaxZoom = trackDbFloatSettingOrDefault(tg->tdb, "showDiffBasesMaxZoom", -1.0); if ((showDiffBasesMaxZoom >= 0.0) && ((basesPerPixel > showDiffBasesMaxZoom) || (showDiffBasesMaxZoom == 0.0))) enabled = FALSE; if (drawOpt == baseColorDrawDiffCodons && !zoomedToCdsColorLevel && lf->codons && enabled) { drawCdsDiffCodonsOnly(tg, lf, hvg, xOff, y, scale, - heightPer, mrnaSeq, psl, winStart); + heightPer, qSeq, qOffset, psl, winStart); } if (drawOpt == baseColorDrawDiffBases && !zoomedToBaseLevel && enabled) { drawCdsDiffBaseTickmarksOnly(tg, lf, hvg, xOff, y, scale, - heightPer, mrnaSeq, psl, winStart); + heightPer, qSeq, qOffset, psl, winStart); } } void baseColorOverdrawQInsert(struct track *tg, struct linkedFeatures *lf, struct hvGfx *hvg, int xOff, int y, double scale, int heightPer, - struct dnaSeq *mrnaSeq, struct psl *psl, + struct dnaSeq *qSeq, int qOffset, struct psl *psl, int winStart, enum baseColorDrawOpt drawOpt, boolean indelShowQInsert, boolean indelShowPolyA) /* If applicable, draw 1-pixel wide orange lines for query insertions in the * middle of the query, 1-pixel wide blue lines for query insertions at the * end of the query, and 1-pixel wide green (instead of blue) when a query * insertion at the end is a valid poly-A tail. */ { assert(psl); int i; int s; int lastBlk = psl->blockCount - 1; boolean gotPolyAStart=FALSE, gotPolyAEnd=FALSE; -if (indelShowPolyA && mrnaSeq) +if (indelShowPolyA && qSeq) { /* Draw green lines for polyA first, so if the entire transcript is * jammed into one pixel and the other end has a blue line, blue is * what the user sees. */ if (psl->qStarts[0] != 0 && psl->strand[0] == '-') { /* Query is -. We reverse-complemented in baseColorDrawSetup, * so test for polyT head: */ - int polyTSize = headPolyTSizeLoose(mrnaSeq->dna, mrnaSeq->size); + int polyTSize = headPolyTSizeLoose(qSeq->dna, qSeq->size); if (polyTSize > 0 && (polyTSize + 3) >= psl->qStarts[0]) { if (psl->strand[1] == '-') s = psl->tSize - psl->tStarts[0] - 1; else s = psl->tStarts[0]; drawVertLine(lf, hvg, s, xOff, y, heightPer-1, scale, cdsColor[CDS_POLY_A]); gotPolyAStart = TRUE; } } if ((psl->qStarts[lastBlk] + psl->blockSizes[lastBlk] != psl->qSize) && psl->strand[0] == '+') { if (psl->strand[1] == '-') { /* Query is + but target is -. We reverse-complemented in * baseColorDrawSetup, so test for polyT head: */ - int polyTSize = headPolyTSizeLoose(mrnaSeq->dna, mrnaSeq->size); + int polyTSize = headPolyTSizeLoose(qSeq->dna, qSeq->size); int rcQStart = (psl->qSize - (psl->qStarts[lastBlk] + psl->blockSizes[lastBlk])); if (polyTSize > 0 && (polyTSize + 3) >= rcQStart) { s = psl->tStart; drawVertLine(lf, hvg, s, xOff, y, heightPer-1, scale, cdsColor[CDS_POLY_A]); gotPolyAEnd = TRUE; } } else { /* Both are +. We didn't reverse-complement in * baseColorDrawSetup, so test for polyA tail: */ - int polyASize = tailPolyASizeLoose(mrnaSeq->dna, mrnaSeq->size); + int polyASize = tailPolyASizeLoose(qSeq->dna, qSeq->size); if (polyASize > 0 && ((polyASize + 3) >= (psl->qSize - (psl->qStarts[lastBlk] + psl->blockSizes[lastBlk])))) { s = psl->tStarts[lastBlk] + psl->blockSizes[lastBlk]; drawVertLine(lf, hvg, s, xOff, y, heightPer-1, scale, cdsColor[CDS_POLY_A]); gotPolyAEnd = TRUE; } } } } if (indelShowQInsert) @@ -1963,78 +1965,168 @@ makeCdsShades(hvg, cdsColor); cdsColorsMade = TRUE; } } static void checkTrackInited(struct track *tg, char *what) /* Die if baseColorInitTrack has not been called (most recently) for this track. */ { setGc(); if (initedTrack == NULL || differentString(tg->track, initedTrack)) errAbort("Error: Track %s should have been baseColorInitTrack'd before %s. " "(tg->drawItems may be unrecognized by baseColorCanDraw)", tg->track, what); } +struct psl *linkedFeatureToPsl(struct linkedFeatures *lf, char *qName, char *tName, int tSize) +/* Fake up a psl from linked features */ +{ +/* Go through link features counting up components and doing min/maxes */ +int tStart = BIGNUM, qStart = BIGNUM, tEnd = 0, qEnd = 0; +int blockCount = 0; +struct simpleFeature *sf; +for (sf = lf->components; sf != NULL; sf = sf->next) + { + if (tStart > sf->start) tStart = sf->start; + if (tEnd < sf->end) tEnd = sf->end; + if (qStart > sf->qStart) qStart = sf->qStart; + if (qEnd < sf->qEnd) qEnd = sf->qEnd; + blockCount += 1; + } + +struct psl *psl; +AllocVar(psl); +psl->strand[0] = (lf->orientation < 0 ? '-' : '+'); + +psl->qName = cloneString(qName); +psl->qSize = qEnd; // Might need fixup +psl->qStart = qStart; +psl->qEnd = qEnd; + +psl->tName = cloneString(tName); +psl->tSize = tSize; +psl->tStart = tStart; +psl->tEnd = tEnd; + +/* Set block count and allocate block-by-block arrays */ +psl->blockCount = blockCount; +unsigned *blockSizes = AllocArray(psl->blockSizes, blockCount); +unsigned *qStarts = AllocArray(psl->qStarts, blockCount); +unsigned *tStarts = AllocArray(psl->tStarts, blockCount); + +/* Go through link features filling in blockSizes, qStarts, qEnds */ +int i; +unsigned totalSize = 0; +for (i=0, sf = lf->components; sf != NULL; sf = sf->next, ++i) + { + int size = sf->end - sf->start; + totalSize += size; + blockSizes[i] = size; + qStarts[i] = sf->qStart; + tStarts[i] = sf->start; + } +psl->match = totalSize; /* May want to redo later */ + +/* Go through linked features one last time filling in gap info */ +struct simpleFeature *prev = lf->components; +for (sf = prev->next; sf != NULL; sf = sf->next) + { + int qInsertSize = sf->qStart - prev->qStart; + if (qInsertSize > 0) + { + psl->qNumInsert += 1; + psl->qBaseInsert += qInsertSize; + } + int tInsertSize = sf->start - prev->end; + if (tInsertSize > 0) + { + psl->tNumInsert += 1; + psl->tBaseInsert += tInsertSize; + } + prev = sf; + } + +return psl; +} + enum baseColorDrawOpt baseColorDrawSetup(struct hvGfx *hvg, struct track *tg, struct linkedFeatures *lf, - struct dnaSeq **retMrnaSeq, struct psl **retPsl) + struct dnaSeq **retMrnaSeq, int *retMrnaOffset, struct psl **retPsl) /* Returns the CDS coloring option, allocates colors if necessary, and * returns the sequence and psl record for the given item if applicable. * Note: even if base coloring is not enabled, this will return psl and * mrna seq if query insert/polyA coloring is enabled. * baseColorInitTrack must be called before this (in tg->drawItems) -- * this is meant to be called by tg->drawItemAt (i.e. linkedFeaturesDrawAt). */ { enum baseColorDrawOpt drawOpt = baseColorGetDrawOpt(tg); boolean indelShowDoubleInsert, indelShowQueryInsert, indelShowPolyA; indelEnabled(cart, (tg ? tg->tdb : NULL), basesPerPixel, &indelShowDoubleInsert, &indelShowQueryInsert, &indelShowPolyA); if (drawOpt <= baseColorDrawOff && !(indelShowQueryInsert || indelShowPolyA)) return drawOpt; checkTrackInited(tg, "calling baseColorDrawSetup"); /* If we are using item sequence, fetch alignment and sequence: */ -if ((drawOpt > baseColorDrawOff && (startsWith("psl", tg->tdb->type) || - sameString("bigPsl", tg->tdb->type) || - sameString("lrg", tg->tdb->track))) - || indelShowQueryInsert || indelShowPolyA) +struct psl *psl = NULL; +struct dnaSeq *mrnaSeq = NULL; +int mrnaOffset = 0; +if (indelShowQueryInsert || indelShowPolyA || drawOpt > baseColorDrawOff) { + char *type = tg->tdb->type; + boolean needPsl = FALSE; + char *qName = lf->name; if (sameString("lrg", tg->tdb->track)) - *retPsl = lrgToPsl(lf->original, hChromSize(database, chromName)); + { + psl = lrgToPsl(lf->original, hChromSize(database, chromName)); + needPsl = TRUE; + } + else if (startsWith("psl", type) || sameString("bigPsl", type)) + { + psl = (struct psl *)(lf->original); + needPsl = TRUE; + } + else if (startsWithWord("chain", type) || startsWithWord("bigChain", type)) + { + qName = cloneFirstWord(lf->name); + psl = linkedFeatureToPsl(lf, qName, chromName, hChromSize(database, chromName)); + needPsl = TRUE; + } + boolean doRc = FALSE; + if (needPsl) + { + if (psl == NULL) + drawOpt = baseColorDrawOff; else - *retPsl = (struct psl *)(lf->original); - if (*retPsl == NULL) - return baseColorDrawOff; + doRc = (psl->strand[0] == '-' || psl->strand[1] == '-'); } + /* Do we need the sequence for display, if so get it */ if (drawOpt == baseColorDrawItemBases || drawOpt == baseColorDrawDiffBases || drawOpt == baseColorDrawItemCodons || - drawOpt == baseColorDrawDiffCodons || - indelShowPolyA) + drawOpt == baseColorDrawDiffCodons || indelShowPolyA) { - struct psl *psl = *retPsl; - boolean doRc = (psl != NULL && (psl->strand[0] == '-' || psl->strand[1] == '-')); - *retMrnaSeq = maybeGetSeqUpper(lf, tg->table, tg, doRc); - // if no sequence, no base color drawing - // Note: we could have sequence but no PSL (eg, tagAlign format) - if (*retMrnaSeq == NULL) - return baseColorDrawOff; + mrnaSeq = maybeGetSeqUpper(lf, qName, tg->table, tg, doRc); + if (mrnaSeq == NULL) + drawOpt = baseColorDrawOff; } - + } +*retPsl = psl; +*retMrnaSeq = mrnaSeq; +*retMrnaOffset = mrnaOffset; return drawOpt; } void baseColorDrawRulerCodons(struct hvGfx *hvg, struct simpleFeature *sfList, double scale, int xOff, int y, int height, MgFont *font, int winStart, int maxPixels, bool zoomedToText) /* Draw amino acid translation of genomic sequence based on a list of codons. Used for browser ruler in full mode*/ { struct simpleFeature *sf; if (!cdsColorsMade) { makeCdsShades(hvg, cdsColor); cdsColorsMade = TRUE;