2b789d0cc2a1f7c5d287820c20d2039a77a16009 kent Wed Jun 22 20:45:03 2022 -0700 Removing baseColorDrawCleanup. Adding "2bit" type to baseColorUseSequence trackDb setting. Adding otherTwoBitUrl tag to trackDb. Enabling psl and bitPsl to use "baseColorDrawCLeanup 2bit" and "otherTwoBitUrl someTwoBitUrl.2bit" combination to get differing bases to be hilit in red and displaying bases when zoomed in to base level. diff --git src/hg/hgTracks/cds.c src/hg/hgTracks/cds.c index 606eb56..e4cada9 100644 --- src/hg/hgTracks/cds.c +++ src/hg/hgTracks/cds.c @@ -933,36 +933,78 @@ { /* seqSource is: extFile seqTbl extFileTbl */ static struct dyString *buf = NULL; if (buf == NULL) buf = dyStringNew(0); dyStringClear(buf); dyStringAppend(buf, seqSource); char *words[3]; int nwords = chopByWhite(buf->string, words, ArraySize(words)); if ((nwords != ArraySize(words)) || !sameString(words[0], "extFile")) errAbort("invalid %s track setting: %s", BASE_COLOR_USE_SEQUENCE, seqSource); return hDnaSeqGet(database, name, words[1], words[2]); } + +static struct dnaSeq *fetchCachedTwoBitSeq(char *url, char *seqName, boolean doRc) +/* fetch a sequence from a 2bit. Caches open two bit files and sequence in + * both forward and reverse strand */ +{ +/* Init static url hash */ +static struct hash *urlHash = NULL; // hash of open files +if (urlHash == NULL) + urlHash = hashNew(0); + +/* Get cache for a particular two bit URL */ +struct twoBitCache +/* An open two bit file and a hash of sequences in it */ + { + struct twoBitFile *tbf; + struct hash *seqHash; + struct hash *rcSeqHash; + }; +struct twoBitCache *cache = hashFindVal(urlHash, url); +if (cache == NULL) + { + AllocVar(cache); + cache->tbf = twoBitOpen(url); + cache->seqHash = hashNew(0); + cache->rcSeqHash = hashNew(0); + hashAdd(urlHash, url, cache); + } +struct hash *seqHash = (doRc ? cache->rcSeqHash : cache->seqHash); +struct dnaSeq *seq = hashFindVal(seqHash, seqName); + +if (seq == NULL) + { + seq = twoBitReadSeqFrag(cache->tbf, seqName, 0, 0); + touppers(seq->dna); + if (doRc) + reverseComplement(seq->dna, seq->size); + hashAdd(seqHash, seqName, seq); + } +return seq; +} + static struct dnaSeq *maybeGetSeqUpper(struct linkedFeatures *lf, - char *tableName, struct track *tg) + char *tableName, struct track *tg, boolean doRc) /* Look up the sequence in genbank tables (hGenBankGetMrna also searches * seq if it can't find it in GB tables) or user's blat sequence, * uppercase and return it if we find it, return NULL if we don't find it. */ { +boolean doUpper = TRUE; struct dnaSeq *mrnaSeq = NULL; char *name = getItemDataName(tg, lf->name); if (sameString(tableName,"refGene") || sameString(tableName,"refSeqAli")) mrnaSeq = hGenBankGetMrna(database, name, "refMrna"); else { char *seqSource = trackDbSetting(tg->tdb, BASE_COLOR_USE_SEQUENCE); if (seqSource == NULL) errAbort("setting '%s' missing for track '%s'", BASE_COLOR_USE_SEQUENCE, tg->track); if (sameString(seqSource, "ss")) mrnaSeq = maybeGetUserSeq(name); #ifndef GBROWSE else if (sameString(seqSource, PCR_RESULT_TRACK_NAME)) mrnaSeq = maybeGetPcrResultSeq(lf); #endif /* GBROWSE */ @@ -983,49 +1025,60 @@ reverseComplement(mrnaSeq->dna, mrnaSeq->size); } else if (sameString("lfExtra", seqSource)) { if (lf->extra == NULL) errAbort("baseColorDrawSetup: sequence for track '%s' not loaded when sequence option is set in trackDb\n", tg->track); mrnaSeq = newDnaSeq(cloneString(lf->extra), strlen(lf->extra), lf->extra); if (lf->orientation == -1) reverseComplement(mrnaSeq->dna, mrnaSeq->size); } else if (sameString("lrg", seqSource)) { struct lrg *lrg = lf->original; mrnaSeq = lrgReconstructSequence(lrg, database); } + else if (sameString("2bit", seqSource)) + { + char *url = trackDbSetting(tg->tdb, "otherTwoBitUrl"); + if (url == NULL) + errAbort("missing otherTwoBitUrl in baseColorUseSequence 2bit trackDb setting"); + mrnaSeq = fetchCachedTwoBitSeq(url, name, doRc); + doRc = FALSE; // Handled it already + doUpper = FALSE; // Handled it already + } else if (startsWith("table ", seqSource)) { char *table = seqSource; nextWord(&table); mrnaSeq = hGenBankGetMrna(database, name, table); } else if (startsWithWord("db", seqSource)) { char *sourceDb = seqSource; nextWord(&sourceDb); if (isEmpty(sourceDb)) sourceDb = database; mrnaSeq = hChromSeq(sourceDb, name, 0, 0); } else mrnaSeq = hGenBankGetMrna(database, name, NULL); } -if (mrnaSeq != NULL) +if (mrnaSeq != NULL && doUpper) touppers(mrnaSeq->dna); +if (doRc) + reverseComplement(mrnaSeq->dna, mrnaSeq->size); return mrnaSeq; } static void makeCdsShades(struct hvGfx *hvg, Color *cdsColor) /* setup CDS colors */ { cdsColor[CDS_ERROR] = hvGfxFindColorIx(hvg,0,0,0); cdsColor[CDS_ODD] = hvGfxFindColorIx(hvg,CDS_ODD_R,CDS_ODD_G,CDS_ODD_B); cdsColor[CDS_EVEN] = hvGfxFindColorIx(hvg,CDS_EVEN_R,CDS_EVEN_G,CDS_EVEN_B); cdsColor[CDS_START] = hvGfxFindColorIx(hvg,CDS_START_R,CDS_START_G,CDS_START_B); cdsColor[CDS_STOP] = hvGfxFindColorIx(hvg,CDS_STOP_R,CDS_STOP_G,CDS_STOP_B); cdsColor[CDS_SPLICE] = hvGfxFindColorIx(hvg,CDS_SPLICE_R,CDS_SPLICE_G, CDS_SPLICE_B); @@ -1949,42 +2002,36 @@ || indelShowQueryInsert || indelShowPolyA) { if (sameString("lrg", tg->tdb->track)) *retPsl = lrgToPsl(lf->original, hChromSize(database, chromName)); else *retPsl = (struct psl *)(lf->original); if (*retPsl == NULL) return baseColorDrawOff; } if (drawOpt == baseColorDrawItemBases || drawOpt == baseColorDrawDiffBases || drawOpt == baseColorDrawItemCodons || drawOpt == baseColorDrawDiffCodons || indelShowPolyA) { - *retMrnaSeq = maybeGetSeqUpper(lf, tg->table, tg); - if (*retMrnaSeq != NULL && *retPsl != NULL) // we have both sequence and PSL - { - if ((*retMrnaSeq)->size != (*retPsl)->qSize) - errAbort("baseColorDrawSetup: %s: mRNA size (%d) != psl qSize (%d)", - (*retPsl)->qName, (*retMrnaSeq)->size, (*retPsl)->qSize); - if ((*retPsl)->strand[0] == '-' || (*retPsl)->strand[1] == '-') - reverseComplement((*retMrnaSeq)->dna, strlen((*retMrnaSeq)->dna)); - } + struct psl *psl = *retPsl; + boolean doRc = (psl != NULL && (psl->strand[0] == '-' || psl->strand[1] == '-')); + *retMrnaSeq = maybeGetSeqUpper(lf, tg->table, tg, doRc); // if no sequence, no base color drawing // Note: we could have sequence but no PSL (eg, tagAlign format) - else if (*retMrnaSeq == NULL) + if (*retMrnaSeq == NULL) return baseColorDrawOff; } return drawOpt; } void baseColorDrawRulerCodons(struct hvGfx *hvg, struct simpleFeature *sfList, double scale, int xOff, int y, int height, MgFont *font, int winStart, int maxPixels, bool zoomedToText) /* Draw amino acid translation of genomic sequence based on a list of codons. Used for browser ruler in full mode*/ { struct simpleFeature *sf; if (!cdsColorsMade) @@ -1996,41 +2043,30 @@ for (sf = sfList; sf != NULL; sf = sf->next) { char codon[4]; Color color = colorAndCodonFromGrayIx(hvg, codon, sf->grayIx, MG_GRAY); if (zoomedToText) drawScaledBoxWithText(hvg, sf->start, sf->end, scale, insideX, y, height, color, 1.0, font, codon, TRUE, winStart, maxPixels, TRUE, TRUE); else /* zoomed in just enough to see colored boxes */ drawScaledBox(hvg, sf->start, sf->end, scale, xOff, y, height, color); } } -void baseColorDrawCleanup(struct linkedFeatures *lf, struct dnaSeq **pMrnaSeq, - struct psl **pPsl) -/* Free structures allocated just for base/cds coloring. */ -{ -// We could free lf->original here (either genePredFree or pslFree, depending -// on the type -- but save time by skipping that. Maybe we should save time -// by skipping this free too: -if (pMrnaSeq != NULL) - dnaSeqFree(pMrnaSeq); -} - void baseColorSetCdsBounds(struct linkedFeatures *lf, struct psl *psl, struct track *tg) /* set CDS bounds in linked features for a PSL. Used when zoomed out too far * for codon or base coloring, but still want to render CDS bounds */ { struct genbankCds cds; getPslCds(psl, tg, lf, &cds); if (cds.start < cds.end) { struct genbankCds genomeCds = genbankCdsToGenome(&cds, psl); if (genomeCds.start < genomeCds.end) { lf->tallStart = genomeCds.start; lf->tallEnd = genomeCds.end; }