bc21bd3d27fe3d29971231955b3fc544fa1c3d1e angie Wed Oct 16 11:51:39 2013 -0700 Two new tracks for Locus Reference Genomic (LRG) (#11863) with customhandlers: LRG Regions and LRG Transcripts. LRGs are frozen reference sequences for a particular gene plus some upstream and downstream sequence. They are intended to provide a stable coordinate system for gene annotations that won't change with every new genome assembly, but can be mapped to each genome assembly. Since there is a lot of metadata associated with each region, I made LRG Regions a bigBed 12 + with fields describing mismatches and indels, so that PSL can be derived from the bigBed and the original LRG sequence can be reconstructed using genome assembly sequence and the mismatch/indel info. hgTracks shows differences and LRG insertions into the reference assembly using the cds.c baseColor code. (LRG deletions from the reference appear as gaps, which we get for free with bed12 info). For LRG Transcripts, I found the genePred codon-coloring code inadequate for showing an insertion into hg19 (or even mismatches), so instead of genePred I ended up using PSL + sequence, more like the mRNA track representation and display. diff --git src/hg/hgTracks/cds.c src/hg/hgTracks/cds.c index 378962d..e9ee1c7 100644 --- src/hg/hgTracks/cds.c +++ src/hg/hgTracks/cds.c @@ -7,30 +7,31 @@ #include "dystring.h" #include "memgfx.h" #include "hvGfx.h" #include "dnaseq.h" #include "dnautil.h" #include "hdb.h" #include "psl.h" #include "fa.h" #include "genePred.h" #include "cds.h" #include "genbank.h" #include "twoBit.h" #include "hgTracks.h" #include "cdsSpec.h" #include "axt.h" +#include "lrg.h" /* * WARNING: this code is incomprehensible: * - variables named codon often contain amino acids, not condons * - it does two passes, one undocumented function encodes both a * color and an amino acid into the struct simpleFeature grayIx * field and this is decoded in the second pass. * - baseColorDrawItem doesn't draw a item, it draws a single codon, * or a maybe even a single base. * - there are many assumptions and state shared between this module * and the simpleTracks.c. * may The Force be with you.. */ #ifndef GBROWSE @@ -335,38 +336,37 @@ for (i=0; i < (e - s); i++) { if (mrnaSeq->dna[mrnaS+i] != winDna[s-winStart+i]) drawVertLine(lf, hvg, s+i, xOff, y+1, heightPer-2, scale, c); } } } } } static void maskDiffString( char *retStr, char *s1, char *s2, char mask ) /*copies s1, masking off similar characters, and returns result into retStr. *if strings are of different size it stops after s1 is done.*/ { +int s1Len = strlen(s1); +memset(retStr, mask, s1Len); int i; -for (i=0; i<strlen(s1); i++) +for (i=0; i < s1Len; i++) { - if (s1[i] == s2[i]) - retStr[i] = mask; - else + if (s1[i] != s2[i]) retStr[i] = s1[i]; - } retStr[i] = '\0'; } Color lighterShade(struct hvGfx *hvg, Color color, double percentLess) /* Get lighter shade of a color, with a variable level */ { struct rgbColor rgbColor = hvGfxColorIxToRgb(hvg, color); rgbColor.r = (int)((rgbColor.r+127)/percentLess); rgbColor.g = (int)((rgbColor.g+127)/percentLess); rgbColor.b = (int)((rgbColor.b+127)/percentLess); return hvGfxFindColorIx(hvg, rgbColor.r, rgbColor.g, rgbColor.b); } @@ -894,30 +894,41 @@ if (lf->orientation == -1) reverseComplement(mrnaSeq->dna, mrnaSeq->size); } else if (sameString("seq1Seq2", seqSource)) { mrnaSeq = lf->extra; if (lf->orientation == -1) reverseComplement(mrnaSeq->dna, mrnaSeq->size); } else if (sameString("lfExtra", seqSource)) { mrnaSeq = newDnaSeq(cloneString(lf->extra), strlen(lf->extra), lf->extra); if (lf->orientation == -1) reverseComplement(mrnaSeq->dna, mrnaSeq->size); } +else if (sameString("lrg", seqSource)) + { + struct lrg *lrg = lf->original; + mrnaSeq = lrgReconstructSequence(lrg, database); + } +else if (startsWith("table ", seqSource)) + { + char *table = seqSource; + nextWord(&table); + mrnaSeq = hGenBankGetMrna(database, name, table); + } else mrnaSeq = hGenBankGetMrna(database, name, NULL); if (mrnaSeq != NULL) touppers(mrnaSeq->dna); return mrnaSeq; } static void makeCdsShades(struct hvGfx *hvg, Color *cdsColor) /* setup CDS colors */ { cdsColor[CDS_ERROR] = hvGfxFindColorIx(hvg,0,0,0); cdsColor[CDS_ODD] = hvGfxFindColorIx(hvg,CDS_ODD_R,CDS_ODD_G,CDS_ODD_B); @@ -1739,31 +1750,31 @@ { /* Insert at end of query -- draw vertical blue line unless it's * all polyA. */ s = (psl->strand[1] == '-') ? (psl->tSize - (psl->tStarts[lastBlk] + psl->blockSizes[lastBlk])) : (psl->tStarts[lastBlk] + psl->blockSizes[lastBlk]); drawVertLine(lf, hvg, s, xOff, y, heightPer-1, scale, cdsColor[CDS_QUERY_INSERTION_AT_END]); } } } void baseColorInitTrack(struct hvGfx *hvg, struct track *tg) /* Set up base coloring state (e.g. cache genomic sequence) for tg. * This must be called by tg->drawItems if baseColorDrawSetup is used - * in tg->drawItemAt. Uses tg->drawItems method to determine whether + * in tg->drawItemAt. Peeks at tg->drawItems method to determine whether * tg is linkedFeatures or linkedFeaturesSeries (currently the only * two supported track types -- bed, psl etc. are subclasses of these). */ { if (initedTrack == NULL || differentString(tg->track, initedTrack)) { int overallStart, overallEnd; boolean isSeries = FALSE; if ((tg->drawItems == linkedFeaturesSeriesDraw) #ifdef USE_BAM || (tg->drawItems == bamLinkedFeaturesSeriesDraw)) #else ) #endif isSeries = TRUE; else if (!baseColorCanDraw(tg)) @@ -1807,33 +1818,37 @@ * mrna seq if query insert/polyA coloring is enabled. * baseColorInitTrack must be called before this (in tg->drawItems) -- * this is meant to be called by tg->drawItemAt (i.e. linkedFeaturesDrawAt). */ { enum baseColorDrawOpt drawOpt = baseColorGetDrawOpt(tg); boolean indelShowDoubleInsert, indelShowQueryInsert, indelShowPolyA; indelEnabled(cart, (tg ? tg->tdb : NULL), basesPerPixel, &indelShowDoubleInsert, &indelShowQueryInsert, &indelShowPolyA); if (drawOpt <= baseColorDrawOff && !(indelShowQueryInsert || indelShowPolyA)) return drawOpt; checkTrackInited(tg, "calling baseColorDrawSetup"); /* If we are using item sequence, fetch alignment and sequence: */ -if ((drawOpt > baseColorDrawOff && startsWith("psl", tg->tdb->type)) || - indelShowQueryInsert || indelShowPolyA) +if ((drawOpt > baseColorDrawOff && (startsWith("psl", tg->tdb->type) || + sameString("lrg", tg->tdb->track))) + || indelShowQueryInsert || indelShowPolyA) { + if (sameString("lrg", tg->tdb->track)) + *retPsl = lrgToPsl(lf->original, hChromSize(database, chromName)); + else *retPsl = (struct psl *)(lf->original); if (*retPsl == NULL) return baseColorDrawOff; } if (drawOpt == baseColorDrawItemBases || drawOpt == baseColorDrawDiffBases || drawOpt == baseColorDrawItemCodons || drawOpt == baseColorDrawDiffCodons || indelShowPolyA) { *retMrnaSeq = maybeGetSeqUpper(lf, tg->table, tg); if (*retMrnaSeq != NULL && *retPsl != NULL) // we have both sequence and PSL { if ((*retMrnaSeq)->size != (*retPsl)->qSize) errAbort("baseColorDrawSetup: %s: mRNA size (%d) != psl qSize (%d)",