bc21bd3d27fe3d29971231955b3fc544fa1c3d1e angie Wed Oct 16 11:51:39 2013 -0700 Two new tracks for Locus Reference Genomic (LRG) (#11863) with customhandlers: LRG Regions and LRG Transcripts. LRGs are frozen reference sequences for a particular gene plus some upstream and downstream sequence. They are intended to provide a stable coordinate system for gene annotations that won't change with every new genome assembly, but can be mapped to each genome assembly. Since there is a lot of metadata associated with each region, I made LRG Regions a bigBed 12 + with fields describing mismatches and indels, so that PSL can be derived from the bigBed and the original LRG sequence can be reconstructed using genome assembly sequence and the mismatch/indel info. hgTracks shows differences and LRG insertions into the reference assembly using the cds.c baseColor code. (LRG deletions from the reference appear as gaps, which we get for free with bed12 info). For LRG Transcripts, I found the genePred codon-coloring code inadequate for showing an insertion into hg19 (or even mismatches), so instead of genePred I ended up using PSL + sequence, more like the mRNA track representation and display. diff --git src/hg/hgTracks/lrgTrack.c src/hg/hgTracks/lrgTrack.c new file mode 100644 index 0000000..564a680 --- /dev/null +++ src/hg/hgTracks/lrgTrack.c @@ -0,0 +1,62 @@ +/* lrgTrack.c - display Locus Reference Genomic (LRG) sequences mapped to genome assembly */ +#include "common.h" +#include "hgTracks.h" +#include "bigBed.h" +#include "lrg.h" + +static struct linkedFeatures *lrgToLf(struct lrg *lrg) +/* Translate LRG into a linkedFeatures item. */ +{ +struct linkedFeatures *lf = lfFromBed((struct bed *)lrg); +lf->original = lrg; +return lf; +} + +static void lrgLoadItems(struct track *tg) +/* Load LRGs in range, translate to linkedFeatures and store as tg->items. */ +{ +struct lm *lm = lmInit(0); +struct bigBedInterval *bb, *bbList = bigBedSelectRange(tg, chromName, winStart, winEnd, lm); +for (bb = bbList; bb != NULL; bb = bb->next) + { + char *lrgRow[LRG_NUM_COLS]; + char startBuf[16], endBuf[16]; + int bbFieldCount = bigBedIntervalToRow(bb, chromName, startBuf, endBuf, lrgRow, + ArraySize(lrgRow)); + if (bbFieldCount != LRG_NUM_COLS) + errAbort("lrgLoadItems: expected %d columns for row has %d", LRG_NUM_COLS, bbFieldCount); + struct lrg *lrg = lrgLoad(lrgRow); + slAddHead(&(tg->items), lrgToLf(lrg)); + } +lmCleanup(&lm); +} + +static char *lrgItemName(struct track *tg, void *item) +/* Return LRG ID and (if available) HUGO/HGNC gene symbol. */ +{ +struct linkedFeatures *lf = item; +struct lrg *lrg = lf->original; +if (isNotEmpty(lrg->hgncSymbol)) + { + int nameLen = strlen(lrg->name); + int symLen = strlen(lrg->hgncSymbol); + int extraLen = 3; // " ()" + int labelSize = nameLen + symLen + extraLen + 1; + char *label = needMem(labelSize); + safef(label, labelSize, "%s (%s)", lrg->name, lrg->hgncSymbol); + return label; + } +else + return lf->name; +} + +void lrgMethods(struct track *tg) +/* Locus Reference Genomic (bigBed 12 +) handlers. */ +{ +linkedFeaturesMethods(tg); +tg->canPack = TRUE; +tg->isBigBed = TRUE; +tg->loadItems = lrgLoadItems; +tg->itemName = lrgItemName; +tg->nextPrevExon = simpleBedNextPrevEdge; +}