be8645fb43ba545dc342deb80cff297c5b677a5e braney Tue Sep 6 11:11:15 2016 -0700 allow bigGenePred to be used to optimize knownGene on hgTracks #15259 diff --git src/hg/hgTracks/simpleTracks.c src/hg/hgTracks/simpleTracks.c index 28ff134..66bb967 100644 --- src/hg/hgTracks/simpleTracks.c +++ src/hg/hgTracks/simpleTracks.c @@ -3477,31 +3477,31 @@ * a perfect world it would not be necessary, and * it's not necessary for ghostView. */ int x1 = x+1; int x2 = x + w - 1; if (x1 < 0) x1 = 0; if (x2 > hvg->width) x2 = hvg->width; if (x2-x1 > 0) hvGfxLine(hvg, x1, y, x2, y, color); } } static void lfColors(struct track *tg, struct linkedFeatures *lf, struct hvGfx *hvg, Color *retColor, Color *retBarbColor) /* Figure out color to draw linked feature in. */ { -if (!((lf->filterColor == 0) || (lf->filterColor == -1))) +if (!((lf->isBigGenePred) ||(lf->filterColor == 0)|| (lf->filterColor == -1))) { if (lf->extra == (void *)USE_ITEM_RGB) { struct rgbColor itemRgb; itemRgb.r = (lf->filterColor & 0xff0000) >> 16; itemRgb.g = (lf->filterColor & 0xff00) >> 8; itemRgb.b = lf->filterColor & 0xff; *retColor = *retBarbColor = hvGfxFindColorIx(hvg, itemRgb.r, itemRgb.g, itemRgb.b); } else *retColor = *retBarbColor = lf->filterColor; } else if (tg->itemColor) { @@ -3511,30 +3511,41 @@ else if (tg->colorShades) { boolean isXeno = (tg->subType == lfSubXeno) || (tg->subType == lfSubChain) || startsWith("mrnaBla", tg->table); *retColor = tg->colorShades[lf->grayIx+isXeno]; *retBarbColor = tg->colorShades[lf->grayIx]; } else { *retColor = tg->ixColor; *retBarbColor = tg->ixAltColor; } } +Color bigGenePredColor(struct track *tg, void *item, struct hvGfx *hvg) +/* Determine the color of the name for the bigGenePred linked feature. */ +{ +struct linkedFeatures *lf = item; +struct rgbColor itemRgb; +itemRgb.r = (lf->filterColor & 0xff0000) >> 16; +itemRgb.g = (lf->filterColor & 0xff00) >> 8; +itemRgb.b = lf->filterColor & 0xff; +return hvGfxFindColorIx(hvg, itemRgb.r, itemRgb.g, itemRgb.b); +} + Color linkedFeaturesNameColor(struct track *tg, void *item, struct hvGfx *hvg) /* Determine the color of the name for the linked feature. */ { Color col, barbCol; lfColors(tg, item, hvg, &col, &barbCol); return col; } struct simpleFeature *simpleFeatureCloneList(struct simpleFeature *list) /* Just copies the simpleFeature list. This is good for making a copy */ /* when the codon list is made. */ { struct simpleFeature *ret = NULL; struct simpleFeature *cur; for (cur = list; cur != NULL; cur = cur->next) @@ -5979,31 +5990,31 @@ } else return lf->name; } char *knownGeneMapName(struct track *tg, void *item) /* Return un-abbreviated gene name. */ { char str2[255]; struct linkedFeatures *lf = item; /* piggy back the protein ID (hgg_prot variable) on hgg_gene variable */ safef(str2, sizeof(str2), "%s&hgg_prot=%s", lf->name, ((struct knownGenesExtra *)(lf->extra))->hgg_prot); return(cloneString(str2)); } -void lookupKnownGeneNames(struct linkedFeatures *lfList) +void lookupKnownGeneNames(struct linkedFeatures *lfList, boolean isBigGenePred) /* This converts the known gene ID to a gene symbol */ { struct linkedFeatures *lf; struct sqlConnection *conn = hAllocConn(database); char *geneSymbol; char *protDisplayId; char *gencodeId; char *mimId; char cond_str[256]; boolean useGeneSymbol= FALSE; boolean useKgId = FALSE; boolean useProtDisplayId = FALSE; boolean useMimId = FALSE; boolean useGencodeId = FALSE; @@ -6039,71 +6050,93 @@ else if (!endsWith(label->name, "gene") && !endsWith(label->name, "gencodeId") && !endsWith(label->name, "kgId") && !endsWith(label->name, "prot") && !endsWith(label->name, omimLabel) ) { useGeneSymbol = TRUE; cartRemove(cart, label->name); } } for (lf = lfList; lf != NULL; lf = lf->next) { struct dyString *name = dyStringNew(SMALLDYBUF); struct knownGenesExtra *kgE; + struct genePredExt *gp = lf->original; AllocVar(kgE); labelStarted = FALSE; /* reset between items */ if (useGeneSymbol) { + if ( isBigGenePred ) + { + geneSymbol = gp->geneName; + } + else + { sqlSafefFrag(cond_str, sizeof cond_str,"kgID='%s'", lf->name); geneSymbol = sqlGetField(database, "kgXref", "geneSymbol", cond_str); + } if (geneSymbol != NULL) { dyStringAppend(name, geneSymbol); } labelStarted = TRUE; } if (useGencodeId) { if (labelStarted) dyStringAppendC(name, '/'); else labelStarted = TRUE; + if ( isBigGenePred ) + { + gencodeId = gp->name2; + } + else + { sqlSafefFrag(cond_str, sizeof(cond_str), "name='%s'", lf->name); gencodeId = sqlGetField(database, "knownGene", "alignID", cond_str); + } dyStringAppend(name, gencodeId); } if (useKgId) { if (labelStarted) dyStringAppendC(name, '/'); else labelStarted = TRUE; dyStringAppend(name, lf->name); } if (useProtDisplayId) { if (labelStarted) dyStringAppendC(name, '/'); else labelStarted = TRUE; + if ( isBigGenePred ) + { + dyStringAppend(name, gp->geneName2); + } + else + { if (lf->extra != NULL) { dyStringAppend(name, (char *)lf->extra); } else { sqlSafefFrag(cond_str, sizeof(cond_str), "kgID='%s'", lf->name); protDisplayId = sqlGetField(database, "kgXref", "spDisplayID", cond_str); dyStringAppend(name, protDisplayId); } } + } if (useMimId && sqlTableExists(conn, refLinkTable)) { if (labelStarted) dyStringAppendC(name, '/'); else labelStarted = TRUE; sqlSafef(cond_str, sizeof(cond_str), "select cast(r.omimId as char) from kgXref,%s r where kgID = '%s' and kgXref.refseq = r.mrnaAcc and r.omimId != 0",refLinkTable, lf->name); mimId = sqlQuickString(conn, cond_str); if (mimId) dyStringAppend(name, mimId); } /* should this be a hash instead? */ kgE->name = dyStringCannibalize(&name); kgE->hgg_prot = lf->extra; lf->extra = kgE; } } @@ -6112,87 +6145,149 @@ struct linkedFeatures *stripShortLinkedFeatures(struct linkedFeatures *list) /* Remove linked features with no tall component from list. */ { struct linkedFeatures *newList = NULL, *el, *next; for (el = list; el != NULL; el = next) { next = el->next; if (el->tallStart < el->tallEnd) slAddHead(&newList, el); } slReverse(&newList); return newList; } +#define BIT_BASIC (1 << 0) // transcript is in basic set +#define BIT_CANON (1 << 1) // transcript is in canonical set + +struct linkedFeatures *stripLinkedFeaturesWithoutBitInScore (struct linkedFeatures *list, unsigned bit) +/* Remove features that don't have this bit set in the score. */ +{ +struct linkedFeatures *newList = NULL, *el, *next; +for (el = list; el != NULL; el = next) + { + next = el->next; + el->next = NULL; + if ((unsigned)el->score & bit) + { + slAddHead(&newList, el); + } + } +slReverse(&newList); +return newList; +} + struct linkedFeatures *stripLinkedFeaturesNotInHash(struct linkedFeatures *list, struct hash *hash) /* Remove linked features not in hash from list. */ { struct linkedFeatures *newList = NULL, *el, *next; for (el = list; el != NULL; el = next) { next = el->next; if (hashLookup(hash, el->name)) slAddHead(&newList, el); } slReverse(&newList); return newList; } +static void loadKnownBigGenePred(struct track *tg, boolean isGencode) +/* Load knownGene features from a bigGenePred. */ +{ +int scoreMin = atoi(trackDbSettingClosestToHomeOrDefault(tg->tdb, "scoreMin", "0")); +int scoreMax = atoi(trackDbSettingClosestToHomeOrDefault(tg->tdb, "scoreMax", "1000")); +struct linkedFeatures *lfList = NULL; +tg->parallelLoading = TRUE; // set so bigBed code will look at bigDataUrl +bigBedAddLinkedFeaturesFrom(tg, chromName, winStart, winEnd, + scoreMin, scoreMax, TRUE, 12, &lfList); +struct linkedFeatures *newList = lfList; + +if (isGencode) + { + char varName[SMALLBUF]; + safef(varName, sizeof(varName), "%s.show.comprehensive", tg->tdb->track); + boolean showComprehensive = cartUsualBoolean(cart, varName, FALSE); + if (!showComprehensive) + newList = stripLinkedFeaturesWithoutBitInScore(lfList, BIT_BASIC); + } + +slSort(&newList, linkedFeaturesCmp); +tg->items = newList; +tg->itemColor = bigGenePredColor; +tg->itemNameColor = bigGenePredColor; +} + void loadKnownGene(struct track *tg) /* Load up known genes. */ { struct trackDb *tdb = tg->tdb; char *isGencode = trackDbSetting(tdb, "isGencode"); +char *bigGenePred = trackDbSetting(tdb, "bigGeneDataUrl"); +struct udcFile *file; +boolean isBigGenePred = FALSE; -if (isGencode == NULL) +if ((bigGenePred != NULL) && ((file = udcFileMayOpen(bigGenePred, udcDefaultDir())) != NULL)) + { + isBigGenePred = TRUE; + udcFileClose(&file); + loadKnownBigGenePred(tg, isGencode != NULL); + } +else if (isGencode == NULL) loadGenePredWithName2(tg); else loadKnownGencode(tg); char varName[SMALLBUF]; safef(varName, sizeof(varName), "%s.show.noncoding", tdb->track); boolean showNoncoding = cartUsualBoolean(cart, varName, TRUE); safef(varName, sizeof(varName), "%s.show.spliceVariants", tdb->track); boolean showSpliceVariants = cartUsualBoolean(cart, varName, TRUE); if (!showNoncoding) tg->items = stripShortLinkedFeatures(tg->items); if (!showSpliceVariants) { + if (isBigGenePred) + { + tg->items = stripLinkedFeaturesWithoutBitInScore(tg->items, BIT_CANON); + } + else + { char *canonicalTable = trackDbSettingOrDefault(tdb, "canonicalTable", "knownCanonical"); if (hTableExists(database, canonicalTable)) { /* Create hash of items in canonical table in region. */ struct sqlConnection *conn = hAllocConn(database); struct hash *hash = hashNew(0); char query[512]; sqlSafef(query, sizeof(query), "select transcript from %s where chrom=\"%s\" and chromStart < %d && chromEnd > %d", canonicalTable, chromName, winEnd, winStart); struct sqlResult *sr = sqlGetResult(conn, query); char **row; while ((row = sqlNextRow(sr)) != NULL) hashAdd(hash, row[0], NULL); sqlFreeResult(&sr); hFreeConn(&conn); /* Get rid of non-canonical items. */ tg->items = stripLinkedFeaturesNotInHash(tg->items, hash); hashFree(&hash); } } -lookupKnownGeneNames(tg->items); + } +lookupKnownGeneNames(tg->items, isBigGenePred); limitVisibility(tg); } Color knownGeneColorCalc(struct track *tg, void *item, struct hvGfx *hvg) /* Return color to draw known gene in. */ { struct linkedFeatures *lf = item; int col = tg->ixColor; struct rgbColor *normal = &(tg->color); struct rgbColor lighter; struct rgbColor lightest; struct sqlConnection *conn = hAllocConn(database); struct sqlResult *sr; char **row; char query[256];