be8645fb43ba545dc342deb80cff297c5b677a5e
braney
  Tue Sep 6 11:11:15 2016 -0700
allow bigGenePred to be used to optimize knownGene on hgTracks  #15259

diff --git src/hg/hgTracks/simpleTracks.c src/hg/hgTracks/simpleTracks.c
index 28ff134..66bb967 100644
--- src/hg/hgTracks/simpleTracks.c
+++ src/hg/hgTracks/simpleTracks.c
@@ -3477,31 +3477,31 @@
     * a perfect world it would not be necessary, and
     * it's not necessary for ghostView. */
    int x1 = x+1;
    int x2 = x + w - 1;
    if (x1 < 0) x1 = 0;
    if (x2 > hvg->width) x2 = hvg->width;
    if (x2-x1 > 0)
        hvGfxLine(hvg, x1, y, x2, y, color);
    }
 }
 
 static void lfColors(struct track *tg, struct linkedFeatures *lf,
         struct hvGfx *hvg, Color *retColor, Color *retBarbColor)
 /* Figure out color to draw linked feature in. */
 {
-if (!((lf->filterColor == 0) || (lf->filterColor == -1)))
+if (!((lf->isBigGenePred) ||(lf->filterColor == 0)|| (lf->filterColor == -1)))
     {
     if (lf->extra == (void *)USE_ITEM_RGB)
 	{
 	struct rgbColor itemRgb;
 	itemRgb.r = (lf->filterColor & 0xff0000) >> 16;
 	itemRgb.g = (lf->filterColor & 0xff00) >> 8;
 	itemRgb.b = lf->filterColor & 0xff;
 	*retColor = *retBarbColor =
 		hvGfxFindColorIx(hvg, itemRgb.r, itemRgb.g, itemRgb.b);
 	}
     else
 	*retColor = *retBarbColor = lf->filterColor;
     }
 else if (tg->itemColor)
     {
@@ -3511,30 +3511,41 @@
 else if (tg->colorShades)
     {
     boolean isXeno = (tg->subType == lfSubXeno)
                                 || (tg->subType == lfSubChain)
                                 || startsWith("mrnaBla", tg->table);
     *retColor =  tg->colorShades[lf->grayIx+isXeno];
     *retBarbColor =  tg->colorShades[lf->grayIx];
     }
 else
     {
     *retColor = tg->ixColor;
     *retBarbColor = tg->ixAltColor;
     }
 }
 
+Color bigGenePredColor(struct track *tg, void *item,  struct hvGfx *hvg)
+/* Determine the color of the name for the bigGenePred linked feature. */
+{
+struct linkedFeatures *lf = item;
+struct rgbColor itemRgb;
+itemRgb.r = (lf->filterColor & 0xff0000) >> 16;
+itemRgb.g = (lf->filterColor & 0xff00) >> 8;
+itemRgb.b = lf->filterColor & 0xff;
+return hvGfxFindColorIx(hvg, itemRgb.r, itemRgb.g, itemRgb.b);
+}
+
 Color linkedFeaturesNameColor(struct track *tg, void *item, struct hvGfx *hvg)
 /* Determine the color of the name for the linked feature. */
 {
 Color col, barbCol;
 lfColors(tg, item, hvg, &col, &barbCol);
 return col;
 }
 
 struct simpleFeature *simpleFeatureCloneList(struct simpleFeature *list)
 /* Just copies the simpleFeature list. This is good for making a copy */
 /* when the codon list is made. */
 {
 struct simpleFeature *ret = NULL;
 struct simpleFeature *cur;
 for (cur = list; cur != NULL; cur = cur->next)
@@ -5979,31 +5990,31 @@
     }
 else
     return lf->name;
 }
 
 char *knownGeneMapName(struct track *tg, void *item)
 /* Return un-abbreviated gene name. */
 {
 char str2[255];
 struct linkedFeatures *lf = item;
 /* piggy back the protein ID (hgg_prot variable) on hgg_gene variable */
 safef(str2, sizeof(str2), "%s&hgg_prot=%s", lf->name, ((struct knownGenesExtra *)(lf->extra))->hgg_prot);
 return(cloneString(str2));
 }
 
-void lookupKnownGeneNames(struct linkedFeatures *lfList)
+void lookupKnownGeneNames(struct linkedFeatures *lfList, boolean isBigGenePred)
 /* This converts the known gene ID to a gene symbol */
 {
 struct linkedFeatures *lf;
 struct sqlConnection *conn = hAllocConn(database);
 char *geneSymbol;
 char *protDisplayId;
 char *gencodeId;
 char *mimId;
 char cond_str[256];
 
 boolean useGeneSymbol= FALSE;
 boolean useKgId      = FALSE;
 boolean useProtDisplayId = FALSE;
 boolean useMimId = FALSE;
 boolean useGencodeId = FALSE;
@@ -6039,71 +6050,93 @@
         else if (!endsWith(label->name, "gene") &&
                  !endsWith(label->name, "gencodeId") &&
                  !endsWith(label->name, "kgId") &&
                  !endsWith(label->name, "prot") &&
                  !endsWith(label->name, omimLabel) )
             {
             useGeneSymbol = TRUE;
             cartRemove(cart, label->name);
             }
         }
 
     for (lf = lfList; lf != NULL; lf = lf->next)
 	{
         struct dyString *name = dyStringNew(SMALLDYBUF);
         struct knownGenesExtra *kgE;
+        struct genePredExt *gp = lf->original;
         AllocVar(kgE);
         labelStarted = FALSE; /* reset between items */
         if (useGeneSymbol)
             {
+            if ( isBigGenePred )
+                {
+                geneSymbol = gp->geneName;
+                }
+            else
+                {
                 sqlSafefFrag(cond_str, sizeof cond_str,"kgID='%s'", lf->name);
                 geneSymbol = sqlGetField(database, "kgXref", "geneSymbol", cond_str);
+                }
             if (geneSymbol != NULL)
                 {
                 dyStringAppend(name, geneSymbol);
                 }
             labelStarted = TRUE;
             }
         if (useGencodeId)
             {
             if (labelStarted) dyStringAppendC(name, '/');
             else labelStarted = TRUE;
+            if ( isBigGenePred )
+                {
+                gencodeId = gp->name2;
+                }
+            else
+                {
                 sqlSafefFrag(cond_str, sizeof(cond_str), "name='%s'", lf->name);
                 gencodeId = sqlGetField(database, "knownGene", "alignID", cond_str);
+                }
 	    dyStringAppend(name, gencodeId);
 	    }
         if (useKgId)
             {
             if (labelStarted) dyStringAppendC(name, '/');
             else labelStarted = TRUE;
             dyStringAppend(name, lf->name);
 	    }
         if (useProtDisplayId)
             {
             if (labelStarted) dyStringAppendC(name, '/');
             else labelStarted = TRUE;
+            if ( isBigGenePred )
+                {
+                dyStringAppend(name, gp->geneName2);
+                }
+            else
+                {
                 if (lf->extra != NULL)
                     {
                     dyStringAppend(name, (char *)lf->extra);
                     }
                 else
                     {
                     sqlSafefFrag(cond_str, sizeof(cond_str), "kgID='%s'", lf->name);
                     protDisplayId = sqlGetField(database, "kgXref", "spDisplayID", cond_str);
                     dyStringAppend(name, protDisplayId);
                     }
                 }
+	    }
         if (useMimId && sqlTableExists(conn, refLinkTable))
             {
             if (labelStarted) dyStringAppendC(name, '/');
             else labelStarted = TRUE;
             sqlSafef(cond_str, sizeof(cond_str), "select cast(r.omimId as char) from kgXref,%s r where kgID = '%s' and kgXref.refseq = r.mrnaAcc and r.omimId != 0",refLinkTable, lf->name);
             mimId = sqlQuickString(conn, cond_str);
             if (mimId)
                 dyStringAppend(name, mimId);
             }
         /* should this be a hash instead? */
         kgE->name = dyStringCannibalize(&name);
         kgE->hgg_prot = lf->extra;
         lf->extra = kgE;
 	}
     }
@@ -6112,87 +6145,149 @@
 
 struct linkedFeatures *stripShortLinkedFeatures(struct linkedFeatures *list)
 /* Remove linked features with no tall component from list. */
 {
 struct linkedFeatures *newList = NULL, *el, *next;
 for (el = list; el != NULL; el = next)
     {
     next = el->next;
     if (el->tallStart < el->tallEnd)
         slAddHead(&newList, el);
     }
 slReverse(&newList);
 return newList;
 }
 
+#define BIT_BASIC       (1 << 0)        // transcript is in basic set
+#define BIT_CANON       (1 << 1)        // transcript is in canonical set
+
+struct linkedFeatures *stripLinkedFeaturesWithoutBitInScore (struct linkedFeatures *list, unsigned bit)
+/* Remove features that don't have this bit set in the score. */
+{
+struct linkedFeatures *newList = NULL, *el, *next;
+for (el = list; el != NULL; el = next)
+    {
+    next = el->next;
+    el->next = NULL;
+    if ((unsigned)el->score & bit)
+        {
+        slAddHead(&newList, el);
+        }
+    }
+slReverse(&newList);
+return newList;
+}
+
 struct linkedFeatures *stripLinkedFeaturesNotInHash(struct linkedFeatures *list, struct hash *hash)
 /* Remove linked features not in hash from list. */
 {
 struct linkedFeatures *newList = NULL, *el, *next;
 for (el = list; el != NULL; el = next)
     {
     next = el->next;
     if (hashLookup(hash, el->name))
         slAddHead(&newList, el);
     }
 slReverse(&newList);
 return newList;
 }
 
+static void loadKnownBigGenePred(struct track *tg, boolean isGencode)
+/* Load knownGene features from a bigGenePred. */
+{
+int scoreMin = atoi(trackDbSettingClosestToHomeOrDefault(tg->tdb, "scoreMin", "0"));
+int scoreMax = atoi(trackDbSettingClosestToHomeOrDefault(tg->tdb, "scoreMax", "1000"));
+struct linkedFeatures *lfList = NULL;
+tg->parallelLoading = TRUE;  // set so bigBed code will look at bigDataUrl
+bigBedAddLinkedFeaturesFrom(tg, chromName, winStart, winEnd,
+      scoreMin, scoreMax, TRUE, 12, &lfList);
+struct linkedFeatures *newList = lfList;
+
+if (isGencode)
+    {
+    char varName[SMALLBUF];
+    safef(varName, sizeof(varName), "%s.show.comprehensive", tg->tdb->track);
+    boolean showComprehensive = cartUsualBoolean(cart, varName, FALSE);
+    if (!showComprehensive)
+        newList = stripLinkedFeaturesWithoutBitInScore(lfList,  BIT_BASIC);
+    }
+
+slSort(&newList, linkedFeaturesCmp);
+tg->items = newList;
+tg->itemColor   = bigGenePredColor;
+tg->itemNameColor = bigGenePredColor;
+}
+
 void loadKnownGene(struct track *tg)
 /* Load up known genes. */
 {
 struct trackDb *tdb = tg->tdb;
 char *isGencode = trackDbSetting(tdb, "isGencode");
+char *bigGenePred = trackDbSetting(tdb, "bigGeneDataUrl");
+struct udcFile *file;
+boolean isBigGenePred = FALSE;
 
-if (isGencode == NULL)
+if ((bigGenePred != NULL) && ((file = udcFileMayOpen(bigGenePred, udcDefaultDir())) != NULL))
+    {
+    isBigGenePred = TRUE;
+    udcFileClose(&file);
+    loadKnownBigGenePred(tg, isGencode != NULL);
+    }
+else if (isGencode == NULL)
     loadGenePredWithName2(tg);
 else
     loadKnownGencode(tg);
 
 char varName[SMALLBUF];
 safef(varName, sizeof(varName), "%s.show.noncoding", tdb->track);
 boolean showNoncoding = cartUsualBoolean(cart, varName, TRUE);
 safef(varName, sizeof(varName), "%s.show.spliceVariants", tdb->track);
 boolean showSpliceVariants = cartUsualBoolean(cart, varName, TRUE);
 if (!showNoncoding)
     tg->items = stripShortLinkedFeatures(tg->items);
 if (!showSpliceVariants)
     {
+    if (isBigGenePred)
+        {
+        tg->items = stripLinkedFeaturesWithoutBitInScore(tg->items,  BIT_CANON);
+        }
+    else
+        {
         char *canonicalTable = trackDbSettingOrDefault(tdb, "canonicalTable", "knownCanonical");
         if (hTableExists(database, canonicalTable))
             {
             /* Create hash of items in canonical table in region. */
             struct sqlConnection *conn = hAllocConn(database);
             struct hash *hash = hashNew(0);
             char query[512];
             sqlSafef(query, sizeof(query),
                     "select transcript from %s where chrom=\"%s\" and chromStart < %d && chromEnd > %d",
                     canonicalTable, chromName, winEnd, winStart);
             struct sqlResult *sr = sqlGetResult(conn, query);
             char **row;
             while ((row = sqlNextRow(sr)) != NULL)
                 hashAdd(hash, row[0], NULL);
             sqlFreeResult(&sr);
             hFreeConn(&conn);
 
             /* Get rid of non-canonical items. */
             tg->items = stripLinkedFeaturesNotInHash(tg->items, hash);
             hashFree(&hash);
             }
         }
-lookupKnownGeneNames(tg->items);
+    }
+lookupKnownGeneNames(tg->items, isBigGenePred);
 limitVisibility(tg);
 }
 
 Color knownGeneColorCalc(struct track *tg, void *item, struct hvGfx *hvg)
 /* Return color to draw known gene in. */
 {
 struct linkedFeatures *lf = item;
 int col = tg->ixColor;
 struct rgbColor *normal = &(tg->color);
 struct rgbColor lighter;
 struct rgbColor lightest;
 struct sqlConnection *conn = hAllocConn(database);
 struct sqlResult *sr;
 char **row;
 char query[256];