e2467a639cc1e98174ffbd9d0da399b3b75bc9ae
markd
  Thu Jul 26 21:33:38 2012 -0700
highlighting by attribute functionality for GENCODE
diff --git src/hg/hgTracks/simpleTracks.c src/hg/hgTracks/simpleTracks.c
index 9da47b2..33310cf 100644
--- src/hg/hgTracks/simpleTracks.c
+++ src/hg/hgTracks/simpleTracks.c
@@ -37,30 +37,31 @@
 #ifndef GBROWSE
 #include "encode.h"
 #include "expRatioTracks.h"
 #include "hapmapTrack.h"
 #include "retroGene.h"
 #include "switchGear.h"
 #include "variation.h"
 #include "wiki.h"
 #include "wormdna.h"
 #include "aliType.h"
 #include "agpGap.h"
 #include "cgh.h"
 #include "bactigPos.h"
 #include "genePred.h"
 #include "genePredReader.h"
+#include "gencodeTracks.h"
 #include "isochores.h"
 #include "spDb.h"
 #include "simpleRepeat.h"
 #include "cpgIsland.h"
 #include "gcPercent.h"
 #include "genomicDups.h"
 #include "mapSts.h"
 #include "est3.h"
 #include "exoFish.h"
 #include "roughAli.h"
 #include "snp.h"
 #include "rnaGene.h"
 #include "fishClones.h"
 #include "stsMarker.h"
 #include "stsMap.h"
@@ -86,31 +87,30 @@
 #include "altGraph.h"
 #include "altGraphX.h"
 #include "geneGraph.h"
 #include "genMapDb.h"
 #include "genomicSuperDups.h"
 #include "celeraDupPositive.h"
 #include "celeraCoverage.h"
 #include "simpleNucDiff.h"
 #include "tfbsCons.h"
 #include "tfbsConsSites.h"
 #include "itemAttr.h"
 #include "encode.h"
 #include "variation.h"
 #include "estOrientInfo.h"
 #include "versionInfo.h"
-#include "gencodeIntron.h"
 #include "retroGene.h"
 #include "switchGear.h"
 #include "dless.h"
 #include "liftOver.h"
 #include "hgConfig.h"
 #include "gv.h"
 #include "gvUi.h"
 #include "protVar.h"
 #include "oreganno.h"
 #include "oregannoUi.h"
 #include "pgSnp.h"
 #include "bedDetail.h"
 #include "bed12Source.h"
 #include "dbRIP.h"
 #include "wikiLink.h"
@@ -4137,95 +4137,103 @@
 unsigned *ends = gp->exonEnds;
 int i, blockCount = gp->exonCount;
 
 for (i=0; i<blockCount; ++i)
     {
     AllocVar(sf);
     sf->start = starts[i];
     sf->end = ends[i];
     sf->grayIx = grayIx;
     slAddHead(&sfList, sf);
     }
 slReverse(&sfList);
 return sfList;
 }
 
+struct linkedFeatures *linkedFeaturesFromGenePred(struct track *tg, struct genePred *gp, boolean extra)
+/* construct a linkedFeatures object from a genePred */
+{
+int grayIx = maxShade;
+struct linkedFeatures *lf;
+AllocVar(lf);
+lf->grayIx = grayIx;
+lf->name = cloneString(gp->name);
+if (extra && gp->name2)
+    lf->extra = cloneString(gp->name2);
+lf->orientation = orientFromChar(gp->strand[0]);
+
+lf->components = sfFromGenePred(gp, grayIx);
+
+if (tg->itemAttrTbl != NULL)
+    lf->itemAttr = itemAttrTblGet(tg->itemAttrTbl, gp->name,
+                                  gp->chrom, gp->txStart, gp->txEnd);
+
+linkedFeaturesBoundsAndGrays(lf);
+
+if (gp->cdsStart >= gp->cdsEnd)
+    {
+    lf->tallStart = gp->txEnd;
+    lf->tallEnd = gp->txEnd;
+    }
+else
+    {
+    lf->tallStart = gp->cdsStart;
+    lf->tallEnd = gp->cdsEnd;
+    }
+// Don't free gp; it might be used in the drawing phase by baseColor code.
+lf->original = gp;
+return lf;
+}
 
 static struct linkedFeatures *connectedLfFromGenePredInRangeExtra(
                                         struct track *tg, struct sqlConnection *conn, char *table,
                                         char *chrom, int start, int end, boolean extra)
 /* Return linked features from range of a gene prediction table after
  * we have already connected to database. Optinally Set lf extra to
  * gene pred name2, to display gene name instead of transcript ID.*/
 {
-struct linkedFeatures *lfList = NULL, *lf;
-int grayIx = maxShade;
+struct linkedFeatures *lfList = NULL;
 struct genePredReader *gpr = NULL;
 struct genePred *gp = NULL;
 boolean nmdTrackFilter = sameString(trackDbSettingOrDefault(tg->tdb, "nmdFilter", "off"), "on");
 char varName[SMALLBUF];
 safef(varName, sizeof(varName), "%s.%s", table, HIDE_NONCODING_SUFFIX);
 boolean hideNoncoding = cartUsualBoolean(cart, varName, HIDE_NONCODING_DEFAULT);  // TODO: Use cartUsualBooleanClosestToHome if tableName == tg->tdb->track
 boolean doNmd = FALSE;
 char buff[256];
 safef(buff, sizeof(buff), "hgt.%s.nmdFilter",  tg->track);
 
 /* Should we remove items that appear to be targets for nonsense
  * mediated decay? */
 if(nmdTrackFilter)
     doNmd = cartUsualBoolean(cart, buff, FALSE);
 
 if (tg->itemAttrTbl != NULL)
     itemAttrTblLoad(tg->itemAttrTbl, conn, chrom, start, end);
 
 char *noncodingClause = (hideNoncoding ? "cdsStart != cdsEnd" : NULL);
 gpr = genePredReaderRangeQuery(conn, table, chrom, start, end, noncodingClause);
 while ((gp = genePredReaderNext(gpr)) != NULL)
     {
     if(doNmd && genePredNmdTarget(gp))
 	{
 	genePredFree(&gp);
-	continue;
-	}
-    AllocVar(lf);
-    lf->grayIx = grayIx;
-    lf->name = cloneString(gp->name);
-    if (extra && gp->name2)
-        lf->extra = cloneString(gp->name2);
-    lf->orientation = orientFromChar(gp->strand[0]);
-
-    lf->components = sfFromGenePred(gp, grayIx);
-
-    if (tg->itemAttrTbl != NULL)
-        lf->itemAttr = itemAttrTblGet(tg->itemAttrTbl, gp->name,
-                                      gp->chrom, gp->txStart, gp->txEnd);
-
-    linkedFeaturesBoundsAndGrays(lf);
-
-    if (gp->cdsStart >= gp->cdsEnd)
-        {
-        lf->tallStart = gp->txEnd;
-        lf->tallEnd = gp->txEnd;
         }
     else
         {
-        lf->tallStart = gp->cdsStart;
-        lf->tallEnd = gp->cdsEnd;
+        slAddHead(&lfList, linkedFeaturesFromGenePred(tg, gp, extra));
         }
-    // Don't free gp; it might be used in the drawing phase by baseColor code.
-    lf->original = gp;
-    slAddHead(&lfList, lf);
     }
 slReverse(&lfList);
 genePredReaderFree(&gpr);
 
 if (tg->visibility != tvDense)
     slSort(&lfList, linkedFeaturesCmpStart);
 
 return lfList;
 }
 
 struct linkedFeatures *connectedLfFromGenePredInRange(struct track *tg, struct sqlConnection *conn,
                                                       char *table, char *chrom, int start, int end)
 /* Return linked features from range of a gene prediction table after
  * we have already connected to database. */
 {
@@ -4282,122 +4290,37 @@
                                                       filterBy_t *filterBySet, struct linkedFeatures *lf)
 /* construct the query for a standard genePred class filterBtSet */
 {
 char *clause = filterBySetClause(filterBySet);
 if (clause == NULL)
     return NULL;
 
 // don't care about a column value here, just if it exists, so get a constant
 char *nameCol = trackDbSettingOrDefault(tg->tdb, GENEPRED_CLASS_NAME_COLUMN, GENEPRED_CLASS_NAME_COLUMN_DEFAULT);
 struct dyString *dyQuery = dyStringCreate("select 1 from %s where %s = \"%s\" and ", classTable, nameCol, lf->name);
 dyStringAppend(dyQuery, clause);
 freeMem(clause);
 return dyQuery;
 }
 
-static void gencodeFilterByMethodChoice(struct dyString *dyClause, char *choice)
-/* add compared for a choice for special case of GENCODE transcript method. */
-{
-if (sameString(choice, "manual"))
-    dyStringAppend(dyClause, "(transSrc.source like \"%havana%\")");
-else if (sameString(choice, "automatic"))
-    dyStringAppend(dyClause, "(transSrc.source like \"%ensembl%\")");
-else if (sameString(choice, "manual_only"))
-    dyStringAppend(dyClause, "(transSrc.source like \"%havana%\") and (transSrc.source not like \"%ensembl%\")");
-else if (sameString(choice, "automatic_only"))
-    dyStringAppend(dyClause, "(transSrc.source like \"%ensembl%\") and (transSrc.source not like \"%havana%\")");
-else
-    errAbort("BUG: filterByMethodChoice missing choice: \"%s\"", choice);
-}
-
-static char *gencodeFilterByMethod(filterBy_t *filterBy)
-{
-if ((filterBy->slChoices == NULL) || (slNameInList(filterBy->slChoices,"All")))
-    return NULL;
-
-struct dyString *clause = newDyString(256);
-struct slName *slChoice = NULL;
-dyStringAppend(clause, "(transSrc.transcriptId = attrs.transcriptId) and ");
-boolean first = TRUE;
-for(slChoice = filterBy->slChoices; slChoice != NULL; slChoice = slChoice->next)
-    {
-    if(!first)
-        dyStringPrintf(clause, " or ");
-    first = FALSE;
-    gencodeFilterByMethodChoice(clause, slChoice->name);
-    }
-return dyStringCannibalize(&clause);
-}
-
-static void gencodeFilterBy(filterBy_t *filterBy, struct dyString *where)
-/* handle adding on filterBy clause for gencode */
-{
-char *clause;
-if (sameString(filterBy->column, "transcriptMethod"))
-    clause = gencodeFilterByMethod(filterBy);
-else
-    clause = filterByClause(filterBy);
-if (clause != NULL)
-    {
-    dyStringPrintf(where, " and (%s)", clause);
-    freeMem(clause);
-    }
-}
-
-static struct dyString *gencodeFilterBySetQueryWhere(struct track *tg, filterBy_t *filterBySet)
-/* build where clause based on filters, or NULL if none */
-{
-struct dyString *where = dyStringNew(0);
-filterBy_t *filterBy;
-for (filterBy = filterBySet;filterBy != NULL; filterBy = filterBy->next)
-    gencodeFilterBy(filterBy, where);
-if (dyStringLen(where) == 0)
-    dyStringFree(&where);
-return where;
-}
-
-static struct dyString *gencodeFilterBySetQuery(struct track *tg, filterBy_t *filterBySet, struct linkedFeatures *lf)
-/* construct the query for GENCODE filterBySet */
-{
-struct dyString *where = gencodeFilterBySetQueryWhere(tg, filterBySet);
-if (where == NULL)
-    return NULL;
-char *attrsTbl = trackDbRequiredSetting(tg->tdb, "wgEncodeGencodeAttrs");
-char *srcTbl = trackDbRequiredSetting(tg->tdb, "wgEncodeGencodeTranscriptSource");
-struct dyString *dyQuery = dyStringCreate("select 1 from %s attrs, %s transSrc where (attrs.transcriptId = \"%s\") %s", attrsTbl, srcTbl, lf->name, where->string);
-dyStringFree(&where);
-return dyQuery;
-}
-
 static boolean genePredClassFilterBySet(struct track *tg, char *classTable,
                                         filterBy_t *filterBySet, struct linkedFeatures *lf)
 /* Check if an item passes a filterBySet filter  */
 {
-struct dyString *dyQuery = NULL;
-if (trackDbSetting(tg->tdb, "wgEncodeGencodeVersion") != NULL)
-    {
-    if (startsWith("wgEncodeGencodeBasic", tg->tdb->track)
-        || startsWith("wgEncodeGencodeComp", tg->tdb->track)
-        || startsWith("wgEncodeGencode2wayConsPseudo", tg->tdb->track)
-        || startsWith("wgEncodeGencodePseudoGene", tg->tdb->track))
-        dyQuery = gencodeFilterBySetQuery(tg, filterBySet, lf);
-    }
-else
-    dyQuery = genePredClassFilterBySetQuery(tg, classTable, filterBySet, lf);
+struct dyString *dyQuery = genePredClassFilterBySetQuery(tg, classTable, filterBySet, lf);
 if (dyQuery == NULL)
     return TRUE;
-
 struct sqlConnection *conn = hAllocConn(database);
 boolean passesThroughFilter = sqlQuickNum(conn, dyQuery->string);
 dyStringFree(&dyQuery);
 hFreeConn(&conn);
 return passesThroughFilter;
 }
 
 static boolean genePredClassFilterAcembly(struct track *tg, char *classTable,
                                           struct linkedFeatures *lf)
 /* Check if an item passes a filterBySet filter  */
 {
 char *classString = addSuffix(tg->track, ".type");
 char *classType = cartUsualString(cart, classString, acemblyEnumToString(0));
 freeMem(classString);
 enum acemblyOptEnum ct = acemblyStringToEnum(classType);
@@ -5354,40 +5277,30 @@
 /* get geneName from refLink or NULL if not found.
  * WARNING: static return */
 {
 static char nameBuf[256];
 char query[256], *name = NULL;
 if (hTableExists(database,  "refLink"))
     {
     sprintf(query, "select name from refLink where mrnaAcc = '%s'", acc);
     name = sqlQuickQuery(conn, query, nameBuf, sizeof(nameBuf));
     if ((name != NULL) && (name[0] == '\0'))
         name = NULL;
     }
 return name;
 }
 
-char *gencodeGeneName(struct track *tg, void *item)
-/* Get name to use for Gencode gene item. */
-{
-struct linkedFeatures *lf = item;
-if (lf->extra != NULL)
-    return lf->extra;
-else
-    return lf->name;
-}
-
 char *getRgdGene2Symbol(struct sqlConnection *conn, char *acc)
 /* get gene symbol from rgdGene2ToSymbol or NULL if not found.
  * WARNING: static return */
 {
 static char symbolBuf[256];
 char query[256], *symbol = NULL;
 if (hTableExists(database,  "rgdGene2ToSymbol"))
     {
     sprintf(query, "select geneSymbol from rgdGene2ToSymbol where rgdId = '%s'", acc);
     symbol = sqlQuickQuery(conn, query, symbolBuf, sizeof(symbolBuf));
     if ((symbol != NULL) && (symbol[0] == '\0'))
         symbol = NULL;
     }
 return symbol;
 }
@@ -10256,77 +10169,30 @@
 
 void drawColorMethods(struct track *tg)
 /* Fill in color track items based on chrom  */
 {
 char *optionStr ;
 optionStr = cartUsualStringClosestToHome(cart, tg->tdb,FALSE,"color", "off");
 tg->mapItemName = lfMapNameFromExtra;
 if( sameString( optionStr, "on" )) /*use chromosome coloring*/
     tg->itemColor = lfChromColor;
 else
     tg->itemColor = NULL;
 linkedFeaturesMethods(tg);
 tg->loadItems = loadGenePred;
 }
 
-Color gencodeIntronColorItem(struct track *tg, void *item, struct hvGfx *hvg)
-/* Return color of ENCODE gencode intron track item.
- * Use recommended color palette pantone colors (level 4) for red, green, blue*/
-{
-struct gencodeIntron *intron = (struct gencodeIntron *)item;
-
-if (sameString(intron->status, "not_tested"))
-    return hvGfxFindColorIx(hvg, 214,214,216);       /* light grey */
-if (sameString(intron->status, "RT_negative"))
-    return hvGfxFindColorIx(hvg, 145,51,56);       /* red */
-if (sameString(intron->status, "RT_positive") ||
-        sameString(intron->status, "RACE_validated"))
-    return hvGfxFindColorIx(hvg, 61,142,51);       /* green */
-if (sameString(intron->status, "RT_wrong_junction"))
-    return getOrangeColor(hvg);                 /* orange */
-if (sameString(intron->status, "RT_submitted"))
-    return hvGfxFindColorIx(hvg, 102,109,112);       /* grey */
-return hvGfxFindColorIx(hvg, 214,214,216);       /* light grey */
-}
-
-static void gencodeIntronLoadItems(struct track *tg)
-/* Load up track items. */
-{
-bedLoadItem(tg, tg->table, (ItemLoader)gencodeIntronLoad);
-}
-
-static void gencodeIntronMethods(struct track *tg)
-/* Load up custom methods for ENCODE Gencode intron validation track */
-{
-tg->loadItems = gencodeIntronLoadItems;
-tg->itemColor = gencodeIntronColorItem;
-}
-
-static void gencodeGeneMethods(struct track *tg)
-/* Load up custom methods for ENCODE Gencode gene track */
-{
-tg->loadItems = loadGenePredWithConfiguredName;
-tg->itemName = gencodeGeneName;
-}
-
-static void gencodeRaceFragsMethods(struct track *tg)
-/* Load up custom methods for ENCODE Gencode RACEfrags track */
-{
-tg->loadItems = loadGenePred;
-tg->subType = lfNoIntronLines;
-}
-
 void loadDless(struct track *tg)
 /* Load dless items */
 {
 struct sqlConnection *conn = hAllocConn(database);
 struct dless *dless, *list = NULL;
 struct sqlResult *sr;
 char **row;
 int rowOffset;
 
 sr = hRangeQuery(conn, tg->table, chromName, winStart, winEnd,
                  NULL, &rowOffset);
 while ((row = sqlNextRow(sr)) != NULL)
     {
     dless = dlessLoad(row+rowOffset);
     slAddHead(&list, dless);
@@ -13244,31 +13110,30 @@
 registerTrackHandler("rmskCensor", repeatMethods);
 registerTrackHandler("simpleRepeat", simpleRepeatMethods);
 registerTrackHandler("chesSimpleRepeat", simpleRepeatMethods);
 registerTrackHandler("uniGene",uniGeneMethods);
 registerTrackHandler("perlegen",perlegenMethods);
 registerTrackHandler("haplotype",haplotypeMethods);
 registerTrackHandler("encodeErge5race",encodeErgeMethods);
 registerTrackHandler("encodeErgeBinding",encodeErgeMethods);
 registerTrackHandler("encodeErgeExpProm",encodeErgeMethods);
 registerTrackHandler("encodeErgeHssCellLines",encodeErgeMethods);
 registerTrackHandler("encodeErgeInVitroFoot",encodeErgeMethods);
 registerTrackHandler("encodeErgeMethProm",encodeErgeMethods);
 registerTrackHandler("encodeErgeStableTransf",encodeErgeMethods);
 registerTrackHandler("encodeErgeSummary",encodeErgeMethods);
 registerTrackHandler("encodeErgeTransTransf",encodeErgeMethods);
-registerTrackHandler("encodeGencodeGenePolyAMar07",bed9Methods);
 registerTrackHandlerOnFamily("encodeStanfordNRSF",encodeStanfordNRSFMethods);
 registerTrackHandler("cghNci60", cghNci60Methods);
 registerTrackHandler("rosetta", rosettaMethods);
 registerTrackHandler("affy", affyMethods);
 registerTrackHandler("ancientR", ancientRMethods );
 registerTrackHandler("altGraphX", altGraphXMethods );
 registerTrackHandler("triangle", triangleMethods );
 registerTrackHandler("triangleSelf", triangleMethods );
 registerTrackHandler("transfacHit", triangleMethods );
 registerTrackHandler("esRegGeneToMotif", eranModuleMethods );
 registerTrackHandler("leptin", mafMethods );
 registerTrackHandler("igtc", igtcMethods );
 registerTrackHandler("cactusBed", cactusBedMethods );
 registerTrackHandler("variome", variomeMethods);
 /* Lowe lab related */
@@ -13327,56 +13192,31 @@
 registerTrackHandler("encodeEvoFold", rnaSecStrMethods);
 registerTrackHandler("rnafold", rnaSecStrMethods);
 registerTrackHandler("mcFolds", rnaSecStrMethods);
 registerTrackHandler("rnaEditFolds", rnaSecStrMethods);
 registerTrackHandler("altSpliceFolds", rnaSecStrMethods);
 registerTrackHandler("chimpSimpleDiff", chimpSimpleDiffMethods);
 registerTrackHandler("tfbsCons", tfbsConsMethods);
 registerTrackHandler("tfbsConsSites", tfbsConsSitesMethods);
 registerTrackHandler("pscreen", simpleBedTriangleMethods);
 registerTrackHandler("dless", dlessMethods);
 registerTrackHandler("jaxAllele", jaxAlleleMethods);
 registerTrackHandler("jaxPhenotype", jaxPhenotypeMethods);
 registerTrackHandler("jaxAlleleLift", jaxAlleleMethods);
 registerTrackHandler("jaxPhenotypeLift", jaxPhenotypeMethods);
 /* ENCODE related */
-registerTrackHandlerOnFamily("wgEncodeGencode", gencodeGeneMethods);
-registerTrackHandlerOnFamily("wgEncodeSangerGencode", gencodeGeneMethods);
-// one per gencode version, after V7 when it was substantially changed
-// FIXME: this is hacky, need a way to register based on pattern
-registerTrackHandlerOnFamily("wgEncodeGencodeV3", gencodeGeneMethods);
-registerTrackHandlerOnFamily("wgEncodeGencodeV4", gencodeGeneMethods);
-registerTrackHandlerOnFamily("wgEncodeGencodeV7", gencodeGeneMethods);
-registerTrackHandlerOnFamily("wgEncodeGencodeV8", gencodeGeneMethods);
-registerTrackHandlerOnFamily("wgEncodeGencodeV9", gencodeGeneMethods);
-registerTrackHandlerOnFamily("wgEncodeGencodeV10", gencodeGeneMethods);
-registerTrackHandlerOnFamily("wgEncodeGencodeV11", gencodeGeneMethods);
-registerTrackHandlerOnFamily("wgEncodeGencodeV12", gencodeGeneMethods);
-registerTrackHandlerOnFamily("wgEncodeGencodeV13", gencodeGeneMethods);
-registerTrackHandlerOnFamily("wgEncodeGencodeV14", gencodeGeneMethods);
-registerTrackHandlerOnFamily("wgEncodeGencodeV15", gencodeGeneMethods);
-
-registerTrackHandlerOnFamily("wgEncodeSangerGencodeGencodeManual20081001", gencodeGeneMethods);
-registerTrackHandlerOnFamily("wgEncodeSangerGencodeGencodeAuto20081001", gencodeGeneMethods);
-registerTrackHandlerOnFamily("encodeGencodeGene", gencodeGeneMethods);
-registerTrackHandlerOnFamily("encodeGencodeGeneJun05", gencodeGeneMethods);
-registerTrackHandlerOnFamily("encodeGencodeGeneOct05", gencodeGeneMethods);
-registerTrackHandlerOnFamily("encodeGencodeGeneMar07", gencodeGeneMethods);
-registerTrackHandler("encodeGencodeIntron", gencodeIntronMethods);
-registerTrackHandler("encodeGencodeIntronJun05", gencodeIntronMethods);
-registerTrackHandler("encodeGencodeIntronOct05", gencodeIntronMethods);
-registerTrackHandlerOnFamily("encodeGencodeRaceFrags", gencodeRaceFragsMethods);
+gencodeRegisterTrackHandlers();
 registerTrackHandler("affyTxnPhase2", affyTxnPhase2Methods);
 registerTrackHandler("gvPos", gvMethods);
 registerTrackHandlerOnFamily("pgSnp", pgSnpMethods);
 registerTrackHandlerOnFamily("pgSnpHgwdev", pgSnpMethods);
 registerTrackHandlerOnFamily("pgPop", pgSnpMethods);
 registerTrackHandler("pgTest", pgSnpMethods);
 registerTrackHandler("protVarPos", protVarMethods);
 registerTrackHandler("oreganno", oregannoMethods);
 registerTrackHandler("encodeDless", dlessMethods);
 transMapRegisterTrackHandlers();
 retroRegisterTrackHandlers();
 registerTrackHandler("retroposons", dbRIPMethods);
 registerTrackHandlerOnFamily("kiddEichlerDisc", kiddEichlerMethods);
 registerTrackHandlerOnFamily("kiddEichlerValid", kiddEichlerMethods);
 registerTrackHandler("dgv", dgvMethods);