25590f99da7cd6b2b029aafe6de0712f83ba1c48
tdreszer
  Fri Oct 29 13:55:58 2010 -0700
Moved mdb/cv terms white list to mdb library code.  Now it can be used both by trackSearch and by metadata '...' table which will have links for select terms to hgEncodeVocab.
diff --git src/hg/lib/mdb.c src/hg/lib/mdb.c
index 922f384..31a3329 100644
--- src/hg/lib/mdb.c
+++ src/hg/lib/mdb.c
@@ -2135,15 +2135,106 @@
         if (label != NULL)
             {
             freeMem(pair->name); // Allocated when pair was created
             pair->name = strSwapChar(cloneString(label),'_',' ');  // vestigial _ meaning space
             if (limit > 0 && strlen(pair->name) > limit)
                 pair->name[limit] = '\0';
             }
         }
     slAddHead(&pairs, pair);
     }
 sqlFreeResult(&sr);
 slPairSortCase(&pairs);
 return pairs;
 }
 
+struct slPair *mdbCvWhiteList(boolean searchTracks, boolean cvLinks)
+// returns the official mdb/controlled vocabulary terms that have been whitelisted for certain uses.
+{
+#define WHITE_LIST_COUNT 35
+
+struct cvTerms
+    {
+    char *term;          // The actual term as it appears in the mdb or cv.ra
+    void *title;         // The title that should appear in Track Search
+    boolean searchable;  // Can this term be searched for in Track Search?
+    boolean cvTerm;      // Can this term be linked to cv.ra content?
+    };
+
+// TODO: move this list into cv.ra as type=term
+// TODO: If hgEncodeVocab was changed to give a description of a term not found in cv.ra, then many more of these could be linked.
+struct cvTerms whiteList[] = {
+   // term                  Title                              search cvTerm
+    {"accession"           ,"Lab provided accession"           ,FALSE,FALSE}, // Superceded by geoSample?
+    {"age"                 ,"Age of experimental organism"     ,TRUE ,TRUE },
+    {"annotation"          ,"Gencode annotation"               ,FALSE,FALSE},
+    {"antibody"            ,"Antibody or target protein"       ,TRUE ,TRUE },
+    {"bioRep"              ,"UNKNOWN"                          ,FALSE,FALSE},
+    {"cell"                ,"Cell, tissue or DNA sample"       ,TRUE ,TRUE },
+    {"composite"           ,"UCSC Composite Track"             ,FALSE,FALSE}, // Could be in cv.ra
+    {"control"             ,"Control or Input for ChIPseq"     ,TRUE ,TRUE },
+    {"controlId"           ,"ControlId - explicit relationship",FALSE,FALSE},
+    {"dataType"            ,"Experiment type"                  ,TRUE ,TRUE },
+    {"dataVersion"         ,"ENCODE release"                   ,TRUE ,FALSE},
+    {"dateResubmitted"     ,"Date resubmitted to UCSC"         ,FALSE,FALSE},
+    {"dateSubmitted"       ,"Date submitted to UCSC"           ,FALSE,FALSE},
+    {"dateUnrestricted"    ,"Date restrictions are lifted"     ,FALSE,FALSE},
+    {"expVars"             ,"Experimental variables"           ,FALSE,FALSE}, // Only defined for one obj: wgEncodeCaltechRnaSeq
+    {"fileIndex"           ,"BAM Index file"                   ,FALSE,FALSE},
+    {"fileName"            ,"File Name for downloading"        ,FALSE,FALSE},
+    {"fragLength"          ,"Mean Length of DNA fragments"     ,FALSE,FALSE},
+    {"fragSize"            ,"Length of GIS PET fragments"      ,FALSE,FALSE}, // WHAT IS THIS?
+    {"freezeDate"          ,"Gencode freeze date"              ,FALSE,FALSE},
+    {"geoSample"           ,"GEO accession"                    ,TRUE ,FALSE},
+  //{"geoSampleAccession"  ,"GEO sample accession"             ,TRUE ,FALSE}, // Should be replaced by geoSample
+    {"geoSeries"           ,"GEO series accession"             ,TRUE ,FALSE},
+  //{"geoSeriesAccession"  ,"GEO series"                       ,TRUE ,FALSE}, // Should be replaced by geoSeries
+    {"grant"               ,"Principal Investigator"           ,TRUE ,TRUE },
+    {"insertLength"        ,"Insertion length"                 ,FALSE,FALSE}, // WHAT IS THIS?
+    {"lab"                 ,"Lab producing data"               ,TRUE ,TRUE },
+    {"labExpId"            ,"Lab specific identifier"          ,TRUE ,FALSE},
+    {"labProtocolId"       ,"Lab specific protocol ID"         ,FALSE,FALSE},
+    {"labVersion"          ,"Lab specific details"             ,TRUE ,FALSE},
+    {"level"               ,"Gencode level"                    ,FALSE,FALSE},
+    {"localization"        ,"Cell compartment"                 ,TRUE ,TRUE },
+    {"mapAlgorithm"        ,"Mapping algorithm"                ,TRUE ,TRUE },
+    {"origAssembly"        ,"Assembly originally mapped to"    ,TRUE ,FALSE}, // Could be cv.ra term
+  //{"parentTable"         ,"Closest related table"            ,FALSE,FALSE}, // On its way out
+    {"phase"               ,"Cell phase"                       ,FALSE,FALSE}, // Only one exp in hg18: wgEncodeUwDnaseSeqPeaksRep1JurkatG1
+  //{"project"             ,"Project funded by"                ,FALSE,FALSE}, // Only one: all encode is under single project='wgEncode'
+    {"protocol"            ,"Library Protocol"                 ,TRUE ,TRUE },
+    {"rank"                ,"Rank of replicate"                ,FALSE,FALSE}, // UW used rank in hg18 only
+    {"readType"            ,"Paired/Single reads lengths"      ,TRUE ,TRUE },
+    {"replicate"           ,"Replicate number"                 ,TRUE ,FALSE},
+    {"restrictionEnzyme"   ,"Restriction Enzyme used"          ,FALSE,TRUE },
+    {"rnaExtract"          ,"RNA Extract"                      ,TRUE ,TRUE },
+    {"seqPlatform"         ,"Sequencing Platform"              ,TRUE ,TRUE },
+    {"setType"             ,"Experiment or Input"              ,TRUE ,FALSE},
+    {"sex"                 ,"Sex of organism"                  ,TRUE ,TRUE }, // really want to link to this?
+  //{"size"                ,"Mapability windowing size"        ,FALSE,FALSE}, // Used in hg19 mapability only
+    {"softwareVersion"     ,"Lab specific informatics"         ,TRUE ,FALSE},
+    {"strain"              ,"Strain of organism"               ,TRUE ,TRUE },
+    {"subId"               ,"Submission Id"                    ,TRUE ,FALSE},
+    {"submittedDataVersion","Version of data if resubmitted"   ,FALSE,FALSE},
+    {"tableName"           ,"Name of msql table at UCSC"       ,FALSE,FALSE},
+    {"treatment"           ,"Treatment"                        ,TRUE ,TRUE },
+  //{"type"                ,"Data Format type"                 ,FALSE,FALSE}, // Used rarely in hg18 to distinguish wig and bedGraph
+  //{"uniqueness"          ,"Number of mismatches tolerated"   ,FALSE,FALSE}, // Used in hg19 mapability only
+    {"view"                ,"View - Peaks or Signals"          ,TRUE ,FALSE},
+};
+
+int ix,size = sizeof(whiteList);
+struct slPair *whitePairs = NULL;
+for(ix=0;ix<size;ix++)
+    {
+    if (searchTracks && !whiteList[ix].searchable)
+        continue;
+    if (cvLinks && !whiteList[ix].cvTerm)
+        continue;
+
+    slPairAdd(&whitePairs, whiteList[ix].term, cloneString(whiteList[ix].title));
+    }
+if (whitePairs != NULL)
+    slPairValSortCase(&whitePairs);
+
+return whitePairs;
+}