25590f99da7cd6b2b029aafe6de0712f83ba1c48 tdreszer Fri Oct 29 13:55:58 2010 -0700 Moved mdb/cv terms white list to mdb library code. Now it can be used both by trackSearch and by metadata '...' table which will have links for select terms to hgEncodeVocab. diff --git src/hg/lib/mdb.c src/hg/lib/mdb.c index 922f384..31a3329 100644 --- src/hg/lib/mdb.c +++ src/hg/lib/mdb.c @@ -2135,15 +2135,106 @@ if (label != NULL) { freeMem(pair->name); // Allocated when pair was created pair->name = strSwapChar(cloneString(label),'_',' '); // vestigial _ meaning space if (limit > 0 && strlen(pair->name) > limit) pair->name[limit] = '\0'; } } slAddHead(&pairs, pair); } sqlFreeResult(&sr); slPairSortCase(&pairs); return pairs; } +struct slPair *mdbCvWhiteList(boolean searchTracks, boolean cvLinks) +// returns the official mdb/controlled vocabulary terms that have been whitelisted for certain uses. +{ +#define WHITE_LIST_COUNT 35 + +struct cvTerms + { + char *term; // The actual term as it appears in the mdb or cv.ra + void *title; // The title that should appear in Track Search + boolean searchable; // Can this term be searched for in Track Search? + boolean cvTerm; // Can this term be linked to cv.ra content? + }; + +// TODO: move this list into cv.ra as type=term +// TODO: If hgEncodeVocab was changed to give a description of a term not found in cv.ra, then many more of these could be linked. +struct cvTerms whiteList[] = { + // term Title search cvTerm + {"accession" ,"Lab provided accession" ,FALSE,FALSE}, // Superceded by geoSample? + {"age" ,"Age of experimental organism" ,TRUE ,TRUE }, + {"annotation" ,"Gencode annotation" ,FALSE,FALSE}, + {"antibody" ,"Antibody or target protein" ,TRUE ,TRUE }, + {"bioRep" ,"UNKNOWN" ,FALSE,FALSE}, + {"cell" ,"Cell, tissue or DNA sample" ,TRUE ,TRUE }, + {"composite" ,"UCSC Composite Track" ,FALSE,FALSE}, // Could be in cv.ra + {"control" ,"Control or Input for ChIPseq" ,TRUE ,TRUE }, + {"controlId" ,"ControlId - explicit relationship",FALSE,FALSE}, + {"dataType" ,"Experiment type" ,TRUE ,TRUE }, + {"dataVersion" ,"ENCODE release" ,TRUE ,FALSE}, + {"dateResubmitted" ,"Date resubmitted to UCSC" ,FALSE,FALSE}, + {"dateSubmitted" ,"Date submitted to UCSC" ,FALSE,FALSE}, + {"dateUnrestricted" ,"Date restrictions are lifted" ,FALSE,FALSE}, + {"expVars" ,"Experimental variables" ,FALSE,FALSE}, // Only defined for one obj: wgEncodeCaltechRnaSeq + {"fileIndex" ,"BAM Index file" ,FALSE,FALSE}, + {"fileName" ,"File Name for downloading" ,FALSE,FALSE}, + {"fragLength" ,"Mean Length of DNA fragments" ,FALSE,FALSE}, + {"fragSize" ,"Length of GIS PET fragments" ,FALSE,FALSE}, // WHAT IS THIS? + {"freezeDate" ,"Gencode freeze date" ,FALSE,FALSE}, + {"geoSample" ,"GEO accession" ,TRUE ,FALSE}, + //{"geoSampleAccession" ,"GEO sample accession" ,TRUE ,FALSE}, // Should be replaced by geoSample + {"geoSeries" ,"GEO series accession" ,TRUE ,FALSE}, + //{"geoSeriesAccession" ,"GEO series" ,TRUE ,FALSE}, // Should be replaced by geoSeries + {"grant" ,"Principal Investigator" ,TRUE ,TRUE }, + {"insertLength" ,"Insertion length" ,FALSE,FALSE}, // WHAT IS THIS? + {"lab" ,"Lab producing data" ,TRUE ,TRUE }, + {"labExpId" ,"Lab specific identifier" ,TRUE ,FALSE}, + {"labProtocolId" ,"Lab specific protocol ID" ,FALSE,FALSE}, + {"labVersion" ,"Lab specific details" ,TRUE ,FALSE}, + {"level" ,"Gencode level" ,FALSE,FALSE}, + {"localization" ,"Cell compartment" ,TRUE ,TRUE }, + {"mapAlgorithm" ,"Mapping algorithm" ,TRUE ,TRUE }, + {"origAssembly" ,"Assembly originally mapped to" ,TRUE ,FALSE}, // Could be cv.ra term + //{"parentTable" ,"Closest related table" ,FALSE,FALSE}, // On its way out + {"phase" ,"Cell phase" ,FALSE,FALSE}, // Only one exp in hg18: wgEncodeUwDnaseSeqPeaksRep1JurkatG1 + //{"project" ,"Project funded by" ,FALSE,FALSE}, // Only one: all encode is under single project='wgEncode' + {"protocol" ,"Library Protocol" ,TRUE ,TRUE }, + {"rank" ,"Rank of replicate" ,FALSE,FALSE}, // UW used rank in hg18 only + {"readType" ,"Paired/Single reads lengths" ,TRUE ,TRUE }, + {"replicate" ,"Replicate number" ,TRUE ,FALSE}, + {"restrictionEnzyme" ,"Restriction Enzyme used" ,FALSE,TRUE }, + {"rnaExtract" ,"RNA Extract" ,TRUE ,TRUE }, + {"seqPlatform" ,"Sequencing Platform" ,TRUE ,TRUE }, + {"setType" ,"Experiment or Input" ,TRUE ,FALSE}, + {"sex" ,"Sex of organism" ,TRUE ,TRUE }, // really want to link to this? + //{"size" ,"Mapability windowing size" ,FALSE,FALSE}, // Used in hg19 mapability only + {"softwareVersion" ,"Lab specific informatics" ,TRUE ,FALSE}, + {"strain" ,"Strain of organism" ,TRUE ,TRUE }, + {"subId" ,"Submission Id" ,TRUE ,FALSE}, + {"submittedDataVersion","Version of data if resubmitted" ,FALSE,FALSE}, + {"tableName" ,"Name of msql table at UCSC" ,FALSE,FALSE}, + {"treatment" ,"Treatment" ,TRUE ,TRUE }, + //{"type" ,"Data Format type" ,FALSE,FALSE}, // Used rarely in hg18 to distinguish wig and bedGraph + //{"uniqueness" ,"Number of mismatches tolerated" ,FALSE,FALSE}, // Used in hg19 mapability only + {"view" ,"View - Peaks or Signals" ,TRUE ,FALSE}, +}; + +int ix,size = sizeof(whiteList); +struct slPair *whitePairs = NULL; +for(ix=0;ix