ce20d3f78f14e756e3a76e57160249fba33d1819 tdreszer Thu Mar 24 13:29:13 2011 -0700 Split out the cv routines into their own lib file, but have yet to rename any of the APIs diff --git src/hg/lib/cv.c src/hg/lib/cv.c new file mode 100644 index 0000000..0749f81 --- /dev/null +++ src/hg/lib/cv.c @@ -0,0 +1,238 @@ + + +// cv.c stands for Controlled Vocabullary and this file contains the +// library APIs for reading and making sense of the contents of cv.ra. + +#include "common.h" +#include "linefile.h" +#include "dystring.h" +#include "ra.h" +#include "hui.h" +#include "mdb.h" + +// CV Defines that should not necessarily be public + +// CV UGLY TERMS (NOTE: These should be hiddne inside cv.c APIS and callers should use non-UGLY terms) +#define CV_UGLY_TOT_CELLTYPE "cellType" +#define CV_UGLY_TERM_CELL_LINE "Cell Line" +#define CV_UGLY_TERM_ANTIBODY "Antibody" + +// Type of Terms searchable defines +#define CV_SEARCHABLE "searchable" +#define CV_SEARCHABLE_SINGLE_SELECT "select" +#define CV_SEARCHABLE_MULTI_SELECT "multiSelect" +#define CV_SEARCHABLE_FREE_TEXT "freeText" + +// TODO: decide to make this public or hide it away inside the one function so far that uses it. +static char *cv_file() +// return default location of cv.ra +{ +static char filePath[PATH_LEN]; +char *root = hCgiRoot(); +if (root == NULL || *root == 0) + root = "/usr/local/apache/cgi-bin/"; // Make this check out sandboxes? +// root = "/cluster/home/tdreszer/kent/src/hg/makeDb/trackDb/cv/alpha/"; // Make this check out sandboxes? +safef(filePath, sizeof(filePath), "%s/encode/%s", root,CV_FILE_NAME); +if(!fileExists(filePath)) + errAbort("Error: can't locate %s; %s doesn't exist\n", CV_FILE_NAME, filePath); +return filePath; +} + +const struct hash *mdbCvTermHash(char *term) +// returns a hash of hashes of a term which should be defined in cv.ra +// NOTE: in static memory: DO NOT FREE +{ +static struct hash *cvHashOfHashOfHashes = NULL; +if (sameString(term,MDB_VAR_CELL)) + term = CV_UGLY_TERM_CELL_LINE; +else if (sameString(term,MDB_VAR_ANTIBODY)) + term = CV_UGLY_TERM_ANTIBODY; + +if (cvHashOfHashOfHashes == NULL) + cvHashOfHashOfHashes = hashNew(0); + +struct hash *cvTermHash = hashFindVal(cvHashOfHashOfHashes,term); +// Establish cv hash of Term Types if it doesn't already exist +if (cvTermHash == NULL) + { + cvTermHash = raReadWithFilter(cv_file(), CV_TERM,CV_TYPE,term); + if (cvTermHash != NULL) + hashAdd(cvHashOfHashOfHashes,term,cvTermHash); + } + +return cvTermHash; +} + +struct slPair *mdbValLabelSearch(struct sqlConnection *conn, char *var, int limit, boolean tags, boolean tables, boolean files) +// Search the metaDb table for vals by var and returns val (as pair->name) and controlled vocabulary (cv) label +// (if it exists) (as pair->val). Can impose (non-zero) limit on returned string size of name. +// if requested, return cv tag instead of mdb val. If requested, limit to table objs or file objs +// Return is case insensitive sorted on label (cv label or else val). +{ // TODO: Change this to use normal mdb struct routines? +if (!tables && !files) + errAbort("mdbValSearch requests values for neither table nor file objects.\n"); + +char *tableName = mdbTableName(conn,TRUE); // Look for sandBox name first + +struct dyString *dyQuery = dyStringNew(512); +if (limit > 0) + dyStringPrintf(dyQuery,"select distinct LEFT(val,%d)",limit); +else + dyStringPrintf(dyQuery,"select distinct val"); + +dyStringPrintf(dyQuery," from %s l1 where l1.var='%s' ",tableName,var); + +if (!tables || !files) + dyStringPrintf(dyQuery,"and exists (select l2.obj from %s l2 where l2.obj = l1.obj and l2.var='objType' and l2.val='%s')", + tableName,tables?MDB_OBJ_TYPE_TABLE:MDB_OBJ_TYPE_FILE); + +struct hash *varHash = (struct hash *)mdbCvTermHash(var); + +struct slPair *pairs = NULL; +struct sqlResult *sr = sqlGetResult(conn, dyStringContents(dyQuery)); +dyStringFree(&dyQuery); +char **row; +while ((row = sqlNextRow(sr)) != NULL) + { + char *val = row[0]; + char *label = NULL; + if (varHash != NULL) + { + struct hash *valHash = hashFindVal(varHash,val); + if (valHash != NULL) + { + label = cloneString(hashOptionalVal(valHash,CV_LABEL,row[0])); + if (tags) + { + char *tag = hashFindVal(valHash,CV_TAG); + if (tag != NULL) + val = tag; + } + } + } + if (label == NULL); + label = cloneString(row[0]); + label = strSwapChar(label,'_',' '); // vestigial _ meaning space + slPairAdd(&pairs,val,label); + } +sqlFreeResult(&sr); +slPairValSortCase(&pairs); +return pairs; +} + +const struct hash *mdbCvTermTypeHash() +// returns a hash of hashes of mdb and controlled vocabulary (cv) term types +// Those terms should contain label,description,searchable,cvDefined,hidden +// NOTE: in static memory: DO NOT FREE +{ // NOTE: "typeOfTerm" is specialized, so don't use mdbCvTermHash +static struct hash *cvHashOfTermTypes = NULL; + +// Establish cv hash of Term Types if it doesn't already exist +if (cvHashOfTermTypes == NULL) + { + cvHashOfTermTypes = raReadWithFilter(cv_file(), CV_TERM,CV_TYPE,CV_TOT); + // Patch up an ugly inconsistency with 'cell' + struct hash *cellHash = hashRemove(cvHashOfTermTypes,CV_UGLY_TOT_CELLTYPE); + if (cellHash) + { + hashAdd(cvHashOfTermTypes,CV_TERM_CELL,cellHash); + hashReplace(cellHash, CV_TERM, cloneString(CV_TERM_CELL)); // spilling memory of 'cellType' val + } + struct hash *abHash = hashRemove(cvHashOfTermTypes,CV_UGLY_TERM_ANTIBODY); + if (abHash) + { + hashAdd(cvHashOfTermTypes,CV_TERM_ANTIBODY,abHash); + hashReplace(abHash, CV_TERM, cloneString(CV_TERM_ANTIBODY)); // spilling memory of 'Antibody' val + } + } + +return cvHashOfTermTypes; +} + +struct slPair *mdbCvWhiteList(boolean searchTracks, boolean cvDefined) +// returns the official mdb/controlled vocabulary terms that have been whitelisted for certain uses. +// TODO: change to return struct that includes searchable! +{ +struct slPair *whitePairs = NULL; + +// Get the list of term types from thew cv +struct hash *termTypeHash = (struct hash *)mdbCvTermTypeHash(); +struct hashCookie hc = hashFirst(termTypeHash); +struct hashEl *hEl; +while ((hEl = hashNext(&hc)) != NULL) + { + char *setting = NULL; + struct hash *typeHash = (struct hash *)hEl->val; + //if (!includeHidden) + { + setting = hashFindVal(typeHash,CV_TOT_HIDDEN); + if(SETTING_IS_ON(setting)) + continue; + } + if (searchTracks) + { + setting = hashFindVal(typeHash,CV_SEARCHABLE); + if (setting == NULL + || ( differentWord(setting,CV_SEARCHABLE_SINGLE_SELECT) + && differentWord(setting,CV_SEARCHABLE_MULTI_SELECT) + && differentWord(setting,CV_SEARCHABLE_FREE_TEXT))) + continue; + } + if (cvDefined) + { + setting = hashFindVal(typeHash,CV_TOT_CV_DEFINED); + if(SETTING_NOT_ON(setting)) + continue; + } + char *term = hEl->name; + char *label = hashFindVal(typeHash,CV_LABEL); + if (label == NULL) + label = term; + slPairAdd(&whitePairs, term, cloneString(label)); // Term gets cloned in slPairAdd + } +if (whitePairs != NULL) + slPairValSortCase(&whitePairs); + +return whitePairs; +} + +enum mdbCvSearchable mdbCvSearchMethod(char *term) +// returns whether the term is searchable // TODO: replace with mdbCvWhiteList() returning struct +{ +// Get the list of term types from thew cv +struct hash *termTypeHash = (struct hash *)mdbCvTermTypeHash(); +struct hash *termHash = hashFindVal(termTypeHash,term); +if (termHash != NULL) + { + char *searchable = hashFindVal(termHash,CV_SEARCHABLE); + if (searchable != NULL) + { + if (sameWord(searchable,CV_SEARCHABLE_SINGLE_SELECT)) + return cvsSearchBySingleSelect; + if (sameWord(searchable,CV_SEARCHABLE_MULTI_SELECT)) + return cvsSearchByMultiSelect; + if (sameWord(searchable,CV_SEARCHABLE_FREE_TEXT)) + return cvsSearchByFreeText; + //if (sameWord(searchable,"date")) + // return cvsSearchByDateRange; + //if (sameWord(searchable,"numeric")) + // return cvsSearchByIntegerRange; + } + } +return cvsNotSearchable; +} + +const char *cvLabel(char *term) +// returns cv label if term found or else just term +{ +// Get the list of term types from thew cv +struct hash *termTypeHash = (struct hash *)mdbCvTermTypeHash(); +struct hash *termHash = hashFindVal(termTypeHash,term); +if (termHash != NULL) + { + char *label = hashFindVal(termHash,CV_LABEL); + if (label != NULL) + return label; + } +return term; +}