e70152e44cc66cc599ff6b699eb8adc07f3e656a kent Sat May 24 21:09:34 2014 -0700 Adding Copyright NNNN Regents of the University of California to all files I believe with reasonable certainty were developed under UCSC employ or as part of Genome Browser copyright assignment. diff --git src/hg/lib/hgFindSpecCustom.c src/hg/lib/hgFindSpecCustom.c index 1ba1091..6c65a1f 100644 --- src/hg/lib/hgFindSpecCustom.c +++ src/hg/lib/hgFindSpecCustom.c @@ -1,536 +1,539 @@ /* hgFindSpecCustom - custom (not autoSQL generated) code for working * with hgFindSpec. This code is concerned with making the hgFindSpec * MySQL table out of the trackDb.ra files. */ +/* Copyright (C) 2014 The Regents of the University of California + * See README in this or parent directory for licensing information. */ + #include "common.h" #include "linefile.h" #include "jksql.h" #include "hgFindSpec.h" #include "hdb.h" #include "hui.h" #include "ra.h" #include "hash.h" #include "obscure.h" #include "regexHelper.h" #include "trackDb.h" /* ----------- End of AutoSQL generated code --------------------- */ static void anchorTermRegex(struct hgFindSpec *hfs) /* termRegex must match the whole term. If it doesn't already start with * ^ and end in $, add those (no need to make the trackDb.ra file even * harder to read with those extra magic chars :). */ { if (isNotEmpty(hfs->termRegex)) { char *orig = hfs->termRegex; char first = orig[0]; char last = orig[strlen(orig)-1]; char buf[512]; safef(buf, sizeof(buf), "%s%s%s", (first == '^') ? "" : "^", orig, (last == '$') ? "" : "$"); freeMem(hfs->termRegex); hfs->termRegex = cloneString(buf); } else if (hfs->termRegex == NULL) hfs->termRegex = ""; } static void checkTermRegex(struct hgFindSpec *hfs) /* Make sure termRegex compiles OK. */ { if (isNotEmpty(hfs->termRegex)) { char buf[256]; safef(buf, sizeof(buf), "hfsPolish: search %s: termRegex", hfs->searchName); const regex_t *compiledExp = regexCompile(hfs->termRegex, buf, (REG_EXTENDED | REG_ICASE | REG_NOSUB)); compiledExp = NULL; // Avoid compiler warning about unused variable / return value } } static void escapeTermRegex(struct hgFindSpec *hfs) /* Escape any '\' characters in termRegex for sql storage. */ { if (isNotEmpty(hfs->termRegex)) { char *orig = hfs->termRegex; hfs->termRegex = makeEscapedString(orig, '\\'); freeMem(orig); } } static char *genePredDefaultFormat = "select chrom,txStart,txEnd,name from %s where name like "; static char *bedDefaultFormat = "select chrom,chromStart,chromEnd,name from %s where name like "; static char *pslDefaultFormat = "select tName,tStart,tEnd,qName from %s where qName like "; static char *exactTermFormat = "'%s'"; static char *prefixTermFormat = "'%s%%'"; static char *fuzzyTermFormat = "'%%%s%%'"; static char *getQueryFormat(struct hgFindSpec *hfs) /* Fill in query format from searchType if necessary. */ { char *queryFormat = hfs->query; char buf[256]; if (isEmpty(queryFormat)) { char *baseFmt = ""; char *termFmt = ""; buf[0] = 0; if (sameString(hfs->searchType, "genePred")) baseFmt = genePredDefaultFormat; else if (sameString(hfs->searchType, "bed")) baseFmt = bedDefaultFormat; else if (sameString(hfs->searchType, "psl")) baseFmt = pslDefaultFormat; if (isNotEmpty(baseFmt)) { if (isNotEmpty(hfs->xrefQuery)) termFmt = exactTermFormat; else if (sameString(hfs->searchMethod, "fuzzy")) termFmt = fuzzyTermFormat; else if (sameString(hfs->searchMethod, "prefix")) termFmt = prefixTermFormat; else termFmt = exactTermFormat; } safef(buf, sizeof(buf), "%s%s", baseFmt, termFmt); queryFormat = cloneString(buf); } return(queryFormat); } static char *queryFormatRegex = "^select [[:alnum:]]+, ?[[:alnum:]]+, ?[[:alnum:]]+, ?[[:alnum:]]+ " "from %s where [[:alnum:]]+ (r?like|=) ['\"]?.*%s.*['\"]?$"; static char *exactTermFormatRegex = "['\"]?.*%s.*['\"]?$"; static char *prefixTermFormatRegex = "['\"]?%s.*%%['\"]?$"; static void checkQueryFormat(struct hgFindSpec *hfs) /* Make sure query looks right and jives with searchMethod. */ { if (isNotEmpty(hfs->query) && !hgFindSpecSetting(hfs, "dontCheckQueryFormat")) { if (! regexMatchNoCase(hfs->query, queryFormatRegex)) errAbort("hfsPolish: search %s: query needs to be of the format " "\"select field1,field2,field3,field4 from %%s " "where field4 like '%%s'\" " "(for prefix, '%%s%%%%'; for fuzzy, '%%%%%%s%%%%'), " "but instead is this:\n%s", hfs->searchName, hfs->query); if (isNotEmpty(hfs->xrefQuery)) { if (!regexMatchNoCase(hfs->query, exactTermFormatRegex)) errAbort("hfsPolish: search %s: there is an xrefQuery so query " "needs to end with %s (exact match to xref results).", hfs->searchName, exactTermFormat); } else { if (sameString(hfs->searchMethod, "fuzzy") && !endsWith(hfs->query, fuzzyTermFormat)) errAbort("hfsPolish: search %s: searchMethod is fuzzy so query " "needs to end with %s.", hfs->searchName, fuzzyTermFormat); else if (sameString(hfs->searchMethod, "prefix") && !regexMatchNoCase(hfs->query, prefixTermFormatRegex)) errAbort("hfsPolish: search %s: searchMethod is prefix so query " "needs to end with %s.", hfs->searchName, prefixTermFormat); else if (sameString(hfs->searchMethod, "exact") && !regexMatchNoCase(hfs->query, exactTermFormatRegex)) errAbort("hfsPolish: search %s: searchMethod is exact so query " "needs to end with %s.", hfs->searchName, exactTermFormat); } } } static char *xrefQueryFormatRegex = "select [[:alnum:]]+, ?[[:alnum:]]+(\\([^)]+\\))? from %s where [[:alnum:]]+ (like|=) ['\"]?[%s]+['\"]?"; static void checkXrefQueryFormat(struct hgFindSpec *hfs) /* Make sure xrefQuery looks right and jives with searchMethod. */ { if (isNotEmpty(hfs->xrefQuery) && !hgFindSpecSetting(hfs, "dontCheckXrefQueryFormat")) { if (! regexMatchNoCase(hfs->xrefQuery, xrefQueryFormatRegex)) errAbort("hfsPolish: search %s: xrefQuery needs to be of the format " "\"select field1,field2 from %%s where field2 like '%%s'\" " "(for prefix, '%%s%%%%'; for exact, '%%%%%%s%%%%'), " "but instead is this:\n%s", hfs->searchName, hfs->xrefQuery); if (sameString(hfs->searchMethod, "fuzzy") && !endsWith(hfs->xrefQuery, fuzzyTermFormat)) errAbort("hfsPolish: search %s: searchMethod is fuzzy so xrefQuery " "needs to end with %s.", hfs->searchName, fuzzyTermFormat); else if (sameString(hfs->searchMethod, "prefix") && !regexMatchNoCase(hfs->xrefQuery, prefixTermFormatRegex)) errAbort("hfsPolish: search %s: searchMethod is prefix so xrefQuery " "needs to end with %s.", hfs->searchName, prefixTermFormat); else if (sameString(hfs->searchMethod, "exact") && !regexMatchNoCase(hfs->xrefQuery, exactTermFormatRegex)) errAbort("hfsPolish: search %s: searchMethod is exact so xrefQuery " " needs to end with %s.", hfs->searchName, exactTermFormat); } } static void hgFindSpecPolish(char *db, struct hgFindSpec *hfs) /* Fill in missing values with defaults, check for consistency. */ { /* At least one of {searchName, searchTable} must be defined. */ if ((hfs->searchName == NULL) && (hfs->searchTable == NULL)) errAbort("hfsPolish: searchName or searchTable must be defined.\n"); if (hfs->searchName == NULL) hfs->searchName = cloneString(hfs->searchTable); if (hfs->searchTable == NULL) hfs->searchTable = cloneString(hfs->searchName); /* If searchType is not defined, query must be defined. */ if (hfs->searchType == NULL && hfs->query == NULL) errAbort("hfsPolish: search %s: if searchType is not defined, " "then query must be defined.\n", hfs->searchName); /* If one of {xrefTable,xrefQuery} is defined, both must be. */ if ((hfs->xrefTable == NULL) ^ (hfs->xrefQuery == NULL)) errAbort("hfsPolish: search %s: can't define xrefTable without xrefQuery " "or vice versa.\n", hfs->searchName); if (hfs->searchMethod == NULL) hfs->searchMethod = cloneString("exact"); if (hfs->searchType == NULL) hfs->searchType = ""; anchorTermRegex(hfs); checkTermRegex(hfs); escapeTermRegex(hfs); if (hfs->query == NULL) hfs->query = getQueryFormat(hfs); checkQueryFormat(hfs); checkXrefQueryFormat(hfs); if (hfs->xrefTable == NULL) hfs->xrefTable = ""; if (hfs->xrefQuery == NULL) hfs->xrefQuery = ""; if (hfs->searchPriority == 0.0) hfs->searchPriority = 1000.0; if (hfs->searchDescription == NULL) { char buf[512]; struct sqlConnection *conn = hAllocConn(db); struct trackDb *tdb = hMaybeTrackInfo(conn, hfs->searchTable); hFreeConn(&conn); if (tdb != NULL) safecpy(buf, sizeof(buf), tdb->longLabel); else safef(buf, sizeof(buf), "%s", hfs->searchTable); hfs->searchDescription = cloneString(buf); } if (hfs->searchSettings == NULL) hfs->searchSettings = cloneString(""); } int hgFindSpecCmp(const void *va, const void *vb) /* Compare to sort based on searchPriority. */ { const struct hgFindSpec *a = *((struct hgFindSpec **)va); const struct hgFindSpec *b = *((struct hgFindSpec **)vb); float dif = a->searchPriority - b->searchPriority; if (dif < 0) return -1; else if (dif == 0.0) return 0; else return 1; } static void hgFindSpecAddInfo(struct hgFindSpec *hfs, char *var, char *value) /* Add info from a variable/value pair to hgFindSpec. */ { if (sameString(var, "searchName")) hfs->searchName = cloneString(value); else if (sameString(var, "searchTable")) hfs->searchTable = cloneString(value); else if (sameString(var, "searchMethod")) hfs->searchMethod = cloneString(value); else if (sameString(var, "searchType")) hfs->searchType = cloneString(value); else if (sameWord(var, "shortCircuit")) hfs->shortCircuit = TRUE; else if (sameString(var, "termRegex")) hfs->termRegex = cloneString(value); else if (sameString(var, "query")) hfs->query = cloneString(value); else if (sameString(var, "xrefTable")) hfs->xrefTable = cloneString(value); else if (sameString(var, "xrefQuery")) hfs->xrefQuery = cloneString(value); else if (sameString(var, "searchPriority")) hfs->searchPriority = atof(value); else if (sameString(var, "searchDescription")) hfs->searchDescription = cloneString(value); else /* Add to settings. */ { if (hfs->settingsHash == NULL) hfs->settingsHash = hashNew(7); hashAdd(hfs->settingsHash, var, cloneString(value)); if (sameWord(var, "semiShortCircuit")) hfs->shortCircuit = TRUE; } } static void hgFindSpecAddRelease(struct hgFindSpec *hfs, char *releaseTag) /* Add release tag */ { hgFindSpecAddInfo(hfs, "release", cloneString(releaseTag)); } struct hgFindSpec *hgFindSpecFromRa(char *db, char *raFile, char *releaseTag) /* Load track info from ra file into list. */ { static boolean reEntered = FALSE; struct lineFile *lf = lineFileOpen(raFile, TRUE); char *line, *word; struct hgFindSpec *hfsList = NULL, *hfs; boolean done = FALSE; char *incFile; for (;;) { /* Seek to next line that starts with 'searchName' or 'searchTable' */ for (;;) { char *subRelease; if (!lineFileNext(lf, &line, NULL)) { done = TRUE; break; } if (startsWith("searchName", line) || startsWith("searchTable", line)) { lineFileReuse(lf); break; } else if ((incFile = trackDbInclude(raFile, line, &subRelease)) != NULL) { if (subRelease) trackDbCheckValidRelease(subRelease); if (releaseTag && subRelease && !sameString(subRelease, releaseTag)) errAbort("Include with release %s inside include with release %s line %d of %s", subRelease, releaseTag, lf->lineIx, lf->fileName); /* Set reEntered=TRUE whenever we recurse, so we don't polish * multiple times and get too many backslash-escapes. */ boolean reBak = reEntered; reEntered = TRUE; struct hgFindSpec *incHfs = hgFindSpecFromRa(db, incFile, subRelease); reEntered = reBak; hfsList = slCat(hfsList, incHfs); } } if (done) break; /* Allocate track structure and fill it in until next blank line. */ AllocVar(hfs); slAddHead(&hfsList, hfs); for (;;) { /* Break at blank line or EOF. */ if (!lineFileNext(lf, &line, NULL)) break; line = skipLeadingSpaces(line); if (line == NULL || line[0] == 0) break; /* Skip comments. */ if (line[0] == '#') continue; /* Parse out first word and decide what to do. */ word = nextWord(&line); if (line == NULL) errAbort("No value for %s line %d of %s", word, lf->lineIx, lf->fileName); line = trimSpaces(line); hgFindSpecAddInfo(hfs, word, line); } if (releaseTag) hgFindSpecAddRelease(hfs, releaseTag); } lineFileClose(&lf); if (! reEntered) { for (hfs = hfsList; hfs != NULL; hfs = hfs->next) { hgFindSpecPolish(db, hfs); } } slReverse(&hfsList); return hfsList; } char *hgFindSpecSetting(struct hgFindSpec *hfs, char *name) /* Return setting string or NULL if none exists. */ { if (hfs == NULL) errAbort("Program error: null hfs passed to hgFindSpecSetting."); if (hfs->settingsHash == NULL) hfs->settingsHash = raFromString(hfs->searchSettings); return hashFindVal(hfs->settingsHash, name); } char *hgFindSpecRequiredSetting(struct hgFindSpec *hfs, char *name) /* Return setting string or squawk and die. */ { char *ret = hgFindSpecSetting(hfs, name); if (ret == NULL) errAbort("Missing required %s setting in %s (%s) search spec", name, hfs->searchTable, hfs->searchName); return ret; } char *hgFindSpecSettingOrDefault(struct hgFindSpec *hfs, char *name, char *defaultVal) /* Return setting string, or defaultVal if none exists */ { char *val = hgFindSpecSetting(hfs, name); return (val == NULL ? defaultVal : val); } static struct slName *hgFindSpecNameList(char *db) /* Return the hgFindSpec table name(s) to use (based on trackDb name). */ { struct slName *trackDbList = hTrackDbList(); struct slName *specNameList = NULL; struct slName *tdbName; for (tdbName = trackDbList; tdbName != NULL; tdbName = tdbName->next) { char *subbed = replaceChars(tdbName->name, "trackDb", "hgFindSpec"); if (hTableExists(db, subbed)) slNameAddHead(&specNameList, subbed); freez(&subbed); } if (!specNameList) specNameList = slNameNew("hgFindSpec"); else slReverse(&specNameList); return specNameList; } static boolean haveSpecAlready(struct hgFindSpec *list, struct hgFindSpec *spec) /* Simply check to see if we have this search in our list already. */ { struct hgFindSpec *cur = list; while ((cur != NULL) && (!sameString(cur->searchName, spec->searchName))) cur = cur->next; return (cur) ? TRUE : FALSE; } static int hgFindSpecPriCmp(const void *va, const void *vb) /* Compare to sort by assending searchPriority. */ { const struct hgFindSpec *a = *((struct hgFindSpec **)va); const struct hgFindSpec *b = *((struct hgFindSpec **)vb); float diff = a->searchPriority - b->searchPriority; if (diff < 0) return -1; else if (diff > 0) return 1; else return 0; } static struct hgFindSpec *loadFindSpecsTbl(char *db, char *tblSpec, char *where) /* Load find specs for the given where and a given tblSpec. where can be * NULL. */ { struct hgFindSpec *hfsList = NULL; char *tbl; struct sqlConnection *conn = hAllocConnProfileTbl(db, tblSpec, &tbl); char query[512]; if (where != NULL) sqlSafef(query, sizeof(query), "select * from %s where %s", tbl, where); else sqlSafef(query, sizeof(query), "select * from %s", tbl); struct sqlResult *sr = sqlGetResult(conn, query); char **row = NULL; while ((row = sqlNextRow(sr)) != NULL) { struct hgFindSpec *hfs = hgFindSpecLoad(row); if (!haveSpecAlready(hfsList, hfs)) slAddHead(&hfsList, hfs); } sqlFreeResult(&sr); hFreeConn(&conn); return(hfsList); } static struct hgFindSpec *loadFindSpecs(char *db, char *where) /* Load find specs for the given where. */ { struct hgFindSpec *hfsList = NULL; struct slName *hgFindSpecList = hgFindSpecNameList(db); struct slName *oneSpec; for (oneSpec = hgFindSpecList; oneSpec != NULL; oneSpec = oneSpec->next) hfsList = slCat(hfsList, loadFindSpecsTbl(db, oneSpec->name, where)); slSort(&hfsList, hgFindSpecPriCmp); return(hfsList); } struct hgFindSpec *hgFindSpecGetSpecs(char *db, boolean shortCircuit) /* Load all short-circuit (or not) search specs from the current db, sorted by * searchPriority. */ { char where[64]; sqlSafefFrag(where, sizeof(where), "shortCircuit = %d", shortCircuit); struct hgFindSpec *hfsList = loadFindSpecs(db, where); slSort(&hfsList, hgFindSpecPriCmp); return(hfsList); } void hgFindSpecGetAllSpecs(char *db, struct hgFindSpec **retShortCircuitList, struct hgFindSpec **retAdditiveList) /* Load all search specs from the current db, separated according to * shortCircuit and sorted by searchPriority. */ { struct hgFindSpec *hfs, *hfsList = loadFindSpecs(db, NULL); struct hgFindSpec *shortList = NULL, *longList = NULL; while ((hfs = slPopHead(&hfsList)) != NULL) { if (hfs->shortCircuit) slAddHead(&shortList, hfs); else slAddHead(&longList, hfs); } if (retShortCircuitList != NULL) { slSort(&shortList, hgFindSpecPriCmp); *retShortCircuitList = shortList; } else hgFindSpecFreeList(&shortList); if (retAdditiveList != NULL) { slSort(&longList, hgFindSpecPriCmp); *retAdditiveList = longList; } else hgFindSpecFreeList(&longList); }