08a042ae53f8a7ea0b6e15bee0b12fcf48a90a76 hiram Thu Aug 29 14:07:38 2024 -0700 now with proper interface to assembly status, refSeq category, assembly level refs #32596 diff --git src/hg/hubApi/findGenome.c src/hg/hubApi/findGenome.c index b925041..2197680 100644 --- src/hg/hubApi/findGenome.c +++ src/hg/hubApi/findGenome.c @@ -14,30 +14,37 @@ { struct combinedSummary *next; /* Next in singly linked list */ struct asmSummary *summary; /* from asmSummary table */ struct genark *genArk; /* from genark table */ struct dbDb *dbDb; /* from dbDb table */ }; /* will be initialized as this function begins */ static char *genarkTable = NULL; static char *asmListTable = NULL; static boolean statsOnly = FALSE; /* these three are radio button states, only one of these three can be TRUE */ static boolean browserMustExist = TRUE; /* default: browser must exist */ static boolean browserMayExist = FALSE; static boolean browserNotExist = FALSE; +static unsigned specificYear = 0; /* from year=1234 argument */ +/* from category= reference or representative */ +static char *refSeqCategory = NULL; +/* from status= one of: latest, replaced or suppressed */ +static char *versionStatus = NULL; +/* from level= one of complete, chromosome, scaffold or contig */ +static char *assemblyLevel = NULL; /* hgsql -e 'desc assemblyList;' hgcentraltest +----------------+---------------------+------+-----+---------+-------+ | Field | Type | Null | Key | Default | Extra | +----------------+---------------------+------+-----+---------+-------+ | name | varchar(255) | NO | PRI | NULL | | | priority | int(10) unsigned | YES | | NULL | | | commonName | varchar(511) | YES | | NULL | | | scientificName | varchar(511) | YES | | NULL | | | taxId | int(10) unsigned | YES | | NULL | | | clade | varchar(255) | YES | | NULL | | | description | varchar(1023) | YES | | NULL | | | browserExists | tinyint(3) unsigned | YES | | NULL | | | hubUrl | varchar(511) | YES | | NULL | | @@ -83,130 +90,151 @@ jsonWriteString(jw, "refSeqCategory", el->refSeqCategory); if (isEmpty(el->versionStatus)) jsonWriteString(jw, "versionStatus", NULL); else jsonWriteString(jw, "versionStatus", el->versionStatus); if (isEmpty(el->assemblyLevel)) jsonWriteString(jw, "assemblyLevel", NULL); else jsonWriteString(jw, "assemblyLevel", el->assemblyLevel); jsonWriteObjectEnd(jw); ++itemCount; } return (itemCount); } +static void addBrowserExists(struct dyString *query) +/* add the AND clauses for browserExist depending upon option */ +{ +if (browserMustExist) + sqlDyStringPrintf(query, " AND browserExists=1"); +else if (browserNotExist) + sqlDyStringPrintf(query, " AND browserExists=0"); +} + +static void addCategory(struct dyString *query) +/* refSeqCategory = reference or representative */ +{ +if (isNotEmpty(refSeqCategory)) + sqlDyStringPrintf(query, " AND refSeqCategory='%s'", refSeqCategory); +} + +static void addStatus(struct dyString *query) +/* versionStatus = latest, replaced or suppressed */ +{ +if (isNotEmpty(versionStatus)) + sqlDyStringPrintf(query, " AND versionStatus='%s'", versionStatus); +} + +static void addLevel(struct dyString *query) +/* assemblyLevel = complete, chromosome, scaffold or contig */ +{ +if (isNotEmpty(assemblyLevel)) + sqlDyStringPrintf(query, " AND assemblyLevel='%s'", assemblyLevel); +} + +static void addConditions(struct dyString *query) +/* add any of the optional conditions */ +{ +addBrowserExists(query); +addCategory(query); +addStatus(query); +addLevel(query); +} + static long long multipleWordSearch(struct sqlConnection *conn, char **words, int wordCount, struct jsonWrite *jw, long long *totalMatchCount) /* perform search on multiple words, prepare json and return number of matches */ { long long itemCount = 0; *totalMatchCount = 0; if (wordCount < 0) return itemCount; /* get the words[] into a single string */ struct dyString *queryDy = dyStringNew(128); dyStringPrintf(queryDy, "%s", words[0]); for (int i = 1; i < wordCount; ++i) dyStringPrintf(queryDy, " %s", words[i]); /* initial SELECT allows any browser exist status, existing or not */ struct dyString *query = dyStringNew(64); sqlDyStringPrintf(query, "SELECT COUNT(*) FROM %s WHERE MATCH(name, commonName, scientificName, clade, description) AGAINST ('%s' IN BOOLEAN MODE)", asmListTable, queryDy->string); -/* add specific browserExists depending upon options */ -if (browserMustExist) - sqlDyStringPrintf(query, " AND browserExists=1"); -else if (browserNotExist) - sqlDyStringPrintf(query, " AND browserExists=0"); +addConditions(query); /* add optional SELECT options */ + long long matchCount = sqlQuickLongLong(conn, query->string); if (matchCount > 0) { *totalMatchCount = matchCount; if (statsOnly) // only counting, nothing returned { // the LIMIT would limit results to maxItemsOutput itemCount = min(maxItemsOutput, matchCount); } // when less than totalMatchCount else { dyStringFree(&query); query = dyStringNew(64); sqlDyStringPrintf(query, "SELECT * FROM %s WHERE MATCH(name, commonName, scientificName, clade, description, refSeqCategory, versionStatus, assemblyLevel) AGAINST ('%s' IN BOOLEAN MODE)", asmListTable, queryDy->string); - /* add specific browserExists depending upon options */ - if (browserMustExist) - sqlDyStringPrintf(query, " AND browserExists=1"); - else if (browserNotExist) - sqlDyStringPrintf(query, " AND browserExists=0"); + addConditions(query); /* add optional SELECT options */ sqlDyStringPrintf(query, " ORDER BY priority LIMIT %d;", maxItemsOutput); struct sqlResult *sr = sqlGetResult(conn, query->string); itemCount = sqlJsonOut(jw, sr); sqlFreeResult(&sr); dyStringFree(&query); } } return itemCount; } static long long oneWordSearch(struct sqlConnection *conn, char *searchWord, struct jsonWrite *jw, long long *totalMatchCount, boolean *prefixSearch) /* perform search on a single word, prepare json and return number of matches * and number of potential matches totalMatchCount */ { long long itemCount = 0; *totalMatchCount = 0; struct dyString *query = dyStringNew(64); sqlDyStringPrintf(query, "SELECT COUNT(*) FROM %s WHERE MATCH(name, commonName, scientificName, clade, description, refSeqCategory, versionStatus, assemblyLevel) AGAINST ('%s' IN BOOLEAN MODE)", asmListTable, searchWord); -if (browserMustExist) - sqlDyStringPrintf(query, " AND browserExists=1"); -else if (browserNotExist) - sqlDyStringPrintf(query, " AND browserExists=0"); +addConditions(query); /* add optional SELECT options */ long long matchCount = sqlQuickLongLong(conn, query->string); *prefixSearch = FALSE; /* assume not */ if (matchCount < 1) /* no match, add the * wild card match to make a prefix match */ { dyStringFree(&query); query = dyStringNew(64); sqlDyStringPrintf(query, "SELECT COUNT(*) FROM %s WHERE MATCH(name, commonName, scientificName, clade, description, refSeqCategory, versionStatus, assemblyLevel) AGAINST ('%s*' IN BOOLEAN MODE)", asmListTable, searchWord); - /* add specific browserExists depending upon options */ - if (browserMustExist) - sqlDyStringPrintf(query, " AND browserExists=1"); - else if (browserNotExist) - sqlDyStringPrintf(query, " AND browserExists=0"); + addConditions(query); /* add optional SELECT options */ matchCount = sqlQuickLongLong(conn, query->string); if (matchCount > 0) *prefixSearch = TRUE; } if (matchCount < 1) // nothing found, returning zero return itemCount; *totalMatchCount = matchCount; if (statsOnly) // only counting, nothing returned { // the LIMIT would limit results to maxItemsOutput itemCount = min(maxItemsOutput, matchCount); } // when less than totalMatchCount else { dyStringFree(&query); query = dyStringNew(64); sqlDyStringPrintf(query, "SELECT * FROM %s WHERE MATCH(name, commonName, scientificName, clade, description, refSeqCategory, versionStatus, assemblyLevel) AGAINST ('%s%s' IN BOOLEAN MODE)", asmListTable, searchWord, *prefixSearch ? "*" : ""); - /* add specific browserExists depending upon options */ - if (browserMustExist) - sqlDyStringPrintf(query, " AND browserExists=1"); - else if (browserNotExist) - sqlDyStringPrintf(query, " AND browserExists=0"); + addConditions(query); /* add optional SELECT options */ sqlDyStringPrintf(query, " ORDER BY priority LIMIT %d;", maxItemsOutput); struct sqlResult *sr = sqlGetResult(conn, query->string); itemCount = sqlJsonOut(jw, sr); sqlFreeResult(&sr); dyStringFree(&query); } return itemCount; } /* static long long oneWordSearch(struct sqlConnection *conn, char *searchWord, struct jsonWrite *jw, boolean *prefixSearch) */ static long elapsedTime(struct jsonWrite *jw) { long nowTime = clock1000(); long elapsedTimeMs = nowTime - enteredMainTime; jsonWriteNumber(jw, "elapsedTimeMs", elapsedTimeMs); @@ -227,31 +255,89 @@ if (extraArgs) apiErrAbort(err400, err400Msg, "extraneous arguments found for function /findGenome'%s'", extraArgs); boolean asmListExists = hTableExists("hgcentraltest", asmListTable); if (!asmListExists) apiErrAbort(err400, err400Msg, "table hgcentraltest.assemblyList does not exist for /findGenome"); boolean asmSummaryExists = hTableExists("hgcentraltest", "asmSummary"); if (!asmSummaryExists) apiErrAbort(err400, err400Msg, "table hgcentraltest.asmSummary does not exist for /findGenome"); boolean genArkExists = hTableExists("hgcentraltest", genarkTable); if (!genArkExists) apiErrAbort(err400, err400Msg, "table hgcentraltest.%s does not exist for /findGenome", genarkTable); +char *yearString = cgiOptionalString(argYear); +char *categoryString = cgiOptionalString(argCategory); +char *statusString = cgiOptionalString(argStatus); +char *levelString = cgiOptionalString(argLevel); +/* protect sqlUnsigned from errors */ +if (isNotEmpty(yearString)) + { + struct errCatch *errCatch = errCatchNew(); + if (errCatchStart(errCatch)) + { + specificYear = sqlUnsigned(yearString); + if ((specificYear < 1800) || (specificYear > 2100)) + apiErrAbort(err400, err400Msg, "year specified '%s' must be >= 1800 and <= 2100", yearString); + } + errCatchEnd(errCatch); + if (errCatch->gotError) + apiErrAbort(err400, err400Msg, "can not recognize year '%s' as a number", yearString); + } +/* probably be better to place this arg checking business into a function + * operating from a list + */ +if (isNotEmpty(categoryString)) + { + refSeqCategory = cloneString(categoryString); + toLowerN(refSeqCategory, strlen(refSeqCategory)); + if (differentWord(refSeqCategory, "reference")) + { + if (differentWord(refSeqCategory, "representative")) + apiErrAbort(err400, err400Msg, "values for argument %s=%s must be 'reference' or 'representative'", argCategory, categoryString); + } + } +if (isNotEmpty(statusString)) + { + versionStatus = cloneString(statusString); + toLowerN(versionStatus, strlen(versionStatus)); + if (differentWord(versionStatus, "latest")) + { + if (differentWord(versionStatus, "replaced")) + if (differentWord(versionStatus, "suppressed")) + apiErrAbort(err400, err400Msg, "values for argument %s=%s must be one of: 'latest', 'replaced' or 'suppressed'", argStatus, statusString); + } + } +if (isNotEmpty(levelString)) + { + assemblyLevel = cloneString(levelString); + toLowerN(assemblyLevel, strlen(assemblyLevel)); + if (differentWord(assemblyLevel, "complete")) + { + if (differentWord(assemblyLevel, "chromosome")) + if (differentWord(assemblyLevel, "scaffold")) + if (differentWord(assemblyLevel, "scaffold")) + apiErrAbort(err400, err400Msg, "values for argument %s=%s must be one of: 'complete', 'chromosome', 'scaffold' or 'contig'", argLevel, levelString); + } + } + char *browserExistString = cgiOptionalString(argBrowser); +if (NULL == browserExistString) /* set default if none given */ + browserExistString = cloneString("mustExist"); + if (isNotEmpty(browserExistString)) { /* from radio buttons, only one can be on */ if (sameWord(browserExistString, "mustExist")) { browserMustExist = TRUE; /* default: browser must exist */ browserMayExist = FALSE; browserNotExist = FALSE; } else if (sameWord(browserExistString, "mayExist")) { browserMustExist = FALSE; browserMayExist = TRUE; browserNotExist = FALSE; } else if (sameWord(browserExistString, "notExist")) @@ -280,31 +366,41 @@ if (!sqlTableExists(conn, asmListTable)) apiErrAbort(err500, err500Msg, "missing central.assemblyList table in function /findGenome'%s'", extraArgs); int wordCount = 0; /* verify number of words in search string is legal */ wordCount = chopByWhite(searchString, NULL, 0); if (wordCount < 1) apiErrAbort(err400, err400Msg, "search term '%s' does not contain a word ? for function /findGenome", argQ); if (wordCount > 5) apiErrAbort(err400, err400Msg, "search term '%s=%s' should not have more than 5 words for function /findGenome", argQ, searchString); struct jsonWrite *jw = apiStartOutput(); +/* show options in effect in JSON return */ + jsonWriteString(jw, argBrowser, browserExistString); +if (specificYear > 0) + jsonWriteNumber(jw, argYear, specificYear); +if (isNotEmpty(refSeqCategory)) + jsonWriteString(jw, argCategory, refSeqCategory); +if (isNotEmpty(versionStatus)) + jsonWriteString(jw, argStatus, versionStatus); +if (isNotEmpty(assemblyLevel)) + jsonWriteString(jw, argLevel, assemblyLevel); long long itemCount = 0; long long totalMatchCount = 0; char **words; AllocArray(words, wordCount); (void) chopByWhite(searchString, words, wordCount); if (1 == wordCount) itemCount = oneWordSearch(conn, words[0], jw, &totalMatchCount, &prefixSearch); else /* multiple word search */ itemCount = multipleWordSearch(conn, words, wordCount, jw, &totalMatchCount); if (prefixSearch) { struct dyString *addedStar = dyStringNew(64); dyStringPrintf(addedStar, "%s*", inputSearchString);