ba87d8471c9c78f15412ccad2aaba9b46d1af38e hiram Tue Jul 2 16:14:47 2024 -0700 eliminte the dependency upon specific genArk genome names, use the genark table for questions about existence, refs #32596 diff --git src/hg/lib/genark.c src/hg/lib/genark.c index 0a660b7..9d31bd1 100644 --- src/hg/lib/genark.c +++ src/hg/lib/genark.c @@ -1,367 +1,385 @@ /* genark.c was originally generated by the autoSql program, which also * generated genark.h and genark.sql. This module links the database and * the RAM representation of objects. */ #include <limits.h> #include "common.h" #include "linefile.h" #include "dystring.h" #include "jksql.h" #include "genark.h" #include "hgConfig.h" #include "hdb.h" char *genarkCommaSepFieldNames = "gcAccession,hubUrl,asmName,scientificName,commonName,taxId,priority,clade"; void genarkStaticLoad(char **row, struct genark *ret) /* Load a row from genark table into ret. The contents of ret will * be replaced at the next call to this function. */ { int colCount = genArkColumnCount(); ret->gcAccession = row[0]; ret->hubUrl = row[1]; ret->asmName = row[2]; ret->scientificName = row[3]; ret->commonName = row[4]; ret->taxId = sqlSigned(row[5]); ret->priority = 0; if (colCount > 6) { ret->priority = sqlSigned(row[6]); } if (colCount > 7) ret->clade = row[7]; else ret->clade = cloneString("n/a"); } struct genark *genarkLoadByQuery(struct sqlConnection *conn, char *query) /* Load all genark from table that satisfy the query given. * Where query is of the form 'select * from example where something=something' * or 'select example.* from example, anotherTable where example.something = * anotherTable.something'. * Dispose of this with genarkFreeList(). */ { struct genark *list = NULL, *el; struct sqlResult *sr; char **row; sr = sqlGetResult(conn, query); while ((row = sqlNextRow(sr)) != NULL) { el = genarkLoad(row); slAddHead(&list, el); } slReverse(&list); sqlFreeResult(&sr); return list; } void genarkSaveToDb(struct sqlConnection *conn, struct genark *el, char *tableName, int updateSize) /* Save genark as a row to the table specified by tableName. * As blob fields may be arbitrary size updateSize specifies the approx size * of a string that would contain the entire query. Arrays of native types are * converted to comma separated strings and loaded as such, User defined types are * inserted as NULL. This function automatically escapes quoted strings for mysql. */ { struct dyString *update = dyStringNew(updateSize); sqlDyStringPrintf(update, "insert into %s values ( '%s','%s','%s','%s','%s',%d,%d,'%s')", tableName, el->gcAccession, el->hubUrl, el->asmName, el->scientificName, el->commonName, el->taxId, el->priority, el->clade); sqlUpdate(conn, update->string); dyStringFree(&update); } struct genark *genarkLoad(char **row) /* Load a genark from row fetched with select * from genark * from database. Dispose of this with genarkFree(). */ { int colCount = genArkColumnCount(); struct genark *ret; AllocVar(ret); ret->gcAccession = cloneString(row[0]); ret->hubUrl = cloneString(row[1]); ret->asmName = cloneString(row[2]); ret->scientificName = cloneString(row[3]); ret->commonName = cloneString(row[4]); ret->taxId = sqlSigned(row[5]); ret->priority = 0; if (colCount > 6) { ret->priority = sqlSigned(row[6]); } if (colCount > 7) ret->clade = row[7]; else ret->clade = cloneString("n/a"); return ret; } struct genark *genarkLoadAll(char *fileName) /* Load all genark from a whitespace-separated file. * Dispose of this with genarkFreeList(). */ { struct genark *list = NULL, *el; struct lineFile *lf = lineFileOpen(fileName, TRUE); char *row[8]; while (lineFileRow(lf, row)) { el = genarkLoad(row); slAddHead(&list, el); } lineFileClose(&lf); slReverse(&list); return list; } struct genark *genarkLoadAllByChar(char *fileName, char chopper) /* Load all genark from a chopper separated file. * Dispose of this with genarkFreeList(). */ { struct genark *list = NULL, *el; struct lineFile *lf = lineFileOpen(fileName, TRUE); char *row[8]; while (lineFileNextCharRow(lf, chopper, row, ArraySize(row))) { el = genarkLoad(row); slAddHead(&list, el); } lineFileClose(&lf); slReverse(&list); return list; } struct genark *genarkCommaIn(char **pS, struct genark *ret) /* Create a genark out of a comma separated string. * This will fill in ret if non-null, otherwise will * return a new genark */ { char *s = *pS; if (ret == NULL) AllocVar(ret); ret->gcAccession = sqlStringComma(&s); ret->hubUrl = sqlStringComma(&s); ret->asmName = sqlStringComma(&s); ret->scientificName = sqlStringComma(&s); ret->commonName = sqlStringComma(&s); ret->taxId = sqlSignedComma(&s); ret->priority = sqlSignedComma(&s); ret->clade = sqlStringComma(&s); *pS = s; return ret; } void genarkFree(struct genark **pEl) /* Free a single dynamically allocated genark such as created * with genarkLoad(). */ { struct genark *el; if ((el = *pEl) == NULL) return; freeMem(el->gcAccession); freeMem(el->hubUrl); freeMem(el->asmName); freeMem(el->scientificName); freeMem(el->commonName); freeMem(el->clade); freez(pEl); } void genarkFreeList(struct genark **pList) /* Free a list of dynamically allocated genark's */ { struct genark *el, *next; for (el = *pList; el != NULL; el = next) { next = el->next; genarkFree(&el); } *pList = NULL; } void genarkOutput(struct genark *el, FILE *f, char sep, char lastSep) /* Print out genark. Separate fields with sep. Follow last field with lastSep. */ { if (sep == ',') fputc('"',f); fprintf(f, "%s", el->gcAccession); if (sep == ',') fputc('"',f); fputc(sep,f); if (sep == ',') fputc('"',f); fprintf(f, "%s", el->hubUrl); if (sep == ',') fputc('"',f); fputc(sep,f); if (sep == ',') fputc('"',f); fprintf(f, "%s", el->asmName); if (sep == ',') fputc('"',f); fputc(sep,f); if (sep == ',') fputc('"',f); fprintf(f, "%s", el->scientificName); if (sep == ',') fputc('"',f); fputc(sep,f); if (sep == ',') fputc('"',f); fprintf(f, "%s", el->commonName); if (sep == ',') fputc('"',f); fputc(sep,f); fprintf(f, "%d", el->taxId); fputc(sep,f); fprintf(f, "%d", el->priority); fputc(sep,f); if (sep == ',') fputc('"',f); fprintf(f, "%s", el->clade); if (sep == ',') fputc('"',f); fputc(lastSep,f); } void genarkJsonOutput(struct genark *el, FILE *f) /* Print out genark in JSON format. */ { fputc('{',f); fputc('"',f); fprintf(f,"gcAccession"); fputc('"',f); fputc(':',f); fputc('"',f); fprintf(f, "%s", el->gcAccession); fputc('"',f); fputc(',',f); fputc('"',f); fprintf(f,"hubUrl"); fputc('"',f); fputc(':',f); fputc('"',f); fprintf(f, "%s", el->hubUrl); fputc('"',f); fputc(',',f); fputc('"',f); fprintf(f,"asmName"); fputc('"',f); fputc(':',f); fputc('"',f); fprintf(f, "%s", el->asmName); fputc('"',f); fputc(',',f); fputc('"',f); fprintf(f,"scientificName"); fputc('"',f); fputc(':',f); fputc('"',f); fprintf(f, "%s", el->scientificName); fputc('"',f); fputc(',',f); fputc('"',f); fprintf(f,"commonName"); fputc('"',f); fputc(':',f); fputc('"',f); fprintf(f, "%s", el->commonName); fputc('"',f); fputc(',',f); fputc('"',f); fprintf(f,"taxId"); fputc('"',f); fputc(':',f); fprintf(f, "%d", el->taxId); fputc(',',f); fputc('"',f); fprintf(f,"priority"); fputc('"',f); fputc(':',f); fprintf(f, "%d", el->priority); fputc(',',f); fputc('"',f); fprintf(f,"clade"); fputc('"',f); fputc(':',f); fputc('"',f); fprintf(f, "%s", el->clade); fputc('"',f); fputc('}',f); } /* -------------------------------- End autoSql Generated Code -------------------------------- */ char *genarkUrl(char *accession) /* Return the URL to the genark assembly with this accession if present, * otherwise return NULL * */ { char *genarkPrefix = cfgOption("genarkHubPrefix"); if (genarkPrefix == NULL) return NULL; struct sqlConnection *conn = hConnectCentral(); if (!sqlTableExists(conn, genarkTableName())) return NULL; char *url = NULL; char query[4096]; char buffer[4096]; sqlSafef(query, sizeof query, "select hubUrl from %s where gcAccession='%s'", genarkTableName(), accession); if (sqlQuickQuery(conn, query, buffer, sizeof buffer)) { char buffer2[4096]; safef(buffer2, sizeof buffer2, "%s/%s", genarkPrefix, buffer); url = cloneString(buffer2); } hDisconnectCentral(&conn); return url; } -char *genArkHubTxt(char *gcX) -/* given a GC[AF]_012345678.9 name, return hub.txt URL */ +char *genArkPath(char *genome) +/* given a GenArk hub genome name, e.g. GCA_021951015.1 return the path: + * GCA/021/951/015 + * prefix that with desired server URL: https://hgdownload.soe.ucsc.edu/hubs/ + * if desired. Or suffix add /hub.txt to get the hub.txt URL + * The path returned does not depend upon this GCx_ naming scheme, + * it simply uses the hub URL as returned from genarkUrl(genome) and + * returns the middle part without the https://... prefix + */ { -char hubTxt[PATH_MAX + 1024]; -/* temporary construction of the path */ -char tPath[PATH_MAX + 1024]; -safencpy(tPath, 4, gcX, 3); -safencpy(tPath+3, 2, "/", 1); -safencpy(tPath+4, 4, gcX+4, 3); -safencpy(tPath+7, 2, "/", 1); -safencpy(tPath+8, 4, gcX+7, 3); -safencpy(tPath+11, 2, "/", 1); -safencpy(tPath+12, 4, gcX+10, 3); -safencpy(tPath+15, 2, "/", 1); -safecpy(tPath+16, PATH_MAX-16, gcX); -/* start the result with the genArkHubPrefix, add in tPath and /hub.txt */ -safef(hubTxt, sizeof(hubTxt), "%s/%s/hub.txt", cfgOption("genarkHubPrefix"), - tPath); -return cloneString(hubTxt); // no need to free this +if (isEmpty(genome)) + return NULL; + +char *url = genarkUrl(genome); +if (isEmpty(url)) + return NULL; +char *genarkPrefix = cfgOption("genarkHubPrefix"); +stripString(url, genarkPrefix); +stripString(url, "/hub.txt"); +stripString(url, genome); +/* remove the trailing / */ +trimLastChar(url); +/* the ++url skips the leading / character*/ +return cloneString(++url); } static char *_genarkTableName = NULL; char *genarkTableName() /* return the genark table name from the environment, * or hg.conf, or use the default. Cache the result */ { if (_genarkTableName == NULL) _genarkTableName = cfgOptionEnvDefault("HGDB_GENARK_STATUS_TABLE", genarkTableConfVariable, defaultGenarkTableName); return _genarkTableName; } /* temporary function while the genark table is in transistion with * new coluns being added, July 2024. Allows compatibility with existing * genark table. */ int genArkColumnCount() /* return number of columns in genark table */ { static int colCount = 0; if (colCount > 0) return colCount; char *centralProfile = "central"; char *centralDb = cfgOption2(centralProfile, "db"); struct sqlConnection *conn = hConnectCentral(); if (!sqlTableExists(conn, genarkTableName())) return colCount; char query[4096]; sqlSafef(query, sizeof query, "SELECT count(*) FROM information_schema.columns WHERE table_schema = '%s' AND table_name = '%s'", centralDb, genarkTableName()); colCount = sqlQuickNum(conn, query); hDisconnectCentral(&conn); return colCount; } + +boolean isGenArk(char *genome) +/* given a genome name, see if it is in the genark table to determine + * yes/no this is a genark genome assembly + */ +{ +if (isEmpty(genome)) + return FALSE; +char *url = genarkUrl(genome); +if (isEmpty(url)) + return FALSE; +return TRUE; +}