ba87d8471c9c78f15412ccad2aaba9b46d1af38e hiram Tue Jul 2 16:14:47 2024 -0700 eliminte the dependency upon specific genArk genome names, use the genark table for questions about existence, refs #32596 diff --git src/hg/lib/genark.c src/hg/lib/genark.c index 0a660b7..9d31bd1 100644 --- src/hg/lib/genark.c +++ src/hg/lib/genark.c @@ -298,49 +298,54 @@ char buffer[4096]; sqlSafef(query, sizeof query, "select hubUrl from %s where gcAccession='%s'", genarkTableName(), accession); if (sqlQuickQuery(conn, query, buffer, sizeof buffer)) { char buffer2[4096]; safef(buffer2, sizeof buffer2, "%s/%s", genarkPrefix, buffer); url = cloneString(buffer2); } hDisconnectCentral(&conn); return url; } -char *genArkHubTxt(char *gcX) -/* given a GC[AF]_012345678.9 name, return hub.txt URL */ +char *genArkPath(char *genome) +/* given a GenArk hub genome name, e.g. GCA_021951015.1 return the path: + * GCA/021/951/015 + * prefix that with desired server URL: https://hgdownload.soe.ucsc.edu/hubs/ + * if desired. Or suffix add /hub.txt to get the hub.txt URL + * The path returned does not depend upon this GCx_ naming scheme, + * it simply uses the hub URL as returned from genarkUrl(genome) and + * returns the middle part without the https://... prefix + */ { -char hubTxt[PATH_MAX + 1024]; -/* temporary construction of the path */ -char tPath[PATH_MAX + 1024]; -safencpy(tPath, 4, gcX, 3); -safencpy(tPath+3, 2, "/", 1); -safencpy(tPath+4, 4, gcX+4, 3); -safencpy(tPath+7, 2, "/", 1); -safencpy(tPath+8, 4, gcX+7, 3); -safencpy(tPath+11, 2, "/", 1); -safencpy(tPath+12, 4, gcX+10, 3); -safencpy(tPath+15, 2, "/", 1); -safecpy(tPath+16, PATH_MAX-16, gcX); -/* start the result with the genArkHubPrefix, add in tPath and /hub.txt */ -safef(hubTxt, sizeof(hubTxt), "%s/%s/hub.txt", cfgOption("genarkHubPrefix"), - tPath); -return cloneString(hubTxt); // no need to free this +if (isEmpty(genome)) + return NULL; + +char *url = genarkUrl(genome); +if (isEmpty(url)) + return NULL; +char *genarkPrefix = cfgOption("genarkHubPrefix"); +stripString(url, genarkPrefix); +stripString(url, "/hub.txt"); +stripString(url, genome); +/* remove the trailing / */ +trimLastChar(url); +/* the ++url skips the leading / character*/ +return cloneString(++url); } static char *_genarkTableName = NULL; char *genarkTableName() /* return the genark table name from the environment, * or hg.conf, or use the default. Cache the result */ { if (_genarkTableName == NULL) _genarkTableName = cfgOptionEnvDefault("HGDB_GENARK_STATUS_TABLE", genarkTableConfVariable, defaultGenarkTableName); return _genarkTableName; } @@ -353,15 +358,28 @@ { static int colCount = 0; if (colCount > 0) return colCount; char *centralProfile = "central"; char *centralDb = cfgOption2(centralProfile, "db"); struct sqlConnection *conn = hConnectCentral(); if (!sqlTableExists(conn, genarkTableName())) return colCount; char query[4096]; sqlSafef(query, sizeof query, "SELECT count(*) FROM information_schema.columns WHERE table_schema = '%s' AND table_name = '%s'", centralDb, genarkTableName()); colCount = sqlQuickNum(conn, query); hDisconnectCentral(&conn); return colCount; } + +boolean isGenArk(char *genome) +/* given a genome name, see if it is in the genark table to determine + * yes/no this is a genark genome assembly + */ +{ +if (isEmpty(genome)) + return FALSE; +char *url = genarkUrl(genome); +if (isEmpty(url)) + return FALSE; +return TRUE; +}