d947dcc75b04fd0dc921c0eb02ed988d7b4819f0
braney
  Mon Jun 30 16:00:56 2025 -0700
let genark hubs appear in hgConvert if we can figure out their UCSC org

diff --git src/hg/lib/genark.c src/hg/lib/genark.c
index 9d31bd1d39b..df599a2ae91 100644
--- src/hg/lib/genark.c
+++ src/hg/lib/genark.c
@@ -1,385 +1,467 @@
 /* genark.c was originally generated by the autoSql program, which also
  * generated genark.h and genark.sql.  This module links the database and
  * the RAM representation of objects. */
 
 #include <limits.h>
 #include "common.h"
 #include "linefile.h"
 #include "dystring.h"
 #include "jksql.h"
 #include "genark.h"
 #include "hgConfig.h"
 #include "hdb.h"
+#include "dbDb.h"
 
 char *genarkCommaSepFieldNames = "gcAccession,hubUrl,asmName,scientificName,commonName,taxId,priority,clade";
 
 void genarkStaticLoad(char **row, struct genark *ret)
 /* Load a row from genark table into ret.  The contents of ret will
  * be replaced at the next call to this function. */
 {
 int colCount = genArkColumnCount();
 
 ret->gcAccession = row[0];
 ret->hubUrl = row[1];
 ret->asmName = row[2];
 ret->scientificName = row[3];
 ret->commonName = row[4];
 ret->taxId = sqlSigned(row[5]);
 ret->priority = 0;
 if (colCount > 6) { ret->priority = sqlSigned(row[6]); }
 if (colCount > 7)
     ret->clade = row[7];
 else
     ret->clade = cloneString("n/a");
 }
 
 struct genark *genarkLoadByQuery(struct sqlConnection *conn, char *query)
 /* Load all genark from table that satisfy the query given.
  * Where query is of the form 'select * from example where something=something'
  * or 'select example.* from example, anotherTable where example.something =
  * anotherTable.something'.
  * Dispose of this with genarkFreeList(). */
 {
 struct genark *list = NULL, *el;
 struct sqlResult *sr;
 char **row;
 
 sr = sqlGetResult(conn, query);
 while ((row = sqlNextRow(sr)) != NULL)
     {
     el = genarkLoad(row);
     slAddHead(&list, el);
     }
 slReverse(&list);
 sqlFreeResult(&sr);
 return list;
 }
 
 void genarkSaveToDb(struct sqlConnection *conn, struct genark *el, char *tableName, int updateSize)
 /* Save genark as a row to the table specified by tableName.
  * As blob fields may be arbitrary size updateSize specifies the approx size
  * of a string that would contain the entire query. Arrays of native types are
  * converted to comma separated strings and loaded as such, User defined types are
  * inserted as NULL. This function automatically escapes quoted strings for mysql. */
 {
 struct dyString *update = dyStringNew(updateSize);
 sqlDyStringPrintf(update, "insert into %s values ( '%s','%s','%s','%s','%s',%d,%d,'%s')",
 	tableName,  el->gcAccession,  el->hubUrl,  el->asmName,  el->scientificName,  el->commonName,  el->taxId,  el->priority,  el->clade);
 sqlUpdate(conn, update->string);
 dyStringFree(&update);
 }
 
 struct genark *genarkLoad(char **row)
 /* Load a genark from row fetched with select * from genark
  * from database.  Dispose of this with genarkFree(). */
 {
 int colCount = genArkColumnCount();
 struct genark *ret;
 
 AllocVar(ret);
 ret->gcAccession = cloneString(row[0]);
 ret->hubUrl = cloneString(row[1]);
 ret->asmName = cloneString(row[2]);
 ret->scientificName = cloneString(row[3]);
 ret->commonName = cloneString(row[4]);
 ret->taxId = sqlSigned(row[5]);
 ret->priority = 0;
 if (colCount > 6) { ret->priority = sqlSigned(row[6]); }
 if (colCount > 7)
     ret->clade = row[7];
 else
     ret->clade = cloneString("n/a");
 return ret;
 }
 
 struct genark *genarkLoadAll(char *fileName)
 /* Load all genark from a whitespace-separated file.
  * Dispose of this with genarkFreeList(). */
 {
 struct genark *list = NULL, *el;
 struct lineFile *lf = lineFileOpen(fileName, TRUE);
 char *row[8];
 
 while (lineFileRow(lf, row))
     {
     el = genarkLoad(row);
     slAddHead(&list, el);
     }
 lineFileClose(&lf);
 slReverse(&list);
 return list;
 }
 
 struct genark *genarkLoadAllByChar(char *fileName, char chopper)
 /* Load all genark from a chopper separated file.
  * Dispose of this with genarkFreeList(). */
 {
 struct genark *list = NULL, *el;
 struct lineFile *lf = lineFileOpen(fileName, TRUE);
 char *row[8];
 
 while (lineFileNextCharRow(lf, chopper, row, ArraySize(row)))
     {
     el = genarkLoad(row);
     slAddHead(&list, el);
     }
 lineFileClose(&lf);
 slReverse(&list);
 return list;
 }
 
 struct genark *genarkCommaIn(char **pS, struct genark *ret)
 /* Create a genark out of a comma separated string.
  * This will fill in ret if non-null, otherwise will
  * return a new genark */
 {
 char *s = *pS;
 
 if (ret == NULL)
     AllocVar(ret);
 ret->gcAccession = sqlStringComma(&s);
 ret->hubUrl = sqlStringComma(&s);
 ret->asmName = sqlStringComma(&s);
 ret->scientificName = sqlStringComma(&s);
 ret->commonName = sqlStringComma(&s);
 ret->taxId = sqlSignedComma(&s);
 ret->priority = sqlSignedComma(&s);
 ret->clade = sqlStringComma(&s);
 *pS = s;
 return ret;
 }
 
 void genarkFree(struct genark **pEl)
 /* Free a single dynamically allocated genark such as created
  * with genarkLoad(). */
 {
 struct genark *el;
 
 if ((el = *pEl) == NULL) return;
 freeMem(el->gcAccession);
 freeMem(el->hubUrl);
 freeMem(el->asmName);
 freeMem(el->scientificName);
 freeMem(el->commonName);
 freeMem(el->clade);
 freez(pEl);
 }
 
 void genarkFreeList(struct genark **pList)
 /* Free a list of dynamically allocated genark's */
 {
 struct genark *el, *next;
 
 for (el = *pList; el != NULL; el = next)
     {
     next = el->next;
     genarkFree(&el);
     }
 *pList = NULL;
 }
 
 void genarkOutput(struct genark *el, FILE *f, char sep, char lastSep)
 /* Print out genark.  Separate fields with sep. Follow last field with lastSep. */
 {
 if (sep == ',') fputc('"',f);
 fprintf(f, "%s", el->gcAccession);
 if (sep == ',') fputc('"',f);
 fputc(sep,f);
 if (sep == ',') fputc('"',f);
 fprintf(f, "%s", el->hubUrl);
 if (sep == ',') fputc('"',f);
 fputc(sep,f);
 if (sep == ',') fputc('"',f);
 fprintf(f, "%s", el->asmName);
 if (sep == ',') fputc('"',f);
 fputc(sep,f);
 if (sep == ',') fputc('"',f);
 fprintf(f, "%s", el->scientificName);
 if (sep == ',') fputc('"',f);
 fputc(sep,f);
 if (sep == ',') fputc('"',f);
 fprintf(f, "%s", el->commonName);
 if (sep == ',') fputc('"',f);
 fputc(sep,f);
 fprintf(f, "%d", el->taxId);
 fputc(sep,f);
 fprintf(f, "%d", el->priority);
 fputc(sep,f);
 if (sep == ',') fputc('"',f);
 fprintf(f, "%s", el->clade);
 if (sep == ',') fputc('"',f);
 fputc(lastSep,f);
 }
 
 void genarkJsonOutput(struct genark *el, FILE *f)
 /* Print out genark in JSON format. */
 {
 fputc('{',f);
 fputc('"',f);
 fprintf(f,"gcAccession");
 fputc('"',f);
 fputc(':',f);
 fputc('"',f);
 fprintf(f, "%s", el->gcAccession);
 fputc('"',f);
 fputc(',',f);
 fputc('"',f);
 fprintf(f,"hubUrl");
 fputc('"',f);
 fputc(':',f);
 fputc('"',f);
 fprintf(f, "%s", el->hubUrl);
 fputc('"',f);
 fputc(',',f);
 fputc('"',f);
 fprintf(f,"asmName");
 fputc('"',f);
 fputc(':',f);
 fputc('"',f);
 fprintf(f, "%s", el->asmName);
 fputc('"',f);
 fputc(',',f);
 fputc('"',f);
 fprintf(f,"scientificName");
 fputc('"',f);
 fputc(':',f);
 fputc('"',f);
 fprintf(f, "%s", el->scientificName);
 fputc('"',f);
 fputc(',',f);
 fputc('"',f);
 fprintf(f,"commonName");
 fputc('"',f);
 fputc(':',f);
 fputc('"',f);
 fprintf(f, "%s", el->commonName);
 fputc('"',f);
 fputc(',',f);
 fputc('"',f);
 fprintf(f,"taxId");
 fputc('"',f);
 fputc(':',f);
 fprintf(f, "%d", el->taxId);
 fputc(',',f);
 fputc('"',f);
 fprintf(f,"priority");
 fputc('"',f);
 fputc(':',f);
 fprintf(f, "%d", el->priority);
 fputc(',',f);
 fputc('"',f);
 fprintf(f,"clade");
 fputc('"',f);
 fputc(':',f);
 fputc('"',f);
 fprintf(f, "%s", el->clade);
 fputc('"',f);
 fputc('}',f);
 }
 
 /* -------------------------------- End autoSql Generated Code -------------------------------- */
 
 char *genarkUrl(char *accession)
 /* Return the URL to the genark assembly with this accession if present,
  * otherwise return NULL
  * */
 {
 char *genarkPrefix = cfgOption("genarkHubPrefix");
 
 if (genarkPrefix == NULL)
     return NULL;
 
 struct sqlConnection *conn = hConnectCentral();
 if (!sqlTableExists(conn, genarkTableName()))
     return NULL;
 
 char *url = NULL;
 char query[4096];
 char buffer[4096];
 sqlSafef(query, sizeof query, "select hubUrl from %s where gcAccession='%s'", genarkTableName(), accession);
 if (sqlQuickQuery(conn, query, buffer, sizeof buffer))
     {
     char buffer2[4096];
     safef(buffer2, sizeof buffer2, "%s/%s", genarkPrefix, buffer);
 
     url = cloneString(buffer2);
     }
 
 hDisconnectCentral(&conn);
 
 return url;
 }
 
 char *genArkPath(char *genome)
 /* given a GenArk hub genome name, e.g. GCA_021951015.1 return the path:
  *               GCA/021/951/015
  * prefix that with desired server URL: https://hgdownload.soe.ucsc.edu/hubs/
  *   if desired.  Or suffix add /hub.txt to get the hub.txt URL
  *   The path returned does not depend upon this GCx_ naming scheme,
  *   it simply uses the hub URL as returned from genarkUrl(genome) and
  *   returns the middle part without the https://... prefix
  */
 {
 if (isEmpty(genome))
     return NULL;
 
 char *url = genarkUrl(genome);
 if (isEmpty(url))
     return NULL;
 char *genarkPrefix = cfgOption("genarkHubPrefix");
 stripString(url, genarkPrefix);
 stripString(url, "/hub.txt");
 stripString(url, genome);
 /* remove the trailing / */
 trimLastChar(url);
 /* the ++url skips the leading / character*/
 return cloneString(++url);
 }
 
 static char *_genarkTableName = NULL;
 
 char *genarkTableName()
 /* return the genark table name from the environment,
  * or hg.conf, or use the default.  Cache the result */
 {
 if (_genarkTableName == NULL)
     _genarkTableName = cfgOptionEnvDefault("HGDB_GENARK_STATUS_TABLE",
 	    genarkTableConfVariable, defaultGenarkTableName);
 
 return _genarkTableName;
 }
 
 /* temporary function while the genark table is in transistion with
  * new coluns being added, July 2024.  Allows compatibility with existing
  * genark table.
  */
 int genArkColumnCount()
 /* return number of columns in genark table */
 {
 static int colCount = 0;
 if (colCount > 0)
    return colCount;
 char *centralProfile = "central";
 char *centralDb = cfgOption2(centralProfile, "db");
 struct sqlConnection *conn = hConnectCentral();
 if (!sqlTableExists(conn, genarkTableName()))
     return colCount;
 char query[4096];
 sqlSafef(query, sizeof query, "SELECT count(*) FROM information_schema.columns WHERE table_schema = '%s' AND table_name = '%s'", centralDb, genarkTableName());
 colCount = sqlQuickNum(conn, query);
 hDisconnectCentral(&conn);
 return colCount;
 }
 
 boolean isGenArk(char *genome)
 /* given a genome name, see if it is in the genark table to determine
  *  yes/no this is a genark genome assembly
  */
 {
 if (isEmpty(genome))
     return FALSE;
 char *url = genarkUrl(genome);
 if (isEmpty(url))
     return FALSE;
 return TRUE;
 }
+
+struct dbDb * genarkMakeDbDb(char **row)
+/* Fake a dbDb structure for a Genark hub. */
+{
+struct dbDb *dbDb;
+struct hash *orgHash = genarkGetOrgHash();
+
+AllocVar(dbDb);
+
+dbDb->name = cloneString(row[0]);
+dbDb->nibPath = cloneString("genark"); 
+dbDb->description = cloneString(row[4]); // commonName
+dbDb->scientificName = cloneString(row[3]); 
+dbDb->taxId = atoi(row[5]); 
+dbDb->genome = hashFindVal(orgHash, row[0]);
+dbDb->orderKey = 99999;
+dbDb->defaultPos = "default";
+if (dbDb->genome == NULL)
+    dbDb->genome = "Other";
+
+return dbDb;
+}
+
+struct dbDb *genarkLiftOverDbs(char *listOfAccs)
+/* return list of dbDb structures for the genark genomes that match listOfAccs */
+{
+if (!cfgOption("genarkLiftOver"))
+    return NULL;
+struct dbDb *list = NULL;
+char query[64 * 1024];
+
+safef(query, sizeof query, "NOSQLINJ select * from %s where gcAccession in (%s)", genarkTableName(), listOfAccs);
+struct sqlConnection *conn = hConnectCentral();
+struct sqlResult *sr;
+char **row;
+
+sr = sqlGetResult(conn, query);
+while ((row = sqlNextRow(sr)) != NULL)
+    {
+    struct dbDb *dbDb = genarkMakeDbDb(row);
+    slAddHead(&list, dbDb);
+    }
+slReverse(&list);
+sqlFreeResult(&sr);
+hDisconnectCentral(&conn);
+return list;
+}
+
+struct dbDb *genarkLiftOverDb(char *acc)
+/* return dbDb structure for GC* acc */
+{
+char query[4096];
+safef(query, sizeof query, "'%s'", acc);
+
+return genarkLiftOverDbs(query);
+}
+
+struct hash *genarkGetOrgHash()
+/* read table that maps gcAccession to UCSC org. */
+{
+static struct hash *orgHash = NULL;
+
+if (orgHash != NULL)
+    return orgHash;
+char query[64 * 1024];
+
+sqlSafef(query, sizeof query, "select * from %s", "genarkOrg");
+struct sqlConnection *conn = hConnectCentral();
+struct sqlResult *sr;
+char **row;
+orgHash = newHash(0);
+
+sr = sqlGetResult(conn, query);
+while ((row = sqlNextRow(sr)) != NULL)
+    {
+    hashAdd(orgHash, cloneString(row[0]), cloneString(row[1]));
+    }
+sqlFreeResult(&sr);
+hDisconnectCentral(&conn);
+return orgHash;
+}