8d9253332f172e52c7ecb7fbc6ecde2bad30561f
hiram
  Mon Feb 5 15:36:52 2024 -0800
with browserUrl and finding equivalent asmSummary for UCSC dbDb browser refs #32897

diff --git src/hg/hubApi/findGenome.c src/hg/hubApi/findGenome.c
index 6152923..5bb88a8 100644
--- src/hg/hubApi/findGenome.c
+++ src/hg/hubApi/findGenome.c
@@ -79,106 +79,130 @@
 jsonWriteNumber(jw, "genomeSize", (long long)el->genomeSize);
 jsonWriteNumber(jw, "genomeSizeUngapped", (long long)el->genomeSizeUngapped);
 jsonWriteDouble(jw, "gcPercent", (double)el->gcPercent);
 jsonWriteNumber(jw, "repliconCount", (long long)el->repliconCount);
 jsonWriteNumber(jw, "scaffoldCount", (long long)el->scaffoldCount);
 jsonWriteNumber(jw, "contigCount", (long long)el->contigCount);
 jsonWriteString(jw, "annotationProvider", el->annotationProvider);
 jsonWriteString(jw, "annotationName", el->annotationName);
 jsonWriteString(jw, "annotationDate", el->annotationDate);
 jsonWriteNumber(jw, "totalGeneCount", (long long)el->totalGeneCount);
 jsonWriteNumber(jw, "proteinCodingGeneCount", (long long)el->proteinCodingGeneCount);
 jsonWriteNumber(jw, "nonCodingGeneCount", (long long)el->nonCodingGeneCount);
 jsonWriteString(jw, "pubmedId", el->pubmedId);
 }
 
+static void setBrowserUrl(struct jsonWrite *jw, char *browserId)
+{
+char* host = getenv("HTTP_HOST");
+char browserUrl[1024];
+if (startsWith("GC", browserId))
+    {
+    if (host)
+        safef(browserUrl, sizeof(browserUrl), "https://%s/h/%s", host, browserId);
+    else
+        safef(browserUrl, sizeof(browserUrl), "https://genome.ucsc.edu/h/%s", browserId);
+    }
+else
+    {
+    if (host)
+        safef(browserUrl, sizeof(browserUrl), "https://%s/cgi-bin/hgTracks?db=%s", host, browserId);
+    else
+        safef(browserUrl, sizeof(browserUrl), "https://genome.ucsc.edu/cgi-bin/hgTracks?db=%s", browserId);
+    }
+jsonWriteString(jw, "browserUrl", browserUrl);
+}
+
 static int outputCombo(struct jsonWrite *jw, struct combinedSummary *summaryList)
 /* may be information from any of the three tables */
 {
 int itemCount = 0;
 struct combinedSummary *el = NULL;
 startGenomes(jw);
 for (el=summaryList; el != NULL; el=el->next)
     {
     if (el->summary)
 	{
 	if (jsonOutputArrays)
 	    {
 	    jsonWriteObjectStart(jw, NULL);
 	    jsonWriteString(jw, "accession", el->summary->assemblyAccession);
 	    }
 	else
 	    jsonWriteObjectStart(jw, el->summary->assemblyAccession);
 
         finiSummary(jw, el->summary);
 	boolean sciNameDone = FALSE;
 	boolean comNameDone = FALSE;
         if (el->genArk)
 	    {
 //            jsonWriteString(jw, "gcAccession", el->genArk->gcAccession);
 	    /* some of these are going to be dups of the asmSummary */
+            setBrowserUrl(jw, el->genArk->gcAccession);
 	    finiGenArk(jw, el->genArk);
 	    sciNameDone = TRUE;
 	    comNameDone = TRUE;
 //            jsonWriteNumber(jw, "taxId", (long long)genome->taxId);
 	    }
 	if (el->dbDb)
 	    {
+            setBrowserUrl(jw, el->dbDb->name);
             jsonWriteString(jw, "database", el->dbDb->name);
             jsonWriteString(jw, "description", el->dbDb->description);
             jsonWriteString(jw, "sourceName", el->dbDb->sourceName);
 	    if (!sciNameDone)
 		jsonWriteString(jw, "scientificName", el->dbDb->scientificName);
 	    if (!comNameDone)
 		jsonWriteString(jw, "commonName", el->dbDb->organism);
 //            jsonWriteNumber(jw, "taxId", (long long)dbDbEntry->taxId);
 	    }
 	}
     else if (el->genArk)	/* ONLY genArk ?? - should have asmSummary TBD */
 	{
 	if (jsonOutputArrays)
 	    {
 	    jsonWriteObjectStart(jw, NULL);
 	    jsonWriteString(jw, "gcAccession", el->genArk->gcAccession);
 	    }
 	else
 	    jsonWriteObjectStart(jw, el->genArk->gcAccession);
-
+        setBrowserUrl(jw, el->genArk->gcAccession);
 	finiGenArk(jw, el->genArk);
 	if (el->dbDb)
 	    {
 	    jsonWriteBoolean(jw, "isUcscDatabase", TRUE);
             jsonWriteString(jw, "database", el->dbDb->name);
             jsonWriteString(jw, "description", el->dbDb->description);
             jsonWriteString(jw, "sourceName", el->dbDb->sourceName);
 //	    if (!sciNameDone)
 //		jsonWriteString(jw, "scientificName", el->dbDb->scientificName);
 //	    if (!comNameDone)
 //		jsonWriteString(jw, "commonName", el->dbDb->organism);
 //            jsonWriteNumber(jw, "taxId", (long long)dbDbEntry->taxId);
 	    }
 	}
     else if (el->dbDb)	/* ONLY dbDb ?  - should have the other two TBD */
 	{
 	if (jsonOutputArrays)
 	    {
 	    jsonWriteObjectStart(jw, NULL);
 	    jsonWriteString(jw, "database", el->dbDb->name);
 	    }
 	else
 	    jsonWriteObjectStart(jw, el->dbDb->name);
 
+        setBrowserUrl(jw, el->dbDb->name);
 	jsonWriteBoolean(jw, "isUcscDatabase", TRUE);
 	jsonWriteString(jw, "description", el->dbDb->description);
 	jsonWriteString(jw, "sourceName", el->dbDb->sourceName);
 	jsonWriteString(jw, "scientificName", el->dbDb->scientificName);
 	jsonWriteString(jw, "commonName", el->dbDb->organism);
 	jsonWriteNumber(jw, "taxId", (long long)el->dbDb->taxId);
 	}
 
     jsonWriteObjectEnd(jw);
     ++itemCount;
     }
 endGenomes(jw);
 return (itemCount);
 }	/*	static int outputCombo(struct jsonWrite *jw, struct combinedSummary *summaryList) */
 
@@ -198,57 +222,110 @@
         cs->genArk = NULL;
         cs->dbDb = NULL;
         char query[4096];
         sqlSafef(query, sizeof(query), "SELECT * FROM %s WHERE gcAccession = \"%s\"", genarkTable, el->assemblyAccession);
         struct genark *gA = genarkLoadByQuery(conn, query);
         if (gA)
             cs->genArk = gA;
         slAddHead(&comboSumary, cs);
         }
     }
 if (comboSumary)
   slReverse(&comboSumary);
 return comboSumary;
 }
 
-static struct asmSummary *checkAsmSummary(struct sqlConnection *conn, char *oneAccession)
-/* check the asmSummary table for the single accession LIKE */
+static struct asmSummary *checkAsmSummary(struct sqlConnection *conn, char *oneAccession, boolean exactMatch, long long limit)
+/* check the asmSummary table for oneAccession, mayby exact, maybe a limit */
 {
 char query[4096];
-sqlSafef(query, sizeof(query), "SELECT * FROM asmSummary WHERE assemblyAccession LIKE \"%s%%\"", oneAccession);
+if (limit > 0)
+    {
+    if (exactMatch)
+        sqlSafef(query, sizeof(query), "SELECT * FROM asmSummary WHERE assemblyAccession = '%s' LIMIT %lld", oneAccession, limit);
+    else
+        sqlSafef(query, sizeof(query), "SELECT * FROM asmSummary WHERE assemblyAccession LIKE '%s%%' LIMIT %lld", oneAccession, limit);
+    }
+else
+    {
+    if (exactMatch)
+        sqlSafef(query, sizeof(query), "SELECT * FROM asmSummary WHERE assemblyAccession = '%s'", oneAccession);
+    else
+        sqlSafef(query, sizeof(query), "SELECT * FROM asmSummary WHERE assemblyAccession LIKE '%s%%'", oneAccession);
+    }
+
 struct asmSummary *list = asmSummaryLoadByQuery(conn, query);
 return list;
 }
 
+// Field   Type    Null    Key     Default Extra
+// source  varchar(255)    NO      MUL     NULL
+// destination     varchar(255)    NO      MUL     NULL
+// sourceAuthority enum('ensembl','ucsc','genbank','refseq') NO  NULL
+// destinationAuthority    enum('ensembl','ucsc','genbank','refseq')   NO  NULL
+// matchCount      bigint(20)      NO              NULL
+// sourceCount     bigint(20)      NO              NULL
+// destinationCount        bigint(20)      NO              NULL
+
+struct asmSummary *dbDbAsmEquivalent(struct sqlConnection *centConn, char *dbDbName)
+{
+struct asmSummary *itemReturn = NULL;
+struct sqlConnection *conn = hAllocConn("hgFixed");
+char query[4096];
+sqlSafef(query, sizeof(query), "SELECT destination from asmEquivalent WHERE source='%s' AND sourceAuthority='ucsc' AND destinationAuthority='refseq' LIMIT 1", dbDbName);
+char *asmIdFound = NULL;
+char *genBank = NULL;
+char *refSeq = sqlQuickString(conn, query);
+if (refSeq)
+    asmIdFound = refSeq;
+else
+    {
+    sqlSafef(query, sizeof(query), "SELECT destination from asmEquivalent WHERE source='%s' AND sourceAuthority='ucsc' AND destinationAuthority='genbank' LIMIT 1", dbDbName);
+    genBank = sqlQuickString(conn, query);
+    if (genBank)
+	asmIdFound = genBank;
+    }
+if (asmIdFound)
+    {
+    char *words[3];
+    int wordCount = chopString(asmIdFound, "_", words, ArraySize(words));
+    if (wordCount > 2)
+	{
+	safef(query, sizeof(query), "%s_%s", words[0], words[1]);
+        itemReturn = checkAsmSummary(centConn, query, TRUE, 1);
+	}
+    }
+return itemReturn;
+}
+
 static int singleWordSearch(struct sqlConnection *conn, char *searchWord, struct jsonWrite *jw)
 /* conn is a connection to hgcentral, searching for one word,
  *   might be a UCSC database or a GenArk hub accession, or just some word.
  */
 {
 /* the input word might be a database alias
  * asmAliasFind returns the searchWord if no alias found
  */
 char *perhapsAlias = asmAliasFind(searchWord);
 
 int itemCount = 0;
 
 if (startsWith("GC", perhapsAlias))
     {
     struct asmSummary *asmSumFound = NULL;
     struct hash *asmSummaryHash = NULL;
-    asmSumFound = checkAsmSummary(conn, perhapsAlias);
-    verbose(0, "# found %d items\n", slCount(asmSumFound));
+    asmSumFound = checkAsmSummary(conn, perhapsAlias, FALSE, 0);
     if (asmSumFound)
         {
         struct asmSummary *el;
         asmSummaryHash = newHash(0);
         for (el = asmSumFound; el != NULL; el = el->next)
             hashAdd(asmSummaryHash, el->assemblyAccession, (void *)el);
         }
     char query[4096];
     sqlSafef(query, sizeof(query), "SELECT * FROM %s WHERE gcAccession LIKE \"%s%%\"", genarkTable, perhapsAlias);
     struct sqlResult *sr = sqlGetResult(conn, query);
     char **row;
     struct combinedSummary *comboOutput = NULL;
     while ((row = sqlNextRow(sr)) != NULL)
         {
         struct genark *genome = genarkLoad(row);
@@ -282,33 +359,35 @@
 	}
     if (comboOutput)
 	{
 	slReverse(&comboOutput);
 	itemCount = outputCombo(jw, comboOutput);
 	}
     }	/*	if (startsWith("GC", perhapsAlias))	*/
 else
     {	/* not a GC genArk name, perhaps a UCSC database */
     if (hDbIsActive(perhapsAlias))
         {
 	struct combinedSummary *comboOutput = NULL;
         AllocVar(comboOutput);
         comboOutput->summary = NULL;
         comboOutput->genArk = NULL;
-	/* TBD need to get an equivalent GC accession name for UCSC database */
         struct dbDb *dbDbEntry = hDbDb(perhapsAlias);
         comboOutput->dbDb = dbDbEntry;
+        struct asmSummary *sumList = dbDbAsmEquivalent(conn, perhapsAlias);
+        if (sumList)
+	   comboOutput->summary = sumList;
 	itemCount = outputCombo(jw, comboOutput);
         }
     }	/*	checked genArk and UCSC database */
 if (0 == itemCount)	/* not found in genark or ucsc db, check asmSummary */
     {
     long long totalMatch = 0;
     /* no more than 100 items result please */
     struct asmSummary *summaryList = asmSummaryFullText(conn, perhapsAlias, (long long) 100, &totalMatch);
     /* now check the genark table to see if there are any of those */
     struct combinedSummary *comboOutput = checkForGenArk(conn, summaryList);
     if (comboOutput)
 	itemCount = outputCombo(jw, comboOutput);
     }
 return itemCount;
 }	/*	static int singleWordSearch(struct sqlConnection *conn, char *searchWord, struct jsonWrite *jw) */
@@ -323,52 +402,52 @@
 char **row;
 while ((row = sqlNextRow(sr)) != NULL)
     {
     if (strlen(row[0]) < 1)
 	jsonWriteNumber(jw, "na", sqlLongLong(row[1]));
     else
 	jsonWriteNumber(jw, row[0], sqlLongLong(row[1]));
     }
 sqlFreeResult(&sr);
 jsonWriteObjectEnd(jw);
 }
 
 static void doStatsOnly(struct sqlConnection *conn, struct jsonWrite *jw)
 /* only count the items in each database */
 {
-jsonWriteString(jw, "description:", "counting items in tables: asmSummary, genark and dbDb for number of genomes available");
+jsonWriteString(jw, "description", "counting items in tables: asmSummary, genark and dbDb for number of genomes available");
 
 char query[4096];
 sqlSafef(query, sizeof(query), "SELECT count(*) FROM %s", genarkTable);
 long long genArkCount = sqlQuickLongLong(conn, query);
 sqlSafef(query, sizeof(query), "SELECT count(*) FROM asmSummary");
 long long asmSummaryTotal = sqlQuickLongLong(conn, query);
 sqlSafef(query, sizeof(query), "SELECT count(*) FROM dbDb where active=1");
 long long dbDbCount = sqlQuickLongLong(conn, query);
 long long totalCount = genArkCount + asmSummaryTotal + dbDbCount;
 jsonWriteNumber(jw, "totalCount", totalCount);
 jsonWriteObjectStart(jw, "genark");
-jsonWriteString(jw, "TBD:", "the genark table can count GCF vs. GCA and will have a clade category to couint\n");
+jsonWriteString(jw, "TBD", "the genark table can count GCF vs. GCA and will have a clade category to couint\n");
 jsonWriteNumber(jw, "itemCount", genArkCount);
 jsonWriteObjectEnd(jw);
 jsonWriteObjectStart(jw, "dbDb");
-jsonWriteString(jw, "description:", "the dbDb table is a count of UCSC 'database' browser instances");
+jsonWriteString(jw, "description", "the dbDb table is a count of UCSC 'database' browser instances");
 jsonWriteNumber(jw, "itemCount", dbDbCount);
 jsonWriteObjectEnd(jw);
 jsonWriteObjectStart(jw, "asmSummary");
-jsonWriteString(jw, "description:", "the asmSummary is the contents of the NCBI  genbank/refseq assembly_summary.txt information");
-jsonWriteString(jw, "seeAlso:", "https://ftp.ncbi.nlm.nih.gov/genomes/ASSEMBLY_REPORTS/README_assembly_summary.txt");
+jsonWriteString(jw, "description", "the asmSummary is the contents of the NCBI  genbank/refseq assembly_summary.txt information");
+jsonWriteString(jw, "seeAlso", "https://ftp.ncbi.nlm.nih.gov/genomes/ASSEMBLY_REPORTS/README_assembly_summary.txt");
 jsonWriteNumber(jw, "asmSummaryTotal", asmSummaryTotal);
 asmSummaryGroup(conn, jw, "refseqCategory");
 asmSummaryGroup(conn, jw, "versionStatus");
 asmSummaryGroup(conn, jw, "assemblyLevel");
 asmSummaryGroup(conn, jw, "releaseType");
 asmSummaryGroup(conn, jw, "genomeRep");
 asmSummaryGroup(conn, jw, "pairedAsmComp");
 asmSummaryGroup(conn, jw, "assemblyType");
 asmSummaryGroup(conn, jw, "phyloGroup");
 jsonWriteObjectEnd(jw);
 }
 
 static void elapsedTime(struct jsonWrite *jw)
 {
 long nowTime = clock1000();