67dcce67b7da61d4de46d360754b174294432a8c
chmalee
  Wed Mar 29 16:22:39 2023 -0700
Fix non-knownCanonical results not showing up in a knownGene search, refs #25078

diff --git src/hg/lib/hgFind.c src/hg/lib/hgFind.c
index 0312ced..3bd4f03 100644
--- src/hg/lib/hgFind.c
+++ src/hg/lib/hgFind.c
@@ -406,54 +406,72 @@
 	    slAddHead(&idList, idEl);
 	    }
 	}
     pipelineClose(&pl);  /* Takes care of lf too. */
     freeCmds(cmds, keyCount);
     if (verboseLevel() >= 3)
 	{
 	int count = slCount(idList);
 	verbose(3, "*** Got %d results from %s\n\n", count, indexFile);
 	}
     }
 freeMem(escapedKey);
 return idList;
 }
 
+static struct hgPosTable *findTable(struct hgPosTable *list, char *name)
+/* Find first table in list that matches name */
+{
+struct hgPosTable *ret = NULL;
+for (ret = list; ret != NULL; ret = ret->next)
+    {
+    if (sameString(ret->name, name))
+        return ret;
+    }
+return NULL;
+}
+
 static struct hgPosTable *addKnownGeneTable(char *db, struct hgPositions *hgp, char *name)
 /* Create new table for known genes matches, add it to hgp, and return it. */
 {
+// we may be coming here a second time, after already hitting knownGeneFast
+// add non duplicate results to the end of our hgp
 struct hgPosTable *table;
+table = findTable(hgp->tableList, name);
+if (table == NULL)
+    {
     AllocVar(table);
     table->searchTime = -1;
     if (differentString(name, "knownGene"))
         {
         char *masterGeneTrack = hdbGetMasterGeneTrack(name);
 
         table->description = cloneString(masterGeneTrack);
         table->name = cloneString(masterGeneTrack);
         }
     else
         {
         if (hTableExists(db, "knownAttrs"))
             table->description = cloneString("Gencode Genes");
         else if (hTableExists(db, "kgProtMap2"))
             table->description = cloneString("UCSC Genes");
         else
             table->description = cloneString("Known Genes");
         table->name = cloneString("knownGene");
         }
     slAddHead(&hgp->tableList, table);
+    }
 return table;
 }
 
 static char *makeIndexPath(char *db, char *name)
 {
 /* create the pathname with the knowngene index for a db, result needs to be freed */
 char *path = needMem(PATH_LEN);
 safef(path, PATH_LEN, "/gbdb/%s/%s.ix", db, name);
 char *newPath = hReplaceGbdb(path);
 freez(&path);
 return newPath;
 }
 
 static boolean gotFullText(char *db, char *indexPath)
 /* Return TRUE if we have full text index. */
@@ -486,31 +504,30 @@
 const struct hgPos *hg1 = *((struct hgPos**)vhg1);
 const struct hgPos *hg2 = *((struct hgPos**)vhg2);
 int diff = trixSearchResultCmp(&hg1->tp->tsr, &hg2->tp->tsr);
 if (diff == 0)
     {
     diff = (hg2->canonical - hg1->canonical);
     if (diff == 0)
         {
 	// Prioritize things on main chromosomes
 	diff = chrNameCmpWithAltRandom(hg1->chrom, hg2->chrom);
 	}
     }
 return diff;
 }
 
-
 static void addKnownGeneItems(struct hgPosTable *table,
 	struct trixSearchResult *tsrList, struct sqlConnection *conn, char *name, struct trix *trix, struct hgFindSpec *hfs)
 /* Convert tsrList to posList, and hang posList off of table. */
 {
 struct dyString *dy = dyStringNew(0);
 struct trixSearchResult *tsr;
 struct hash *hash = hashNew(16);
 struct hgPos *pos, *posList = NULL;
 struct tsrPos *tpList = NULL, *tp;
 struct sqlResult *sr;
 char **row;
 int maxToReturn = NONEXHAUSTIVE_SEARCH_LIMIT;
 char *db = sqlGetDatabase(conn);
 char *dbName;
 
@@ -591,31 +608,53 @@
     }
 sqlFreeResult(&sr);
 
 /* Hang all pos onto table. */
 for (tp = tpList; tp != NULL; tp = tp->next)
     {
     struct hgPos *next;
     for (pos = tp->posList; pos != NULL; pos = next)
         {
 	next = pos->next;
 	slAddHead(&posList, pos);
 	}
     }
 
 slSort(&posList, hgPosCmpCanonical);
+// we may have already been here (ex: queried knownGeneFast first), if so,
+// we need to put the new list of results behind the old list, since the
+// old results had a higher priority. We can now rank results to knownGene
+// by putting what we want users to find first in different search specs
+if (table->posList == NULL)
     table->posList = posList;
+else
+    {
+    struct hash *prevHash = hashNew(0);
+    struct hgPos *newPosList = NULL, *next;
+    for (pos = table->posList; pos != NULL; pos = pos->next)
+        {
+        hashAdd(prevHash, pos->name, pos);
+        }
+    for (pos = posList; pos != NULL; pos = next)
+        {
+        next = pos->next;
+        if (!hashLookup(prevHash, pos->name))
+            slAddHead(&newPosList, pos);
+        }
+    slReverse(&newPosList);
+    table->posList = slCat(table->posList, newPosList);
+    }
 
 hashFree(&hash);
 dyStringFree(&dy);
 }
 
 static boolean findKnownGeneFullText(char *db, char *term,struct hgPositions *hgp, char *name, char *path, struct hgFindSpec *hfs, boolean measureTiming)
 /* Look for position in full text. */
 {
 long startTime = clock1000();
 boolean gotIt = FALSE;
 struct trix *trix;
 struct trixSearchResult *tsrList;
 char *lowered = cloneString(term);
 char *keyWords[HGFIND_MAX_KEYWORDS];
 int keyCount;
@@ -627,31 +666,36 @@
 tsrList = trixSearch(trix, keyCount, keyWords, tsmExpand);
 if (tsrList != NULL)
     {
     table = addKnownGeneTable(db, hgp, name);
     struct sqlConnection *conn = hAllocConn(db);
     addKnownGeneItems(table, tsrList, conn, name, trix, hfs);
     hFreeConn(&conn);
     gotIt = TRUE;
     }
 freez(&lowered);
 trixSearchResultFreeList(&tsrList);
 trixClose(&trix);
 // This is hacky but rely on knownGene table being at head of list
 // for timing. TODO: make this more robust
 if (measureTiming && table != NULL)
+    {
+    if (table->searchTime == -1)
         table->searchTime = clock1000() - startTime;
+    else
+        table->searchTime += clock1000() - startTime;
+    }
 return gotIt;
 }
 
 static char *getUiUrl(struct cart *cart)
 /* Get rest of UI from browser. */
 {
 static struct dyString *dy = NULL;
 static char *s = NULL;
 if (dy == NULL)
     {
     dy = dyStringNew(64);
     if (cart != NULL && cart->sessionId != NULL)
 	dyStringPrintf(dy, "%s=%s", cartSessionVarName(), cartSessionId(cart));
     s = dy->string;
     }
@@ -3397,31 +3441,31 @@
     {
     boolean foundSpec = hgFindUsingSpec(cart, db, hfs, term, limitResults, hgp, FALSE, 0, 0, multiTerm, measureTiming);
     if (foundSpec)
         hashAdd(foundSpecHash, hfs->searchTable, hfs->searchTable);
     foundIt |= foundSpec;
 
     // for multiTerm searches (like '15q11;15q13'), each individual component
     // must resolve to a single position, so break once we find the first match
     if (multiTerm && foundSpec)
         break;
     }
 if (!(multiTerm) || (multiTerm && !foundIt))
     {
     for (hfs = longList; hfs != NULL; hfs = hfs->next)
         {
-        if (hashFindVal(foundSpecHash, hfs->searchTable) != NULL)
+        if (hashFindVal(foundSpecHash, hfs->searchTable) != NULL && !sameString(hfs->searchTable, "knownGene"))
             continue;
         foundIt |= hgFindUsingSpec(cart, db, hfs, term, limitResults, hgp, FALSE, 0, 0, multiTerm, measureTiming);
         }
     // lastly search any included track hubs, or in the case of an assembly hub, any of the tracks
     if (hubCategoryList)
         foundIt |= findBigBedPosInTdbList(cart, db, hubCategoryList, term, hgp, NULL, measureTiming);
     }
 
 // multiTerm searches must resolve to a single range on a chromosome, so don't
 // do these non positional searches if a multiTerm was requested
 if (!multiTerm)
     {
     getLabelsForHubs();
     struct searchCategory *category;
     for (category = categories; category != NULL; category = category->next)