src/hg/hgTracks/searchTracks.c f937c5dae3e74cf23ce69c234921cae29d45e7b5

f937c5dae3e74cf23ce69c234921cae29d45e7b5
larrym
  Fri Jun 11 18:21:40 2010 +0000
improve isDescriptionMatch so it finds matches only at the start of words
diff --git src/hg/hgTracks/searchTracks.c src/hg/hgTracks/searchTracks.c
index 6bbfbad..08eb4da 100644
--- src/hg/hgTracks/searchTracks.c
+++ src/hg/hgTracks/searchTracks.c
@@ -15,8 +15,9 @@
 #include "jksql.h"
 #include "hdb.h"
 #include "trix.h"
+#include "jsHelper.h"
 
-static char const rcsid[] = "$Id: searchTracks.c,v 1.10 2010/06/10 20:24:01 larrym Exp $";
+static char const rcsid[] = "$Id: searchTracks.c,v 1.11 2010/06/11 18:21:40 larrym Exp $";
 
 #define ANYLABEL "Any"
 #define METADATA_NAME_PREFIX "hgt.metadataName"
@@ -54,42 +55,43 @@
 }
 
 static boolean isDescriptionMatch(struct track *track, char **words, int wordCount)
-// We parse str and look for every word ANYWHERE in track description (i.e. google style).
-// XXXX currently quite primitive; do stemming, strip html markup ??
-// trackMetaData contains tracks already found via metadata searches.
+// We parse str and look for every word at the start of any word in track description (i.e. google style).
 {
-boolean found = FALSE;
-
 if(words)
     {
+    // We do NOT lookup up parent hierarchy for html descriptions.
     char *html = track->tdb->html;
-    if(isEmpty(html))
-        {
-        // XXXX is there a cleaner way to find parent?
-        struct trackDb *parent = track->tdb->parent;
-        while(parent != NULL && (parent->html == NULL || !strlen(parent->html)))
-            parent = parent->parent;
-        if(parent != NULL)
-            html = parent->html;
-        }
-
     if(!isEmpty(html))
         {
-        int i;
+        /* This probably could be made more efficient by parsing the html into some kind of b-tree, but I am assuming 
+           that the inner html loop while only happen for 1-2 words for vast majority of the tracks. */
+
+        int i, numMatches = 0;
+        html = stripRegEx(html, "<[^>]*>", REG_ICASE);
         for(i = 0; i < wordCount; i++)
             {
-            char *val = words[i];
-            if(strstrNoCase(html, val) == NULL)
+            char *needle = words[i];
+            char *haystack, *tmp = cloneString(html);
+            boolean found = FALSE;
+            while((haystack = nextWord(&tmp)))
+                {
+                char *ptr = strstrNoCase(haystack, needle);
+                if(ptr != NULL && ptr == haystack)
                 {
-                found = FALSE;
+                    found = TRUE;
                 break;
                 }
+                }
+            if(found)
+                numMatches++;
             else
-                found = TRUE;
+                break;
             }
+        if(numMatches == wordCount)
+            return TRUE;
         }
     }
-return found;
+return FALSE;
 }
 
 static int getTermArray(struct sqlConnection *conn, char ***terms, char *type)
@@ -238,7 +240,7 @@
 webStartWrapperDetailedNoArgs(cart, database, "", "Track Search (prototype!)", FALSE, FALSE, FALSE, FALSE);
 
 hPrintf("<input type='hidden' name='db' value='%s'>\n", database);
-hPrintf("<input type='hidden' name='hgt.currentSearchTab' id='hgt.currentSearchTab' value='%s'>\n", currentTab);
+hPrintf("<input type='hidden' name='hgt.currentSearchTab' id='currentSearchTab' value='%s'>\n", currentTab);
 
 hPrintf("<div id='tabs'>\n"
         "<ul>\n"