fadb7a07e46918ac377c685de7e8dd124417e972
angie
  Thu Feb 18 10:46:24 2016 -0800
Simplified cytoBand search and loosened up termRegex to support horse.
I removed some old functionality: the cytoBand search code used to cache
the entire table, but it is never repetitively searched -- the code is
invoked only for a user's search term.  Also, keywords like "cen" and
"qter" were supported but have been excluded by search regexes for years
and noone has missed them.  Now, use a regex to help parse the search
term into chrom and band name, and look for an exact match in cytoBand.

diff --git src/hg/lib/hgFind.c src/hg/lib/hgFind.c
index fc2a080..a6a9a0e 100644
--- src/hg/lib/hgFind.c
+++ src/hg/lib/hgFind.c
@@ -936,226 +936,92 @@
 
 for (table = hgp->tableList; table != NULL; table = table->next)
     {
     for (pos = table->posList; pos != NULL; pos = pos->next)
         {
 	++posCount;
 	if (pos->chrom != NULL)
 	    hgp->singlePos = pos;
 	}
     }
 if (posCount != 1)
    hgp->singlePos = NULL;
 hgp->posCount = posCount;
 }
 
-static char *startsWithShortHumanChromName(char *db, char *chrom)
-/* Return "cannonical" name of chromosome or NULL
- * if not a chromosome.  This expects no 'chr' in name. */
-{
-int num;
-char buf[64];
-char c = chrom[0];
-
-if (c == 'x' || c == 'X' || c == 'Y' || c == 'y')
-    {
-    safef(buf, sizeof(buf), "chr%c", toupper(c));
-    return hgOfficialChromName(db, buf);
-    }
-if (!isdigit(chrom[0]))
-    return NULL;
-num = atoi(chrom);
-if (num < 1 || num > 22)
-    return NULL;
-safef(buf, sizeof(buf), "chr%d", num);
-return hgOfficialChromName(db, buf);
-}
-
-static struct cytoBand *loadAllBands(char *db)
-/* Load up all bands from database. */
+static boolean hgFindChromBand(char *db, char *chrom, char *band, int *retStart, int *retEnd)
+/* Return start/end of band in chromosome. */
 {
-struct cytoBand *list = NULL, *el;
 struct sqlConnection *conn = hAllocConn(db);
 struct sqlResult *sr = NULL;
 char **row;
-
-sr = sqlGetResult(conn, "NOSQLINJ select * from cytoBand");
-while ((row = sqlNextRow(sr)) != NULL)
+struct dyString *query = sqlDyStringCreate("select chromStart, chromEnd from cytoBand "
+                                           "where chrom = '%s' and name = '%s'",
+                                           chrom, band);
+sr = sqlGetResult(conn, query->string);
+if ((row = sqlNextRow(sr)) != NULL)
     {
-    el = cytoBandLoad(row);
-    slAddHead(&list, el);
+    if (retStart)
+        *retStart = sqlUnsigned(row[0]);
+    if (retEnd)
+        *retEnd = sqlUnsigned(row[1]);
+    return TRUE;
     }
 sqlFreeResult(&sr);
-slReverse(&list);
 hFreeConn(&conn);
-return list;
-}
-
-static struct cytoBand *bandList = NULL;
-
-void hgFindChromBand(char *db, char *chromosome, char *band, int *retStart, int *retEnd)
-/* Return start/end of band in chromosome. */
-{
-struct cytoBand *chrStart = NULL, *chrEnd = NULL, *cb;
-int start = 0, end = 500000000;
-boolean anyMatch;
-char choppedBand[64], *s, *e;
-
-if (bandList == NULL)
-    bandList = loadAllBands(db);
-
-/* Find first band in chromosome. */
-for (cb = bandList; cb != NULL; cb = cb->next)
-    {
-    if (sameString(cb->chrom, chromosome))
-        {
-	chrStart = cb;
-	break;
-	}
-    }
-if (chrStart == NULL)
-    hUserAbort("Couldn't find chromosome %s in band list", chromosome);
-
-/* Find last band in chromosome. */
-for (cb = chrStart->next; cb != NULL; cb = cb->next)
-    {
-    if (!sameString(cb->chrom, chromosome))
-        break;
-    }
-chrEnd = cb;
-
-if (sameWord(band, "cen"))
-    {
-    for (cb = chrStart; cb != chrEnd; cb = cb->next)
-        {
-	if (cb->name[0] == 'p')
-	    start = cb->chromEnd - 500000;
-	else if (cb->name[0] == 'q')
-	    {
-	    end = cb->chromStart + 500000;
-	    break;
-	    }
-	}
-    *retStart = start;
-    *retEnd = end;
-    return;
-    }
-else if (sameWord(band, "qter"))
-    {
-    *retStart = *retEnd = hChromSize(db, chromosome);
-    *retStart -= 1000000;
-    return;
-    }
-/* Look first for exact match. */
-for (cb = chrStart; cb != chrEnd; cb = cb->next)
-    {
-    if (sameWord(cb->name, band))
-        {
-	*retStart = cb->chromStart;
-	*retEnd = cb->chromEnd;
-	return;
-	}
-    }
-
-/* See if query is less specific.... */
-strcpy(choppedBand, band);
-for (;;) 
-    {
-    anyMatch = FALSE;
-    for (cb = chrStart; cb != chrEnd; cb = cb->next)
-	{
-	if (startsWith(choppedBand, cb->name))
-	    {
-	    if (!anyMatch)
-		{
-		anyMatch = TRUE;
-		start = cb->chromStart;
-		}
-	    end = cb->chromEnd;
-	    }
-	}
-    if (anyMatch)
-	{
-	*retStart = start;
-	*retEnd = end;
-	return;
-	}
-    s = strrchr(choppedBand, '.');
-    if (s == NULL)
-	hUserAbort("Couldn't find anything like band '%s'", band);
-    else
-	{
-	e = choppedBand + strlen(choppedBand) - 1;
-	*e = 0;
-	if (e[-1] == '.')
-	   e[-1] = 0;
-        warn("Band %s%s is at higher resolution than data, chopping to %s%s",
-	    chromosome+3, band, chromosome+3, choppedBand);
-	}
-    }
+dyStringFree(&query);
+return FALSE;
 }
 
-boolean hgIsCytoBandName(char *db, char *spec, char **retChromName, char **retBandName)
+boolean hgParseCytoBandName(char *db, char *spec, char **retChromName, char **retBandName)
 /* Return TRUE if spec is a cytological band name including chromosome short 
  * name. Returns chromosome chrN name and band (with chromosome stripped off) */
 {
-char *fullChromName, *shortChromName;
-int len;
-int dotCount = 0;
-char *s, c;
-
-/* First make sure spec is in format to be a band name. */
-if ((fullChromName = startsWithShortHumanChromName(db, spec)) == NULL)
-    return FALSE;
-shortChromName = skipChr(fullChromName);
-len = strlen(shortChromName);
-spec += len;
-c = spec[0];
-if (c != 'p' && c != 'q')
-    return FALSE;
-/* the mouse bands can have a letter here, A-H, searchType cytoBand
- * doesn't seem to use the termRegx */
-if (!(isdigit(spec[1]) || (1 == countChars("ABCDEFGH", spec[1]))))
-    return FALSE;
-
-/* Make sure rest is digits with maybe one '.' */
-s = spec+2;
-while ((c = *s++) != 0)
-    {
-    if (c == '.')
-        ++dotCount;
-    else if (!isdigit(c))
-        return FALSE;
+regmatch_t substrArr[5];
+// See if spec looks like a "chr"-less chromosome followed by a p or q, then a number,
+// and possibly a '.' and another number.
+// Mouse bands may have a letter A-H before the number, and may have no number.
+// Horse bands may have "pq".
+if (regexMatchSubstrNoCase(spec, "^(X|Y|[0-9]+)([pq]+[A-H]?([0-9]+(\\.[0-9]+)?)?)$",
+                           substrArr, ArraySize(substrArr)))
+    {
+    char chrSpec[PATH_LEN];
+    safencpy(chrSpec, sizeof(chrSpec), "chr", 3);
+    safencpy(chrSpec+3, sizeof(chrSpec)-3, spec, substrArr[1].rm_eo);
+    char *chromName = hgOfficialChromName(db, chrSpec);
+    if (chromName)
+        {
+        if (retChromName)
+            *retChromName = chromName;
+        if (retBandName)
+            *retBandName = cloneString(spec + substrArr[2].rm_so);
+        return TRUE;
+        }
     }
-if (dotCount > 1)
 return FALSE;
-*retChromName = fullChromName;
-*retBandName = spec;
-return TRUE;
 }
 
 boolean hgFindCytoBand(char *db, char *spec, char **retChromName, int *retWinStart,
 		       int *retWinEnd)
 /* Return position associated with cytological band if spec looks to be 
  * in that form. */
 {
 char *bandName;
 
-if (!hgIsCytoBandName(db, spec, retChromName, &bandName))
+if (!hgParseCytoBandName(db, spec, retChromName, &bandName))
      return FALSE;
-hgFindChromBand(db, *retChromName, bandName, retWinStart, retWinEnd);
-return TRUE;
+return hgFindChromBand(db, *retChromName, bandName, retWinStart, retWinEnd);
 }
 
 boolean findChromContigPos(char *db, char *name, char **retChromName, 
 	int *retWinStart, int *retWinEnd)
 /* Find position in genome of contig.  Look in all chroms.
  * Don't alter return variables unless found. */
 /* NOTE: could probably speed this up by using the chromInfo hashtable */
 {
 struct sqlConnection *conn = hAllocConn(db);
 struct sqlResult *sr = NULL;
 char **row;
 char query[256];
 boolean foundIt = FALSE;
 
 /* In case this is a scaffold-based assembly, check for unsplit table first: */