ebcbc767842df436441e62e1d90f0038b85583b5
galt
  Tue Aug 25 14:02:57 2015 -0700
fix phyloGif to accept more special chars. fixes #15919

diff --git src/hg/phyloGif/phyloGif.c src/hg/phyloGif/phyloGif.c
index 360aaff..9c7b6ed 100644
--- src/hg/phyloGif/phyloGif.c
+++ src/hg/phyloGif/phyloGif.c
@@ -68,35 +68,36 @@
 
 
 #include "errAbort.h"
 #include "errCatch.h"
 
 
 struct cart *cart=NULL;      /* The user's ui state. */
 struct hash *oldVars = NULL;
 boolean onWeb = FALSE;
 
 int width=240,height=512;
 boolean branchLengths = FALSE;  /* branch lengths */
 boolean lengthLegend = FALSE;   /* length ruler*/
 boolean branchLabels = FALSE;   /* labelled branch lengths */
 boolean htmlPageWrapper = FALSE;  /* wrap output in an html page */
-boolean preserveUnderscores = FALSE;   /* preserve underscores in input as spaces in output */
+boolean stripUnderscoreSuff = FALSE;   /* strip underscore suffixes from labels in input */
+boolean dashToSpace = FALSE;    /* convert dash to space */
+boolean underToSpace = FALSE;   /* convert underscore to space */
 boolean monospace = FALSE;      /* use monospace font */
 int branchDecimals = 2;         /* show branch label length to two decimals by default */
 int branchMultiplier = 1;       /* multiply branch length by factor */
-char *escapePattern = NULL;      /* use to escape dash '-' char in input */
 char layoutErrMsg[1024] = "";
 
 /* Null terminated list of CGI Variables we don't want to save
  * permanently. */
 char *excludeVars[] = {"Submit", "submit", "phyloGif_submit", "phyloGif_restore", NULL};
 
 void usage(char *msg)
 /* Explain usage and exit. */
 {
 errAbort(
     "%s\n\n"
     "phyloGif - parse and display phyloGenetic tree\n"
     "\n"
     "Command-line Usage Examples:\n"
     "   phyloGif -phyloGif_tree=tree.nh [options] > phylo.gif \n"
@@ -208,37 +209,48 @@
 else if (phyloTree->numEdges == 0)  /* leaf */
     {
     int w=0;
     double factor=0.0;
     this->depth=0;
     this->vPos=*pNumLeafs;
     (*pNumLeafs)++;
 
     if(!phyloTree->ident->name)
 	{
 	safef(layoutErrMsg,sizeof(layoutErrMsg),
 	"leaf is missing label\n");
 	return;
 	}
 
-    /* de-escape name if needed */
-    if(stringIn(escapePattern,phyloTree->ident->name))
+    if(dashToSpace)
 	{
 	char *temp = phyloTree->ident->name;
-	phyloTree->ident->name = replaceChars(temp,escapePattern," ");
+	phyloTree->ident->name = replaceChars(temp,"-"," ");
 	freez(&temp);
 	}	
+
+    if(underToSpace)
+	{
+	char *temp = phyloTree->ident->name;
+	phyloTree->ident->name = replaceChars(temp,"_"," ");
+	freez(&temp);
+	}	
+
+    /* strip underscore suffixes option */
+    if (stripUnderscoreSuff)
+	stripUnderscoreSuffixes(phyloTree->ident->name);
+
     w=mgFontStringWidth(font,phyloTree->ident->name);
     if (w > *pMaxLabelWidth)
 	*pMaxLabelWidth = w;
     factor = (width - 3*MARGIN - w) / this->hPos;
     if (*pMinMaxFactor == 0.0 || factor < *pMinMaxFactor)
 	*pMinMaxFactor = factor;
     }
 else
     {
     safef(layoutErrMsg,sizeof(layoutErrMsg),
 	"Expected tree nodes to have 0 or 2 edges, found %d.\n"
 	"Check for missing commas or missing data.\n"
 	,phyloTree->numEdges); 
     }
 
@@ -378,70 +390,76 @@
     	printf("Content-type: image/gif\r\n");
     printf("\r\n");
     return 0;
     }
 
 if (useCart)
     {
     width = cartUsualInt(cart,"phyloGif_width",width);    
     height = cartUsualInt(cart,"phyloGif_height",height);    
     phyloData = cloneString(cartOptionalString(cart,"phyloGif_tree"));
     branchLengths = cartVarExists(cart,"phyloGif_branchLengths");
     lengthLegend = cartVarExists(cart,"phyloGif_lengthLegend");
     branchLabels = cartVarExists(cart,"phyloGif_branchLabels");
     branchDecimals = cartUsualInt(cart,"phyloGif_branchDecimals", branchDecimals);
     branchMultiplier = cartUsualInt(cart,"phyloGif_branchMultiplier", branchMultiplier);
-    preserveUnderscores = cartVarExists(cart,"phyloGif_underscores");
+    stripUnderscoreSuff = cartVarExists(cart,"phyloGif_undersuff_strip");
+    dashToSpace = cartVarExists(cart,"phyloGif_dash_to_space");
+    underToSpace = cartVarExists(cart,"phyloGif_under_to_space");
     monospace = cartVarExists(cart, "phyloGif_monospace");
     }
 else
     {
     width = cgiUsualInt("phyloGif_width",width);    
     height = cgiUsualInt("phyloGif_height",height);    
     phyloData = cloneString(cgiOptionalString("phyloGif_tree"));
     branchLengths = cgiVarExists("phyloGif_branchLengths");
     lengthLegend = cgiVarExists("phyloGif_lengthLegend");
     branchLabels = cgiVarExists("phyloGif_branchLabels");
     branchDecimals = cgiUsualInt("phyloGif_branchDecimals", branchDecimals);
     branchMultiplier = cgiUsualInt("phyloGif_branchMultiplier", branchMultiplier);
-    preserveUnderscores = cgiVarExists("phyloGif_underscores");
+    stripUnderscoreSuff = cgiVarExists("phyloGif_undersuff_strip");
+    dashToSpace = cgiVarExists("phyloGif_dash_to_space");
+    underToSpace = cgiVarExists("phyloGif_under_to_space");
     monospace = cgiVarExists("phyloGif_monospace");
     }
     
 if (useCart)
     {
     if (onWeb)
 	{
     	printf("Content-type: text/html\r\n");
 	printf("\r\n");
 	cartWebStart(cart, NULL, "%s", "phyloGif Interactive Phylogenetic Tree Gif Maker");
 
 	if (isMSIE)  /* cannot handle long urls */
 	    puts("<form method=\"POST\" action=\"phyloGif\" name=\"mainForm\">");
 	else
 	    puts("<form method=\"GET\" action=\"phyloGif\" name=\"mainForm\">");
 
 	cartSaveSession(cart);
 	puts("<table>");
 	puts("<tr><td>Width:</td><td>"); cartMakeIntVar(cart, "phyloGif_width", width, 4); puts("</td></tr>");
 	puts("<tr><td>Height:</td><td>"); cartMakeIntVar(cart, "phyloGif_height", height, 4); puts("</td></tr>");
 	puts("<tr><td>Use branch lengths?</td><td>"); cartMakeCheckBox(cart, "phyloGif_branchLengths", branchLengths); puts("</td></tr>");
 	puts("<tr><td>&nbsp; Show length ruler?</td><td>"); cartMakeCheckBox(cart, "phyloGif_lengthLegend", lengthLegend); puts("</td></tr>");
 	puts("<tr><td>&nbsp; Show length values?</td><td>"); cartMakeCheckBox(cart, "phyloGif_branchLabels", branchLabels); puts("</td></tr>");
 	puts("<tr><td>&nbsp; How many decimal places?</td><td>"); cartMakeIntVar(cart, "phyloGif_branchDecimals", branchDecimals,1); puts("</td></tr>");
 	puts("<tr><td>&nbsp; Multiply branch length by factor?</td><td>"); cartMakeIntVar(cart, "phyloGif_branchMultiplier", branchMultiplier,5); puts("</td></tr>");
-	puts("<tr><td>Preserve Underscores?</td><td>"); cartMakeCheckBox(cart, "phyloGif_underscores", preserveUnderscores); puts("</td></tr>");
+	puts("<tr><td>Strip underscore-suffixes?</td><td>"); cartMakeCheckBox(cart, "phyloGif_undersuff_strip", stripUnderscoreSuff); puts("</td></tr>");
+	puts("<tr><td>Change dash to space?</td><td>"); cartMakeCheckBox(cart, "phyloGif_dash_to_space", dashToSpace); puts("</td></tr>");
+	puts("<tr><td>Change underscore to space?</td><td>"); cartMakeCheckBox(cart, "phyloGif_under_to_space", underToSpace); puts("</td></tr>");
 	puts("<tr><td>Wrap in html page?</td><td>"); cartMakeCheckBox(cart, "phyloGif_htmlPage", htmlPageWrapper); puts("</td></tr>");
 	puts("<tr><td>Monospace font?</td><td>"); cartMakeCheckBox(cart, "phyloGif_monospace", monospace); puts("</td></tr>");
 
         printf("<tr><td><big>TREE:</big>");
 	puts("<br><br><INPUT type=\"submit\" name=\"phyloGif_restore\" value=\"restore default\">");
 	
 	puts("</td><td><textarea name=\"phyloGif_tree\" rows=14 cols=70>");
 	if (NULL == phyloData || phyloData[0] == '\0' || cgiVarExists("phyloGif_restore"))
 	    {
 	    puts(
 "(((((((((\n"
 "(human_hg18:0.00669,chimp_panTro1:0.00757):0.0243,\n"
 "  macaque_rheMac2:0.0592):0.0240,\n"
 "    ((rat_rn4:0.0817,mouse_mm8:0.0770):0.229,\n"
 "          rabbit_oryCun1:0.207):0.107):0.0230,\n"
@@ -459,64 +477,76 @@
 	    {
 	    printf("%s",phyloData);
 	    }
 	puts("</TEXTAREA>");
 	puts("</td></tr>");
 	puts("<tr><td>&nbsp;</td><td>");
 	puts("<INPUT type=\"submit\" name=\"phyloGif_submit\" value=\"submit\">");
 	puts("</td></tr>");
 	puts("</table>");
 	puts("</form>");
 	webNewSection("Notes");
     	puts(
 "\n"
 "1. Length-ruler and length-values cannot be shown unless use-branch-lengths is also checked.<br>\n"
 "<br>\n"
-"2. Underscores and anything following them are automatically stripped from node labels\n"
-"unless the preserve-underscores checkbox is checked, in which case they are converted to spaces.<br>\n"
+"2. If \"Strip underscore-suffixes?\" is checked, underscores and anything following them are stripped from node labels.<br>\n"
 "<br>\n"
-"3. If a space is required in a node label, enter it as a dash.<br>\n"
+"3. For backwards compatibility, options exist to convert a dash or underscore to a space in a node label.<br>\n"
 "<br>\n"
 "4. The tree is in the phastCons or .nh format name:length.  Parentheses create a parent.\n"
 "Parents must have two children. Length is not required if use-branch-lengths is not checked.\n"
 "The length of the root branch is usually not specified.<br>\n"
+"<br>\n"
 "Examples:<br>\n"
 "<table cellpadding=10>\n"
 "<tr><td><PRE>\n"
 "((A:0.1,B:0.1):0.2,C:0.15);\n"
 "</PRE></td><td>\n"
 "<IMG SRC=\"?phyloGif_width=200&phyloGif_height=120&phyloGif_branchLengths=1&phyloGif_tree=((A:0.1,B:0.1):0.2,C:0.15);\">\n"
 "</td></tr>\n"
 "<tr><td><PRE>\n"
 "((A:0.1,B:0.1)D:0.2,C:0.15)E;\n"
 "</PRE></td><td>\n"
 "<IMG SRC=\"?phyloGif_width=200&phyloGif_height=120&phyloGif_branchLengths=1&phyloGif_tree=((A:0.1,B:0.1)D:0.2,C:0.15)E;\">\n"
 "<br>(internal or ancestral node labels)\n"
 "</td></tr>\n"
 "<tr><td><PRE>\n"
 "  ((((\n"
 "   (\n"
 "     ((mouse,rat),human),\n"
 "       (dog,cow)\n"
 "    ),\n"
 "     opossum),\n"
 "     chicken),\n"
 "     xenopus),\n"
 "    (tetraodon,zebrafish));\n"
 "</PRE></td><td>\n"
 "<IMG SRC=\"?phyloGif_width=200&phyloGif_height=200&phyloGif_tree=(((((((mouse,rat),human),(dog,cow)),opossum),chicken),xenopus),(tetraodon,zebrafish));\">\n"
 "</td></tr>\n"
+"<tr><td>\n"
+"We have extended the Newick format to allow spaces <br>\n"
+"and other non-alphanumeric characters in node labels.<br>\n"
+"If you need a backslash, comma, semi-colon, colon, or parenthesis,<br>\n"
+"it must be escaped with a back-slash character. <br>\n"
+"<PRE>\n"
+"((Brandt's myotis \\(bat\\):0.1,\n"
+"  White-tailed eagle:0.1):0.2,\n"
+" S. purpuratus:0.15);\n"
+"</PRE></td><td>\n"
+"<IMG SRC=\"?phyloGif_width=200&phyloGif_height=120&phyloGif_branchLengths=1&phyloGif_tree=((Brandt's myotis \\(bat\\):0.1,White-tailed eagle:0.1):0.2,S. purpuratus:0.15);\">\n"
+"</td></tr>\n"
 "</table>\n"
 "5. PhastCons branch lengths are expected substitutions per site, allowing for\n"
 "multiple hits.  So a branch length of 0.5 means an average of one\n"
 "substitution every two nucleotide sites, but the percent id will be\n"
 "less than 50% because some of those substitutions are obscured by\n"
 "subsequent substitutions.  They are estimated from neutral sites,\n"
 "sometimes fourfold degenerate sites in coding regions, or sometimes\n"
 "\"nonconserved\" sites according to phastCons.  The numbers are significant\n"
 "to two or three figures.<br>\n"
 "<br>\n"
 "6. Wrap-in-html is useful when the browser automatically shinks a large image.\n"
 "This option keeps the image view full in the browser automatically.\n"
 "However, do not use with IE6 when performing save-as.\n"
 "<br>"
 	    );
@@ -533,89 +563,73 @@
     printf("\r\n");
     puts("<html><head><title>Phylogenetic Tree</title></head><body>");
     printf("<IMAGE SRC=\"http://%s%s"
 	    "?phyloGif_width=%d"
 	    "&phyloGif_height=%d"
 	    "&phyloGif_tree=%s"
 	,getenv("SERVER_NAME"),getenv("SCRIPT_NAME"),width,height,phyloData);
     if (branchLengths)
 	printf("&phyloGif_branchLengths=1");
     if (lengthLegend)
 	printf("&phyloGif_lengthLegend=1");
     if (branchLabels)
 	printf("&phyloGif_branchLabels=1");
     printf("&phyloGif_branchDecimals=%d",branchDecimals);
     printf("&phyloGif_branchMultipliers=%d",branchMultiplier);
-    if (preserveUnderscores)
+    if (stripUnderscoreSuff)
 	printf("&phyloGif_underscores=1");
+    if (dashToSpace)
+	printf("&phyloGif_dash_to_space=1");
+    if (underToSpace)
+	printf("&phyloGif_under_to_space=1");
     if (monospace)
 	printf("&phyloGif_monospace=1");
     puts("\"></body></html>");
     freez(&phyloData);
     return 0;
     }
 
 
 
 if (!onWeb && phyloData[0] != '(')
     {
     int fd = 0;  /* default to stdin */
     if (!sameString(phyloData,"stdin"))
 	fd = open(phyloData,O_RDONLY);
     struct dyString *dy = netSlurpFile(fd);
     if (fd)
     	close(fd);
     freez(&phyloData);
     phyloData = dyStringCannibalize(&dy);
     }
 
-/* preserve underscores option */
-if (preserveUnderscores)
+/* remove carriage returns which are a side-effect of html forms */
+if (strchr(phyloData,'\r'))
     {
     char *temp = phyloData;
-    phyloData = replaceChars(temp,"_","-");
+    phyloData = replaceChars(temp,"\r","");
     freez(&temp);
     }	
 
-/* get rid of underscore suffixes */
-stripUnderscoreSuffixes(phyloData);
-
-/* escape dash chars with some XXX pattern */
-escapePattern = cloneString("");
-do
-    {
-    char *temp = escapePattern;
-    escapePattern=addSuffix(temp,"X");
-    freez(&temp);
-    } while (stringIn(escapePattern,phyloData));
-if (strchr(phyloData,'-'))
-    {
-    char *temp = phyloData;
-    phyloData = replaceChars(temp,"-",escapePattern);
-    freez(&temp);
-    }	
-
-
 /* add trailing semi-colon if it got stripped off */
 if (!strchr(phyloData,';'))
     {
     temp = phyloData;
     phyloData = addSuffix(phyloData,";");
     freez(&temp);
     }
 
-
 /* parse phyloTree, but catch errAborts if any */
 
 {
 struct errCatch *errCatch = errCatchNew();
 char *errMsg = NULL;
 if (errCatchStart(errCatch))
     {
     phyloTree = phyloParseString(phyloData);
     }
 errCatchEnd(errCatch);
 if (errCatch->gotError)
     {
     errMsg = cloneString(errCatch->message->string);
     }
 errCatchFree(&errCatch);