ebcbc767842df436441e62e1d90f0038b85583b5 galt Tue Aug 25 14:02:57 2015 -0700 fix phyloGif to accept more special chars. fixes #15919 diff --git src/lib/phyloTree.c src/lib/phyloTree.c index 11ce572..5195530 100644 --- src/lib/phyloTree.c +++ src/lib/phyloTree.c @@ -18,55 +18,105 @@ tree = phyloParseString(ptr); return tree; } struct phyloTree *phyloOpenTree(char *fileName) { struct lineFile *lf = lineFileOpen(fileName, TRUE); struct phyloTree *tree = phyloReadTree(lf); lineFileClose(&lf); return tree; } +static char *unescapeNewark(char *s) +/* unescape backslashed Newark punctuation chars */ +{ +char out[strlen(s)+1]; +char *o = out; +boolean escaped = FALSE; +while(TRUE) + { + char c = *s++; + if (escaped) + { + if (c == '(' || c == ')' || c == ',' || c == ':' || c == ';' || c == '\\') + { + *o++ = c; + } + else + { + errAbort("Expected Newark punctuation characters or backslash after a backslash."); + } + escaped = FALSE; + } + else + { + if (c == '\\') + { + escaped = TRUE; + } + else + { + if (c == '(' || c == ')' || c == ',' || c == ':' || c == ';' || c == '\\') + errAbort("Expected a backslash before Newark punctuation characters or backslash."); + *o++ = c; + } + } + if (c == 0) + break; + } +return cloneString(out); +} + static struct phyloName *parseIdent(char **ptrPtr) /* read a node name with possibile branch length */ { struct phyloName *pName; char *start = *ptrPtr; char *ptr = *ptrPtr; AllocVar(pName); -/* legal id's are alphanumeric */ -while(isalpha(*ptr) || isdigit(*ptr) || (*ptr == '/') - || (*ptr == '\'') - || (*ptr == '>') - || (*ptr == '<') - || (*ptr == '.') || (*ptr == '_')) + + +/* legal id's are anything except Newick punctuation chars */ +char prev = ' '; +char c; +while((c = *ptr)) + { + if (c == '(' || c == ')' || c == ',' || c == ':' || c == ';') + { + if (prev != '\\') // backslash escape char? + break; + } ptr++; + prev = c; + } /* did we read something? */ if(ptr > start) { char val; val = *ptr; *ptr = 0; pName->name = cloneString(start); + pName->name = trimSpaces(pName->name); + pName->name = unescapeNewark(pName->name); *ptr = val; } /* is there some branch length info */ if (*ptr == ':') { ptr++; sscanf(ptr, "%lg", &pName->length); while ((*ptr != '[') && (*ptr != ')') && (*ptr != ',') && (*ptr != ';')) ptr++; } *ptrPtr = ptr; return pName; @@ -98,30 +148,34 @@ { return newEdge(parent, child); } static struct phyloTree *parseSubTree(char **ptrPtr) /* the recursive workhorse function, parses a tree from ptr */ { struct phyloTree *node = NULL; char *ptr = *ptrPtr; /* trees are terminated by one of these three chars */ if ((*ptr == ';') || (*ptr == ',') || (*ptr == ')') ) return NULL; AllocVar(node); + +while(*ptr == ' ') + ptr++; + if (*ptr == '(') { struct phyloTree *edge; ptr++; do { struct phyloTree *child = parseSubTree(&ptr); if (!child) errAbort("missing child/subTree at (%s)",ptr-1); edge = newEdge(node,child); edge->parent = node; } while (*ptr++ == ','); --ptr; @@ -142,38 +196,52 @@ node->isDup = TRUE; while(*ptr != ']') ptr++; ptr++; } *ptrPtr = ptr; return node; } +static void replaceWhitespaceWithSpaces(char *s) +/* Replace all whitespace chars with a space char */ +{ +char c; +for(c=*s; c != 0; c = *(++s)) + { + if (isspace(c)) + { + *s = ' '; + } + } +} + + struct phyloTree *phyloParseString(char *string) /* build a phyloTree from a string */ { struct phyloTree *tree = NULL; -char *ptr = string; -eraseWhiteSpace(string); +replaceWhitespaceWithSpaces(string); +char *ptr = string; tree = parseSubTree(&ptr); if (*ptr != ';') errAbort("expecting tree terminator ';', found '%s'", ptr); return tree; } /* some static stuff for printing out trees */ static int recurseCount = 0; static void tabOut(FILE *f) { int i;