4a3f3df9cab079354e8a9f4cf6c56f616f1ea0ab kent Fri Apr 6 16:29:22 2012 -0700 Removing MySQL linking dependency. diff --git src/hg/autoDtd/autoDtd.c src/hg/autoDtd/autoDtd.c deleted file mode 100644 index 42e1084..0000000 --- src/hg/autoDtd/autoDtd.c +++ /dev/null @@ -1,463 +0,0 @@ -/* autoDtd - Give this a XML document to look at and it will come up with a - * DTD to describe it, and possibly some more readable and informative outputs - * as well. */ -/* This file is copyright 2005 Jim Kent, but license is hereby - * granted for all use - public, private or commercial. */ - -#include "common.h" -#include "linefile.h" -#include "hash.h" -#include "options.h" -#include "xap.h" - - -void usage() -/* Explain usage and exit. */ -{ -errAbort( - "autoDtd - Give this a XML document to look at and it will come up with a DTD\n" - "to describe it.\n" - "usage:\n" - " autoDtd in.xml out.dtd out.stats\n" - "options:\n" - " -tree=out.tree - Output tag tree.\n" - " -atree=out.atree - Output attributed tag tree.\n" - ); -} - -static struct optionSpec options[] = { - {"tree", OPTION_STRING}, - {"atree", OPTION_STRING}, - {NULL, 0}, -}; - -struct type -/* Information on a type. */ - { - struct type *next; - char *name; /* Name of type/field. */ - int count; /* Number of occurences of this tag. */ - struct hash *attHash; /* Hash of all elements keyed by name */ - struct attribute *attributes; - struct hash *elHash; /* Hash of all elements keyed by type->name */ - struct element *elements; - struct attribute *textAttribute; /* Information on text. */ - }; - -struct attribute -/* Information on an attribute */ - { - struct attribute *next; - char *name; - int count; /* Number of times we've seen this attribute. */ - boolean isOptional; /* True if it's not always there. */ - boolean nonInt; /* True if not an int. */ - boolean nonFloat; /* True if not a number. */ - boolean seenThisRound; /* True if seen this round. */ - struct hash *values; /* Hash of unique values. */ - int maxLen; /* Maximum length */ - }; - -struct element -/* Information on an element */ - { - struct element *next; - struct type *type; /* Element type */ - boolean isOptional; /* True if it's optional. */ - boolean isList; /* True if it's a list. */ - boolean seenThisRound; /* True if seen this round. */ - }; - -struct hash *typeHash; /* Keyed by struct type */ -struct type *topType; /* Highest level type */ - -boolean hasLeftPaddedZero(char *s) -/* does the string have a leading zero */ -{ -if (strlen(s) < 2) - return FALSE; -return s[0]=='0'; -} - -boolean isAllUInt(char *s) -/* Return true if it looks like an unsigned integer */ -{ -char c; -while ((c = *s++) != 0) - if (!isdigit(c)) - return FALSE; -return TRUE; -} - -boolean isAllInt(char *s) -/* Return true if it looks like an integer */ -{ -if (*s == '-') - ++s; -return isAllUInt(s) && !hasLeftPaddedZero(s); -} - -boolean isAllFloat(char *s) -/* Return true if it looks like an floating point */ -{ -char *point = strchr(s,'.'); -if (!point) - return isAllInt(s); -if (!isAllUInt(point+1)) - return FALSE; -char *temp=cloneStringZ(s,point-s); -boolean result = isAllInt(temp); -freeMem(temp); -return result; -} - - -void *startHandler(struct xap *xap, char *name, char **atts) -/* Called at the start of a tag after attributes are parsed. */ -{ -int i; -struct type *type = hashFindVal(typeHash, name); -struct attribute *att; -struct element *el; - -if (type == NULL) - { - AllocVar(type); - hashAddSaveName(typeHash, name, type, &type->name); - type->elHash = hashNew(6); - type->attHash = hashNew(6); - } - -/* Zero out seenThisRound flags */ -for (el = type->elements; el != NULL; el = el->next) - el->seenThisRound = FALSE; -for (att = type->attributes; att != NULL; att = att->next) - att->seenThisRound = FALSE; - -for (i=0; atts[i] != NULL; i += 2) - { - char *name = atts[i], *val = atts[i+1]; - int valLen = strlen(val); - att = hashFindVal(type->attHash, name); - if (att == NULL) - { - AllocVar(att); - hashAddSaveName(type->attHash, name, att, &att->name); - att->values = hashNew(16); - slAddTail(&type->attributes, att); - if (type->count != 0) - att->isOptional = TRUE; - } - att->count += 1; - hashStore(att->values, val); - if (valLen > att->maxLen) - att->maxLen = valLen; - if (!att->nonInt) - if (!isAllInt(val) || hasLeftPaddedZero(val)) - att->nonInt = TRUE; - if (!att->nonFloat) - if (!isAllFloat(val)) - att->nonFloat = TRUE; - att->seenThisRound = TRUE; - } -for (att = type->attributes; att != NULL; att = att->next) - { - if (!att->seenThisRound) - att->isOptional = TRUE; - } - -if (xap->stackDepth > 1) - { - struct xapStack *st = xap->stack+1; - struct type *parent = st->object; - el = hashFindVal(parent->elHash, name); - if (el == NULL) - { - AllocVar(el); - hashAdd(parent->elHash, name, el); - el->type = type; - slAddTail(&parent->elements, el); - if (parent->count != 0) - el->isOptional = TRUE; - } - if (el->seenThisRound) - el->isList = TRUE; - el->seenThisRound = TRUE; - } -return type; -} - -void endHandler(struct xap *xap, char *name) -/* Called at end of a tag */ -{ -struct type *type = xap->stack->object; -char *text = skipLeadingSpaces(xap->stack->text->string); -struct element *el; -for (el = type->elements; el != NULL; el = el->next) - { - if (!el->seenThisRound) - el->isOptional = TRUE; - } -if (text[0] == 0) - { - if (type->textAttribute != NULL) - type->textAttribute->isOptional = TRUE; - } -else - { - int textLen = strlen(text); - struct attribute *att = type->textAttribute; - if (att == NULL) - { - type->textAttribute = AllocVar(att); - att->name = "<text>"; - att->values = hashNew(16); - if (type->count != 0) - att->isOptional = TRUE; - } - if (att->maxLen < textLen) - att->maxLen = textLen; - hashStore(att->values, text); - att->count += 1; - if (!att->nonInt) - if (!isAllInt(text) || hasLeftPaddedZero(text)) - att->nonInt = TRUE; - if (!att->nonFloat) - if (!isAllFloat(text)) - att->nonFloat = TRUE; - } -type->count += 1; -topType = type; -} - -char *attDataType(struct attribute *att) -/* Return data type associated with attribute as a string */ -{ -if (!att->nonInt) - return "int"; -else if (!att->nonFloat) - return "float"; -else - return "string"; -} - -void rWriteDtd(FILE *dtdFile, FILE *statsFile, struct type *type, - struct hash *uniqHash) -/* Recursively write out DTD. */ -{ -struct element *el; -struct attribute *att; -int elCount = slCount(type->elements); -boolean multiline; - -if (type->textAttribute != NULL) - elCount += 1; -multiline = (elCount > 3); -hashAdd(uniqHash, type->name, type); -fprintf(dtdFile, "<!ELEMENT %s (", type->name); -if (multiline) - fprintf(dtdFile, "\n"); -for (el = type->elements; el != NULL; el = el->next) - { - if (multiline) - fprintf(dtdFile, "\t"); - fprintf(dtdFile, "%s", el->type->name); - if (el->isList) - { - if (el->isOptional) - fprintf(dtdFile, "*"); - else - fprintf(dtdFile, "+"); - } - else - { - if (el->isOptional) - fprintf(dtdFile, "?"); - } - if (el->next != NULL || type->textAttribute != NULL) - fprintf(dtdFile, ", "); - if (multiline) - fprintf(dtdFile, "\n"); - } -if (type->textAttribute != NULL) - { - if (multiline) - fprintf(dtdFile, "\t"); - if (!type->textAttribute->nonInt) - fprintf(dtdFile, "%%INTEGER;"); - else if (!type->textAttribute->nonFloat) - fprintf(dtdFile, "%%REAL;"); - else - fprintf(dtdFile, "#PCDATA"); - if (multiline) - fprintf(dtdFile, "\n"); - } -fprintf(dtdFile, ")>\n"); -fprintf(statsFile, "%s %d\n", type->name, type->count); -if ((att = type->textAttribute) != NULL) - { - fprintf(statsFile, "\t%s\t%d\t%s\t%d\t%d\n", att->name, att->maxLen, - attDataType(att), att->count, att->values->elCount); - } -else - { - fprintf(statsFile, "\t<text>\t0\tnone\t0\t0\n"); - } - -for (att = type->attributes; att != NULL; att = att->next) - { - fprintf(dtdFile, "<!ATTLIST %s %s ", type->name, att->name); - if (!att->nonInt) - fprintf(dtdFile, "%%int;"); - else if (!att->nonFloat) - fprintf(dtdFile, "%%float;"); - else - fprintf(dtdFile, "CDATA"); - if (att->isOptional) - fprintf(dtdFile, " #IMPLIED"); - else - fprintf(dtdFile, " #REQUIRED"); - fprintf(dtdFile, ">\n"); - fprintf(statsFile, "\t%s\t%d\t%s\t%d\t%d\n", att->name, att->maxLen, - attDataType(att), att->count, att->values->elCount); - } -fprintf(dtdFile, "\n"); -fprintf(statsFile, "\n"); - -/* Now recurse if we haven't written children yet. */ -for (el = type->elements; el != NULL; el = el->next) - { - if (!hashLookup(uniqHash, el->type->name)) - { - rWriteDtd(dtdFile, statsFile, el->type, uniqHash); - } - } -} - -void writeDtd(char *dtdFileName, char *statsFileName, char *xmlFileName, - struct type *type) -/* Write out DTD. */ -{ -struct hash *uniqHash = newHash(0); /* Prevent writing dup defs for shared types. */ -FILE *dtdFile = mustOpen(dtdFileName, "w"); -FILE *statsFile = mustOpen(statsFileName, "w"); -fprintf(dtdFile, "<!-- This file was created by autoDtd based on %s -->\n\n", xmlFileName); -fprintf(dtdFile, "<!-- First some entities to mark numeric types in between tags. Same as NCBI. -->\n"); -fprintf(dtdFile, "<!ENTITY %% INTEGER \"#PCDATA\">\n"); -fprintf(dtdFile, "<!ENTITY %% REAL \"#PCDATA\">\n\n"); -fprintf(dtdFile, "<!-- Now some entities for numeric attributes. NCBI doesn't define these but we do. -->\n"); -fprintf(dtdFile, "<!ENTITY %% int \"CDATA\">\n"); -fprintf(dtdFile, "<!ENTITY %% float \"CDATA\">\n\n"); -fprintf(dtdFile, "<!-- Now the data structure in %s. -->\n", xmlFileName); -fprintf(statsFile, "#Statistics on %s\n", xmlFileName); -fprintf(statsFile, "#Format is:\n"); -fprintf(statsFile, "#<tag name> <tag count>\n"); -fprintf(statsFile, "# <<text>> <max length> <type> <count> <unique count>\n"); -fprintf(statsFile, "# <attribute name> <max length> <type> <count> <unique count>\n"); -fprintf(statsFile, "\n"); -rWriteDtd(dtdFile, statsFile, type, uniqHash); -carefulClose(&dtdFile); -carefulClose(&statsFile); -} - -void writeAttValType(FILE *f, struct attribute *att) -/* Write out #, % or ? depending if type is int, float, or string */ -{ -if (!att->nonInt) - fprintf(f, "#"); -else if (!att->nonFloat) - fprintf(f, "%%"); -else - fprintf(f, "$"); -} - - -void writeAttribute(FILE *f, struct attribute *att) -/* Write out information about attribute. */ -{ -fprintf(f, " "); -writeAttValType(f, att); -fprintf(f, "%s", att->name); -if (att->isOptional) - fprintf(f, "?"); -} - -void rWriteTree(FILE *f, struct type *type, boolean isOptional, - boolean isList, struct hash *uniqHash, - boolean withAttributes, int level) -/* Write out type and it's children. */ -{ -struct attribute *att; -struct element *el; -static struct type *parentStack[256]; -int i; - - -spaceOut(f, level*2); -if (withAttributes && type->textAttribute != NULL) - writeAttValType(f, type->textAttribute); -fprintf(f, "%s", type->name); -if (isList) - if (isOptional) - fprintf(f, "*"); - else - fprintf(f, "+"); -else - if (isOptional) - fprintf(f, "?"); -if (withAttributes) - { - for (att = type->attributes; att != NULL; att = att->next) - writeAttribute(f, att); - } -fprintf(f, "\n"); - -if (level >= ArraySize(parentStack)) - errAbort("Recursion too deep in rWriteTree"); -parentStack[level] = type; -for (i=level-1; i>= 0; i -= 1) - if (type == parentStack[i]) - return; /* Avoid cycling on self. */ - -for (el = type->elements; el != NULL; el = el->next) - rWriteTree(f, el->type, el->isOptional, el->isList, - uniqHash, withAttributes, level+1); -} - -void writeTree(char *fileName, struct type *root, boolean withAttributes) -/* Write out type tree to file. */ -{ -struct hash *uniqHash = newHash(0); /* Prevent writing dup defs. */ -FILE *f = mustOpen(fileName, "w"); -rWriteTree(f, root, FALSE, FALSE, uniqHash, withAttributes, 0); -carefulClose(&f); -} - - -void autoDtd(char *inXml, char *outDtd, char *outStats, char *treeFileName, - char *atreeFileName) -/* autoDtd - Give this a XML document to look at and it will come up with a - * DTD to describe it.. */ -{ -struct xap *xap = xapNew(startHandler, endHandler, inXml); -typeHash = newHash(0); -xapParseFile(xap, inXml); -writeDtd(outDtd, outStats, inXml, topType); -if (treeFileName != NULL) - writeTree(treeFileName, topType, FALSE); -if (atreeFileName != NULL) - writeTree(atreeFileName, topType, TRUE); -} - -int main(int argc, char *argv[]) -/* Process command line. */ -{ -char *treeFileName = NULL, *atreeFileName = NULL; -optionInit(&argc, argv, options); -if (argc != 4) - usage(); -treeFileName = optionVal("tree", treeFileName); -atreeFileName = optionVal("atree", atreeFileName); -autoDtd(argv[1], argv[2], argv[3], treeFileName, atreeFileName); -return 0; -}