a44421a79fb36cc2036fe116b97ea3bc9590cd0c
braney
  Fri Dec 2 09:34:39 2011 -0800
removed rcsid (#295)
diff --git src/hg/sqlToXml/sqlToXml.c src/hg/sqlToXml/sqlToXml.c
index 90862a7..94640d2 100644
--- src/hg/sqlToXml/sqlToXml.c
+++ src/hg/sqlToXml/sqlToXml.c
@@ -1,511 +1,510 @@
 /* sqlToXml - Given a database, .as file, .joiner file, and a sql select 
  * statement, dump out results as XML. */
 /* This file is copyright 2005 Jim Kent, but license is hereby
  * granted for all use - public, private or commercial. */
 
 #include "common.h"
 #include "linefile.h"
 #include "hash.h"
 #include "dystring.h"
 #include "options.h"
 #include "obscure.h"
 #include "jksql.h"
 
-static char const rcsid[] = "$Id: sqlToXml.c,v 1.15 2008/09/03 19:21:21 markd Exp $";
 
 void usage()
 /* Explain usage and exit. */
 {
 errAbort(
   "sqlToXml - dump out all or part of a relational database to XML, guided\n"
   "by a dump specification.  See sqlToXml.doc for additional information.\n"
   "usage:\n"
   "   sqlToXml database dumpSpec.od output.xml\n"
   "options:\n"
   "   -topTag=name - Give the top level XML tag the given name.  By\n"
   "               default it will be the same as the database name.\n"
   "   -query=file.sql - Instead of dumping whole database, just dump those\n"
   "                  records matching SQL select statement in file.sql.\n"
   "                  This statement should be of the form:\n"
   "           select * from table where ...\n"
   "                   or\n"
   "           select table.* from table,otherTables where ...\n"
   "                   Where the table is the same as the table in the first\n"
   "                   line of dumpSpec.\n"
   "   -tab=N - number of spaces betweeen tabs in xml.dumpSpec - by default it's 8.\n"
   "            (It may be best just to avoid tabs in that file though.)\n"
   "   -maxList=N - This will limit any lists in the output to no more than\n"
   "                size N.  This is mostly just for testing.\n"
   );
 }
 
 int tabSize = 8;
 int maxList = 0;
 char *topTag = NULL;
 
 static struct optionSpec options[] = {
    {"topTag", OPTION_STRING},
    {"query", OPTION_STRING},
    {"tab", OPTION_INT},
    {"maxList", OPTION_INT},
    {NULL, 0},
 };
 
 struct dyString *escaper;
 
 struct specTree 
 /* Holds dump specification tree. */
    {
    struct specTree *next;	/* Sibling */
    struct specTree *children;
    char *field;		/* Field in table to link on. 
                          * At top level is master table name instead.  */
    int fieldIx;		/* Index of field in table. */
    char *target;	/* Target table.field  */
    char *targetTable;	/* Table part of target */
    char *targetField;	/* Field part of target */
    char *outName;	/* Name to use for output, defaults to table name. */
    bool needsQuote;	/* Does join need value in quotes? */
    bool hideTable;	/* If true hide table (but not children) */
    bool hideField;	/* If true hide field. */
    bool textField;	/* IF true this is text field. */
    bool allInAttributes; /* If true all info is in attributes. */
    bool gotChildTags;    /* If true has child tags. */
    };
 
 #ifdef DEBUG
 static void dumpSpecTree(FILE *f, struct specTree *tree, int level)
 {
 struct specTree *child;
 spaceOut(f, level+2);
 fprintf(f, "f %s, fIx %d, target %s, hideTable %d, hideField %d, textField %d, allInI %d\n",
 	tree->field, tree->fieldIx, tree->target, tree->hideTable, tree->hideField, 
 	tree->textField, tree->allInAttributes);
 for (child = tree->children; child != NULL; child = child->next)
     dumpSpecTree(f, child, level+1);
 } 
 #endif /* DEBUG */
 
 struct specTree *specTreeTarget(struct specTree *parent, char *field)
 /* Look through all children for matching field. */
 {
 struct specTree *tree;
 for (tree = parent->children; tree != NULL; tree = tree->next)
     if (sameString(tree->field, field))
 	break;
 return tree;
 }
 
 struct typedField
 /* A field - name and type */
     {
     struct typedField *next;
     char *name;	/* Field name. */
     char type;  /* Value is " or space. */
     };
 
 int countIndent(char *s, int size)
 /* Count the number of spaces and tabs. */
 {
 int count = 0;
 int i;
 char c;
 for (i=0; i<size; ++i)
     {
     c = s[i];
     if (c == ' ')
         count += 1;
     else if (c == '\t')
         {
 	int nextTab = (count + tabSize)/tabSize * tabSize;
 	count = nextTab;
 	}
     else
         internalErr();
     }
 return count;
 }
 
 struct typedField *findField(struct typedField *list, char *name)
 /* Return field of given name, or NULL if none. */
 {
 struct typedField *field;
 for (field = list; field != NULL; field = field->next)
     if (sameString(field->name, name))
         break;
 return field;
 }
 
 
 boolean isStringField(struct hash *tableHash, char *tableDotField)
 /* Return true if given field is a string type field, needing quotations
  * in sql. */
 {
 char *dot = strchr(tableDotField, '.');
 if (dot != NULL)
     {
     struct typedField *field, *fieldList;
     *dot = 0;
     fieldList = hashMustFindVal(tableHash, tableDotField);
     field = findField(fieldList, dot+1);
     *dot = '.';
     if (field == NULL)
         errAbort("No field %s", tableDotField);
     return field->type == '"';
     }
 return FALSE;
 }
 
 void rSpecTreeLoad(struct lineFile *lf, 
 	struct specTree *parent, struct hash *tableHash)
 /* Load all children of parent. */
 {
 char *line, *pastSpace;
 int indent, curIndent;
 struct specTree *branch = NULL;
 struct typedField *tf, *tfList = hashMustFindVal(tableHash, parent->targetTable);
 if (!lineFileNextReal(lf, &line))
     return;
 pastSpace = skipLeadingSpaces(line);
 curIndent = countIndent(line, pastSpace - line);
 lineFileReuse(lf);
 for (;;)
     {
     if (!lineFileNextReal(lf, &line))
         break;
     pastSpace = skipLeadingSpaces(line);
     indent = countIndent(line, pastSpace - line);
     if (indent > curIndent)
 	{
 	lineFileReuse(lf);
 	rSpecTreeLoad(lf, branch, tableHash);
 	}
     else if (indent < curIndent)
 	{
 	lineFileReuse(lf);
 	break;
 	}
     else
 	{
 	char *field = nextWord(&pastSpace);
 	char *target = nextWord(&pastSpace);
 	char *extra = nextWord(&pastSpace);
 	char *dot, targetTable[256], *targetField;
 	if (target == NULL)
 	   errAbort("Missing target line %d of %s", lf->lineIx, lf->fileName);
 	AllocVar(branch);
 	branch->allInAttributes = TRUE;
 	if (field[0] != '.')
 	    errAbort("Field must start with . line %d of %s", 
 	    	lf->lineIx, lf->fileName);
 	field += 1;
 	branch->field = cloneString(field);
 	branch->target = cloneString(target);
 	dot = strchr(target, '.');
 	if (dot != NULL)
 	    {
 	    /* Parse out tableName and fieldName. */
 	    targetField = dot+1;
 	    *dot = 0;
 	    safef(targetTable, sizeof(targetTable), "%s", target);
 	    *dot = '.';
 	    branch->targetTable = cloneString(targetTable);
 	    branch->targetField = cloneString(targetField);
 
 	    /* Get info on current field. */
 	    tf = findField(tfList, field);
 	    if (tf == NULL)
 		errAbort("Field %s.%s doesn't exists",  parent->targetTable, field);
 	    branch->needsQuote = (tf->type == '"');
 	    branch->fieldIx = slIxFromElement(tfList, tf);
 
 	    /* See if it is followed by 'hide' */
 	    if (extra != NULL && sameString(extra, "hide"))
 	        {
 		extra = nextWord(&pastSpace);
 		branch->hideTable = TRUE;
 		}
 
 	    parent->allInAttributes = FALSE;
 	    parent->gotChildTags = TRUE;
 	    }
 	else
 	    {
 	    if (sameString(target, "hide"))
 		{
 		branch->hideField = TRUE;
 		}
 	    else if (sameString(target, "text"))
 	        {
 		tf = findField(tfList, field);
 		branch->fieldIx = slIxFromElement(tfList, tf);
 		branch->textField = TRUE;
 		parent->allInAttributes = FALSE;
 		}
 	    else
 	        {
 		errAbort("Don't recognize %s line %d of %s", 
 			target, lf->lineIx, lf->fileName);
 		}
 	    }
 	if (extra != NULL)
 	   {
 	   if (sameString(extra, "as"))
 	       {
 	       char *outName = nextWord(&pastSpace);
 	       if (outName == NULL)
 	           errAbort("Expecting something after 'as' line %d of %s.",
 		   	lf->lineIx, lf->fileName);
 	       branch->outName = cloneString(outName);
 	       extra = nextWord(&pastSpace);
 	       }
 	   }
 	if (extra != NULL)
 	    errAbort("There's extra stuff I don't understand line %d of %s.",
 		    lf->lineIx, lf->fileName);
 	if (branch->outName == NULL)
 	    branch->outName = cloneString(branch->targetTable);
 	slAddTail(&parent->children, branch);
 	}
     }
 }
 
 struct specTree *specTreeLoad(char *fileName, struct hash *tableHash)
 /* Load up a spec-tree from a file. */
 {
 struct specTree *root = NULL;
 struct lineFile *lf = lineFileOpen(fileName, TRUE);
 char *line;
 char *table, *as, *outName;
 
 AllocVar(root);
 if (!lineFileNextReal(lf, &line))
     lineFileUnexpectedEnd(lf);
 line = trimSpaces(line);
 table = nextWord(&line);
 as = nextWord(&line);
 outName = nextWord(&line);
 root->targetTable = cloneString(table);
 if (as != NULL)
    {
    if (outName == NULL)
        errAbort("Expecting something after 'as' line %d of %s", 
        		lf->lineIx, lf->fileName);
    root->outName = cloneString(outName);
    }
 else
    root->outName = cloneString(root->targetTable);
 rSpecTreeLoad(lf, root, tableHash);
 
 lineFileClose(&lf);
 return root;
 }
 
 struct hash *tablesAndFields(struct sqlConnection *conn)
 /* Get hash of all tables.  Hash is keyed by table name.
  * Hash values are lists of typedField.   */
 {
 struct hash *hash = hashNew(0);
 struct slName *table, *tableList = sqlListTables(conn);
 for (table = tableList; table != NULL; table = table->next)
     {
     char query[256], **row;
     struct sqlResult *sr;
     struct typedField *fieldList = NULL, *field;
     safef(query, sizeof(query), "describe %s", table->name);
     sr = sqlGetResult(conn, query);
     while ((row = sqlNextRow(sr)) != NULL)
         {
 	char *name = row[0];
 	char *type = row[1];
 	char code = '"';
 	if ( startsWith("int", type)  
 	  || startsWith("float", type)
 	  || startsWith("tinyint", type)
 	  || startsWith("smallint", type))
 	    code = ' ';
 	AllocVar(field);
 	field->name = cloneString(name);
 	field->type = code;
 	slAddHead(&fieldList, field);
 	}
     slReverse(&fieldList);
     hashAdd(hash, table->name, fieldList);
     }
 return hash;
 }
 
 void escToXmlString(char *in, char *out)
 /* Put in the escapes xml needs inside of a string */
 {
 char c;
 while ((c = *in++) != 0)
     {
     if (c == '&')
 	{
         strcpy(out, "&amp;");
 	out += 5;
 	}
     else if (c == '"')
         {
 	strcpy(out, "&quot;");
 	out += 6;
 	}
     else
         *out++ = c;
     }
 *out = 0;
 }
 
 void rSqlToXml(struct sqlConnCache *cc, char *database, char *table, char *entryField,
 	char *query, struct hash *tableHash, struct specTree *tree,
 	FILE *f, int depth)
 /* Recursively output XML */
 {
 struct sqlConnection *conn = sqlConnCacheAlloc(cc, database);
 struct sqlResult *sr;
 char **row;
 struct typedField *col, *colList = hashMustFindVal(tableHash, table);
 
 verbose(2, "%s\n", query);
 sr = sqlGetResult(conn, query);
 while ((row = sqlNextRow(sr)) != NULL)
     {
     int rowIx = 0;
 
     /* Print non-joining columns as attributes. */
     if (!tree->hideTable)
 	{
 	spaceOut(f, depth*2);
 	fprintf(f, "<%s", tree->outName);
 	}
     for (col = colList; col != NULL; col = col->next, rowIx+=1)
         {
 	char *val = row[rowIx];
 	struct specTree *branch = specTreeTarget(tree, col->name);
 	if (branch == NULL && !sameString(col->name, entryField))
 	    {
 	    int len = strlen(val);
 	    dyStringBumpBufSize(escaper, len*5);
 	    escToXmlString(val, escaper->string);
 	    if (!tree->hideTable)
 		fprintf(f, " %s=\"%s\"", col->name, escaper->string);
 	    }
 	}
 
     /* If no subobjects, then close up tag, else proceed to subobjects. */
     if (tree->allInAttributes)
 	{
 	if (!tree->hideTable)
 	    fprintf(f, "/>\n");
 	}
     else
         {
 	struct specTree *branch;
 	if (!tree->hideTable)
 	    fprintf(f, ">");
 	rowIx = 0;
 	for (branch = tree->children; branch != NULL; branch = branch->next)
 	    {
 	    if (sameString(branch->target, "text"))
 	        {
 		char *val = row[branch->fieldIx];
 		int len = strlen(val);
 		dyStringBumpBufSize(escaper, len*5);
 		escToXmlString(val, escaper->string);
 		fprintf(f, "%s", escaper->string);
 		}
 	    }
 	if (tree->gotChildTags)
 	    if (!tree->hideTable)
 		fprintf(f, "\n");
 	for (branch = tree->children; branch != NULL; branch = branch->next)
 	    {
 	    char *target = branch->target;
 	    if (sameString(target, "text"))
 	        ;
 	    else if (!sameString(target, "hide"))
 		{
 		struct dyString *sql = dyStringNew(0);
 		int newDepth = depth;
 		if (!tree->hideTable)
 		    newDepth += 1;
 		dyStringPrintf(sql, "select * from %s where %s = ",
 			branch->targetTable, target);
 		if (branch->needsQuote)
 		    dyStringPrintf(sql, "\"%s\"", row[branch->fieldIx]);
 		else
 		    dyStringPrintf(sql, "%s", row[branch->fieldIx]);
 		if (maxList != 0)
 		    dyStringPrintf(sql, " limit %d", maxList);
 		rSqlToXml(cc, database, branch->targetTable, branch->targetField, 
 			sql->string, tableHash, branch, f, newDepth);
 		dyStringFree(&sql);
 		}
 	    }
 	if (!tree->hideTable)
 	    {
 	    if (tree->gotChildTags)
 		spaceOut(f, depth*2);
 	    fprintf(f, "</%s>\n", tree->outName);
 	    }
 	}
     }
 sqlFreeResult(&sr);
 sqlConnCacheDealloc(cc, &conn);
 }
 
 
 void sqlToXml(char *database, char *dumpSpec, char *outputXml)
 /* sqlToXml - Given a database, .as file, .joiner file, and a sql select 
  * statement, dump out results as XML. */
 {
 struct sqlConnCache *cc = sqlConnCacheNew();
 struct sqlConnection *conn = sqlConnCacheAlloc(cc, database);
 struct hash *tableHash = tablesAndFields(conn);
 struct specTree *tree = specTreeLoad(dumpSpec, tableHash);
 FILE *f = mustOpen(outputXml, "w");
 char *topTag = optionVal("topTag", database);
 char *table = tree->targetTable;
 struct dyString *sql = dyStringNew(0);
 
 if (optionExists("query"))
     {
     char *queryFile = optionVal("query", NULL);
     char *query;
     readInGulp(queryFile, &query, NULL);
     if (!stringIn(table, query))
 	errAbort("No mention of table %s in %s.", table, queryFile);
     dyStringAppend(sql, query);
     }
 else
     dyStringPrintf(sql, "select * from %s", table);
 
 if (maxList > 0)
     dyStringPrintf(sql, " limit %d", maxList);
 
 if (!sqlTableExists(conn, table))
     errAbort("No table %s in %s", table, database);
 sqlConnCacheDealloc(cc, &conn);
 
 verbose(1, "%d tables in %s\n",
 	tableHash->elCount,  database);
 
 escaper = dyStringNew(0);
 fprintf(f, "<%s>\n", topTag);
 rSqlToXml(cc, database, table, "", sql->string, tableHash, tree, f, 1);
 fprintf(f, "</%s>\n", topTag);
 carefulClose(&f);
 }
 
 int main(int argc, char *argv[])
 /* Process command line. */
 {
 optionInit(&argc, argv, options);
 if (argc != 4)
     usage();
 tabSize = optionInt("tab", tabSize);
 maxList = optionInt("maxList", maxList);
 sqlToXml(argv[1], argv[2], argv[3]);
 return 0;
 }