4a3f3df9cab079354e8a9f4cf6c56f616f1ea0ab kent Fri Apr 6 16:29:22 2012 -0700 Removing MySQL linking dependency. diff --git src/utils/autoSql/autoSql.doc src/utils/autoSql/autoSql.doc new file mode 100644 index 0000000..b5638d5 --- /dev/null +++ src/utils/autoSql/autoSql.doc @@ -0,0 +1,312 @@ +OVERVIEW + +AutoSQL is a program which automatically generates SQL and C +code for saving and loading objects to a database based on +an object specification. The specification langauge is +a bit quirky, but it has proven effective for many jobs. + +A SIMPLE EXAMPLE + +Imagine a very simple address book that just stored name, +street address, zip-code and state. A specification for +this would be: + +table addressBook +"A simple address book" + ( + string name; "Name - first or last or both, we don't care" + lstring address; "Street address" + string city; "City" + uint zipCode; "A zip code is always positive, so can be unsigned" + char[2] state; "Just store the abbreviation for the state" + ) + +This looks like a bit of a hybrid between a C structure and a +SQL table definition. This is because I was switching between C and +SQL when I made the AutoSQL language. My mind is incapable +of holding three things in it at once. When it tries to hold +two things, it mixes them up! + +This produces the SQL table definition: + + #A simple address book + CREATE TABLE addressBook ( + name varchar(255) not null, # Name - first or last or both, we don't care + address longblob not null, # Street address + city varchar(255) not null, # City + zipCode int unsigned not null, # A zip code is always positive, so can be unsigned + state char(2) not null, # Just store the abbreviation for the state + #Indices + PRIMARY KEY(name) + ); + +and the following C structure definition: + +struct addressBook +/* A simple address book */ + { + struct addressBook *next; /* Next in singly linked list. */ + char *name; /* Name - first or last or both, we don't care */ + char *address; /* Street address */ + char *city; /* City */ + unsigned zipCode; /* A zip code is always positive, so can be unsigned */ + char state[3]; /* Just store the abbreviation for the state */ + }; + +Typically in C you access a single row of a SQL database at +a time. The row is returned as an array of strings. It is +up to the C program to convert the ascii representation of +numbers to binary numbers. This is not hard work, but after +you've typed in twenty or thirty lines that look something +like: + point->x = atoi(row[1]); + point->y = atoi(row[2]); +you'll appreciate the following two routines that AutoSQL +generates for you: + + void addressBookStaticLoad(char **row, struct addressBook *ret); + /* Load a row from addressBook table into ret. The contents of ret will + * be replaced at the next call to this function. */ + + struct addressBook *addressBookLoad(char **row); + /* Load a addressBook from row fetched with select * from addressBook + * from database. Dispose of this with addressBookFree(). */ + +The first routine is typically used when you just want to process one +item at a time. It doesn't allocate any dynamic memory, and so is quite +fast. The second routine saves the structure to dynamic memory. Since +the C structure always includes a "next" field, you easily use this +routine to build a list of address book entries. + +The only problem with using dynamic memory, is that you have to remember +to free it. While AutoSQL can't remember to free things for you, it +can generate routines to free a single dynamically allocated structure, +or a list of dynamically allocates structures. That's what the next +two routines do: + + void addressBookFree(struct addressBook **pEl); + /* Free a single dynamically allocated addressBook such as created + * with addressBookLoad(). */ + + void addressBookFreeList(struct addressBook **pList); + /* Free a list of dynamically allocated addressBook's */ + +Reading structures without having to write code to load them up a +field at a time is nice, but sometimes you need to write structures +too. AutoSQL assumes that you'll either want to save the structure +in a tab-delimited or in a comma-delimited format. It generates a +routine that can do either: + + void addressBookOutput(struct addressBook *el, FILE *f, char sep, char lastSep); + /* Print out addressBook. Separate fields with sep. Follow last field with lastSep. */ + +and macros that make it convenient to do commas or tabs: + + #define addressBookTabOut(el,f) addressBookOutput(el,f,'\t','\n'); + /* Print out addressBook as a line in a tab-separated file. */ + + #define addressBookCommaOut(el,f) addressBookOutput(el,f,',',','); + /* Print out addressBook as a comma separated list including final comma. */ + +The last routine AutoSQL generates reads comma separated lists. While you +are unlikely to call this routine directly yourself, fields more complicated +than simple strings or integers get saved in the database as comma separated +lists. This routine allows AutoSQL to have objects that contain other objects. + +TYPES OF OBJECTS + +AutoSQL has three types of objects: + object - objects that appear in lists. A next + pointer is automatically inserted as the first field in + the C structure corresponding to an object. What look + like arrays in the .as file will be linked lists in memory. + simple - objects that don't appear in lists. Arrays in the .as file + will also be arrays in memory. At one point simple objects + could not include strings or variable-sized arrays, but this + limitation has been lifted. + table - like objects, but the program generates a SQL as well + as a C definition. + +Simple objects differ from other objects in how the program treats +array declarations. In the field declaration: + simple point[3] triangle; "A three sided figure" +the three points are stored in memory as a C array. If this were +declared instead as + object point[3] triangle; "A three sided figure" +the three points would be stored in memory as a singly linked list. + +TYPES OF FIELDS + +The following basic field are supported. + int - 32 bit signed integer + uint - 32 bit unsigned integer + short - 16 bit signed integer + ushort - 16 bit unsigned integer + byte - 8 bit signed integer + ubyte - 8 bit unsigned integer + bigint - 64 bit integer + float - single precision IEEE floating point + char - 8 bit character (can only be used in an array) + string - variable length string up to 255 bytes long. + lstring - variable length string up to 2 billion bytes long. + enum - enumerated type that can have a single symbolic value. + set - set type column that can have multiple symbolic value. + +In addition the simple, object, and table types can be used as +fields. + +FIXED LENGTH AND VARIABLE ARRAY DECLARATIONS + +An array can be declared as either fixed size or variable size. +A variable sized array is declared by putting a field name +inside of the brackets in the array declaration. This field +must be defined before the array. + +A MORE COMPLICATED EXAMPLE + +Imagine that you've just built an amazing 3D modeling program. +The only problem is that now you need to save the structures in +a database. Here is a way you might build the database with +AutoSQL: + +simple point +"A three dimensional point" + ( + float x; "Horizontal coordinate" + float y; "Vertical coordinate" + float z; "In/out of screen coordinate" + ) + +simple color +"A red/green/blue format color" + ( + ubyte red; "Red value 0-255" + ubyte green; "Green value 0-255" + ubyte blue; "Blue value 0-255" + ) + +object face +"A face of a three dimensional solid" + ( + simple color color; "Color of this face" + int pointCount; "Number of points in this polygon" + uint[pointCount] points; "Indices of points that make up face in polyhedron point array" + ) + +table polyhedron +"A solid three dimensional object" + ( + int faceCount; "Number of faces" + object face[faceCount] faces; "List of faces" + int pointCount; "Number of points" + simple point[pointCount] points; "Array of points" + ) + +Saving this as "threeD.as" and running: + autoSql threeD.as threeD +would end up generating 393 lines of bug free (I think!) C code and +14 lines of SQL for the investment of 33 lines of specification. +Considering that autoSQL itself is about 1200 lines long, I'll only +have to use it on 4 projects to break even! For you on the other +hand it's a very good deal indeed.... + +ENUM AND SET FIELDS: + +The enum and set fields generate SQL enum and set fields, which provide +efficient symbolic values. The symbolic values must be a valid C +identifier. When loaded from a string, the set value is a comma separated +string of the values in the set. C enums are generated for the values defined +in the fields. For example: + +table symbolCols +"example of enum and set symbolic columns" + ( + int id; "unique id" + enum(male, female) sex; "enumerated column" + set(cProg,javaProg,pythonProg,awkProg) skills; "set column" + ) + +Generates the following types and fields: +enum symTestSex + { + symTestMale = 0, + symTestFemale = 1, + }; +enum symTestSkills + { + symTestCProg = 0x0001, + symTestJavaProg = 0x0002, + symTestPythonProg = 0x0004, + symTestAwkProg = 0x0008, + }; +struct symTest +/* test of enum and set symbolic columns */ + { + struct symTest *next; /* Next in singly linked list. */ + int id; /* unique id */ + enum symTestSex sex; /* enumerated column */ + unsigned skills; /* set column */ + }; + +GRAMMER + +The grammer for AutoSQL is: + +declarationList: + declaration + declarationList declaration + +declaration: + declareType declareName comment '(' fieldList ')' + +declareType: + 'simple' + 'object' + 'table' + +declareName: + name + +comment: + quotedString + +fieldList: + field + fieldList field + +field: + fieldType fieldName ';' comment + fieldType '[' fieldSize ']' name ';' comment + fieldType '(' fieldValues ')' name ';' comment + +fieldName: + name + +fieldValues: + name, ..., name + +fieldType: + 'int' + 'uint' + 'short' + 'ushort' + 'byte' + 'ubyte' + 'float' + 'char' + 'string' + 'lstring' + 'enum' + 'set' + declareType declareName + +fieldSize: + number + fieldName + +name: A series of letters, digits, or '_', starting with a letter. + +number: A series of digits + +quotedString: Any text that is surrounded by double quotes +