4a3f3df9cab079354e8a9f4cf6c56f616f1ea0ab kent Fri Apr 6 16:29:22 2012 -0700 Removing MySQL linking dependency. diff --git src/hg/autoSql/autoSql.doc src/hg/autoSql/autoSql.doc deleted file mode 100644 index b5638d5..0000000 --- src/hg/autoSql/autoSql.doc +++ /dev/null @@ -1,312 +0,0 @@ -OVERVIEW - -AutoSQL is a program which automatically generates SQL and C -code for saving and loading objects to a database based on -an object specification. The specification langauge is -a bit quirky, but it has proven effective for many jobs. - -A SIMPLE EXAMPLE - -Imagine a very simple address book that just stored name, -street address, zip-code and state. A specification for -this would be: - -table addressBook -"A simple address book" - ( - string name; "Name - first or last or both, we don't care" - lstring address; "Street address" - string city; "City" - uint zipCode; "A zip code is always positive, so can be unsigned" - char[2] state; "Just store the abbreviation for the state" - ) - -This looks like a bit of a hybrid between a C structure and a -SQL table definition. This is because I was switching between C and -SQL when I made the AutoSQL language. My mind is incapable -of holding three things in it at once. When it tries to hold -two things, it mixes them up! - -This produces the SQL table definition: - - #A simple address book - CREATE TABLE addressBook ( - name varchar(255) not null, # Name - first or last or both, we don't care - address longblob not null, # Street address - city varchar(255) not null, # City - zipCode int unsigned not null, # A zip code is always positive, so can be unsigned - state char(2) not null, # Just store the abbreviation for the state - #Indices - PRIMARY KEY(name) - ); - -and the following C structure definition: - -struct addressBook -/* A simple address book */ - { - struct addressBook *next; /* Next in singly linked list. */ - char *name; /* Name - first or last or both, we don't care */ - char *address; /* Street address */ - char *city; /* City */ - unsigned zipCode; /* A zip code is always positive, so can be unsigned */ - char state[3]; /* Just store the abbreviation for the state */ - }; - -Typically in C you access a single row of a SQL database at -a time. The row is returned as an array of strings. It is -up to the C program to convert the ascii representation of -numbers to binary numbers. This is not hard work, but after -you've typed in twenty or thirty lines that look something -like: - point->x = atoi(row[1]); - point->y = atoi(row[2]); -you'll appreciate the following two routines that AutoSQL -generates for you: - - void addressBookStaticLoad(char **row, struct addressBook *ret); - /* Load a row from addressBook table into ret. The contents of ret will - * be replaced at the next call to this function. */ - - struct addressBook *addressBookLoad(char **row); - /* Load a addressBook from row fetched with select * from addressBook - * from database. Dispose of this with addressBookFree(). */ - -The first routine is typically used when you just want to process one -item at a time. It doesn't allocate any dynamic memory, and so is quite -fast. The second routine saves the structure to dynamic memory. Since -the C structure always includes a "next" field, you easily use this -routine to build a list of address book entries. - -The only problem with using dynamic memory, is that you have to remember -to free it. While AutoSQL can't remember to free things for you, it -can generate routines to free a single dynamically allocated structure, -or a list of dynamically allocates structures. That's what the next -two routines do: - - void addressBookFree(struct addressBook **pEl); - /* Free a single dynamically allocated addressBook such as created - * with addressBookLoad(). */ - - void addressBookFreeList(struct addressBook **pList); - /* Free a list of dynamically allocated addressBook's */ - -Reading structures without having to write code to load them up a -field at a time is nice, but sometimes you need to write structures -too. AutoSQL assumes that you'll either want to save the structure -in a tab-delimited or in a comma-delimited format. It generates a -routine that can do either: - - void addressBookOutput(struct addressBook *el, FILE *f, char sep, char lastSep); - /* Print out addressBook. Separate fields with sep. Follow last field with lastSep. */ - -and macros that make it convenient to do commas or tabs: - - #define addressBookTabOut(el,f) addressBookOutput(el,f,'\t','\n'); - /* Print out addressBook as a line in a tab-separated file. */ - - #define addressBookCommaOut(el,f) addressBookOutput(el,f,',',','); - /* Print out addressBook as a comma separated list including final comma. */ - -The last routine AutoSQL generates reads comma separated lists. While you -are unlikely to call this routine directly yourself, fields more complicated -than simple strings or integers get saved in the database as comma separated -lists. This routine allows AutoSQL to have objects that contain other objects. - -TYPES OF OBJECTS - -AutoSQL has three types of objects: - object - objects that appear in lists. A next - pointer is automatically inserted as the first field in - the C structure corresponding to an object. What look - like arrays in the .as file will be linked lists in memory. - simple - objects that don't appear in lists. Arrays in the .as file - will also be arrays in memory. At one point simple objects - could not include strings or variable-sized arrays, but this - limitation has been lifted. - table - like objects, but the program generates a SQL as well - as a C definition. - -Simple objects differ from other objects in how the program treats -array declarations. In the field declaration: - simple point[3] triangle; "A three sided figure" -the three points are stored in memory as a C array. If this were -declared instead as - object point[3] triangle; "A three sided figure" -the three points would be stored in memory as a singly linked list. - -TYPES OF FIELDS - -The following basic field are supported. - int - 32 bit signed integer - uint - 32 bit unsigned integer - short - 16 bit signed integer - ushort - 16 bit unsigned integer - byte - 8 bit signed integer - ubyte - 8 bit unsigned integer - bigint - 64 bit integer - float - single precision IEEE floating point - char - 8 bit character (can only be used in an array) - string - variable length string up to 255 bytes long. - lstring - variable length string up to 2 billion bytes long. - enum - enumerated type that can have a single symbolic value. - set - set type column that can have multiple symbolic value. - -In addition the simple, object, and table types can be used as -fields. - -FIXED LENGTH AND VARIABLE ARRAY DECLARATIONS - -An array can be declared as either fixed size or variable size. -A variable sized array is declared by putting a field name -inside of the brackets in the array declaration. This field -must be defined before the array. - -A MORE COMPLICATED EXAMPLE - -Imagine that you've just built an amazing 3D modeling program. -The only problem is that now you need to save the structures in -a database. Here is a way you might build the database with -AutoSQL: - -simple point -"A three dimensional point" - ( - float x; "Horizontal coordinate" - float y; "Vertical coordinate" - float z; "In/out of screen coordinate" - ) - -simple color -"A red/green/blue format color" - ( - ubyte red; "Red value 0-255" - ubyte green; "Green value 0-255" - ubyte blue; "Blue value 0-255" - ) - -object face -"A face of a three dimensional solid" - ( - simple color color; "Color of this face" - int pointCount; "Number of points in this polygon" - uint[pointCount] points; "Indices of points that make up face in polyhedron point array" - ) - -table polyhedron -"A solid three dimensional object" - ( - int faceCount; "Number of faces" - object face[faceCount] faces; "List of faces" - int pointCount; "Number of points" - simple point[pointCount] points; "Array of points" - ) - -Saving this as "threeD.as" and running: - autoSql threeD.as threeD -would end up generating 393 lines of bug free (I think!) C code and -14 lines of SQL for the investment of 33 lines of specification. -Considering that autoSQL itself is about 1200 lines long, I'll only -have to use it on 4 projects to break even! For you on the other -hand it's a very good deal indeed.... - -ENUM AND SET FIELDS: - -The enum and set fields generate SQL enum and set fields, which provide -efficient symbolic values. The symbolic values must be a valid C -identifier. When loaded from a string, the set value is a comma separated -string of the values in the set. C enums are generated for the values defined -in the fields. For example: - -table symbolCols -"example of enum and set symbolic columns" - ( - int id; "unique id" - enum(male, female) sex; "enumerated column" - set(cProg,javaProg,pythonProg,awkProg) skills; "set column" - ) - -Generates the following types and fields: -enum symTestSex - { - symTestMale = 0, - symTestFemale = 1, - }; -enum symTestSkills - { - symTestCProg = 0x0001, - symTestJavaProg = 0x0002, - symTestPythonProg = 0x0004, - symTestAwkProg = 0x0008, - }; -struct symTest -/* test of enum and set symbolic columns */ - { - struct symTest *next; /* Next in singly linked list. */ - int id; /* unique id */ - enum symTestSex sex; /* enumerated column */ - unsigned skills; /* set column */ - }; - -GRAMMER - -The grammer for AutoSQL is: - -declarationList: - declaration - declarationList declaration - -declaration: - declareType declareName comment '(' fieldList ')' - -declareType: - 'simple' - 'object' - 'table' - -declareName: - name - -comment: - quotedString - -fieldList: - field - fieldList field - -field: - fieldType fieldName ';' comment - fieldType '[' fieldSize ']' name ';' comment - fieldType '(' fieldValues ')' name ';' comment - -fieldName: - name - -fieldValues: - name, ..., name - -fieldType: - 'int' - 'uint' - 'short' - 'ushort' - 'byte' - 'ubyte' - 'float' - 'char' - 'string' - 'lstring' - 'enum' - 'set' - declareType declareName - -fieldSize: - number - fieldName - -name: A series of letters, digits, or '_', starting with a letter. - -number: A series of digits - -quotedString: Any text that is surrounded by double quotes -