e907713dd4d0dad8934b23ec55c2d937b9085482
kent
  Mon Sep 2 08:44:52 2019 -0700
Starting to work.  Creates normal, conditional and foreign key inserts.  Still to do - the multi to multi

diff --git src/hg/sqlUpdateRelated/sqlUpdateRelated.c src/hg/sqlUpdateRelated/sqlUpdateRelated.c
new file mode 100644
index 0000000..873de60
--- /dev/null
+++ src/hg/sqlUpdateRelated/sqlUpdateRelated.c
@@ -0,0 +1,295 @@
+/* sqlUpdateRelated - Update a bunch of tables in a kind of careful way based out of tab separated 
+ * files.  Handles foreign key and many-to-many relationships with a multitude of @ signs. */
+#include "common.h"
+#include "linefile.h"
+#include "hash.h"
+#include "options.h"
+#include "fieldedTable.h"
+#include "csv.h"
+#include "jksql.h"
+
+void usage()
+/* Explain usage and exit. */
+{
+errAbort(
+  "sqlUpdateRelated - Update a bunch of tables in a kind of careful way based out of tab \n"
+  "separated files.  Handles foreign key and many-to-many relationships with a multitude\n"
+  "of @ signs.  Currently only works with mysql cause going through jksql\n"
+  "usage:\n"
+  "   sqlUpdateRelated database tableFiles\n"
+  "options:\n"
+  "   -xxx=XXX\n"
+  );
+}
+
+/* Command line validation table. */
+static struct optionSpec options[] = {
+   {NULL, 0},
+};
+
+struct foreignRef
+/* What we need to handle a foreign key reference */
+    {
+    struct foreignRef *next;	    // Next in list
+    char *nativeFieldName;	    // Name in the current table
+    char *foreignTable;		    // Foreign table name
+    char *foreignFindName;	    // Name to search in the destination table
+    char *foreignKeyName;	    // Name of key we are fetching, usually just "id"
+    char *outputVal;		    // Used as a place to hold the row value for later processing
+    int nativeFieldIx;		    // Field index in native table
+    char *foreignKey;		    // Actual foreign key - computed each row
+    };
+
+struct multiRef
+/* What we need to handle a foreign key reference */
+    {
+    struct multiRef *next;	    // Next in list
+    char *nativeFieldName;	    // Name in the current table
+    char *nativeKeyName;	    // The name of the key in current table
+    char *relationalTable;	    // Table that links the two together
+    char *relationalNativeField;    // The field that specifies the record in current table
+    char *relationalForeignField;   // The field that specifies the record in foreign table
+    char *foreignTable;		    // Foreign table name
+    char *foreignFindName;	    // Name to search in the destination table
+    char *foreignKeyName;	    // Name of key we are fetching, usually just "id"
+    int nativeFieldIx;		    // Field index in native table
+    };
+
+void sqlUpdateViaTabFile(struct sqlConnection *conn, char *tabFile, char *tableName)
+/* Interpret one tab-separated file */
+{
+// Load the tabFile 
+struct fieldedTable *inTable = fieldedTableFromTabFile(tabFile, tabFile, NULL, 0);
+verbose(2, "%d fields and %d rows in %s\n",  inTable->fieldCount, inTable->rowCount, tabFile);
+char **inFields = inTable->fields;
+
+// Loop through the fields looking for special ones
+char *conditionalField = NULL;  // We might have one of these
+int conditionalIx = -1;
+struct foreignRef *foreignRefList = NULL;
+struct multiRef *multiRefList = NULL;
+struct hash *foreignHash = hashNew(0), *multiHash = hashNew(0);
+int fieldIx;
+for (fieldIx=0; fieldIx<inTable->fieldCount; ++fieldIx)
+    {
+    char *field = inFields[fieldIx];
+    char firstChar = field[0];
+    if (firstChar == '?')
+        {
+	if (conditionalField != NULL)
+	    errAbort("Multiple fields starting with a '?', There can only be one\n"
+		"but both %s and %s exist\n", conditionalField, field+1);
+	conditionalField = field;
+	conditionalIx = fieldIx;
+	verbose(2, "conditionalField = %s, ix = %d\n", field, conditionalIx);
+	}
+    else if (firstChar == '@')  // Foreign keys are involved.  Will it get worse?
+        {
+	char *chopTemp = cloneString(field);
+	if (field[1] == '@')  // Ugh, a multiRef.  Much to parse!
+	    {
+	    verbose(2, "multiRef field = %s\n", field);
+	    char *pos = chopTemp + 2;  // Set up to be past @
+	    int expectedCount = 8;
+	    char *parts[expectedCount+1];	// More than we need
+	    int partCount = chopByChar(pos, '@', parts, ArraySize(parts));
+	    if (partCount != expectedCount)
+	        {
+		errAbort("Expecting %d @ separated fields in %s, got %d\n",
+		    expectedCount, field, partCount);
+		}
+	    struct multiRef *mRef;
+	    AllocVar(mRef);
+	    mRef->nativeFieldName = parts[0];	    
+	    mRef->nativeKeyName = parts[1];	    
+	    mRef->relationalTable = parts[2];	    
+	    mRef->relationalNativeField = parts[3];    
+	    mRef->relationalForeignField = parts[4];   
+	    mRef->foreignTable = parts[5];		    
+	    mRef->foreignFindName = parts[6];	    
+	    mRef->foreignKeyName = parts[7];	    
+	    mRef->nativeFieldIx = fieldIx;
+	    if (!sqlTableExists(conn, mRef->relationalTable))
+	        errAbort("Table %s from %s doesn't exist", mRef->relationalTable, field);
+	    if (!sqlTableExists(conn, mRef->foreignTable))
+	        errAbort("Table %s from %s doesn't exist", mRef->foreignTable, field);
+	    slAddTail(&multiRefList, mRef);
+	    hashAdd(multiHash, field, mRef);
+	    }
+	else
+	    {
+	    verbose(2, "foreignRef field = %s\n", field);
+	    char *pos = chopTemp + 2;  // Set up to be past @@
+	    int expectedCount = 4;
+	    char *parts[expectedCount+1];	// More than we need
+	    int partCount = chopByChar(pos, '@', parts, ArraySize(parts));
+	    if (partCount != expectedCount)
+	        {
+		errAbort("Expecting %d @ separated fields in %s, got %d\n",
+		    expectedCount, field, partCount);
+		}
+	    struct foreignRef *fRef;
+	    AllocVar(fRef);
+	    fRef->nativeFieldName = parts[0];
+	    fRef->foreignTable = parts[1];
+	    fRef->foreignFindName = parts[2];
+	    fRef->foreignKeyName = parts[3];
+	    fRef->nativeFieldIx = fieldIx;
+	    if (!sqlTableExists(conn, fRef->foreignTable))
+	        errAbort("Table %s from %s doesn't exist", fRef->foreignTable, field);
+	    slAddTail(&foreignRefList, fRef);
+	    hashAdd(foreignHash, field, fRef);
+	    }
+	}
+    }
+verbose(2, "Got %s conditional, %d foreignRefs, %d multiRefs\n", 
+	naForNull(conditionalField), slCount(foreignRefList), slCount(multiRefList));
+
+/* Now we loop through the input table */
+struct fieldedRow *fr;
+struct dyString *sql = dyStringNew(0);
+struct dyString *csvScratch = dyStringNew(0);
+for (fr = inTable->rowList; fr != NULL; fr = fr->next)
+    {
+    char **row = fr->row;
+
+    /* Deal with conditional field */
+    if (conditionalField != NULL && sameString(conditionalField, inFields[conditionalIx]))
+	{
+	dyStringClear(sql);
+	char *rawVal = row[conditionalIx];
+	char *uncsvVal = csvParseNext(&rawVal, csvScratch);
+	// Before we do more we see if the record already exists
+	sqlDyStringPrintf(sql, "select count(*) from %s where %s='%s'",
+	    tableName, conditionalField+1, uncsvVal);
+	verbose(1, "%s\n", sql->string);
+	if (sqlQuickNum(conn, sql->string) > 0)
+	    continue;
+	}
+
+    /* Cope with foreign keys */
+    struct foreignRef *fRef;
+    for (fRef = foreignRefList; fRef != NULL; fRef = fRef->next)
+        {
+	char *origVal = row[fRef->nativeFieldIx];
+	char *val = emptyForNull(csvParseNext(&origVal, csvScratch));
+	char *escaped = sqlEscapeString(val);
+	dyStringPrintf(sql, "\"%s\"",  escaped);
+	dyStringClear(sql);
+	sqlDyStringPrintf(sql, "select %s from %s where %s=\"%s\"",
+	    fRef->foreignKeyName, fRef->foreignTable, 
+	    fRef->foreignFindName, escaped);
+	verbose(1, "query for foreignKey: %s\n", sql->string);
+	fRef->foreignKey = sqlQuickString(conn, sql->string);
+	if (isEmpty(fRef->foreignKey))
+	    errAbort("No %s in table %s referenced line %d of %s",  val, fRef->foreignTable,
+	        fr->id,  tabFile);
+	row[fRef->nativeFieldIx] = fRef->foreignKey;
+	freez(&escaped);
+	}
+
+    dyStringClear(sql);
+    sqlDyStringPrintf(sql, "insert into %s (", tableName);
+    boolean firstTime = TRUE;
+    for (fieldIx=0; fieldIx < inTable->fieldCount; ++fieldIx)
+        {
+	char *field = inFields[fieldIx];
+	char firstChar = field[0];
+
+	if (firstChar == '@')
+	    {
+	    if (field[1] == '@')  // multi field
+		{
+		// Actually multi field variables don't get written, all lives in
+		// the relationship table
+		continue;
+		}
+	    else
+		{
+		char *startField = field + 1;  // skip over '@'
+		char *endField = strchr(startField, '@');
+		// This is parsed out so we know it works until someone rearranged code
+		assert(endField != NULL);  
+		field = cloneStringZ(startField, endField-startField);
+		}
+	    }
+
+	// We already dealt with the question mark outside this loop
+	if (firstChar == '?')
+	    field += 1;
+
+
+	if (firstTime)
+	    firstTime = !firstTime;
+	else
+	    dyStringAppendC(sql, ',');
+	dyStringAppend(sql, field);
+	}
+
+    /* Now generate the values bit */
+    dyStringAppend(sql, ") values (");
+    firstTime = TRUE;
+    for (fieldIx=0; fieldIx < inTable->fieldCount; ++fieldIx)
+        {
+	char *field = inFields[fieldIx];
+	char firstChar = field[0];
+
+	if (firstChar == '@')
+	    {
+	    if (field[1] == '@')  // multi field
+		continue;
+	    else
+	        field += 1;	// We val with the foreign key here, just skip over '@'
+	    }
+	if (firstChar == '?')
+	    field += 1;
+	if (firstTime)
+	    firstTime = !firstTime;
+	else
+	    dyStringAppendC(sql, ',');
+	char *origVal = row[fieldIx];
+	char *val = emptyForNull(csvParseNext(&origVal, csvScratch));
+	char *escaped = sqlEscapeString(val);
+	dyStringPrintf(sql, "\"%s\"",  escaped);
+	freez(&escaped);
+	}
+    dyStringAppendC(sql, ')');
+    verbose(1, "update sql: %s\n", sql->string);
+    sqlUpdate(conn, sql->string);
+
+    /* Clean up strings allocated for field references */
+    for (fRef = foreignRefList; fRef != NULL; fRef = fRef->next)
+	freez(&fRef->foreignKey);
+    }
+dyStringFree(&sql);
+dyStringFree(&csvScratch);
+}
+
+void sqlUpdateRelated(char *database, char **inFiles, int inCount)
+/* sqlUpdateRelated - Update a bunch of tables in a kind of careful way based out of tab 
+ * separated files.  Handles foreign key and many-to-many relationships with a multitude 
+ * of @ signs.. */
+{
+struct sqlConnection *conn = sqlConnect(database);
+int fileIx;
+for (fileIx = 0; fileIx < inCount; ++fileIx)
+    {
+    char *inFile = inFiles[fileIx];
+    char *tableName = cloneString(inFile);
+    chopSuffix(tableName);
+    verbose(1, "Processing %s into %s table \n", inFile, tableName);
+
+    sqlUpdateViaTabFile(conn, inFile, tableName);
+    }
+sqlDisconnect(&conn);
+}
+
+int main(int argc, char *argv[])
+/* Process command line. */
+{
+optionInit(&argc, argv, options);
+if (argc < 3)
+    usage();
+sqlUpdateRelated(argv[1], argv+2, argc-2);
+return 0;
+}