0890e2c3c68649027aa95195fe82ca2e1101e82e kent Sun Sep 15 11:42:11 2019 -0700 Adding -uncsv as an option. Actually it used to be the default so I had to throw it on in the test set. The csv/array vs. normal commas in free text are a weak point of the system. Between that and the SQL you can't escape escaping it seems! diff --git src/hg/sqlUpdateRelated/sqlUpdateRelated.c src/hg/sqlUpdateRelated/sqlUpdateRelated.c index e5475bb..e40a2cb 100644 --- src/hg/sqlUpdateRelated/sqlUpdateRelated.c +++ src/hg/sqlUpdateRelated/sqlUpdateRelated.c @@ -8,46 +8,49 @@ #include "fieldedTable.h" #include "csv.h" #include "jksql.h" void usage() /* Explain usage and exit. */ { errAbort( "sqlUpdateRelated - Update a bunch of tables in a kind of careful way based out of tab \n" "separated files. Handles foreign key and many-to-many relationships with a multitude\n" "of @ signs. Currently only works with mysql cause going through jksql\n" "usage:\n" " sqlUpdateRelated database tableFiles\n" "options:\n" " -missOk - if set, tableFiles mentioned that don't exist are skipped rather than erroring\n" + " -uncsv - if set, run uncsv and just take first value for each field, needed sometimes\n" + " to deal with extra quotes from tagstorms and other sources\n" "The tableFiles are in a interesting and peculiar format. The first line with the field name\n" "ends up controlling this program. If a field starts with just a regular letter all is as\n" "you may expect, the field just contains data to load. However if the field starts with\n" "a special char, special things happen. In particular\n" " ? - indicates field is a conditional key field. Record is only inserted if the value\n" " for this field is not already present in table\n" " ! - indicates this is update key field. Record must already exist, values in other fields\n" " are updated.\n" " @ - indicates a foreign key relationship - see source code until docs are in shape\n" " @@ - indicates a many-to-many relationship - see source code until docs are in shape" ); } /* Command line validation table. */ static struct optionSpec options[] = { - {"missOk", TRUE}, + {"missOk", OPTION_BOOLEAN}, + {"uncsv", OPTION_BOOLEAN}, {NULL, 0}, }; struct foreignRef /* What we need to handle a foreign key reference */ { struct foreignRef *next; // Next in list char *nativeFieldName; // Name in the current table char *foreignTable; // Foreign table name char *foreignFindName; // Name to search in the destination table char *foreignKeyName; // Name of key we are fetching, usually just "id" char *outputVal; // Used as a place to hold the row value for later processing int nativeFieldIx; // Field index in native table char *foreignKey; // Actual foreign key - computed each row }; @@ -106,31 +109,31 @@ * attyField */ { if (sqlFieldIndex(conn, table, field) < 0) errAbort("No field %s in table %s used in %s", field, table, attyField); } void checkTableExists(struct sqlConnection *conn, char *table, char *attyField) /* Make sure table exists in database or print error message that includes * attyField */ { if (!sqlTableExists(conn, table)) errAbort("Table %s from %s doesn't exist", table, attyField); } -void sqlUpdateViaTabFile(struct sqlConnection *conn, char *tabFile, char *tableName) +void sqlUpdateViaTabFile(struct sqlConnection *conn, char *tabFile, char *tableName, boolean uncsv) /* Interpret one tab-separated file */ { // Load the tabFile struct fieldedTable *inTable = fieldedTableFromTabFile(tabFile, tabFile, NULL, 0); verbose(2, "%d fields and %d rows in %s\n", inTable->fieldCount, inTable->rowCount, tabFile); char **inFields = inTable->fields; // Loop through the fields creating field set for output table and parsing // foreign and multi-multi fields into structures char *conditionalField = NULL; // We might have one of these int conditionalIx = -1; boolean updateCondition = FALSE; struct foreignRef *foreignRefList = NULL; struct multiRef *multiRefList = NULL; int fieldIx; @@ -239,85 +242,94 @@ struct fieldedRow *fr; struct dyString *sql = dyStringNew(0); struct dyString *csvScratch = dyStringNew(0); for (fr = inTable->rowList; fr != NULL; fr = fr->next) { char **row = fr->row; /* The case of the update (!) condition is special. */ if (updateCondition) { /* Make sure that the record we are updating exists for better * error reporting. There's a race condition that'll make a SQL error happen * instead once in a million years. */ dyStringClear(sql); char *rawVal = row[conditionalIx]; - char *uncsvVal = csvParseNext(&rawVal, csvScratch); + char *uncsvVal = rawVal; + if (uncsv) + uncsvVal = csvParseNext(&rawVal, csvScratch); char *conditionalEscaped = sqlEscapeString(uncsvVal); sqlDyStringPrintf(sql, "select count(*) from %s where %s='%s'", tableName, conditionalField+1, conditionalEscaped); verbose(2, "%s\n", sql->string); if (sqlQuickNum(conn, sql->string) == 0) errAbort("Trying to update %s in %s.%s, but it doesn't exist", uncsvVal, tableName, conditionalField+1); dyStringClear(sql); sqlDyStringPrintf(sql, "update %s set", tableName); boolean firstTime = TRUE; for (fieldIx=0; fieldIx < inTable->fieldCount; ++fieldIx) { if (fieldIx != conditionalIx) { char *rawVal = row[fieldIx]; - char *uncsvVal = csvParseNext(&rawVal, csvScratch); + char *uncsvVal = rawVal; + if (uncsv) + uncsvVal = csvParseNext(&rawVal, csvScratch); char *escaped = sqlEscapeString(uncsvVal); if (firstTime) firstTime = FALSE; else sqlDyStringPrintf(sql, ","); sqlDyStringPrintf(sql, " %s='%s'", inFields[fieldIx], escaped); freez(&escaped); } } sqlDyStringPrintf(sql, " where %s='%s'", conditionalField+1, conditionalEscaped); verbose(2, "%s\n", sql->string); sqlUpdate(conn, sql->string); continue; // We are done, the rest of the loop is for inserts not updates } /* Deal with conditional field. If we have one and the value we are trying to insert * already exists then just continue to next row. */ if (conditionalField != NULL && sameString(conditionalField, inFields[conditionalIx])) { dyStringClear(sql); char *rawVal = row[conditionalIx]; - char *uncsvVal = csvParseNext(&rawVal, csvScratch); + char *uncsvVal = rawVal; + if (uncsv) + uncsvVal = csvParseNext(&rawVal, csvScratch); // Before we do more we see if the record already exists sqlDyStringPrintf(sql, "select count(*) from %s where %s='%s'", tableName, conditionalField+1, uncsvVal); verbose(2, "%s\n", sql->string); if (sqlQuickNum(conn, sql->string) > 0) continue; } /* Cope with foreign keys */ struct foreignRef *fRef; for (fRef = foreignRefList; fRef != NULL; fRef = fRef->next) { - char *origVal = row[fRef->nativeFieldIx]; - char *val = emptyForNull(csvParseNext(&origVal, csvScratch)); + char *rawVal = row[fRef->nativeFieldIx]; + char *uncsvVal = rawVal; + if (uncsv) + uncsvVal = csvParseNext(&rawVal, csvScratch); + char *val = emptyForNull(uncsvVal); char *escaped = sqlEscapeString(val); dyStringPrintf(sql, "\"%s\"", escaped); dyStringClear(sql); sqlDyStringPrintf(sql, "select %s from %s where %s=\"%s\"", fRef->foreignKeyName, fRef->foreignTable, fRef->foreignFindName, escaped); verbose(2, "query for foreignKey: %s\n", sql->string); fRef->foreignKey = sqlQuickString(conn, sql->string); if (isEmpty(fRef->foreignKey)) errAbort("No %s in table %s referenced line %d of %s", val, fRef->foreignTable, fr->id, tabFile); row[fRef->nativeFieldIx] = fRef->foreignKey; freez(&escaped); } @@ -371,32 +383,35 @@ if (firstChar == '@') { if (field[1] == '@') // multi field { continue; // multi field output doesn't go into this table, just relationship } else field += 1; // We val with the foreign key here, just skip over '@' } if (firstChar == '?') field += 1; if (firstTime) firstTime = !firstTime; else dyStringAppendC(sql, ','); - char *origVal = row[fieldIx]; - char *val = emptyForNull(csvParseNext(&origVal, csvScratch)); + char *rawVal = row[fieldIx]; + char *uncsvVal = rawVal; + if (uncsv) + uncsvVal = csvParseNext(&rawVal, csvScratch); + char *val = emptyForNull(uncsvVal); char *escaped = sqlEscapeString(val); dyStringPrintf(sql, "\"%s\"", escaped); freez(&escaped); } dyStringAppendC(sql, ')'); verbose(2, "update sql: %s\n", sql->string); sqlUpdate(conn, sql->string); int mainTableId = sqlLastAutoId(conn); /* Handle multi-multi stuff */ struct multiRef *mRef; for (mRef = multiRefList; mRef != NULL; mRef = mRef->next) { addMultiRelation(conn, mRef, fr, mainTableId, tabFile, csvScratch); } @@ -406,37 +421,38 @@ freez(&fRef->foreignKey); } dyStringFree(&sql); dyStringFree(&csvScratch); fieldedTableFree(&inTable); } void sqlUpdateRelated(char *database, char **inFiles, int inCount) /* sqlUpdateRelated - Update a bunch of tables in a kind of careful way based out of tab * separated files. Handles foreign key and many-to-many relationships with a multitude * of @ signs.. */ { struct sqlConnection *conn = sqlConnect(database); int fileIx; boolean missOk = optionExists("missOk"); +boolean uncsv = optionExists("uncsv"); for (fileIx = 0; fileIx < inCount; ++fileIx) { char *inFile = inFiles[fileIx]; if (missOk && !fileExists(inFile)) continue; char *tableName = cloneString(inFile); chopSuffix(tableName); verbose(1, "Processing %s into %s table \n", inFile, tableName); - sqlUpdateViaTabFile(conn, inFile, tableName); + sqlUpdateViaTabFile(conn, inFile, tableName, uncsv); } sqlDisconnect(&conn); } int main(int argc, char *argv[]) /* Process command line. */ { optionInit(&argc, argv, options); if (argc < 3) usage(); sqlUpdateRelated(argv[1], argv+2, argc-2); return 0; }