a2b75cac670b61deb2664261984bd24e1971242f kent Mon Sep 2 10:21:11 2019 -0700 Adding relationship table updates to multi-multi handling. diff --git src/hg/sqlUpdateRelated/sqlUpdateRelated.c src/hg/sqlUpdateRelated/sqlUpdateRelated.c index 873de60..7f58276 100644 --- src/hg/sqlUpdateRelated/sqlUpdateRelated.c +++ src/hg/sqlUpdateRelated/sqlUpdateRelated.c @@ -43,44 +43,79 @@ struct multiRef /* What we need to handle a foreign key reference */ { struct multiRef *next; // Next in list char *nativeFieldName; // Name in the current table char *nativeKeyName; // The name of the key in current table char *relationalTable; // Table that links the two together char *relationalNativeField; // The field that specifies the record in current table char *relationalForeignField; // The field that specifies the record in foreign table char *foreignTable; // Foreign table name char *foreignFindName; // Name to search in the destination table char *foreignKeyName; // Name of key we are fetching, usually just "id" int nativeFieldIx; // Field index in native table }; +void addMultiRelation(struct sqlConnection *conn, struct multiRef *mRef, struct fieldedRow *fr, + int nativeId, char *tabFile, struct dyString *csvScratch) +/* inCsv is a comma separated list of names that we should be able to locate in the foreign + * table via mRef->foreignFindName. We make up relationships for them in the relationalTable. */ +{ +char *inCsv = fr->row[mRef->nativeFieldIx]; +char *nativeVal; +char *parsePos = inCsv; +struct dyString *sql = dyStringNew(0); +while ((nativeVal = csvParseNext(&parsePos, csvScratch)) != NULL) + { + char *escaped = sqlEscapeString(nativeVal); + dyStringClear(sql); + sqlDyStringPrintf(sql, "select %s from %s where %s=\"%s\"", + mRef->foreignKeyName, mRef->foreignTable, + mRef->foreignFindName, escaped); + char *foreignKey = sqlQuickString(conn, sql->string); + verbose(2, "foreignKey for %s is %s\n", nativeVal, foreignKey); + if (isEmpty(foreignKey)) + errAbort("No %s in table %s referenced line %d of %s", nativeVal, mRef->foreignTable, + fr->id, tabFile); + + // Alright, we got our native and foreign keys, let's insert a row in relationship table + dyStringClear(sql); + sqlDyStringPrintf(sql, "insert into %s (%s,%s) values (%d,%s)", mRef->relationalTable, + mRef->relationalNativeField, mRef->relationalForeignField, + nativeId, foreignKey); + verbose(2, "relationship sql: %s\n", sql->string); + sqlUpdate(conn, sql->string); + freez(&escaped); + } +dyStringFree(&sql); +} + void sqlUpdateViaTabFile(struct sqlConnection *conn, char *tabFile, char *tableName) /* Interpret one tab-separated file */ { // Load the tabFile struct fieldedTable *inTable = fieldedTableFromTabFile(tabFile, tabFile, NULL, 0); verbose(2, "%d fields and %d rows in %s\n", inTable->fieldCount, inTable->rowCount, tabFile); char **inFields = inTable->fields; -// Loop through the fields looking for special ones +// Loop through the fields creating field set for output table and parsing +// foreign and multi-multi fields into structures char *conditionalField = NULL; // We might have one of these int conditionalIx = -1; struct foreignRef *foreignRefList = NULL; struct multiRef *multiRefList = NULL; -struct hash *foreignHash = hashNew(0), *multiHash = hashNew(0); +struct hash *multiHash = hashNew(0); int fieldIx; for (fieldIx=0; fieldIx<inTable->fieldCount; ++fieldIx) { char *field = inFields[fieldIx]; char firstChar = field[0]; if (firstChar == '?') { if (conditionalField != NULL) errAbort("Multiple fields starting with a '?', There can only be one\n" "but both %s and %s exist\n", conditionalField, field+1); conditionalField = field; conditionalIx = fieldIx; verbose(2, "conditionalField = %s, ix = %d\n", field, conditionalIx); } else if (firstChar == '@') // Foreign keys are involved. Will it get worse? @@ -126,94 +161,95 @@ if (partCount != expectedCount) { errAbort("Expecting %d @ separated fields in %s, got %d\n", expectedCount, field, partCount); } struct foreignRef *fRef; AllocVar(fRef); fRef->nativeFieldName = parts[0]; fRef->foreignTable = parts[1]; fRef->foreignFindName = parts[2]; fRef->foreignKeyName = parts[3]; fRef->nativeFieldIx = fieldIx; if (!sqlTableExists(conn, fRef->foreignTable)) errAbort("Table %s from %s doesn't exist", fRef->foreignTable, field); slAddTail(&foreignRefList, fRef); - hashAdd(foreignHash, field, fRef); } } } verbose(2, "Got %s conditional, %d foreignRefs, %d multiRefs\n", naForNull(conditionalField), slCount(foreignRefList), slCount(multiRefList)); -/* Now we loop through the input table */ +/* Now we loop through the input table and make the appropriate sql queries and inserts */ struct fieldedRow *fr; struct dyString *sql = dyStringNew(0); struct dyString *csvScratch = dyStringNew(0); for (fr = inTable->rowList; fr != NULL; fr = fr->next) { char **row = fr->row; - /* Deal with conditional field */ + /* Deal with conditional field. If we have one and the value we are trying to insert + * already exists then just continue to next row. */ if (conditionalField != NULL && sameString(conditionalField, inFields[conditionalIx])) { dyStringClear(sql); char *rawVal = row[conditionalIx]; char *uncsvVal = csvParseNext(&rawVal, csvScratch); // Before we do more we see if the record already exists sqlDyStringPrintf(sql, "select count(*) from %s where %s='%s'", tableName, conditionalField+1, uncsvVal); - verbose(1, "%s\n", sql->string); + verbose(2, "%s\n", sql->string); if (sqlQuickNum(conn, sql->string) > 0) continue; } /* Cope with foreign keys */ struct foreignRef *fRef; for (fRef = foreignRefList; fRef != NULL; fRef = fRef->next) { char *origVal = row[fRef->nativeFieldIx]; char *val = emptyForNull(csvParseNext(&origVal, csvScratch)); char *escaped = sqlEscapeString(val); dyStringPrintf(sql, "\"%s\"", escaped); dyStringClear(sql); sqlDyStringPrintf(sql, "select %s from %s where %s=\"%s\"", fRef->foreignKeyName, fRef->foreignTable, fRef->foreignFindName, escaped); - verbose(1, "query for foreignKey: %s\n", sql->string); + verbose(2, "query for foreignKey: %s\n", sql->string); fRef->foreignKey = sqlQuickString(conn, sql->string); if (isEmpty(fRef->foreignKey)) errAbort("No %s in table %s referenced line %d of %s", val, fRef->foreignTable, fr->id, tabFile); row[fRef->nativeFieldIx] = fRef->foreignKey; freez(&escaped); } dyStringClear(sql); sqlDyStringPrintf(sql, "insert into %s (", tableName); boolean firstTime = TRUE; for (fieldIx=0; fieldIx < inTable->fieldCount; ++fieldIx) { char *field = inFields[fieldIx]; char firstChar = field[0]; if (firstChar == '@') { if (field[1] == '@') // multi field { // Actually multi field variables don't get written, all lives in - // the relationship table + // the relationship table which we handle after the insert into main + // table. continue; } else { char *startField = field + 1; // skip over '@' char *endField = strchr(startField, '@'); // This is parsed out so we know it works until someone rearranged code assert(endField != NULL); field = cloneStringZ(startField, endField-startField); } } // We already dealt with the question mark outside this loop if (firstChar == '?') field += 1; @@ -225,71 +261,81 @@ dyStringAppendC(sql, ','); dyStringAppend(sql, field); } /* Now generate the values bit */ dyStringAppend(sql, ") values ("); firstTime = TRUE; for (fieldIx=0; fieldIx < inTable->fieldCount; ++fieldIx) { char *field = inFields[fieldIx]; char firstChar = field[0]; if (firstChar == '@') { if (field[1] == '@') // multi field - continue; + { + continue; // multi field output doesn't go into this table, just relationship + } else field += 1; // We val with the foreign key here, just skip over '@' } if (firstChar == '?') field += 1; if (firstTime) firstTime = !firstTime; else dyStringAppendC(sql, ','); char *origVal = row[fieldIx]; char *val = emptyForNull(csvParseNext(&origVal, csvScratch)); char *escaped = sqlEscapeString(val); dyStringPrintf(sql, "\"%s\"", escaped); freez(&escaped); } dyStringAppendC(sql, ')'); - verbose(1, "update sql: %s\n", sql->string); + verbose(2, "update sql: %s\n", sql->string); sqlUpdate(conn, sql->string); + int mainTableId = sqlLastAutoId(conn); + + /* Handle multi-multi stuff */ + struct multiRef *mRef; + for (mRef = multiRefList; mRef != NULL; mRef = mRef->next) + { + addMultiRelation(conn, mRef, fr, mainTableId, tabFile, csvScratch); + } /* Clean up strings allocated for field references */ for (fRef = foreignRefList; fRef != NULL; fRef = fRef->next) freez(&fRef->foreignKey); } dyStringFree(&sql); dyStringFree(&csvScratch); +fieldedTableFree(&inTable); } void sqlUpdateRelated(char *database, char **inFiles, int inCount) /* sqlUpdateRelated - Update a bunch of tables in a kind of careful way based out of tab * separated files. Handles foreign key and many-to-many relationships with a multitude * of @ signs.. */ { struct sqlConnection *conn = sqlConnect(database); int fileIx; for (fileIx = 0; fileIx < inCount; ++fileIx) { char *inFile = inFiles[fileIx]; char *tableName = cloneString(inFile); chopSuffix(tableName); verbose(1, "Processing %s into %s table \n", inFile, tableName); - sqlUpdateViaTabFile(conn, inFile, tableName); } sqlDisconnect(&conn); } int main(int argc, char *argv[]) /* Process command line. */ { optionInit(&argc, argv, options); if (argc < 3) usage(); sqlUpdateRelated(argv[1], argv+2, argc-2); return 0; }