44ccfacbe3a3d4b300f80d48651c77837a4b571e galt Tue Apr 26 11:12:02 2022 -0700 SQL INJECTION Prevention Version 2 - this improves our methods by making subclauses of SQL that get passed around be both easy and correct to use. The way that was achieved was by getting rid of the obscure and not well used functions sqlSafefFrag and sqlDyStringPrintfFrag and replacing them with the plain versions of those functions, since these are not needed anymore. The new version checks for NOSQLINJ in unquoted %-s which is used to include SQL clauses, and will give an error the NOSQLINJ clause is not present, and this will automatically require the correct behavior by developers. sqlDyStringPrint is a very useful function, however because it was not enforced, users could use various other dyString functions and they operated without any awareness or checking for SQL correct use. Now those dyString functions are prohibited and it will produce an error if you try to use a dyString function on a SQL string, which is simply detected by the presence of the NOSQLINJ prefix. diff --git src/hg/cgilib/annoStreamDbFactorSource.c src/hg/cgilib/annoStreamDbFactorSource.c index ce03287..d93adf3 100644 --- src/hg/cgilib/annoStreamDbFactorSource.c +++ src/hg/cgilib/annoStreamDbFactorSource.c @@ -1,213 +1,216 @@ /* annoStreamDbFactorSource -- factorSource track w/related tables */ #include "annoStreamDbFactorSource.h" #include "annoStreamDb.h" #include "factorSource.h" #include "hdb.h" #include "sqlNum.h" static char *asfsAutoSqlString = "table factorSourcePlus" "\"factorSourcePlus: Peaks clustered by factor w/normalized scores plus cell type and treatment\"" " (" " string chrom; \"Chromosome\"" " uint chromStart; \"Start position in chrom\"" " uint chromEnd; \"End position in chrom\"" " string name; \"factor that has a peak here\"" " uint score; \"Score from 0-1000\"" " uint expCount; \"Number of experiment values\"" " uint[expCount] expNums; \"Comma separated list of experiment numbers\"" " float[expCount] expScores; \"Comma separated list of experiment scores\"" " string[expCount] cellType; \"Comma separated list of experiment cell types\"" " string[expCount] treatment; \"Comma separated list of experiment treatments\"" " )"; #define FACTORSOURCEPLUS_NUM_COLS 10 struct annoStreamDbFactorSource { struct annoStreamer streamer; // Parent class members & methods (external interface) // Private members struct annoStreamer *mySource; // Internal source of track table rows // Data from related tables int expCount; // Number of experiments whose results were clustered char **expCellType; // Array[expCount] of cellType used in each experiment char **expTreatment; // Array[expCount] of treatment used in each experiment }; struct asObject *annoStreamDbFactorSourceAsObj() /* Return an autoSql object that describs fields of a joining query on a factorSource table * and its inputs. */ { return asParseText(asfsAutoSqlString); } static void asdfsSetRegion(struct annoStreamer *sSelf, char *chrom, uint rStart, uint rEnd) /* Pass setRegion down to internal source. */ { annoStreamerSetRegion(sSelf, chrom, rStart, rEnd); struct annoStreamDbFactorSource *self = (struct annoStreamDbFactorSource *)sSelf; self->mySource->setRegion(self->mySource, chrom, rStart, rEnd); } static char *commaSepFromExpData(char **expAttrs, int *expNums, uint expCount, struct lm *lm) /* Look up experiment attribute strings by experiment numbers; return a comma-separated string * of experiment attributes, allocated using lm. */ { int i; int len = 0, offset = 0; for (i = 0; i < expCount; i++) len += (strlen(expAttrs[expNums[i]]) + 1); char *str = lmAlloc(lm, len + 1); for (i = 0; i < expCount; i++) { char *attr = expAttrs[expNums[i]]; safef(str + offset, len + 1 - offset, "%s,", attr); offset += strlen(attr) + 1; } return str; } INLINE void getCommaSepInts(char *commaSep, int *values, int expectedCount) /* Parse comma-separated ints into values[]. This is like sqlSignedStaticArray, * but we give it an expected count and it's thread-safe because it doesn't use * static variables. */ { char *s = commaSep, *e = NULL; int count; for (count = 0; isNotEmpty(s); count++, s = e) { e = strchr(s, ','); if (e != NULL) *e++ = 0; if (count < expectedCount) values[count] = sqlSigned(s); } if (count != expectedCount) errAbort("getCommaSepInts: expected %d values but found %d", expectedCount, count); } static void factorSourceToFactorSourcePlus(struct annoStreamDbFactorSource *self, char **fsWords, char **fspWords, struct lm *lm) /* Copy fsWords into fspWords and add columns for cellTypes and treatments corresponding to * expNums. */ { // Parse out experiment IDs from expNums column uint expCount = sqlUnsigned(fsWords[5]); int expNums[expCount]; getCommaSepInts(fsWords[6], expNums, expCount); // Copy factorSource columns, then add experiment attribute columns. int i; for (i = 0; i < FACTORSOURCE_NUM_COLS; i++) fspWords[i] = fsWords[i]; fspWords[i++] = commaSepFromExpData(self->expCellType, expNums, expCount, lm); fspWords[i++] = commaSepFromExpData(self->expTreatment, expNums, expCount, lm); if (i != FACTORSOURCEPLUS_NUM_COLS) errAbort("annoStreamDbFactorSource %s: expected to make %d columns but made %d", self->streamer.name, FACTORSOURCEPLUS_NUM_COLS, i); } static struct annoRow *asdfsNextRow(struct annoStreamer *sSelf, char *minChrom, uint minEnd, struct lm *lm) /* Join experiment data with expNums from track table and apply filters. */ { struct annoStreamDbFactorSource *self = (struct annoStreamDbFactorSource *)sSelf; char **fspWords; lmAllocArray(lm, fspWords, FACTORSOURCEPLUS_NUM_COLS); struct annoRow *fsRow; boolean rightJoinFail = FALSE; while ((fsRow = self->mySource->nextRow(self->mySource, minChrom, minEnd, lm)) != NULL) { char **fsWords = fsRow->data; factorSourceToFactorSourcePlus(self, fsWords, fspWords, lm); // If there are filters on experiment attributes, apply them, otherwise just return aRow. if (sSelf->filters) { boolean fails = annoFilterRowFails(sSelf->filters, fspWords, FACTORSOURCEPLUS_NUM_COLS, &rightJoinFail); // If this row passes the filter, or fails but is rightJoin, then we're done looking. if (!fails || rightJoinFail) break; } else // no filtering to do, just use this row break; } if (fsRow != NULL) return annoRowFromStringArray(fsRow->chrom, fsRow->start, fsRow->end, rightJoinFail, fspWords, FACTORSOURCEPLUS_NUM_COLS, lm); else return NULL; } static void getExperimentData(struct annoStreamDbFactorSource *self, char *db, char *sourceTable, char *inputsTable) /* Join two small tables to relate experiment IDs from the track tables expNums column * to experiment attributes cellType and treatment. */ { struct sqlConnection *conn = hAllocConn(db); -self->expCount = sqlRowCount(conn, sourceTable); + +char queryTblSafe[1024]; +sqlSafef(queryTblSafe, sizeof queryTblSafe, "%s", sourceTable); +self->expCount = sqlRowCount(conn, queryTblSafe); AllocArray(self->expCellType, self->expCount); AllocArray(self->expTreatment, self->expCount); struct dyString *query = sqlDyStringCreate("select id, cellType, treatment " "from %s, %s where %s.description = %s.source", sourceTable, inputsTable, sourceTable, inputsTable); struct sqlResult *sr = sqlGetResult(conn, query->string); char **row; while ((row = sqlNextRow(sr)) != NULL) { int id = sqlSigned(row[0]); if (id < 0 || id >= self->expCount) errAbort("annoStreamDbFactorSource %s: found out-of-range id %d in %s (expected [0-%d])", ((struct annoStreamer *)self)->name, id, sourceTable, self->expCount - 1); self->expCellType[id] = cloneString(row[1]); self->expTreatment[id] = cloneString(row[2]); } sqlFreeResult(&sr); hFreeConn(&conn); } static void asdfsClose(struct annoStreamer **pSSelf) /* Free up state. */ { if (pSSelf == NULL) return; struct annoStreamDbFactorSource *self = *(struct annoStreamDbFactorSource **)pSSelf; self->mySource->close(&(self->mySource)); int i; for (i = 0; i < self->expCount; i++) { freeMem(self->expCellType[i]); freeMem(self->expTreatment[i]); } freez(&self->expCellType); freez(&self->expTreatment); annoStreamerFree(pSSelf); } struct annoStreamer *annoStreamDbFactorSourceNew(char *db, char *trackTable, char *sourceTable, char *inputsTable, struct annoAssembly *aa, int maxOutRows) /* Create an annoStreamer (subclass) object using three database tables: * trackTable: the table for a track with type factorSource (bed5 + exp{Count,Nums,Scores}) * sourceTable: trackTable's tdb setting sourceTable; expNums -> source name "cellType+lab+antibody" * inputsTable: trackTable's tdb setting inputTrackTable; source name -> cellType, treatment, etc. */ { struct annoStreamDbFactorSource *self; AllocVar(self); struct annoStreamer *streamer = &(self->streamer); // Set up external streamer interface annoStreamerInit(streamer, aa, annoStreamDbFactorSourceAsObj(), trackTable); streamer->rowType = arWords; // Get internal streamer for trackTable self->mySource = annoStreamDbNew(db, trackTable, aa, maxOutRows, NULL); // Slurp in data from small related tables getExperimentData(self, db, sourceTable, inputsTable); // Override methods that need to pass through to internal source: streamer->setRegion = asdfsSetRegion; streamer->nextRow = asdfsNextRow; streamer->close = asdfsClose; return (struct annoStreamer *)self; }