44ccfacbe3a3d4b300f80d48651c77837a4b571e galt Tue Apr 26 11:12:02 2022 -0700 SQL INJECTION Prevention Version 2 - this improves our methods by making subclauses of SQL that get passed around be both easy and correct to use. The way that was achieved was by getting rid of the obscure and not well used functions sqlSafefFrag and sqlDyStringPrintfFrag and replacing them with the plain versions of those functions, since these are not needed anymore. The new version checks for NOSQLINJ in unquoted %-s which is used to include SQL clauses, and will give an error the NOSQLINJ clause is not present, and this will automatically require the correct behavior by developers. sqlDyStringPrint is a very useful function, however because it was not enforced, users could use various other dyString functions and they operated without any awareness or checking for SQL correct use. Now those dyString functions are prohibited and it will produce an error if you try to use a dyString function on a SQL string, which is simply detected by the presence of the NOSQLINJ prefix. diff --git src/hg/lib/customFactory.c src/hg/lib/customFactory.c index 195453f..3ff42f0 100644 --- src/hg/lib/customFactory.c +++ src/hg/lib/customFactory.c @@ -328,70 +328,70 @@ struct customTrack *track) /* Return TRUE if looks like we're handling a colored-exon track */ { return bedRecognizer(fac, cpp, type, track) && (track->fieldCount >= 14); } static struct pipeline *bedLoaderPipe(struct customTrack *track) /* Set up pipeline that will load wig into database. */ { /* running the single command: * hgLoadBed -customTrackLoader -tmpDir=/data/tmp * -maxChromNameLength=${nameLength} customTrash tableName stdin * -customTrackLoader turns on options: -noNameIx -noHistory -ignoreEmpty * -allowStartEqualEnd -allowNegativeScores -verbose=0 */ -struct dyString *tmpDy = newDyString(0); +struct dyString *tmpDy = dyStringNew(0); int index = 3; /* verify this references the first NULL as cmd1[index] */ char *cmd1[] = {"loader/hgLoadBed", "-customTrackLoader", "-lineLimit=50000000", NULL, NULL, NULL, NULL, NULL, NULL, NULL}; char *tmpDir = cfgOptionDefault("customTracks.tmpdir", "/data/tmp"); struct stat statBuf; if (stat(tmpDir,&statBuf)) errAbort("can not find custom track tmp load directory: '%s'
\n" "create directory or specify in hg.conf customTracks.tmpdir", tmpDir); dyStringPrintf(tmpDy, "-tmpDir=%s", tmpDir); -cmd1[index++] = dyStringCannibalize(&tmpDy); tmpDy = newDyString(0); +cmd1[index++] = dyStringCannibalize(&tmpDy); tmpDy = dyStringNew(0); dyStringPrintf(tmpDy, "-maxChromNameLength=%d", track->maxChromName); -cmd1[index++] = dyStringCannibalize(&tmpDy); tmpDy = newDyString(0); +cmd1[index++] = dyStringCannibalize(&tmpDy); tmpDy = dyStringNew(0); if(startsWithWord("bedGraph", track->dbTrackType)) { /* we currently assume that last field is the bedGraph field. */ dyStringPrintf(tmpDy, "-bedGraph=%d", track->fieldCount); cmd1[index++] = dyStringCannibalize(&tmpDy); } cmd1[index++] = CUSTOM_TRASH; cmd1[index++] = track->dbTableName; cmd1[index++] = "stdin"; assert(index <= ArraySize(cmd1)); /* the "/dev/null" file isn't actually used for anything, but it is used * in the pipeLineOpen to properly get a pipe started that isn't simply * to STDOUT which is what a NULL would do here instead of this name. * This function exits if it can't get the pipe created * The dbStderrFile will get stderr messages from hgLoadBed into the * our private error log so we can send it back to the user */ return pipelineOpen1(cmd1, pipelineWrite | pipelineNoAbort, "/dev/null", track->dbStderrFile, 0); } void pipelineFailExit(struct customTrack *track) /* show up to three lines of error message to stderr and errAbort */ { -struct dyString *errDy = newDyString(0); +struct dyString *errDy = dyStringNew(0); struct lineFile *lf; char *line; int i; dyStringPrintf(errDy, "track load error (track name='%s'):
\n", track->tdb->track); lf = lineFileOpen(track->dbStderrFile, TRUE); i = 0; while( (i < 10) && lineFileNext(lf, &line, NULL)) { dyStringPrintf(errDy, "%s
\n", line); ++i; // break out of loop after wibSizeLimit msg to avoid printing stuff from other commands in the pipe. if(strstr(line, "wibSizeLimit")) break; } lineFileClose(&lf); @@ -748,43 +748,43 @@ else lineFileAbort(cpp->fileStack, "wrong number of columns for type '%s'", type); freeMem(dupe); customPpReuse(cpp, line); return (pt != invalid); } static struct pipeline *encodePeakLoaderPipe(struct customTrack *track) /* Set up pipeline that will load the encodePeak into database. */ { /* running the single command: * hgLoadBed -customTrackLoader -sqlTable=loader/encodePeak.sql -renameSqlTable * -trimSqlTable -notItemRgb -tmpDir=/data/tmp * -maxChromNameLength=${nameLength} customTrash tableName stdin */ -struct dyString *tmpDy = newDyString(0); +struct dyString *tmpDy = dyStringNew(0); char *cmd1[] = {"loader/hgLoadBed", "-customTrackLoader", "-sqlTable=loader/encodePeak.sql", "-renameSqlTable", "-trimSqlTable", "-notItemRgb", NULL, NULL, NULL, NULL, NULL, NULL}; char *tmpDir = cfgOptionDefault("customTracks.tmpdir", "/data/tmp"); struct stat statBuf; int index = 6; if (stat(tmpDir,&statBuf)) errAbort("can not find custom track tmp load directory: '%s'
\n" "create directory or specify in hg.conf customTracks.tmpdir", tmpDir); dyStringPrintf(tmpDy, "-tmpDir=%s", tmpDir); -cmd1[index++] = dyStringCannibalize(&tmpDy); tmpDy = newDyString(0); +cmd1[index++] = dyStringCannibalize(&tmpDy); tmpDy = dyStringNew(0); dyStringPrintf(tmpDy, "-maxChromNameLength=%d", track->maxChromName); cmd1[index++] = dyStringCannibalize(&tmpDy); cmd1[index++] = CUSTOM_TRASH; cmd1[index++] = track->dbTableName; cmd1[index++] = "stdin"; assert(index <= ArraySize(cmd1)); /* the "/dev/null" file isn't actually used for anything, but it is used * in the pipeLineOpen to properly get a pipe started that isn't simply * to STDOUT which is what a NULL would do here instead of this name. * This function exits if it can't get the pipe created * The dbStderrFile will get stderr messages from hgLoadBed into the * our private error log so we can send it back to the user */ return pipelineOpen1(cmd1, pipelineWrite | pipelineNoAbort, @@ -911,43 +911,43 @@ freeMem(dupe); customPpReuse(cpp, line); return (isBed); } static struct pipeline *bedDetailLoaderPipe(struct customTrack *track) /* Set up pipeline that will load the bedDetail into database. */ /* Must be tab separated file, so that can have spaces in description */ { /* running the single command: * hgLoadBed -customTrackLoader -sqlTable=loader/bedDetail.sql -renameSqlTable * -trimSqlTable -notItemRgb -tmpDir=/data/tmp * -maxChromNameLength=${nameLength} customTrash tableName stdin */ -struct dyString *tmpDy = newDyString(0); +struct dyString *tmpDy = dyStringNew(0); //bed size can vary char *cmd1[] = {"loader/hgLoadBed", "-customTrackLoader", "-tab", "-noBin", "-sqlTable=loader/bedDetail.sql", "-renameSqlTable", "-trimSqlTable", "-bedDetail", NULL, NULL, NULL, NULL, NULL, NULL}; char *tmpDir = cfgOptionDefault("customTracks.tmpdir", "/data/tmp"); struct stat statBuf; int index = 8; if (stat(tmpDir,&statBuf)) errAbort("can not find custom track tmp load directory: '%s'
\n" "create directory or specify in hg.conf customTracks.tmpdir", tmpDir); dyStringPrintf(tmpDy, "-tmpDir=%s", tmpDir); -cmd1[index++] = dyStringCannibalize(&tmpDy); tmpDy = newDyString(0); +cmd1[index++] = dyStringCannibalize(&tmpDy); tmpDy = dyStringNew(0); dyStringPrintf(tmpDy, "-maxChromNameLength=%d", track->maxChromName); cmd1[index++] = dyStringCannibalize(&tmpDy); cmd1[index++] = CUSTOM_TRASH; cmd1[index++] = track->dbTableName; cmd1[index++] = "stdin"; assert(index <= ArraySize(cmd1)); /* the "/dev/null" file isn't actually used for anything, but it is used * in the pipeLineOpen to properly get a pipe started that isn't simply * to STDOUT which is what a NULL would do here instead of this name. * This function exits if it can't get the pipe created * The dbStderrFile will get stderr messages from hgLoadBed into the * our private error log so we can send it back to the user */ return pipelineOpen1(cmd1, pipelineWrite | pipelineNoAbort, @@ -1117,42 +1117,42 @@ lineFileAbort(lf, "type is pgSnp so it must have 7 fields but has %d", wordCount); freeMem(dupe); customPpReuse(cpp, line); return (isPgSnp); } static struct pipeline *pgSnpLoaderPipe(struct customTrack *track) /* Set up pipeline that will load the pgSnp into database. */ { /* running the single command: * hgLoadBed -customTrackLoader -sqlTable=loader/pgSnp.sql -renameSqlTable * -trimSqlTable -notItemRgb -tmpDir=/data/tmp * -maxChromNameLength=${nameLength} customTrash tableName stdin */ -struct dyString *tmpDy = newDyString(0); +struct dyString *tmpDy = dyStringNew(0); char *cmd1[] = {"loader/hgLoadBed", "-customTrackLoader", "-sqlTable=loader/pgSnp.sql", "-renameSqlTable", "-trimSqlTable", "-notItemRgb", NULL, NULL, NULL, NULL, NULL, NULL}; char *tmpDir = cfgOptionDefault("customTracks.tmpdir", "/data/tmp"); struct stat statBuf; int index = 6; if (stat(tmpDir,&statBuf)) errAbort("can not find custom track tmp load directory: '%s'
\n" "create directory or specify in hg.conf customTracks.tmpdir", tmpDir); dyStringPrintf(tmpDy, "-tmpDir=%s", tmpDir); -cmd1[index++] = dyStringCannibalize(&tmpDy); tmpDy = newDyString(0); +cmd1[index++] = dyStringCannibalize(&tmpDy); tmpDy = dyStringNew(0); dyStringPrintf(tmpDy, "-maxChromNameLength=%d", track->maxChromName); cmd1[index++] = dyStringCannibalize(&tmpDy); cmd1[index++] = CUSTOM_TRASH; cmd1[index++] = track->dbTableName; cmd1[index++] = "stdin"; assert(index <= ArraySize(cmd1)); /* the "/dev/null" file isn't actually used for anything, but it is used * in the pipeLineOpen to properly get a pipe started that isn't simply * to STDOUT which is what a NULL would do here instead of this name. * This function exits if it can't get the pipe created * The dbStderrFile will get stderr messages from hgLoadBed into the * our private error log so we can send it back to the user */ return pipelineOpen1(cmd1, pipelineWrite | pipelineNoAbort, @@ -1353,40 +1353,40 @@ * hgLoadBed -customTrackLoader -sqlTable=loader/schemaFile -renameSqlTable * -trimSqlTable -notItemRgb -tmpDir=/data/tmp * -maxChromNameLength=${nameLength} customTrash tableName stdin */ char *tmpDir = cfgOptionDefault("customTracks.tmpdir", "/data/tmp"); struct stat statBuf; if (stat(tmpDir,&statBuf)) errAbort("can not find custom track tmp load directory: '%s'
\n" "create directory or specify in hg.conf customTracks.tmpdir", tmpDir); char *cmd1[] = {"loader/hgLoadBed", "-customTrackLoader", NULL, "-renameSqlTable", "-trimSqlTable", "-notItemRgb", "-noBin", NULL, NULL, NULL, NULL, NULL, NULL}; char *schemaFile = "barChartBed.sql"; -struct dyString *ds = newDyString(0); +struct dyString *ds = dyStringNew(0); dyStringPrintf(ds, "-sqlTable=loader/%s", schemaFile); cmd1[2] = dyStringCannibalize(&ds); int index = 7; -ds = newDyString(0); +ds = dyStringNew(0); dyStringPrintf(ds, "-tmpDir=%s", tmpDir); cmd1[index++] = dyStringCannibalize(&ds); -ds = newDyString(0); +ds = dyStringNew(0); dyStringPrintf(ds, "-maxChromNameLength=%d", track->maxChromName); cmd1[index++] = dyStringCannibalize(&ds); cmd1[index++] = CUSTOM_TRASH; cmd1[index++] = track->dbTableName; cmd1[index++] = "stdin"; assert(index <= ArraySize(cmd1)); /* the "/dev/null" file isn't actually used for anything, but it is used * in the pipeLineOpen to properly get a pipe started that isn't simply * to STDOUT which is what a NULL would do here instead of this name. * This function exits if it can't get the pipe created * The dbStderrFile will get stderr messages from hgLoadBed into the * our private error log so we can send it back to the user */ @@ -1542,40 +1542,40 @@ * hgLoadBed -customTrackLoader -sqlTable=loader/schemaFile -renameSqlTable * -trimSqlTable -notItemRgb -tmpDir=/data/tmp * -maxChromNameLength=${nameLength} customTrash tableName stdin */ char *tmpDir = cfgOptionDefault("customTracks.tmpdir", "/data/tmp"); struct stat statBuf; if (stat(tmpDir,&statBuf)) errAbort("can not find custom track tmp load directory: '%s'
\n" "create directory or specify in hg.conf customTracks.tmpdir", tmpDir); char *cmd1[] = {"loader/hgLoadBed", "-customTrackLoader", NULL, "-renameSqlTable", "-trimSqlTable", "-notItemRgb", NULL, NULL, NULL, NULL, NULL, NULL}; char *schemaFile = "interact.sql"; -struct dyString *ds = newDyString(0); +struct dyString *ds = dyStringNew(0); dyStringPrintf(ds, "-sqlTable=loader/%s", schemaFile); cmd1[2] = dyStringCannibalize(&ds); int index = 6; -ds = newDyString(0); +ds = dyStringNew(0); dyStringPrintf(ds, "-tmpDir=%s", tmpDir); cmd1[index++] = dyStringCannibalize(&ds); -ds = newDyString(0); +ds = dyStringNew(0); dyStringPrintf(ds, "-maxChromNameLength=%d", track->maxChromName); cmd1[index++] = dyStringCannibalize(&ds); cmd1[index++] = CUSTOM_TRASH; cmd1[index++] = track->dbTableName; cmd1[index++] = "stdin"; assert(index <= ArraySize(cmd1)); /* the "/dev/null" file isn't actually used for anything, but it is used * in the pipeLineOpen to properly get a pipe started that isn't simply * to STDOUT which is what a NULL would do here instead of this name. * This function exits if it can't get the pipe created * The dbStderrFile will get stderr messages from hgLoadBed into the * our private error log so we can send it back to the user */ @@ -2189,51 +2189,51 @@ { if (type != NULL && !sameType(type, fac->name)) return FALSE; boolean isMaf = headerStartsWith(cpp, "##maf version"); if (type != NULL && !isMaf) lineFileAbort(cpp->fileStack, "type is '%s' but header does not start with '##maf version'", type); return isMaf; } static void mafLoaderBuildTab(struct customTrack *track, char *mafFile) /* build maf tab file and load in database */ { customFactorySetupDbTrack(track); -struct dyString *tmpDy = newDyString(0); +struct dyString *tmpDy = dyStringNew(0); char *cmd1[] = {"loader/hgLoadMaf", "-verbose=0", "-custom", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL}; char **cmds[] = {cmd1, NULL}; char *tmpDir = cfgOptionDefault("customTracks.tmpdir", "/data/tmp"); struct stat statBuf; struct tempName tn; trashDirFile(&tn, "ct", "ct", ".pos"); if (stat(tmpDir,&statBuf)) errAbort("can not find custom track tmp load directory: '%s'
\n" "create directory or specify in hg.conf customTracks.tmpdir", tmpDir); dyStringPrintf(tmpDy, "-tmpDir=%s", tmpDir); -cmd1[3] = dyStringCannibalize(&tmpDy); tmpDy = newDyString(0); +cmd1[3] = dyStringCannibalize(&tmpDy); tmpDy = dyStringNew(0); dyStringPrintf(tmpDy, "-loadFile=%s", mafFile); -cmd1[4] = dyStringCannibalize(&tmpDy); tmpDy = newDyString(0); +cmd1[4] = dyStringCannibalize(&tmpDy); tmpDy = dyStringNew(0); dyStringPrintf(tmpDy, "-refDb=%s", track->genomeDb); -cmd1[5] = dyStringCannibalize(&tmpDy); tmpDy = newDyString(0); +cmd1[5] = dyStringCannibalize(&tmpDy); tmpDy = dyStringNew(0); dyStringPrintf(tmpDy, "-maxNameLen=%d", track->maxChromName); -cmd1[6] = dyStringCannibalize(&tmpDy); tmpDy = newDyString(0); +cmd1[6] = dyStringCannibalize(&tmpDy); tmpDy = dyStringNew(0); dyStringPrintf(tmpDy, "-defPos=%s", tn.forCgi); cmd1[7] = dyStringCannibalize(&tmpDy); cmd1[8] = CUSTOM_TRASH; cmd1[9] = track->dbTableName; struct pipeline *dataPipe = pipelineOpen(cmds, pipelineWrite | pipelineNoAbort, "/dev/null", track->dbStderrFile, 0); if(pipelineWait(dataPipe)) pipelineFailExit(track); /* prints error and exits */ pipelineFree(&dataPipe); unlink(track->dbStderrFile); /* no errors, not used */ track->wigFile = NULL; struct lineFile *lf = lineFileOpen(tn.forCgi, TRUE); @@ -2339,48 +2339,48 @@ /* Return TRUE if looks like we're handling a wig track */ { return (sameOk(type, fac->name) || sameType(type, "wig")); } static struct pipeline *wigLoaderPipe(struct customTrack *track) /* Set up pipeline that will load wig into database. */ { /* Run the two commands in a pipeline: * loader/wigEncode -verbose=0 -wibSizeLimit=300000000 stdin stdout \ * ${wibFile} | \ * loader/hgLoadWiggle -verbose=0 -noHistory -tmpDir=/data/tmp \ * -maxChromNameLength=${nameLength} -chromInfoDb=${database} \ * -pathPrefix=[.|/] ${db} ${table} stdin */ -struct dyString *tmpDy = newDyString(0); +struct dyString *tmpDy = dyStringNew(0); char *cmd1[] = {"loader/wigEncode", "-verbose=0", "-wibSizeLimit=300000000", "stdin", "stdout", NULL, NULL}; char *cmd2[] = {"loader/hgLoadWiggle", "-verbose=0", "-noHistory", NULL, NULL, NULL, NULL, NULL, NULL, "stdin", NULL}; char **cmds[] = {cmd1, cmd2, NULL}; char *tmpDir = cfgOptionDefault("customTracks.tmpdir", "/data/tmp"); struct stat statBuf; cmd1[5] = track->wibFile; if (stat(tmpDir,&statBuf)) errAbort("can not find custom track tmp load directory: '%s'
\n" "create directory or specify in hg.conf customTracks.tmpdir", tmpDir); dyStringPrintf(tmpDy, "-tmpDir=%s", tmpDir); -cmd2[3] = dyStringCannibalize(&tmpDy); tmpDy = newDyString(0); +cmd2[3] = dyStringCannibalize(&tmpDy); tmpDy = dyStringNew(0); dyStringPrintf(tmpDy, "-maxChromNameLength=%d", track->maxChromName); -cmd2[4] = dyStringCannibalize(&tmpDy); tmpDy = newDyString(0); +cmd2[4] = dyStringCannibalize(&tmpDy); tmpDy = dyStringNew(0); // hgLoadWiggle doesn't know about assembly hubs so disable size check if (trackHubDatabase(track->genomeDb)) { cmd2[5] = "-noChromInfo"; } else { dyStringPrintf(tmpDy, "-chromInfoDb=%s", track->genomeDb); cmd2[5] = dyStringCannibalize(&tmpDy); } /* a system could be using /trash/ absolute reference, and nothing to do with * local references, so don't confuse it with ./ a double // will work */ if (startsWith("/", trashDir())) cmd2[6] = "-pathPrefix=/";