44ccfacbe3a3d4b300f80d48651c77837a4b571e galt Tue Apr 26 11:12:02 2022 -0700 SQL INJECTION Prevention Version 2 - this improves our methods by making subclauses of SQL that get passed around be both easy and correct to use. The way that was achieved was by getting rid of the obscure and not well used functions sqlSafefFrag and sqlDyStringPrintfFrag and replacing them with the plain versions of those functions, since these are not needed anymore. The new version checks for NOSQLINJ in unquoted %-s which is used to include SQL clauses, and will give an error the NOSQLINJ clause is not present, and this will automatically require the correct behavior by developers. sqlDyStringPrint is a very useful function, however because it was not enforced, users could use various other dyString functions and they operated without any awareness or checking for SQL correct use. Now those dyString functions are prohibited and it will produce an error if you try to use a dyString function on a SQL string, which is simply detected by the presence of the NOSQLINJ prefix. diff --git src/hg/lib/borf.c src/hg/lib/borf.c index 353f8d6..262cba0 100644 --- src/hg/lib/borf.c +++ src/hg/lib/borf.c @@ -1,301 +1,301 @@ /* borf.c was originally generated by the autoSql program, which also * generated borf.h and borf.sql. This module links the database and * the RAM representation of objects. */ /* Copyright (C) 2014 The Regents of the University of California * See kent/LICENSE or http://genome.ucsc.edu/license/ for licensing information. */ #include "common.h" #include "linefile.h" #include "dystring.h" #include "jksql.h" #include "borf.h" #include "hdb.h" #include "dnaseq.h" #include "fa.h" #include "portable.h" static char *_bestOrfExe = "/hive/data/outside/bestorf.linux/bestorf"; static char *_bestOrfParam = "/hive/data/outside/bestorf.linux/hume.dat"; void borfStaticLoad(char **row, struct borf *ret) /* Load a row from borf table into ret. The contents of ret will * be replaced at the next call to this function. */ { ret->name = row[0]; ret->size = sqlSigned(row[1]); strcpy(ret->strand, row[2]); ret->feature = row[3]; ret->cdsStart = sqlSigned(row[4]); ret->cdsEnd = sqlSigned(row[5]); ret->score = atof(row[6]); ret->orfStart = sqlSigned(row[7]); ret->orfEnd = sqlSigned(row[8]); ret->cdsSize = sqlSigned(row[9]); strcpy(ret->frame, row[10]); ret->protein = row[11]; } struct borf *borfLoad(char **row) /* Load a borf from row fetched with select * from borf * from database. Dispose of this with borfFree(). */ { struct borf *ret; AllocVar(ret); ret->name = cloneString(row[0]); ret->size = sqlSigned(row[1]); strcpy(ret->strand, row[2]); ret->feature = cloneString(row[3]); ret->cdsStart = sqlSigned(row[4]); ret->cdsEnd = sqlSigned(row[5]); ret->score = atof(row[6]); ret->orfStart = sqlSigned(row[7]); ret->orfEnd = sqlSigned(row[8]); ret->cdsSize = sqlSigned(row[9]); strcpy(ret->frame, row[10]); if(ret->cdsSize != 0) ret->protein = cloneString(row[11]); return ret; } struct borf *borfLoadAll(char *fileName) /* Load all borf from a tab-separated file. * Dispose of this with borfFreeList(). */ { struct borf *list = NULL, *el; struct lineFile *lf = lineFileOpen(fileName, TRUE); char *row[12]; while (lineFileChopNextTab(lf, row, ArraySize(row))) { el = borfLoad(row); slAddHead(&list, el); } lineFileClose(&lf); slReverse(&list); return list; } struct borf *borfLoadWhere(struct sqlConnection *conn, char *table, char *where) /* Load all borf from table that satisfy where clause. The * where clause may be NULL in which case whole table is loaded * Dispose of this with borfFreeList(). */ { struct borf *list = NULL, *el; struct dyString *query = dyStringNew(256); struct sqlResult *sr; char **row; sqlDyStringPrintf(query, "select * from %s", table); if (where != NULL) - dyStringPrintf(query, " where %s", where); + sqlDyStringPrintf(query, " where %-s", where); sr = sqlGetResult(conn, query->string); while ((row = sqlNextRow(sr)) != NULL) { el = borfLoad(row); slAddHead(&list, el); } slReverse(&list); sqlFreeResult(&sr); dyStringFree(&query); return list; } struct borf *borfCommaIn(char **pS, struct borf *ret) /* Create a borf out of a comma separated string. * This will fill in ret if non-null, otherwise will * return a new borf */ { char *s = *pS; if (ret == NULL) AllocVar(ret); ret->name = sqlStringComma(&s); ret->size = sqlSignedComma(&s); sqlFixedStringComma(&s, ret->strand, sizeof(ret->strand)); ret->feature = sqlStringComma(&s); ret->cdsStart = sqlSignedComma(&s); ret->cdsEnd = sqlSignedComma(&s); ret->score = sqlFloatComma(&s); ret->orfStart = sqlSignedComma(&s); ret->orfEnd = sqlSignedComma(&s); ret->cdsSize = sqlSignedComma(&s); sqlFixedStringComma(&s, ret->frame, sizeof(ret->frame)); ret->protein = sqlStringComma(&s); *pS = s; return ret; } void borfFree(struct borf **pEl) /* Free a single dynamically allocated borf such as created * with borfLoad(). */ { struct borf *el; if ((el = *pEl) == NULL) return; freeMem(el->name); freeMem(el->feature); freeMem(el->protein); freez(pEl); } void borfFreeList(struct borf **pList) /* Free a list of dynamically allocated borf's */ { struct borf *el, *next; for (el = *pList; el != NULL; el = next) { next = el->next; borfFree(&el); } *pList = NULL; } void borfOutput(struct borf *el, FILE *f, char sep, char lastSep) /* Print out borf. Separate fields with sep. Follow last field with lastSep. */ { if (sep == ',') fputc('"',f); fprintf(f, "%s", el->name); if (sep == ',') fputc('"',f); fputc(sep,f); fprintf(f, "%d", el->size); fputc(sep,f); if (sep == ',') fputc('"',f); fprintf(f, "%s", el->strand); if (sep == ',') fputc('"',f); fputc(sep,f); if (sep == ',') fputc('"',f); if (el->feature != NULL) fprintf(f, "%s", el->feature); else fprintf(f, "NA"); if (sep == ',') fputc('"',f); fputc(sep,f); fprintf(f, "%d", el->cdsStart); fputc(sep,f); fprintf(f, "%d", el->cdsEnd); fputc(sep,f); fprintf(f, "%f", el->score); fputc(sep,f); fprintf(f, "%d", el->orfStart); fputc(sep,f); fprintf(f, "%d", el->orfEnd); fputc(sep,f); fprintf(f, "%d", el->cdsSize); fputc(sep,f); if (sep == ',') fputc('"',f); fprintf(f, "%s", el->frame); if (sep == ',') fputc('"',f); fputc(sep,f); if (sep == ',') fputc('"',f); if (el->protein != NULL) fprintf(f, "%s", el->protein); else fprintf(f, "NA"); if (sep == ',') fputc('"',f); fputc(lastSep,f); } void borfSetExeAndParam(char *exePath, char *paramPath) /* Set the path to the bestOrf probram and parameter file to be used for running borfFromGenomeBed. */ { _bestOrfExe = exePath; _bestOrfParam = paramPath; } static char *borfMustFindLine(struct lineFile *lf, char *pat) /* Find line that starts (after skipping white space) * with pattern or die trying. Skip over pat in return*/ { char *line; while (lineFileNext(lf, &line, NULL)) { line = skipLeadingSpaces(line); if (startsWith(pat, line)) { line += strlen(pat); return skipLeadingSpaces(line); } } errAbort("borf::borfMustFindLine() - Couldn't find %s in %s\n", pat, lf->fileName); return NULL; } struct borf *borfFromBestorfOutput(char *fileName) /* Convert bestorf output to borf structure. */ { struct lineFile *lf = lineFileOpen(fileName, TRUE); char *line = borfMustFindLine(lf, "BESTORF"); char *word=NULL; char *row[13]; struct borf *borf = NULL; struct dnaSeq seq; AllocVar(borf); ZeroVar(&seq); line = borfMustFindLine(lf, "Seq name:"); word = nextWord(&line); borf->name = cloneString(word); /* Name */ line = borfMustFindLine(lf, "Length of sequence:"); borf->size = atoi(line); /* Size */ lineFileNeedNext(lf, &line, NULL); line = skipLeadingSpaces(line); if (startsWith("no reliable", line)) { return borf; } else { line = borfMustFindLine(lf, "G Str F"); lineFileSkip(lf, 1); lineFileRow(lf, row); safef(borf->strand, sizeof(borf->strand),"%s", row[1]); /* Strand. */ borf->feature = cloneString(row[3]); /* Feature. */ borf->cdsStart = lineFileNeedNum(lf, row, 4) - 1; /* cdsStart */ borf->cdsEnd = lineFileNeedNum(lf, row, 6); /* cdsEnd */ borf->score = atof(row[7]); /* score */ borf->orfStart = lineFileNeedNum(lf, row, 8) - 1; /* orfStart */ borf->orfEnd = lineFileNeedNum(lf, row, 10); /* orfEnd */ borf->cdsSize = atoi(row[11]); /* cdsSize. */ safef(borf->frame, sizeof(borf->frame), "%s", row[12]); /* Frame */ line = borfMustFindLine(lf, "Predicted protein"); if (!faPepSpeedReadNext(lf, &seq.dna, &seq.size, &seq.name)) errAbort("Can't find peptide in %s", lf->fileName); borf->protein = cloneString(seq.dna); /* Protein. */ } lineFileClose(&lf); return borf; } struct borf *borfFromGenomeBed(char *db, struct bed *bed) /* borfBig - Run Victor Solovyev's bestOrf on a genome bed's coordinates. */ { static char *tmpFa = NULL; static char *tmpOrf = NULL; struct borf *borf = NULL; struct dnaSeq *seq = NULL; -struct dyString *cmd = newDyString(256); +struct dyString *cmd = dyStringNew(256); int retVal = 0; if(tmpFa == NULL) tmpFa = cloneString(rTempName("/tmp", "borf", ".fa")); if(tmpOrf == NULL) tmpOrf = cloneString(rTempName("/tmp","borf", ".out")); seq = hSeqForBed(db, bed); faWrite(tmpFa, seq->name, seq->dna, seq->size); dyStringClear(cmd); dyStringPrintf(cmd, "%s %s %s > %s", _bestOrfExe, _bestOrfParam, tmpFa, tmpOrf); retVal = system(cmd->string); if (0 != retVal) errAbort("borfFromGenomeBed: failing command: '%s'\n", cmd->string); borf = borfFromBestorfOutput(tmpOrf); remove(tmpFa); remove(tmpOrf); dyStringFree(&cmd); dnaSeqFree(&seq); return borf; }