44ccfacbe3a3d4b300f80d48651c77837a4b571e galt Tue Apr 26 11:12:02 2022 -0700 SQL INJECTION Prevention Version 2 - this improves our methods by making subclauses of SQL that get passed around be both easy and correct to use. The way that was achieved was by getting rid of the obscure and not well used functions sqlSafefFrag and sqlDyStringPrintfFrag and replacing them with the plain versions of those functions, since these are not needed anymore. The new version checks for NOSQLINJ in unquoted %-s which is used to include SQL clauses, and will give an error the NOSQLINJ clause is not present, and this will automatically require the correct behavior by developers. sqlDyStringPrint is a very useful function, however because it was not enforced, users could use various other dyString functions and they operated without any awareness or checking for SQL correct use. Now those dyString functions are prohibited and it will produce an error if you try to use a dyString function on a SQL string, which is simply detected by the presence of the NOSQLINJ prefix. diff --git src/hg/cgilib/cutter.c src/hg/cgilib/cutter.c index dd19d02..c81bc78 100644 --- src/hg/cgilib/cutter.c +++ src/hg/cgilib/cutter.c @@ -1,791 +1,791 @@ /* cutter.c was originally generated by the autoSql program, which also * generated cutter.h and cutter.sql. This module links the database and * the RAM representation of objects. */ /* Copyright (C) 2014 The Regents of the University of California * See kent/LICENSE or http://genome.ucsc.edu/license/ for licensing information. */ #include "common.h" #include "linefile.h" #include "dystring.h" #include "jksql.h" #include "dnaseq.h" #include "dnautil.h" #include "bed.h" #include "cutter.h" struct cutter *cutterLoad(char **row) /* Load a cutter from row fetched with select * from cutter * from database. Dispose of this with cutterFree(). */ { struct cutter *ret; AllocVar(ret); ret->numSciz = sqlUnsigned(row[8]); ret->numCompanies = sqlUnsigned(row[10]); ret->numRefs = sqlUnsigned(row[12]); ret->name = cloneString(row[0]); ret->size = sqlUnsigned(row[1]); ret->matchSize = sqlUnsigned(row[2]); ret->seq = cloneString(row[3]); ret->cut = sqlUnsigned(row[4]); ret->overhang = sqlSigned(row[5]); ret->palindromic = sqlUnsigned(row[6]); ret->semicolon = sqlUnsigned(row[7]); { int sizeOne; sqlStringDynamicArray(row[9], &ret->scizs, &sizeOne); assert(sizeOne == ret->numSciz); } { int sizeOne; sqlCharDynamicArray(row[11], &ret->companies, &sizeOne); assert(sizeOne == ret->numCompanies); } { int sizeOne; sqlUnsignedDynamicArray(row[13], &ret->refs, &sizeOne); assert(sizeOne == ret->numRefs); } return ret; } struct cutter *cutterLoadAll(char *fileName) /* Load all cutter from a whitespace-separated file. * Dispose of this with cutterFreeList(). */ { struct cutter *list = NULL, *el; struct lineFile *lf = lineFileOpen(fileName, TRUE); char *row[14]; while (lineFileRow(lf, row)) { el = cutterLoad(row); slAddHead(&list, el); } lineFileClose(&lf); slReverse(&list); return list; } struct cutter *cutterLoadAllByChar(char *fileName, char chopper) /* Load all cutter from a chopper separated file. * Dispose of this with cutterFreeList(). */ { struct cutter *list = NULL, *el; struct lineFile *lf = lineFileOpen(fileName, TRUE); char *row[14]; while (lineFileNextCharRow(lf, chopper, row, ArraySize(row))) { el = cutterLoad(row); slAddHead(&list, el); } lineFileClose(&lf); slReverse(&list); return list; } struct cutter *cutterLoadByQuery(struct sqlConnection *conn, char *query) /* Load all cutter from table that satisfy the query given. * Where query is of the form 'select * from example where something=something' * or 'select example.* from example, anotherTable where example.something = * anotherTable.something'. * Dispose of this with cutterFreeList(). */ { struct cutter *list = NULL, *el; struct sqlResult *sr; char **row; sr = sqlGetResult(conn, query); while ((row = sqlNextRow(sr)) != NULL) { el = cutterLoad(row); slAddHead(&list, el); } slReverse(&list); sqlFreeResult(&sr); return list; } void cutterSaveToDb(struct sqlConnection *conn, struct cutter *el, char *tableName, int updateSize) /* Save cutter as a row to the table specified by tableName. * As blob fields may be arbitrary size updateSize specifies the approx size * of a string that would contain the entire query. Arrays of native types are * converted to comma separated strings and loaded as such, User defined types are * inserted as NULL. Strings are automatically escaped to allow insertion into the database. */ { -struct dyString *update = newDyString(updateSize); +struct dyString *update = dyStringNew(updateSize); char *scizsArray, *companiesArray, *refsArray; scizsArray = sqlStringArrayToString(el->scizs, el->numSciz); companiesArray = sqlCharArrayToString(el->companies, el->numCompanies); refsArray = sqlUnsignedArrayToString(el->refs, el->numRefs); sqlDyStringPrintf(update, "insert into %s values ( '%s',%u,%u,'%s',%u,%d,%u,%u,%u,'%s',%u,'%s',%u,'%s')", tableName, el->name, el->size, el->matchSize, el->seq, el->cut, el->overhang, el->palindromic, el->semicolon, el->numSciz, scizsArray , el->numCompanies, companiesArray , el->numRefs, refsArray ); sqlUpdate(conn, update->string); -freeDyString(&update); +dyStringFree(&update); freez(&scizsArray); freez(&companiesArray); freez(&refsArray); } struct cutter *cutterCommaIn(char **pS, struct cutter *ret) /* Create a cutter out of a comma separated string. * This will fill in ret if non-null, otherwise will * return a new cutter */ { char *s = *pS; if (ret == NULL) AllocVar(ret); ret->name = sqlStringComma(&s); ret->size = sqlUnsignedComma(&s); ret->matchSize = sqlUnsignedComma(&s); ret->seq = sqlStringComma(&s); ret->cut = sqlUnsignedComma(&s); ret->overhang = sqlSignedComma(&s); ret->palindromic = sqlUnsignedComma(&s); ret->semicolon = sqlUnsignedComma(&s); ret->numSciz = sqlUnsignedComma(&s); { int i; s = sqlEatChar(s, '{'); AllocArray(ret->scizs, ret->numSciz); for (i=0; i<ret->numSciz; ++i) { ret->scizs[i] = sqlStringComma(&s); } s = sqlEatChar(s, '}'); s = sqlEatChar(s, ','); } ret->numCompanies = sqlUnsignedComma(&s); { int i; s = sqlEatChar(s, '{'); AllocArray(ret->companies, ret->numCompanies); for (i=0; i<ret->numCompanies; ++i) { ret->companies[i] = sqlCharComma(&s); } s = sqlEatChar(s, '}'); s = sqlEatChar(s, ','); } ret->numRefs = sqlUnsignedComma(&s); { int i; s = sqlEatChar(s, '{'); AllocArray(ret->refs, ret->numRefs); for (i=0; i<ret->numRefs; ++i) { ret->refs[i] = sqlUnsignedComma(&s); } s = sqlEatChar(s, '}'); s = sqlEatChar(s, ','); } *pS = s; return ret; } void cutterFree(struct cutter **pEl) /* Free a single dynamically allocated cutter such as created * with cutterLoad(). */ { struct cutter *el; if ((el = *pEl) == NULL) return; freeMem(el->name); freeMem(el->seq); /* All strings in scizs are allocated at once, so only need to free first. */ if (el->scizs != NULL) freeMem(el->scizs[0]); freeMem(el->scizs); freeMem(el->companies); freeMem(el->refs); freez(pEl); } void cutterFreeList(struct cutter **pList) /* Free a list of dynamically allocated cutter's */ { struct cutter *el, *next; for (el = *pList; el != NULL; el = next) { next = el->next; cutterFree(&el); } *pList = NULL; } void cutterOutput(struct cutter *el, FILE *f, char sep, char lastSep) /* Print out cutter. Separate fields with sep. Follow last field with lastSep. */ { if (sep == ',') fputc('"',f); fprintf(f, "%s", el->name); if (sep == ',') fputc('"',f); fputc(sep,f); fprintf(f, "%u", el->size); fputc(sep,f); fprintf(f, "%u", el->matchSize); fputc(sep,f); if (sep == ',') fputc('"',f); fprintf(f, "%s", el->seq); if (sep == ',') fputc('"',f); fputc(sep,f); fprintf(f, "%u", el->cut); fputc(sep,f); fprintf(f, "%d", el->overhang); fputc(sep,f); fprintf(f, "%u", el->palindromic); fputc(sep,f); fprintf(f, "%u", el->semicolon); fputc(sep,f); fprintf(f, "%u", el->numSciz); fputc(sep,f); { int i; if (sep == ',') fputc('{',f); for (i=0; i<el->numSciz; ++i) { if (sep == ',') fputc('"',f); fprintf(f, "%s", el->scizs[i]); if (sep == ',') fputc('"',f); fputc(',', f); } if (sep == ',') fputc('}',f); } fputc(sep,f); fprintf(f, "%u", el->numCompanies); fputc(sep,f); { int i; if (sep == ',') fputc('{',f); for (i=0; i<el->numCompanies; ++i) { if (sep == ',') fputc('"',f); fprintf(f, "%c", el->companies[i]); if (sep == ',') fputc('"',f); fputc(',', f); } if (sep == ',') fputc('}',f); } fputc(sep,f); fprintf(f, "%u", el->numRefs); fputc(sep,f); { int i; if (sep == ',') fputc('{',f); for (i=0; i<el->numRefs; ++i) { fprintf(f, "%u", el->refs[i]); fputc(',', f); } if (sep == ',') fputc('}',f); } fputc(lastSep,f); } /* -------------------------------- End autoSql Generated Code -------------------------------- */ boolean isIsosciz(struct cutter *mainEnz, struct cutter *possibleScizEnz) /* Is possibleScizEnz an isoscizomer of mainEnz? */ { int i; for (i = 0; i < mainEnz->numSciz; i++) if (sameString(possibleScizEnz->name, mainEnz->scizs[i])) return TRUE; return FALSE; } struct dnaSeq *stickyEnd(struct cutter *enz) /* Return the sticky end sequence of an enzyme. Strand is unspecified. Free this. */ { struct dnaSeq *ret = NULL; if (!enz) return NULL; if (enz->overhang > 0) { char *seq = cloneStringZ(enz->seq + enz->cut, enz->overhang); ret = newDnaSeq(seq, enz->overhang, "sticky"); } else if (enz->overhang < 0) { int size = intAbs(enz->overhang); char *seq = cloneStringZ(enz->seq + enz->cut - size, size); complement(seq, strlen(seq)); ret = newDnaSeq(seq, strlen(seq), "sticky"); } return ret; } int acgtCount(char *seq) /* Return the count of canonical bases in a sequence. */ { return countChars(seq,'A') + countChars(seq,'C') + countChars(seq,'G') + countChars(seq,'T'); } boolean sameStickyEnd(struct cutter *enz1, struct cutter *enz2) /* Check to see if two enzymes make the same sticky ends. If either of the enzymes have sticky ends that isn't all ACGT, then this returns false. */ { boolean ret = FALSE; struct dnaSeq *sticky1 = stickyEnd(enz1); struct dnaSeq *sticky2 = stickyEnd(enz2); if (sticky1 && sticky2) if (sticky1 && sticky2 && (sticky1->size == sticky2->size) && (acgtCount(sticky1->dna) == sticky1->size) && (acgtCount(sticky2->dna) == sticky2->size)) { if (sameString(sticky1->dna, sticky2->dna)) ret = TRUE; else { reverseComplement(sticky2->dna, sticky2->size); if (sameString(sticky1->dna, sticky2->dna)) ret = TRUE; } } freeDnaSeq(&sticky1); freeDnaSeq(&sticky2); return ret; } struct slName *findIsoligamers(struct cutter *enz, struct cutter *enzList) /* Find isoligamers to an enzyme in a list of enzymes. */ { struct slName *list = NULL; struct cutter *cur; if (!enz || !enzList) return NULL; for (cur = enzList; cur != NULL; cur = cur->next) { if (!sameString(cur->name, enz->name) && (sameStickyEnd(enz, cur))) { struct slName *newname = newSlName(cur->name); slAddHead(&list, newname); } } slReverse(&list); return list; } boolean isPalindrome(char *seq) /* Determine if a sequence is palindromic. */ { int i, j, size = strlen(seq); for (i = 0; i < size; i++) switch (seq[i]) { case 'H': case 'B': case 'V': case 'D': return FALSE; } /* Loop from both ends of the sequence: i from the left, j from the right. */ for (i = 0, j = size-1; (i < size/2) && (j >= ((size%2==1) ? size/2+1 : size/2)); i++, j--) { switch (seq[i]) { case 'A': { if (seq[j] != 'T') return FALSE; break; } case 'C': { if (seq[j] != 'G') return FALSE; break; } case 'G': { if (seq[j] != 'C') return FALSE; break; } case 'T': { if (seq[j] != 'A') return FALSE; break; } case 'W': { if (seq[j] != 'S') return FALSE; break; } case 'S': { if (seq[j] != 'S') return FALSE; break; } case 'Y': { if (seq[j] != 'R') return FALSE; break; } case 'R': { if (seq[j] != 'Y') return FALSE; break; } case 'M': { if (seq[j] != 'K') return FALSE; break; } case 'K': { if (seq[j] != 'M') return FALSE; break; } case 'N': { if (seq[j] != 'N') return FALSE; break; } } } return TRUE; } struct cutter *readGcg(char *gcgFile) /* Parse a GCG file and load it into cutter format. */ { struct lineFile *lf = lineFileOpen(gcgFile,TRUE); struct cutter *enzList = NULL; char *line = "whatever", *words[10], numWords; /* Skip to the right line. */ while (lineFileNext(lf,&line,NULL) && !startsWith("..",line)); /* */ while ((numWords=lineFileChop(lf,words))) { struct cutter *newone = NULL; int comIx = (numWords==7) ? 5 : 6; int refIx = (numWords==7) ? 6 : 7; int i; char *items[100]; /* Skip ones */ if (words[4][0] == '?') continue; AllocVar(newone); newone->semicolon = (words[0][0] == ';') ? TRUE : FALSE; /* Deal with the first few columns */ if (!isdigit(words[1][0])) errAbort("Error: expecting a number in cut site column on line %d\n", lf->lineIx+1); if (!isdigit(words[3][0]) && words[3][0]!='-') errAbort("Error: expecting a number in the overhang column on line %d\n", lf->lineIx+1); if (words[comIx][0] != '>') errAbort("Error: expecting a \'>\' in the commercial sources column of line %d\n", lf->lineIx+1); newone->name = (words[0][0] == ';') ? cloneString(words[0]+1) : cloneString(words[0]); newone->cut = atoi(words[1]); newone->seq = cloneString(words[2]); touppers(newone->seq); stripChar(newone->seq,'\''); stripChar(newone->seq,'_'); newone->size = strlen(newone->seq); newone->matchSize = newone->size - countChars(newone->seq, 'N'); newone->palindromic = isPalindrome(newone->seq); newone->overhang = atoi(words[3]); newone->numCompanies = strlen(words[comIx]+1); if (newone->numCompanies > 0) newone->companies = cloneMem(words[comIx]+1, newone->numCompanies*sizeof(char)); newone->numRefs = chopString(words[refIx], ",", items, ArraySize(items)); AllocArray(newone->refs, newone->numRefs); for (i = 0; i < newone->numRefs; i++) { if (i == 100) errAbort("Error: Andy didn't make the array for holding references big enough\n"); if (!isdigit(items[i][0])) errAbort("Error: expecting number in references column in line %d\n", lf->lineIx+1); newone->refs[i] = atoi(items[i]); } /* Deal with isoscizomers. */ if (numWords == 8) { newone->numSciz = chopString(words[5], ",", items, ArraySize(items)); AllocArray(newone->scizs, newone->numSciz*sizeof(int)); for (i = 0; i < newone->numSciz; i++) { if (i == 100) errAbort("Error: Andy didn't make the array for having isoscizomers big enough\n"); newone->scizs[i] = cloneString(items[i]); } } else newone->numSciz = 0; slAddHead(&enzList, newone); } slReverse(&enzList); lineFileClose(&lf); return enzList; } boolean matchingBase(char enzBase, char seqBase) /* An enzyme matching */ { switch (seqBase) { /* Ignore hard-masked and gaps for now. */ case 'A': { switch (enzBase) { case 'A': case 'R': case 'M': case 'W': case 'H': case 'V': case 'D': case 'N': return TRUE; default: return FALSE; } } case 'C': { switch (enzBase) { case 'C': case 'Y': case 'M': case 'S': case 'H': case 'B': case 'V': case 'N': return TRUE; default: return FALSE; } } case 'G': { switch (enzBase) { case 'G': case 'R': case 'K': case 'S': case 'B': case 'V': case 'D': case 'N': return TRUE; default: return FALSE; } } case 'T': { switch (enzBase) { case 'T': case 'Y': case 'K': case 'W': case 'H': case 'B': case 'D': case 'N': return TRUE; default: return FALSE; } } } return FALSE; } struct bed *allocBedEnz(struct cutter *enz, char *seqName, int seqPos, char strand) { struct bed *newbed = NULL; AllocVar(newbed); newbed->chrom = cloneString(seqName); newbed->chromStart = (strand == '-') ? seqPos - enz->size : seqPos; newbed->chromEnd = (strand == '-') ? seqPos : seqPos + enz->size; newbed->name = cloneString(enz->name); newbed->score = 1000; newbed->strand[0] = strand; return newbed; } struct bed *searchStrand(struct hash *sixers, struct cutter *ACGTo[], struct dnaSeq *seq, int startOffset, char strand) /* Cheesey function that checks a strand for the enzymes after they're put in the hash/array structures. This used to be a part of the matchEnzymes function but I do it twice now. */ { struct cutter *enz; struct bed *bedList = NULL; int seqPos; if (ACGTo[0] || ACGTo[1] || ACGTo[2] || ACGTo[3] || (sixers->elCount > 0)) { for (seqPos = 0; seqPos < seq->size; seqPos++) { struct cutter *enzList = NULL; char sixer[7]; int bedPos = (strand == '-') ? (seq->size - seqPos) : seqPos; if (seq->size - seqPos >= 6) { struct hashEl *el = NULL; sixer[6] = '\0'; memcpy(sixer, seq->dna+seqPos, 6); el = hashLookup(sixers, sixer); if (el) { struct bed *add; enz = el->val; add = allocBedEnz(enz, seq->name, bedPos + startOffset, strand); slAddHead(&bedList, add); /* Just in case there's another one with the same sequence. */ while ((el = hashLookupNext(el))) { enz = el->val; add = allocBedEnz(enz, seq->name, bedPos + startOffset, strand); slAddHead(&bedList, add); } } } /* Use a certain list depending on which letter we're on in the sequence. */ if (seq->dna[seqPos] == 'A') enzList = ACGTo[0]; else if (seq->dna[seqPos] == 'C') enzList = ACGTo[1]; else if (seq->dna[seqPos] == 'G') enzList = ACGTo[2]; else if (seq->dna[seqPos] == 'T') enzList = ACGTo[3]; for (enz = enzList; enz != NULL; enz = enz->next) { int enzPos = 0; int seqCurPos = seqPos; while (enzPos < enz->size && seqCurPos < seq->size && matchingBase(enz->seq[enzPos],seq->dna[seqCurPos])) { enzPos++; seqCurPos++; } if (enzPos == enz->size) { struct bed *add = allocBedEnz(enz, seq->name, bedPos + startOffset, strand); slAddHead(&bedList, add); } } } } return bedList; } struct bed *matchEnzymes(struct cutter *cutters, struct dnaSeq *seq, int startOffset) /* Match the enzymes to sequence and return a bed list in all cases. */ { struct hash *sixers = newHash(8), *palinSixers = newHash(8); struct cutter *enz; struct cutter *ACGTo[5], *palinACGTo[5]; struct bed *bedList = NULL, *tmp; int i; if (!cutters) return NULL; for (i = 0; i < 5; i++) ACGTo[i] = palinACGTo[i] = NULL; /* Put each of the enzymes in either a hash table of six-cutters or */ enz = cutters; while (enz != NULL) { int acgtCount = 0; struct cutter *next = enz->next; acgtCount = countChars(enz->seq,'A') + countChars(enz->seq,'C') + countChars(enz->seq,'G') + countChars(enz->seq,'T'); /* Super dumb coding here but it's quick. */ if (enz->palindromic) { if (enz->size==6 && acgtCount==6) hashAdd(palinSixers, enz->seq, enz); else { if (enz->seq[0] == 'A') slAddHead(&palinACGTo[0], enz); else if (enz->seq[0] == 'C') slAddHead(&palinACGTo[1], enz); else if (enz->seq[0] == 'G') slAddHead(&palinACGTo[2], enz); else if (enz->seq[0] == 'T') slAddHead(&palinACGTo[3], enz); else { slAddHead(&palinACGTo[4], enz); } } } else { if (enz->size==6 && acgtCount==6) hashAdd(sixers, enz->seq, enz); else { if (enz->seq[0] == 'A') slAddHead(&ACGTo[0], enz); else if (enz->seq[0] == 'C') slAddHead(&ACGTo[1], enz); else if (enz->seq[0] == 'G') slAddHead(&ACGTo[2], enz); else if (enz->seq[0] == 'T') slAddHead(&ACGTo[3], enz); else slAddHead(&ACGTo[4], enz); } } enz = next; } /* At this point we got a hash for the palindromes and non-palindromic six-cutters, plus an array for each too. The array is set up so the enzymes starting with 'A' go into [0], 'C' into [1], 'G' into [2], 'T' into [3], and other bases into [4]. */ if (ACGTo[4]) { ACGTo[0] = slCat(ACGTo[0], ACGTo[4]); ACGTo[1] = slCat(ACGTo[1], ACGTo[4]); ACGTo[2] = slCat(ACGTo[2], ACGTo[4]); ACGTo[3] = slCat(ACGTo[3], ACGTo[4]); } if (palinACGTo[4]) { palinACGTo[0] = slCat(palinACGTo[0], palinACGTo[4]); palinACGTo[1] = slCat(palinACGTo[1], palinACGTo[4]); palinACGTo[2] = slCat(palinACGTo[2], palinACGTo[4]); palinACGTo[3] = slCat(palinACGTo[3], palinACGTo[4]); } /* Search the DNA in three ways: on the plus strand for both palindromes and nonpalindromes, and then just nonpalindromes on the minus strand. */ bedList = searchStrand(palinSixers, palinACGTo, seq, startOffset, '+'); tmp = searchStrand(sixers, ACGTo, seq, startOffset, '+'); bedList = slCat(bedList, tmp); reverseComplement(seq->dna, seq->size); tmp = searchStrand(sixers, ACGTo, seq, startOffset, '-'); bedList = slCat(bedList, tmp); if (bedList) slReverse(&bedList); return bedList; } void cullCutters(struct cutter **enzList, boolean semicolon, struct slName *justThese, int matchSize) /* Reduce the list of enzymes based on different options. */ { struct cutter *enz = *enzList, *next; while (enz != NULL) { next = enz->next; if ((justThese && !slNameInList(justThese, enz->name)) || (enz->matchSize < matchSize) || (!semicolon && enz->semicolon)) { slRemoveEl(enzList, enz); cutterFree(&enz); } enz = next; } }