44ccfacbe3a3d4b300f80d48651c77837a4b571e galt Tue Apr 26 11:12:02 2022 -0700 SQL INJECTION Prevention Version 2 - this improves our methods by making subclauses of SQL that get passed around be both easy and correct to use. The way that was achieved was by getting rid of the obscure and not well used functions sqlSafefFrag and sqlDyStringPrintfFrag and replacing them with the plain versions of those functions, since these are not needed anymore. The new version checks for NOSQLINJ in unquoted %-s which is used to include SQL clauses, and will give an error the NOSQLINJ clause is not present, and this will automatically require the correct behavior by developers. sqlDyStringPrint is a very useful function, however because it was not enforced, users could use various other dyString functions and they operated without any awareness or checking for SQL correct use. Now those dyString functions are prohibited and it will produce an error if you try to use a dyString function on a SQL string, which is simply detected by the presence of the NOSQLINJ prefix. diff --git src/hg/snp/snpLoad/snpLoadFromTmp.c src/hg/snp/snpLoad/snpLoadFromTmp.c index ee091c8..d241f69 100644 --- src/hg/snp/snpLoad/snpLoadFromTmp.c +++ src/hg/snp/snpLoad/snpLoadFromTmp.c @@ -1,427 +1,429 @@ /* snpLoadFromTmp - create snp table from build125 database, assuming split tables. */ /* Copyright (C) 2013 The Regents of the University of California * See kent/LICENSE or http://genome.ucsc.edu/license/ for licensing information. */ #include "common.h" #include "chromInfo.h" #include "dystring.h" #include "hash.h" #include "hdb.h" #include "jksql.h" #include "snp125.h" char *snpDb = NULL; char *targetDb = NULL; static struct hash *chromHash = NULL; static struct slName *chromList = NULL; void usage() /* Explain usage and exit. */ { errAbort( "snpLoadFromTmp - create snp table from build125 database, assuming split tables\n" "usage:\n" " snpLoadFromTmp snpDb targetDb\n"); } /* Copied from hgLoadWiggle. */ static struct hash *loadAllChromInfo() /* Load up all chromosome infos. */ { struct chromInfo *el; struct sqlConnection *conn = sqlConnect(targetDb); struct sqlResult *sr = NULL; struct hash *ret; char **row; ret = newHash(0); -sr = sqlGetResult(conn, NOSQLINJ "select * from chromInfo"); +char query[1024]; +sqlSafef(query, sizeof query, "select * from chromInfo"); +sr = sqlGetResult(conn, query); while ((row = sqlNextRow(sr)) != NULL) { el = chromInfoLoad(row); verbose(4, "Add hash %s value %u (%#lx)\n", el->chrom, el->size, (unsigned long)&el->size); hashAdd(ret, el->chrom, (void *)(& el->size)); } sqlFreeResult(&sr); sqlDisconnect(&conn); return ret; } /* also copied from hgLoadWiggle. */ static unsigned getChromSize(char *chrom) /* Return size of chrom. */ { struct hashEl *el = hashLookup(chromHash,chrom); if (el == NULL) errAbort("Couldn't find size of chrom %s", chrom); return *(unsigned *)el->val; } struct snp125 *readSnps(char *chromName) /* slurp in chrN_snpTmp */ { struct snp125 *list=NULL, *el = NULL; char tableName[64]; char query[512]; // struct sqlConnection *conn = hAllocConn2(); struct sqlConnection *conn = sqlConnect(targetDb); struct sqlResult *sr; char **row; int count = 0; verbose(1, "reading snps...\n"); strcpy(tableName, "chr"); strcat(tableName, chromName); strcat(tableName, "_snpTmp"); if (!sqlTableExists(conn, tableName)) return list; /* not checking chrom */ sqlSafef(query, sizeof(query), "select chrom, chromStart, chromEnd, name, strand, refNCBI, locType, func from %s", tableName); sr = sqlGetResult(conn, query); while ((row = sqlNextRow(sr)) != NULL) { AllocVar(el); el->chrom = chromName; el->score = 0; el->chromStart = atoi(row[1]); el->chromEnd = atoi(row[2]); el->name = cloneString(row[3]); strcpy(el->strand, row[4]); el->refNCBI = cloneString(row[5]); el->locType = cloneString(row[6]); el->func = cloneString(row[7]); slAddHead(&list,el); count++; } sqlFreeResult(&sr); // hFreeConn2(&conn); sqlDisconnect(&conn); verbose(1, "%d snps found\n", count); slReverse(&list); return list; } char *validString(int validCode) { switch (validCode) { case 0: return "unknown"; case 1: return "by-cluster"; case 2: return "by-frequency"; case 3: return "by-cluster,by-frequency"; case 4: return "by-submitter"; case 5: return "by-submitter,by-cluster"; case 6: return "by-submitter,by-frequency"; case 7: return "by-submitter"; case 8: return "by-2hit-2allele"; case 9: return "by-2hit-2allele,by-cluster"; case 10: return "by-2hit-2allele,by-frequency"; case 11: return "by-2hit-2allele,by-frequency,by-cluster"; case 12: return "by-2hit-2allele,by-submitter"; case 13: return "by-2hit-2allele,by-submitter,by-cluster"; case 14: return "by-2hit-2allele,by-submitter,by-frequency"; case 15: return "by-2hit-2allele,by-submitter,by-frequency,by-cluster"; case 16: return "by-hapmap"; case 17: return "by-hapmap,by-cluster"; case 18: return "by-hapmap,by-frequency"; case 19: return "by-hapmap,by-frequency,by-cluster"; case 20: return "by-hapmap,by-submitter"; case 21: return "by-hapmap,by-submitter,by-cluster"; case 22: return "by-hapmap,by-submitter,by-frequency"; case 23: return "by-hapmap,by-submitter,by-frequency,by-cluster"; case 24: return "by-hapmap,by-2hit-2allele"; case 25: return "by-hapmap,by-2hit-2allele,by-cluster"; case 26: return "by-hapmap,by-2hit-2allele,by-frequency"; case 27: return "by-hapmap,by-2hit-2allele,by-frequency,by-cluster"; case 28: return "by-hapmap,by-2hit-2allele,by-submitter"; case 29: return "by-hapmap,by-2hit-2allele,by-submitter,by-cluster"; case 30: return "by-hapmap,by-2hit-2allele,by-submitter,by-frequency"; case 31: return "by-hapmap,by-2hit-2allele,by-submitter,by-frequency,by-cluster"; default: return "unknown"; } } void lookupHetAndValid(struct snp125 *list) /* get heterozygosity from SNP table */ { struct snp125 *el; char query[512]; struct sqlConnection *conn = hAllocConn(); struct sqlResult *sr; char **row; char snpName[32]; verbose(1, "looking up heterozygosity and validation status...\n"); for (el = list; el != NULL; el = el->next) { strcpy(snpName, el->name); stripString(snpName, "rs"); sqlSafef(query, sizeof(query), "select avg_heterozygosity, validation_status, het_se from SNP where snp_id = %s", snpName); sr = sqlGetResult(conn, query); /* need a joiner check rule for this */ row = sqlNextRow(sr); if (row == NULL) { el->avHet = 0.0; el->avHetSE = 0.0; sqlFreeResult(&sr); continue; } el->avHet = atof(cloneString(row[0])); el->avHetSE = atof(cloneString(row[2])); el->valid = validString(atoi(row[1])); sqlFreeResult(&sr); } hFreeConn(&conn); } void readFromSNPFasta(struct snp125 *list) /* get molType, class and observed from snpFasta table */ /* not including chrom in query */ { struct snp125 *el; char query[512]; struct sqlConnection *conn = hAllocConn(); struct sqlResult *sr; char **row; verbose(1, "query snpFasta for molType, class, observed...\n"); for (el = list; el != NULL; el = el->next) { sqlSafef(query, sizeof(query), "select molType, class, observed from snpFasta where rsId = '%s'", el->name); sr = sqlGetResult(conn, query); row = sqlNextRow(sr); if (row == NULL) { el->observed = cloneString("n/a"); el->class = cloneString("unknown"); el->molType = cloneString("unknown"); sqlFreeResult(&sr); continue; } el->molType = cloneString(row[0]); el->class = cloneString(row[1]); el->observed = cloneString(row[2]); /* class: special handling for in-dels */ /* split into classes of our own construction */ if (sameString(el->class, "in-del")) { if (sameString(el->locType, "between")) el->class = cloneString("insertion"); else if (sameString(el->locType, "range") || sameString(el->locType, "exact")) el->class = cloneString("deletion"); } sqlFreeResult(&sr); } hFreeConn(&conn); } void lookupRefAllele(struct snp125 *list) /* get reference allele from nib files */ /* for locType exact and locType range */ { struct snp125 *el = NULL; struct dnaSeq *seq; char fileName[HDB_MAX_PATH_STRING]; char chromName[64]; int chromSize = 0; AllocVar(seq); verbose(1, "looking up reference allele...\n"); for (el = list; el != NULL; el = el->next) { if (sameString(el->locType, "unknown") || sameString(el->locType, "between")) { el->refUCSC = cloneString("n/a"); continue; } strcpy(chromName, "chr"); strcat(chromName, el->chrom); chromSize = getChromSize(chromName); if (el->chromStart > chromSize || el->chromEnd > chromSize || el->chromEnd <= el->chromStart) { verbose(1, "unexpected coords %s %s:%d-%d\n", el->name, chromName, el->chromStart, el->chromEnd); el->refUCSC = cloneString("n/a"); continue; } hNibForChrom2(chromName, fileName); seq = hFetchSeq(fileName, chromName, el->chromStart, el->chromEnd); touppers(seq->dna); if (sameString(el->strand, "-")) reverseComplement(seq->dna, strlen(seq->dna)); el->refUCSC = cloneString(seq->dna); } } void loadDatabase(char *chromName) { struct sqlConnection *conn2 = hAllocConn2(); char fileName[64]; char tableName[64]; FILE *f; strcpy(tableName, "chr"); strcat(tableName, chromName); strcat(tableName, "_snp125"); strcpy(fileName, "chr"); strcat(fileName, chromName); strcat(fileName, "_snp125.tab"); verbose(1, "calling snp125TableCreate for %s\n", tableName); snp125TableCreate(conn2, tableName); f = mustOpen(fileName, "r"); verbose(1, "loading %s...\n", tableName); hgLoadTabFile(conn2, ".", tableName, &f); verbose(1, "back from hgLoadTabFile\n"); hFreeConn2(&conn2); } void writeToTabFile(char *chromName, struct snp125 *list) /* write tab separated file */ { struct snp125 *el; int score = 0; int bin = 0; int count = 0; FILE *f; char fileName[64]; strcpy(fileName, "chr"); strcat(fileName, chromName); strcat(fileName, "_snp125"); f = hgCreateTabFile(".", fileName); for (el = list; el != NULL; el = el->next) { bin = hFindBin(el->chromStart, el->chromEnd); fprintf(f, "%d\t", bin); fprintf(f, "chr%s\t", el->chrom); fprintf(f, "%d\t", el->chromStart); fprintf(f, "%d\t", el->chromEnd); fprintf(f, "%s\t", el->name); fprintf(f, "%d\t", score); fprintf(f, "%s\t", el->strand); fprintf(f, "%s\t", el->refNCBI); fprintf(f, "%s\t", el->refUCSC); fprintf(f, "%s\t", el->observed); fprintf(f, "%s\t", el->molType); fprintf(f, "%s\t", el->class); fprintf(f, "%s\t", el->valid); fprintf(f, "%f\t", el->avHet); fprintf(f, "%f\t", el->avHetSE); fprintf(f, "%s\t", el->func); fprintf(f, "%s\t", el->locType); fprintf(f, "dbSNP125"); fprintf(f, "\n"); count++; } if (count > 0) verbose(1, "%d lines written\n", count); fclose(f); } int main(int argc, char *argv[]) /* Check args; read and load . */ { struct snp125 *list=NULL, *el; struct slName *chromPtr, *contigList; if (argc != 3) usage(); /* TODO: look in jksql for existence check for non-hgcentral database */ snpDb = argv[1]; targetDb = argv[2]; // if(!hDbExists(targetDb)) // errAbort("%s does not exist\n", targetDb); hSetDb(snpDb); hSetDb2(targetDb); chromHash = loadAllChromInfo(); chromList = hAllChromNamesDb(targetDb); /* check for needed tables */ if(!hTableExistsDb(snpDb, "ContigLocusId")) errAbort("no ContigLocusId table in %s\n", snpDb); if(!hTableExistsDb(snpDb, "SNP")) errAbort("no SNP table in %s\n", snpDb); for (chromPtr = chromList; chromPtr != NULL; chromPtr = chromPtr->next) { stripString(chromPtr->name, "chr"); } for (chromPtr = chromList; chromPtr != NULL; chromPtr = chromPtr->next) { verbose(1, "chrom = %s\n", chromPtr->name); list = readSnps(chromPtr->name); if (list == NULL) { verbose(1, "--------------------------------------\n"); continue; } lookupHetAndValid(list); readFromSNPFasta(list); lookupRefAllele(list); writeToTabFile(chromPtr->name, list); slFreeList(&list); loadDatabase(chromPtr->name); verbose(1, "---------------------------------------\n"); } return 0; }