44ccfacbe3a3d4b300f80d48651c77837a4b571e galt Tue Apr 26 11:12:02 2022 -0700 SQL INJECTION Prevention Version 2 - this improves our methods by making subclauses of SQL that get passed around be both easy and correct to use. The way that was achieved was by getting rid of the obscure and not well used functions sqlSafefFrag and sqlDyStringPrintfFrag and replacing them with the plain versions of those functions, since these are not needed anymore. The new version checks for NOSQLINJ in unquoted %-s which is used to include SQL clauses, and will give an error the NOSQLINJ clause is not present, and this will automatically require the correct behavior by developers. sqlDyStringPrint is a very useful function, however because it was not enforced, users could use various other dyString functions and they operated without any awareness or checking for SQL correct use. Now those dyString functions are prohibited and it will produce an error if you try to use a dyString function on a SQL string, which is simply detected by the presence of the NOSQLINJ prefix. diff --git src/hg/hgc/peakClusters.c src/hg/hgc/peakClusters.c index 3d72c74..7d75484 100644 --- src/hg/hgc/peakClusters.c +++ src/hg/hgc/peakClusters.c @@ -1,577 +1,586 @@ /* Copyright (C) 2014 The Regents of the University of California * See kent/LICENSE or http://genome.ucsc.edu/license/ for licensing information. */ /* Stuff to display details on tracks that are clusters of peaks (items) in other tracks. * In particular peaks in either ENCODE narrowPeak or broadPeak settings, and residing in * composite tracks. * * These come in two main forms currently: * DNAse hypersensitive clusters - peaks clustered across cell lines stored in bed 5 * with no special type * Transcription Factor Binding Sites (TFBS) - peaks from transcription factor ChIP-seq * across a number of transcription factors and cell lines. Stored in bed 15 * (or BED5+ factorSource format) * plus sourceTable with type factorSource */ #include "common.h" #include "hash.h" #include "jksql.h" #include "obscure.h" #include "hCommon.h" #include "hdb.h" #include "web.h" #include "cart.h" #include "trackDb.h" #include "hui.h" #include "hgc.h" #include "encode/encodePeak.h" #include "expRecord.h" #include "bed6FloatScore.h" #include "ra.h" #include "jsHelper.h" #include "factorSource.h" static void printClusterTableHeader(struct slName *otherCols, boolean withAbbreviation, boolean withDescription, boolean withSignal) /* Print out header fields table of tracks in cluster */ { webPrintLabelCell("#"); if (withSignal) webPrintLabelCell("signal"); if (withAbbreviation) webPrintLabelCell("abr"); struct slName *col; for (col = otherCols; col != NULL; col = col->next) webPrintLabelCell(col->name); if (withDescription) webPrintLabelCell("description"); webPrintLabelCell("more info"); } static double getSignalAt(char *table, struct bed *cluster) /* Get (average) signal from table entries that overlap cluster */ { struct sqlConnection *conn = hAllocConn(database); int count = 0; double sum = 0; if (sqlTableExists(conn, table)) // Table might be withdrawn from data thrash { int rowOffset; struct sqlResult *sr = hRangeQuery(conn, table, cluster->chrom, cluster->chromStart, cluster->chromEnd, NULL, &rowOffset); int signalCol = sqlFieldColumn(sr, "signalValue"); if (signalCol < 0) internalErr(); char **row; while ((row = sqlNextRow(sr)) != NULL) { count += 1; sum += sqlDouble(row[signalCol]); } sqlFreeResult(&sr); } hFreeConn(&conn); if (count > 0) return sum/count; else return 0; } static void printControlledVocabFields(char **row, int fieldCount, struct slName *fieldList, char *vocabFile, struct hash *vocabHash) /* Print out fields from row, linking them to controlled vocab if need be. * If vocabFile is NULL, the vocabHash is not ENCODE, so links + mouseovers are * generated differently */ { int i; struct slName *field; for (i=0, field = fieldList; i<fieldCount; ++i, field = field->next) { char *link = NULL; char *fieldVal = row[i]; if (vocabFile && hashLookup(vocabHash, field->name)) { // controlled vocabulary link = wgEncodeVocabLink(vocabFile, "term", fieldVal, fieldVal, fieldVal, ""); } else if (!vocabFile && vocabHash != NULL) { // meta tables struct hash *fieldHash = hashFindVal(vocabHash, field->name); if (fieldHash != NULL) link = vocabLink(fieldHash, fieldVal, fieldVal); } webPrintLinkCell(link != NULL ? link : fieldVal); } } static void printMetadataForTable(char *table) /* If table exists, _and_ tdb associated with it exists, print out * a metadata link that expands on click. Otherwise print "unavailable" */ { webPrintLinkCellStart(); struct trackDb *tdb = hashFindVal(trackHash, table); if (tdb == NULL) printf("%s info n/a", table); else if (!compositeMetadataToggle(database, tdb, "metadata", TRUE, FALSE)) { /* no metadata, but there is a track table to point TB at */ struct trackDb *parent = trackDbTopLevelSelfOrParent(tdb); printf("<A target='_blank' title='browse table' " "href='%s?db=%s&hgta_table=%s&hgta_group=%s&hgta_track=%s'>%s</A>", hgTablesName(), database, table, parent->grp, parent->track, table); } webPrintLinkCellEnd(); } static void queryInputTrackTable(struct dyString *query, char *inputTrackTable, struct slName *fieldList) /* Construct query in dyString to return contents of inputTrackTable ordered appropriately */ { -struct dyString *fields = dyStringNew(0); -struct slName *field; sqlDyStringPrintf(query, "select tableName "); +struct slName *field; for (field = fieldList; field != NULL; field = field->next) - sqlDyStringPrintfFrag(fields, ",%s", field->name); -sqlDyStringPrintf(query, "%-s from %s", fields->string, inputTrackTable); + { + sqlDyStringPrintf(query, ",%s", field->name); + } +sqlDyStringPrintf(query, " from %s", inputTrackTable); if (fieldList != NULL) - // skip leading comma - dyStringPrintf(query, " order by %s", fields->string+1); -dyStringFree(&fields); + { + sqlDyStringPrintf(query, " order by "); + for (field = fieldList; field != NULL; field = field->next) + { + sqlDyStringPrintf(query, "%s", field->name); + if (field->next) + sqlDyStringPrintf(query, ","); + } + } } static struct hash *getVocabHash(char *fileName) /* Get vocabulary term hash */ { struct hash *hash = raTagVals(fileName, "type"); hashAdd(hash, "cellType", NULL); /* Will the kludge never end, no, never! */ return hash; } static void getVocab(struct trackDb *tdb, struct cart *cart, char **vocabFile, struct hash **vocabHash) /* Get vocabulary info from trackDb settings (CV or vocab tables) */ { char *file = NULL; struct hash *hash = NULL; char *vocab = trackDbSetting(tdb, "controlledVocabulary"); if (vocabSettingIsEncode(vocab)) { file = cloneFirstWord(vocab); hash = getVocabHash(file); } else { hash = vocabBasicFromSetting(tdb, cart); } if (vocabFile != NULL) *vocabFile = file; if (hash != NULL) *vocabHash = hash; } static void printPeakClusterInfo(struct trackDb *tdb, struct cart *cart, struct sqlConnection *conn, char *inputTrackTable, struct slName *fieldList, struct bed *cluster) /* Print an HTML table showing sources with hits in the cluster, along with signal. If cluster is NULL, show all sources assayed */ { /* Make the SQL query to get the table and all other fields we want to show * from inputTrackTable. */ struct dyString *query = dyStringNew(0); queryInputTrackTable(query, inputTrackTable, fieldList); char *vocabFile = NULL; struct hash *vocabHash = NULL; getVocab(tdb, cart, &vocabFile, &vocabHash); int displayNo = 0; int fieldCount = slCount(fieldList); struct sqlResult *sr = sqlGetResult(conn, query->string); char **row; while ((row = sqlNextRow(sr)) != NULL) { double signal = 0; if (cluster != NULL) { char *table = row[0]; signal = getSignalAt(table, cluster); if (signal == 0) continue; } printf("</TR><TR>\n"); webPrintIntCell(++displayNo); if (signal != 0) webPrintDoubleCell(signal); printControlledVocabFields(row+1, fieldCount, fieldList, vocabFile, vocabHash); printMetadataForTable(row[0]); } sqlFreeResult(&sr); freez(&vocabFile); dyStringFree(&query); } static char *factorSourceVocabLink(char *vocabFile, char *fieldName, char *fieldVal) /* Add link to show controlled vocabulary entry for term. * Handles 'target' (factor) which is a special case, derived from Antibody entries */ { char *vocabType = (sameString(fieldName, "target") || sameString(fieldName, "factor")) ? "target" : "term"; return wgEncodeVocabLink(vocabFile, vocabType, fieldVal, fieldVal, fieldVal, ""); } static void printFactorSourceTableHits(struct factorSource *cluster, struct sqlConnection *conn, char *sourceTable, char *inputTrackTable, struct slName *fieldList, boolean invert, char *vocab, struct hash *fieldToUrl) /* Put out a lines in an html table that shows assayed sources that have hits in this * cluster, or if invert is set, that have misses. */ { char *vocabFile = NULL; if (vocab) { vocabFile = cloneFirstWord(vocab); } /* Make the monster SQL query to get all assays*/ struct dyString *query = dyStringNew(0); sqlDyStringPrintf(query, "select %s.id,%s.name,%s.tableName", sourceTable, sourceTable, inputTrackTable); struct slName *field; for (field = fieldList; field != NULL; field = field->next) sqlDyStringPrintf(query, ",%s.%s", inputTrackTable, field->name); sqlDyStringPrintf(query, " from %s,%s ", inputTrackTable, sourceTable); sqlDyStringPrintf(query, " where %s.source = %s.description", inputTrackTable, sourceTable); sqlDyStringPrintf(query, " and factor='%s' order by %s.source", cluster->name, inputTrackTable); boolean encodeStanford = FALSE; if (startsWith("enc", sourceTable)) encodeStanford = TRUE; int displayNo = 0; int fieldCount = slCount(fieldList); struct sqlResult *sr = sqlGetResult(conn, query->string); char **row; while ((row = sqlNextRow(sr)) != NULL) { int sourceId = sqlUnsigned(row[0]); boolean hit = FALSE; int i; double signal = 0.0; for (i=0; i<cluster->expCount; i++) { if (cluster->expNums[i] == sourceId) { hit = TRUE; signal = cluster->expScores[i]; break; } } if (hit ^ invert) { printf("</TR><TR>\n"); webPrintIntCell(++displayNo); if (!invert) webPrintDoubleCell(signal); webPrintLinkCell(row[1]); int i = 0; // find position of CV metadata in field list int offset = 3; struct slName *field = fieldList; for (i=0; i<fieldCount && field != NULL; ++i, field = field->next) { char *fieldVal = row[i+offset]; char *link = NULL; if (vocab) { link = cloneString(factorSourceVocabLink(vocabFile, field->name, fieldVal)); } else if (fieldToUrl != NULL) { // (outside) links on vocab items char *urlPattern = hashFindVal(fieldToUrl, field->name); if (urlPattern != NULL) { char *url = replaceInUrl(urlPattern, fieldVal, cart, database, seqName, winStart, winEnd, NULL, TRUE, NULL); if (url != NULL) { struct dyString *ds = dyStringCreate("<a target='_blank' href='%s'>%s</a>\n", url, fieldVal); link = dyStringCannibalize(&ds); } } } webPrintLinkCell(link ? link : fieldVal); } char *table = row[2]; if (encodeStanford) { char *file = stringIn("ENCFF", table); if (!file) webPrintLinkCell(table); else { webPrintLinkCellStart(); printf("<A target='_blank'" "href='https://www.encodeproject.org/files/%s'>%s</A>", file, file); webPrintLinkCellEnd(); } } else printMetadataForTable(table); } } sqlFreeResult(&sr); freez(&vocabFile); dyStringFree(&query); } void doPeakClusterListItemsAssayed() /* Put up a page that shows all experiments associated with a cluster track. */ { struct trackDb *clusterTdb = tdbForTableArg(); cartWebStart(cart, database, "List of items assayed in %s", clusterTdb->shortLabel); struct sqlConnection *conn = hAllocConn(database); char *inputTableFieldDisplay = trackDbSetting(clusterTdb, "inputTableFieldDisplay"); webPrintLinkTableStart(); if (inputTableFieldDisplay) { struct slName *fieldList = stringToSlNames(inputTableFieldDisplay); printClusterTableHeader(fieldList, FALSE, FALSE, FALSE); char *inputTrackTable = trackDbRequiredSetting(clusterTdb, "inputTrackTable"); printPeakClusterInfo(clusterTdb, cart, conn, inputTrackTable, fieldList, NULL); } else errAbort("Missing required trackDb setting %s for track %s", "inputTableFieldDisplay", clusterTdb->track); webPrintLinkTableEnd(); hFreeConn(&conn); } void doPeakClusters(struct trackDb *tdb, char *item) /* Display detailed info about a cluster of DNase peaks from other tracks. */ { int start = cartInt(cart, "o"); char *table = tdb->table; int rowOffset = hOffsetPastBin(database, seqName, table); char query[256]; struct sqlResult *sr; char **row; struct bed *cluster = NULL; struct sqlConnection *conn = hAllocConn(database); cartWebStart(cart, database, "%s item details", tdb->shortLabel); sqlSafef(query, sizeof(query), "select * from %s where name = '%s' and chrom = '%s' and chromStart = %d", table, item, seqName, start); sr = sqlGetResult(conn, query); row = sqlNextRow(sr); if (row != NULL) cluster = bedLoadN(row+rowOffset, 5); sqlFreeResult(&sr); if (cluster != NULL) { /* Get list of subgroups to display */ char *inputTableFieldDisplay = trackDbSetting(tdb, "inputTableFieldDisplay"); if (inputTableFieldDisplay != NULL) { struct slName *fieldList = stringToSlNames(inputTableFieldDisplay); char *inputTrackTable = trackDbRequiredSetting(tdb, "inputTrackTable"); + char queryTblSafe[1024]; + sqlSafef(queryTblSafe, sizeof queryTblSafe, "%s", inputTrackTable); /* Print out some information about the cluster overall. */ printf("<B>Items in Cluster:</B> %s of %d<BR>\n", cluster->name, - sqlRowCount(conn, sqlCheckIdentifier(inputTrackTable))); + sqlRowCount(conn, queryTblSafe)); printf("<B>Cluster Score (out of 1000):</B> %d<BR>\n", cluster->score); printPos(cluster->chrom, cluster->chromStart, cluster->chromEnd, NULL, TRUE, NULL); /* In a new section put up list of hits. */ webNewSection("List of Items in Cluster"); webPrintLinkTableStart(); printClusterTableHeader(fieldList, FALSE, FALSE, TRUE); printPeakClusterInfo(tdb, cart, conn, inputTrackTable, fieldList, cluster); } else errAbort("Missing required trackDb setting %s for track %s", "inputTableFieldDisplay", tdb->track); webPrintLinkTableEnd(); } printf("<A HREF=\"%s&g=htcListItemsAssayed&table=%s\" TARGET_blank>", hgcPathAndSettings(), tdb->track); printf("List all items assayed"); printf("</A><BR>\n"); webNewSection("Track Description"); printTrackHtml(tdb); hFreeConn(&conn); } void doClusterMotifDetails(struct sqlConnection *conn, struct trackDb *tdb, struct factorSource *cluster) /* Display details about TF binding motif(s) in cluster */ { char *motifTable = trackDbSetting(tdb, "motifTable"); // localizations char *motifPwmTable = trackDbSetting(tdb, "motifPwmTable"); // PWM used to draw sequence logo char *motifMapTable = trackDbSetting(tdb, "motifMapTable"); // map target to motif struct slName *motifNames = NULL, *mn; // list of canonical motifs for the factor struct dnaMotif *motif = NULL; struct bed6FloatScore *hit = NULL, *maxHit = NULL; char **row; char query[256]; if (motifTable != NULL && sqlTableExists(conn, motifTable)) { struct sqlResult *sr; int rowOffset; char where[256]; if (motifMapTable == NULL || !sqlTableExists(conn, motifMapTable)) { // Assume cluster name is motif name if there is no map table motifNames = slNameNew(cluster->name); } else { sqlSafef(query, sizeof(query), "select motif from %s where target = '%s'", motifMapTable, cluster->name); char *ret = sqlQuickString(conn, query); if (ret == NULL) { // missing target from table -- no canonical motif webNewEmptySection(); return; } motifNames = slNameListFromString(ret, ','); } for (mn = motifNames; mn != NULL; mn = mn->next) { - sqlSafefFrag(where, sizeof(where), "name='%s' order by score desc limit 1", mn->name); + sqlSafef(where, sizeof(where), "name='%s' order by score desc limit 1", mn->name); sr = hRangeQuery(conn, motifTable, cluster->chrom, cluster->chromStart, cluster->chromEnd, where, &rowOffset); if ((row = sqlNextRow(sr)) != NULL) { hit = bed6FloatScoreLoad(row + rowOffset); if (maxHit == NULL || maxHit->score < hit->score) maxHit = hit; } sqlFreeResult(&sr); } } if (maxHit == NULL) { // Maintain table layout webNewEmptySection(); return; } hit = maxHit; webNewSection("Canonical Motif in Cluster"); char posLink[1024]; safef(posLink, sizeof(posLink),"<a href=\"%s&db=%s&position=%s%%3A%d-%d\">%s:%d-%d</a>", hgTracksPathAndSettings(), database, cluster->chrom, hit->chromStart+1, hit->chromEnd, cluster->chrom, hit->chromStart+1, hit->chromEnd); printf("<b>Motif Name:</b> %s<br>\n", hit->name); printf("<b>Motif Score"); printf(":</b> %.2f<br>\n", hit->score); printf("<b>Motif Position:</b> %s<br>\n", posLink); printf("<b>Motif Strand:</b> %c<br>\n", (int)hit->strand[0]); struct dnaSeq *seq = hDnaFromSeq(database, seqName, hit->chromStart, hit->chromEnd, dnaLower); if (seq == NULL) return; if (hit->strand[0] == '-') reverseComplement(seq->dna, seq->size); if (motifPwmTable != NULL && sqlTableExists(conn, motifPwmTable)) { motif = loadDnaMotif(hit->name, motifPwmTable); if (motif == NULL) return; motifLogoAndMatrix(&seq, 1, motif); } } void doFactorSource(struct sqlConnection *conn, struct trackDb *tdb, char *item, int start, int end) /* Display detailed info about a cluster of TFBS peaks from other tracks. */ { char extraWhere[256]; -safef(extraWhere, sizeof extraWhere, "name='%s'", item); +sqlSafef(extraWhere, sizeof extraWhere, "name='%s'", item); int rowOffset; struct sqlResult *sr = hRangeQuery(conn, tdb->table, seqName, start, end, extraWhere, &rowOffset); char **row = sqlNextRow(sr); struct factorSource *cluster = NULL; if (row != NULL) cluster = factorSourceLoad(row + rowOffset); sqlFreeResult(&sr); if (cluster == NULL) errAbort("Error loading cluster from track %s", tdb->track); char *sourceTable = trackDbRequiredSetting(tdb, "sourceTable"); char *factorLink = cluster->name; char *vocab = trackDbSetting(tdb, "controlledVocabulary"); if (vocab != NULL) { char *file = cloneFirstWord(vocab); factorLink = wgEncodeVocabLink(file, "term", factorLink, factorLink, factorLink, ""); } printf("<B>Factor:</B> %s<BR>\n", factorLink); printf("<B>Cluster Score (out of 1000):</B> %d<BR>\n", cluster->score); printPos(cluster->chrom, cluster->chromStart, cluster->chromEnd, NULL, TRUE, item); /* Get list of tracks we'll look through for input. */ char *inputTrackTable = trackDbRequiredSetting(tdb, "inputTrackTable"); char query[256]; sqlSafef(query, sizeof(query), "select tableName from %s where factor='%s' order by source", inputTrackTable, cluster->name); /* Next do the lists of hits and misses. We have the hits from the non-zero signals in * cluster->expScores. We need to figure out the sources actually assayed though * some other way. We'll do this by one of two techniques. */ char *inputTableFieldDisplay = trackDbSetting(tdb, "inputTableFieldDisplay"); if (inputTableFieldDisplay != NULL) { char *fieldUrls = trackDbSetting(tdb, "inputTableFieldUrls"); struct hash *fieldToUrl = hashFromString(fieldUrls); struct slName *fieldList = stringToSlNames(inputTableFieldDisplay); char *vocab = trackDbSetting(tdb, "controlledVocabulary"); /* In a new section put up list of hits. */ webNewSection("Assays for %s in Cluster", cluster->name); webPrintLinkTableStart(); printClusterTableHeader(fieldList, TRUE, FALSE, TRUE); printFactorSourceTableHits(cluster, conn, sourceTable, inputTrackTable, fieldList, FALSE, vocab, fieldToUrl); webPrintLinkTableEnd(); webNewSectionHeaderStart(); char sectionTitle[128]; safef(sectionTitle, sizeof(sectionTitle),"Assays for %s Without Hits in Cluster", cluster->name); jsBeginCollapsibleSectionOldStyle(cart, tdb->track, "cellNoHits", sectionTitle, FALSE); webNewSectionHeaderEnd(); webPrintLinkTableStart(); printClusterTableHeader(fieldList, TRUE, FALSE, FALSE); printFactorSourceTableHits(cluster, conn, sourceTable, inputTrackTable, fieldList, TRUE, vocab, fieldToUrl); webPrintLinkTableEnd(); jsEndCollapsibleSection(); } else { errAbort("Missing required trackDb setting %s for track %s", "inputTableFieldDisplay", tdb->track); } webNewSectionHeaderStart(); jsBeginCollapsibleSectionOldStyle(cart, tdb->track, "cellSources", "Cell Abbreviations", FALSE); webNewSectionHeaderEnd(); hPrintFactorSourceAbbrevTable(conn, tdb); jsEndCollapsibleSection(); doClusterMotifDetails(conn, tdb, cluster); }