44ccfacbe3a3d4b300f80d48651c77837a4b571e galt Tue Apr 26 11:12:02 2022 -0700 SQL INJECTION Prevention Version 2 - this improves our methods by making subclauses of SQL that get passed around be both easy and correct to use. The way that was achieved was by getting rid of the obscure and not well used functions sqlSafefFrag and sqlDyStringPrintfFrag and replacing them with the plain versions of those functions, since these are not needed anymore. The new version checks for NOSQLINJ in unquoted %-s which is used to include SQL clauses, and will give an error the NOSQLINJ clause is not present, and this will automatically require the correct behavior by developers. sqlDyStringPrint is a very useful function, however because it was not enforced, users could use various other dyString functions and they operated without any awareness or checking for SQL correct use. Now those dyString functions are prohibited and it will produce an error if you try to use a dyString function on a SQL string, which is simply detected by the presence of the NOSQLINJ prefix. diff --git src/hg/altSplice/affySplice/probeMatcher.c src/hg/altSplice/affySplice/probeMatcher.c index fd40bec..59b3192 100644 --- src/hg/altSplice/affySplice/probeMatcher.c +++ src/hg/altSplice/affySplice/probeMatcher.c @@ -1,316 +1,316 @@ /* probeMatcher - Program to sort out which probes belong to which splicing isoform. */ #include "common.h" #include "bed.h" #include "altGraphX.h" #include "hdb.h" #include "options.h" #include "affyProbe.h" void usage() { errAbort("probeMatcher - Program to associate probes with splicing isoforms.\n" "usage:\n" " probeMatcher <splices.agx> <db> <juntionTable> <nonJunctTable> <constBeds> <output.txt>\n"); } struct hash *constHash = NULL; struct agProbeSets /* Little info about the probe sets for altGraphX. */ { char *chrom; /* Chromosome. */ int chromStart; /* Chromosome Start. */ int chromEnd; /* Chromosome End. */ int type; /* Type of splicing event 2==cassette. */ char strand[3]; /* Genomic Strand. */ char *name; /* Name from altSplice clustering. */ int maxCounts; /* Maximum possible counts, used for * memory management. */ int contProbeCount; /* Number of constitutive probe sets. */ char **contProbeSets; /* Names of constitutive probe sets. */ int alt1ProbeCount; /* Number of alternate1 probe sets. */ char **alt1ProbeSets; /* Names of alternate1 probe sets. */ int alt2ProbeCount; /* Number of alternate2 probe sets. */ char **alt2ProbeSets; /* Names of alternate2 probe sets. */ int transcriptCount; /* Number of altMerge transcripts involved. */ char **transcriptNames; /* Names of altMerge transcripts. */ }; void occassionalDot() /* Write out a dot every 20 times this is called. */ { static int dotMod = 100; static int dot = 100; if (--dot <= 0) { putc('.', stdout); fflush(stdout); dot = dotMod; } } struct hash *makeBedHash(char *bedFile) { struct bed *bed=NULL, *bedNext = NULL, *bedList = NULL; struct bed *item = NULL; struct hash *bedHash = newHash(12); bedList = bedLoadAll(bedFile); for(bed = bedList; bed != NULL; bed = bedNext) { bedNext = bed->next; if(bed->score != 6) continue; item = hashFindVal(bedHash, bed->name); bed->next = NULL; if(item == NULL) { hashAdd(bedHash, bed->name, bed); } else { slAddTail(&item, bed); } } return bedHash; } struct agProbeSets *newAgP(struct altGraphX *ag) /* Allocated and fill in some initial values for agProbeSets. */ { struct agProbeSets *agp = NULL; AllocVar(agp); agp->chrom = cloneString(ag->tName); agp->chromStart = ag->tStart; agp->chromEnd = ag->tEnd; agp->type = ag->id; agp->strand[0] = ag->strand[0]; agp->name = cloneString(ag->name); agp->maxCounts = 10; AllocArray(agp->contProbeSets, agp->maxCounts); AllocArray(agp->alt1ProbeSets, agp->maxCounts); AllocArray(agp->alt2ProbeSets, agp->maxCounts); agp->transcriptCount = ag->mrnaRefCount; agp->transcriptNames = ag->mrnaRefs; return agp; } char ** expandAgp(struct agProbeSets *agp, char **probeSets ) { char **cT=agp->contProbeSets, **a1=agp->alt1ProbeSets, **a2 = agp->alt1ProbeSets; ExpandArray(agp->contProbeSets, agp->maxCounts, 2*agp->maxCounts); ExpandArray(agp->alt1ProbeSets, agp->maxCounts, 2*agp->maxCounts); ExpandArray(agp->alt2ProbeSets, agp->maxCounts, 2*agp->maxCounts); agp->maxCounts *= 2; if(probeSets == cT) return agp->contProbeSets; else if(probeSets == a1) return agp->alt1ProbeSets; else if(probeSets == a2) return agp->alt2ProbeSets; errAbort("Not sure which probeSet was being used."); return NULL; } void addProbeSets(struct altGraphX *ag, enum ggEdgeType type, int chromStart, int chromEnd, struct sqlConnection *conn, char *junctionTable, char *nonJunctionTable, struct agProbeSets *agp, int *count, char **probeSets) /* Add the relevant probe sets to probeSets for edge in ag. */ { char where[256], temp[256]; struct sqlResult *sr = NULL; char **row = NULL; int *vPos = ag->vPositions; int *starts = ag->edgeStarts; int *ends = ag->edgeEnds; char *table = NULL; int rowOffset = 0; boolean unique = TRUE; struct bed *bedList = NULL, *bed = NULL; int i=0; struct affyProbe *affyProbe=NULL, *affyProbeList=NULL; /* Junctions have to be handled a bit differently. The numbers we need are going to be inside the data type and difficult to select for. have to get everything and sort it out afterwards. */ if(type == ggSJ) { - sqlSafefFrag(where, sizeof(where), " chromStart >= %d and chromEnd <= %d and blockCount = 2 ", + sqlSafef(where, sizeof(where), " chromStart >= %d and chromEnd <= %d and blockCount = 2 ", chromStart-25, chromEnd+25); table = junctionTable; sr = hRangeQuery(conn, table, ag->tName, chromStart, chromEnd, where, &rowOffset); while((row = sqlNextRow(sr)) != NULL) { affyProbe = affyProbeLoad(row+rowOffset); slAddHead(&affyProbeList, affyProbe); } sqlFreeResult(&sr); for(affyProbe=affyProbeList; affyProbe != NULL; affyProbe = affyProbe->next) { /* If it is a juction it must line up with the starts and stops. */ if(type == ggSJ && ((affyProbe->chromStart+affyProbe->chromStarts[0]+affyProbe->blockSizes[0] != chromStart) || (affyProbe->chromStart+affyProbe->chromStarts[1] != chromEnd))) continue; safef(temp, sizeof(temp), "%s:%s", affyProbe->psName, affyProbe->seq); for(i=0; i<*count; i++) { if(sameString(probeSets[i], temp)) unique = FALSE; } if(unique) { if(*count+1 > agp->maxCounts) probeSets = expandAgp(agp, probeSets); probeSets[*count] = cloneString(temp); (*count)++; } } affyProbeFreeList(&affyProbe); } else { table = nonJunctionTable; sr = hRangeQuery(conn, table, ag->tName, chromStart, chromEnd, NULL, &rowOffset); while((row = sqlNextRow(sr)) != NULL) { affyProbe = affyProbeLoad(row+rowOffset); slAddHead(&affyProbeList, affyProbe); } sqlFreeResult(&sr); for(affyProbe=affyProbeList; affyProbe != NULL; affyProbe = affyProbe->next) { safef(temp, sizeof(temp), "%s:%s", affyProbe->psName, affyProbe->seq); for(i=0; i<*count; i++) { if(sameString(probeSets[i], temp)) unique = FALSE; } if(unique) { if(*count+1 > agp->maxCounts) probeSets = expandAgp(agp, probeSets); probeSets[*count] = cloneString(temp); (*count)++; } } affyProbeFreeList(&affyProbe); } } void agpTabOut(struct agProbeSets *agp, FILE *out) /*Write out tab delimited. */ { int i=0; fprintf(out, "%s\t%d\t%d\t%d\t%s\t%s\t%d\t%d\t", agp->chrom, agp->chromStart, agp->chromEnd, agp->type, agp->strand, agp->name, agp->maxCounts, agp->contProbeCount); for(i=0; i<agp->contProbeCount; i++) fprintf(out, "%s,", agp->contProbeSets[i]); fprintf(out, "\t"); fprintf(out, "%d\t", agp->alt1ProbeCount); for(i=0; i<agp->alt1ProbeCount; i++) fprintf(out, "%s,", agp->alt1ProbeSets[i]); fprintf(out, "\t"); fprintf(out, "%d\t", agp->alt2ProbeCount); for(i=0; i<agp->alt2ProbeCount; i++) fprintf(out, "%s,", agp->alt2ProbeSets[i]); fprintf(out, "\t"); fprintf(out, "%d\t", agp->transcriptCount); for(i=0; i<agp->transcriptCount; i++) fprintf(out, "%s,", agp->transcriptNames[i]); fprintf(out, "\n"); } void findProbesForAg(struct altGraphX *ag, struct sqlConnection *conn, char *junctionTable, char *nonJunctionTable, FILE *out) /* Dump out the altGraphX's constitutive, alternative and alternative2 probe sets. */ { struct agProbeSets *agp = newAgP(ag); int i; struct bed *bed=NULL, *bedList = NULL; int *starts = ag->edgeStarts; int *ends = ag->edgeEnds; int *vPos = ag->vPositions; for(i=0; i<ag->edgeCount; i++) { enum ggEdgeType type = getSpliceEdgeType(ag, i); switch (ag->edgeTypes[i]) { case 0 : addProbeSets(ag, type, vPos[starts[i]], vPos[ends[i]], conn, junctionTable, nonJunctionTable, agp, &agp->contProbeCount, agp->contProbeSets); break; case 1 : addProbeSets(ag, type, vPos[starts[i]], vPos[ends[i]], conn, junctionTable, nonJunctionTable, agp, &agp->alt1ProbeCount, agp->alt1ProbeSets); break; case 2 : addProbeSets(ag, type, vPos[starts[i]], vPos[ends[i]], conn, junctionTable, nonJunctionTable, agp, &agp->alt2ProbeCount, agp->alt2ProbeSets); break; default : errAbort("probeMatcher::findProbesForAg() - Don't recognize type %d", ag->id); } } bedList = hashFindVal(constHash, ag->name); for(bed = bedList; bed != NULL; bed = bed->next) { addProbeSets(ag, ggExon, bed->chromStart, bed->chromEnd, conn, junctionTable, nonJunctionTable, agp, &agp->contProbeCount, agp->contProbeSets); } agpTabOut(agp, out); } void matchProbes(struct altGraphX *agList, char *junctionTable, char *nonJunctionTable, FILE *out) /* Run through the agList and find the probes for the constitutive and alternative edges. */ { struct sqlConnection *conn = hAllocConn(); struct altGraphX *ag = NULL; for(ag=agList; ag != NULL; ag = ag->next) { occassionalDot(); findProbesForAg(ag, conn, junctionTable, nonJunctionTable, out); } hFreeConn(&conn); } int main(int argc, char *argv[]) { struct altGraphX *agList = NULL; FILE *out = NULL; char *bedName = NULL; optionHash(&argc, argv); if(argc != 6) usage(); hSetDb(argv[2]); bedName = optionVal("bedName", NULL); if(bedName != NULL) { warn("Creating bed hash."); constHash = makeBedHash(bedName); warn(""); } else constHash = newHash(2); warn("Loading altGraphX records."); agList = altGraphXLoadAll(argv[1]); out = mustOpen(argv[5], "w"); warn("Looking for probe sets."); matchProbes(agList, argv[3], argv[4], out); carefulClose(&out); warn("\nDone"); return 0; }