44ccfacbe3a3d4b300f80d48651c77837a4b571e galt Tue Apr 26 11:12:02 2022 -0700 SQL INJECTION Prevention Version 2 - this improves our methods by making subclauses of SQL that get passed around be both easy and correct to use. The way that was achieved was by getting rid of the obscure and not well used functions sqlSafefFrag and sqlDyStringPrintfFrag and replacing them with the plain versions of those functions, since these are not needed anymore. The new version checks for NOSQLINJ in unquoted %-s which is used to include SQL clauses, and will give an error the NOSQLINJ clause is not present, and this will automatically require the correct behavior by developers. sqlDyStringPrint is a very useful function, however because it was not enforced, users could use various other dyString functions and they operated without any awareness or checking for SQL correct use. Now those dyString functions are prohibited and it will produce an error if you try to use a dyString function on a SQL string, which is simply detected by the presence of the NOSQLINJ prefix. diff --git src/hg/protein/lib/pbUtil.c src/hg/protein/lib/pbUtil.c index f80976a..5c34fa3 100644 --- src/hg/protein/lib/pbUtil.c +++ src/hg/protein/lib/pbUtil.c @@ -1,1055 +1,1055 @@ /* pbUtil.c various utility functions for Proteome Browser */ /* Copyright (C) 2013 The Regents of the University of California * See kent/LICENSE or http://genome.ucsc.edu/license/ for licensing information. */ #include "common.h" #include "hCommon.h" #include "string.h" #include "portable.h" #include "memalloc.h" #include "jksql.h" #include "memgfx.h" #include "vGfx.h" #include "htmshell.h" #include "cart.h" #include "hdb.h" #include "web.h" #include "hui.h" #include "cheapcgi.h" #include "hgColors.h" #include "pbStamp.h" #include "pbTracks.h" void hWrites(char *string) /* Write string with no '\n' if not suppressed. */ { if (!suppressHtml) fputs(string, stdout); } void hButton(char *name, char *label) /* Write out button if not suppressed. */ { if (!suppressHtml) cgiMakeButton(name, label); } void aaPropertyInit(int *hasResFreq) /* initialize AA properties */ { int i, j, ia, iaCnt; struct sqlConnection *conn; char query[56]; struct sqlResult *sr; char **row; for (i=0; i<256; i++) { aa_attrib[i] = 0; aa_hydro[i] = 0; } aa_attrib['R'] = CHARGE_POS; aa_attrib['H'] = CHARGE_POS; aa_attrib['K'] = CHARGE_POS; aa_attrib['D'] = CHARGE_NEG; aa_attrib['E'] = CHARGE_NEG; aa_attrib['C'] = POLAR; aa_attrib['Q'] = POLAR; aa_attrib['S'] = POLAR; aa_attrib['Y'] = POLAR; aa_attrib['N'] = POLAR; aa_attrib['T'] = POLAR; aa_attrib['M'] = POLAR; aa_attrib['A'] = NEUTRAL; aa_attrib['W'] = NEUTRAL; aa_attrib['V'] = NEUTRAL; aa_attrib['F'] = NEUTRAL; aa_attrib['P'] = NEUTRAL; aa_attrib['I'] = NEUTRAL; aa_attrib['L'] = NEUTRAL; aa_attrib['G'] = NEUTRAL; /* Ala: 1.800 Arg: -4.500 Asn: -3.500 Asp: -3.500 Cys: 2.500 Gln: -3.500 */ aa_hydro['A'] = 1.800; aa_hydro['R'] = -4.500; aa_hydro['N'] = -3.500; aa_hydro['D'] = -3.500; aa_hydro['C'] = 2.500; aa_hydro['Q'] = -3.500; /* Glu: -3.500 Gly: -0.400 His: -3.200 Ile: 4.500 Leu: 3.800 Lys: -3.900 */ aa_hydro['E'] = -3.500; aa_hydro['G'] = -0.400; aa_hydro['H'] = -3.200; aa_hydro['I'] = 4.500; aa_hydro['L'] = 3.800; aa_hydro['K'] = -3.900; /* Met: 1.900 Phe: 2.800 Pro: -1.600 Ser: -0.800 Thr: -0.700 Trp: -0.900 */ aa_hydro['M'] = 1.900; aa_hydro['F'] = 2.800; aa_hydro['P'] = -1.600; aa_hydro['S'] = -0.800; aa_hydro['T'] = -0.700; aa_hydro['W'] = -0.900; /* Tyr: -1.300 Val: 4.200 Asx: -3.500 Glx: -3.500 Xaa: -0.490 */ aa_hydro['Y'] = -1.300; aa_hydro['V'] = 4.200; /* ?? Asx: -3.500 Glx: -3.500 Xaa: -0.490 ?? */ /* get average frequency distribution for each AA residue */ conn= hAllocConn(database); if (!hTableExists(database, "pbResAvgStd")) { *hasResFreq = 0; return; } else { *hasResFreq = 1; } sqlSafef(query, sizeof(query), "select * from %s.pbResAvgStd", database); iaCnt = 0; sr = sqlMustGetResult(conn, query); row = sqlNextRow(sr); while (row != NULL) { for (j=0; j<20; j++) { if (row[0][0] == aaAlphabet[j]) { iaCnt++; ia = j; aaChar[ia] = row[0][0]; avg[ia] = (double)(atof(row[1])); stddev[ia] = (double)(atof(row[2])); break; } } row = sqlNextRow(sr); } sqlFreeResult(&sr); if (iaCnt != 20) { errAbort("in doAnomalies(), not all 20 amino acide residues are accounted for."); } } char *getAA(char *pepAccession) { struct sqlConnection *conn; char query[256]; struct sqlResult *sr; char **row; char *chp; int i,len; char *seq; char *protDbDate; conn= hAllocConn(database); /* Figure out which is the appropriate DB to use, either spXXXXXX (for PB supported GB) so that we can handle TrEMBL-NEW entries or swissProt (to support global proteome The following convention needs to be followed when building protein DBs: spXXXXXX ---> proteinsXXXXXX swissProt points to the latest spXXXXXX proteins points to the latest proteinsXXXXXX */ if (strstr(protDbName, "proteins") == NULL) { sqlSafef(query, sizeof(query), "select val from %s.protein where acc='%s';", UNIPROT_DB_NAME, pepAccession); } else { protDbDate = strstr(protDbName, "proteins") + strlen("proteins"); sqlSafef(query, sizeof(query), "select val from sp%s.protein where acc='%s';", protDbDate, pepAccession); } sr = sqlMustGetResult(conn, query); row = sqlNextRow(sr); if (row != NULL) { seq = cloneString(row[0]); len = strlen(seq); chp = seq; for (i=0; i<len; i++) { *chp = toupper(*chp); chp++; } } else { seq = NULL; } sqlFreeResult(&sr); hFreeConn(&conn); return(seq); } int chkAnomaly(double currentAvg, double pctLow, double pctHi) /* chkAnomaly() checks if the frequency of an AA residue in a protein is abnormally high (returns 1) or low (returns -1) */ { int result; if (currentAvg >= pctHi) { result = 1; } else { if (currentAvg <= pctLow) { result = -1; } else { result = 0; } } return(result); } void getExonInfo(char *proteinID, int *exonCount, char **chrom, char *strandChar) { char query[256]; struct sqlResult *sr; char **row; struct sqlConnection *conn; char *qNameStr; char *qSizeStr; char *qStartStr; char *qEndStr; char *tNameStr=NULL; char *tSizeStr; char *tStartStr; char *tEndStr; char *blockCountStr; char *blockSizesStr; char *qStartsStr; char *tStartsStr; char *chp; int exonStartPos; int exonEndPos; int exonGenomeStartPos, exonGenomeEndPos; char *exonStartStr = NULL; char *exonSizeStr = NULL; char *exonGenomeStartStr = NULL; char *strand = NULL; int blockCount=0; int exonIndex; int i, isize; int done = 0; int alignDiff, alignDiffShortest; char *answer; int hggStart = 0; int hggEnd = 0; char *hggGene = NULL; char *hggChrom = NULL; conn= hAllocConn(database); /* NOTE: the query below may not always return single answer, */ /* and kgProtMap and knownGene alignments may not be identical, so pick the closest one. */ sqlSafef(query,sizeof(query), "select qName, qSize, qStart, qEnd, tName, tSize, tStart, tEnd, blockCount, blockSizes, qStarts, tStarts, strand from %s.%s where qName='%s';", database, kgProtMapTableName, proteinID); sr = sqlMustGetResult(conn, query); row = sqlNextRow(sr); if (row == NULL) { errAbort("<BLOCKQUOTE>Sorry, cannot display Proteome Browser for %s." "<BR>No entry is found in kgProtMap table for this protein.</BLOCKQUOTE>", proteinID); } answer = cloneString(cartOptionalString(cart, "hgg_gene")); if (answer != NULL) hggGene = cloneString(answer); answer = cloneString(cartOptionalString(cart, "hgg_start")); if (answer != NULL) hggStart = atoi(answer); answer = cloneString(cartOptionalString(cart, "hgg_end")); if (answer != NULL) hggEnd = atoi(answer); answer = cloneString(cartOptionalString(cart, "hgg_chrom")); if (answer != NULL) hggChrom = cloneString(answer); alignDiffShortest = 2000000000; /* initialize it with a very large number */ while (row != NULL) { qNameStr = cloneString(row[0]); qSizeStr = cloneString(row[1]); qStartStr = cloneString(row[2]); qEndStr = cloneString(row[3]); tNameStr = cloneString(row[4]); tSizeStr = cloneString(row[5]); tStartStr = cloneString(row[6]); tEndStr = cloneString(row[7]); blockCountStr = cloneString(row[8]); blockSizesStr = cloneString(row[9]); qStartsStr = cloneString(row[10]); tStartsStr = cloneString(row[11]); strand = cloneString(row[12]); if (!((strand[0] == '+') || (strand[0] == '-')) || (strand[1] != '\0') ) errAbort("wrong strand '%s' data encountered in getExonInfo(), aborting ...", strand); alignDiff = abs(atoi(tStartStr) - hggStart) + abs(atoi(tEndStr) - prevGBEndPos); if (alignDiff < alignDiffShortest) { alignDiffShortest = alignDiff; *strandChar = strand[0]; blockCount = atoi(blockCountStr); exonStartStr = qStartsStr; exonGenomeStartStr = tStartsStr; exonSizeStr = blockSizesStr; } row = sqlNextRow(sr); } sqlFreeResult(&sr); hFreeConn(&conn); exonIndex = 0; while (!done) { /* get protein side exon position */ chp = strstr(exonStartStr, ","); *chp = '\0'; exonStartPos = atoi(exonStartStr); blockStart[exonIndex] = exonStartPos; aaStart[exonIndex] = exonStartPos/3; chp++; exonStartStr = chp; /* get Genome side exon position */ chp = strstr(exonGenomeStartStr, ","); *chp = '\0'; exonGenomeStartPos = atoi(exonGenomeStartStr); blockGenomeStart[exonIndex] = exonGenomeStartPos; chp++; exonGenomeStartStr = chp; chp = strstr(exonSizeStr, ","); *chp = '\0'; isize = atoi(exonSizeStr); blockSize[exonIndex] = isize; exonEndPos = exonStartPos + isize - 1; blockEnd[exonIndex] = exonEndPos; aaEnd[exonIndex] = exonEndPos/3; exonGenomeEndPos = exonGenomeStartPos + isize - 1; blockGenomeEnd[exonIndex] = exonGenomeEndPos; chp++; exonSizeStr = chp; exonIndex++; if (exonIndex == blockCount) done = 1; } /* reverse the negative strand block size sequence to positive direction */ for (i=0; i<blockCount; i++) { if (*strandChar == '-') { blockSizePositive[i] = blockSize[blockCount - i - 1]; blockStartPositive[i] = protSeqLen*3 - blockEnd[blockCount - i - 1] - 1; blockEndPositive[i] = protSeqLen*3 - blockStart[blockCount - i - 1] - 1; blockGenomeStartPositive[i] = blockGenomeStart[blockCount - i - 1]; blockGenomeEndPositive[i] = blockGenomeEnd[blockCount - i - 1]; } else { blockSizePositive[i] = blockSize[i]; blockStartPositive[i] = blockStart[i]; blockEndPositive[i] = blockEnd[i]; blockGenomeStartPositive[i] = blockGenomeStart[i]; blockGenomeEndPositive[i] = blockGenomeEnd[i]; } } *exonCount = blockCount; assert(*exonCount > 0); *chrom = tNameStr; } void printFASTA(char *proteinID, char *aa) /* print the FASTA format protein sequence */ { int i, l; char *chp; l =strlen(aa); hPrintf("<B>Total amino acids:</B> %d\n", strlen(aa)); hPrintf("\n"); hPrintf("<P><B>FASTA record:</B>\n"); hPrintf("<pre>\n"); if (hIsGsidServer()) hPrintf(">%s", proteinID); else hPrintf(">%s|%s|%s", proteinID, protDisplayID, description); chp = aa; for (i=0; i<l; i++) { if ((i%50) == 0) hPrintf("\n"); hPrintf("%c", *chp); chp++; } hPrintf("</pre>"); fflush(stdout); } /* more sophisticated processing can be done using genome coordinates */ void printExonAA(char *proteinID, char *aa, int exonNum) { int i, j, k, jj; int l; int il; int istart, iend; int ilast; char *chp; l =strlen(aa); ilast = 0; hPrintf("<pre>"); if (exonNum == -1) { hPrintf(">%s", proteinID); chp = aa; for (i=0; i<l; i++) { if ((i%50) == 0) hPrintf("\n"); hPrintf("%c", *chp); chp++; } hPrintf("\n\n"); } j=0; il = 0; if (exonNum == -1) { hPrintf("Total amino acids: %d\n", strlen(aa)); istart = 0; iend = l-1; j = 0; } else { hPrintf("AA Start position:%4d\n", aaStart[exonNum-1]+1); hPrintf("AA End position: %4d\n", aaEnd[exonNum-1]+1); hPrintf("AA Length: %4d<br>\n", aaEnd[exonNum-1]-aaStart[exonNum-1]+1); istart = aaStart[exonNum-1]; iend = aaEnd[exonNum-1]; j = exonNum-1; } for (i=istart; i<=iend; i++) { if (((i%50) == 0) && (exonNum == -1)) { hPrintf("\n"); hPrintf("<span style='color:black;'>"); for (jj=0; jj<5; jj++) { if ((i+(jj+1)*10) <= (iend+1)) { hPrintf("%11d", ilast + (jj+1)*10); } } hPrintf("<br>"); hPrintf("</span>"); ilast = ilast + 50; } if (i == aaStart[j]) { j++; k=j%2; if (k) { hPrintf("<font color = blue>"); } else { hPrintf("<font color = green>"); } } if ((i%10) == 0) hPrintf(" "); hPrintf("%c", aa[i]); if (i == aaEnd[j-1]) hPrintf("</font>"); il++; if (il == 50) { il = 0; } } hPrintf("</pre>"); /* Force black color at the end */ hPrintf("<font color = black>"); } void doGenomeBrowserLink(char *spAcc, char *mrnaID, char *hgsidStr) { hPrintf("\n<LI>Genome Browser - "); if (mrnaID != NULL) { hPrintf("<A HREF=\"../cgi-bin/hgTracks?position=%s&db=%s%s\"", mrnaID, database, hgsidStr); hPrintf(" TARGET=_BLANK>%s</A></LI>\n", mrnaID); } else { hPrintf("<A HREF=\"../cgi-bin/hgTracks?position=%s&db=%s%s\"", spAcc, database, hgsidStr); hPrintf(" TARGET=_BLANK>%s</A></LI>\n", spAcc); } } void doGeneSorterLink(char *spAcc, char *mrnaID, char *hgsidStr) { hPrintf("\n<LI>Gene Sorter - "); if (mrnaID != NULL) { /* hPrintf("<A HREF=\"../cgi-bin/hgNear?near_search=%s&hgsid=%s\"", mrnaID, hgsid); */ hPrintf("<A HREF=\"../cgi-bin/hgNear?near_search=%s&db=%s&org=%s%s\"", mrnaID, database, organism, hgsidStr); hPrintf(" TARGET=_BLANK>%s</A> </LI>\n", mrnaID); } else { hPrintf("<A HREF=\"../cgi-bin/hgNear?near_search=%s&db=%s&org=%s%s\"", spAcc, database, organism, hgsidStr); hPrintf(" TARGET=_BLANK>%s</A> </LI>\n", spAcc); } hPrintf("\n"); } void doGeneDetailsLink(char *spAcc, char *mrnaID, char *hgsidStr) { char cond_str[128]; char *hggChrom, *hggStart, *hggEnd; char *displayId; -sqlSafefFrag(cond_str, sizeof(cond_str), "kgId='%s' and spID='%s'", mrnaID, spAcc); +sqlSafef(cond_str, sizeof(cond_str), "kgId='%s' and spID='%s'", mrnaID, spAcc); displayId = sqlGetField(database, "kgXref", "spDisplayID", cond_str); /* Feed hgGene with chrom, txStart, and txEnd data, otherwise it would use whatever are in the cart */ -sqlSafefFrag(cond_str, sizeof(cond_str), "name='%s'", mrnaID); +sqlSafef(cond_str, sizeof(cond_str), "name='%s'", mrnaID); hggChrom = sqlGetField(database, "knownGene", "chrom", cond_str); hggStart = sqlGetField(database, "knownGene", "txStart", cond_str); hggEnd = sqlGetField(database, "knownGene", "txEnd", cond_str); if (mrnaID != NULL) { hPrintf("\n<LI>Gene Details Page - "); hPrintf("<A HREF=\"../cgi-bin/hgGene?db=%s&hgg_gene=%s&hgg_prot=%s&hgg_chrom=%s" "&hgg_start=%s&hgg_end=%s\"", database, mrnaID, displayId, hggChrom, hggStart, hggEnd); hPrintf(" TARGET=_BLANK>%s</A></LI>\n", mrnaID); } } void doBlatLink(char *db, char *sciName, char *commonName, char *aaSeq) { hPrintf("\n<LI>BLAT - "); hPrintf("<A HREF=\"../cgi-bin/hgBlat?db=%s&type=protein&userSeq=%s\"", db, aaSeq); hPrintf(" TARGET=_BLANK>%s", sciName); if (commonName != NULL) hPrintf(" (%s)", commonName); hPrintf("</A></LI>\n"); } void doPathwayLinks(char *spAcc, char *mrnaName) /* Show pathway links */ /* spAcc is a place holder for future extension */ { struct sqlConnection *conn = hAllocConn(database); struct sqlConnection *conn2 = hAllocConn(database); struct sqlResult *sr; char **row; char query[256]; char cond_str[128]; char *mapID, *locusID, *mapDescription; char *geneID; char *geneSymbol; char *cgapID, *biocMapID; boolean hasPathway; if (hTableExists(database, "kgXref")) { - sqlSafefFrag(cond_str, sizeof(cond_str), "kgID='%s'", mrnaName); + sqlSafef(cond_str, sizeof(cond_str), "kgID='%s'", mrnaName); geneSymbol = sqlGetField(database, "kgXref", "geneSymbol", cond_str); if (geneSymbol == NULL) { geneSymbol = mrnaName; } } else { geneSymbol = mrnaName; } /* Show Pathway links if any exist */ hasPathway = FALSE; cgapID = NULL; /*Process BioCarta Pathway link data */ if (sqlTableExists(conn, "cgapBiocPathway")) { - sqlSafefFrag(cond_str, sizeof(cond_str), "alias='%s'", geneSymbol); + sqlSafef(cond_str, sizeof(cond_str), "alias='%s'", geneSymbol); cgapID = sqlGetField(database, "cgapAlias", "cgapID", cond_str); if (cgapID != NULL) { sqlSafef(query, sizeof(query), "select mapID from %s.cgapBiocPathway where cgapID = '%s'", database, cgapID); sr = sqlGetResult(conn, query); row = sqlNextRow(sr); if (row != NULL) { if (!hasPathway) { hPrintf("<B>Pathways:</B>\n<UL>"); hasPathway = TRUE; } } while (row != NULL) { biocMapID = row[0]; hPrintf("<LI>BioCarta -  "); - sqlSafefFrag(cond_str, sizeof(cond_str), "mapID=%c%s%c", '\'', biocMapID, '\''); + sqlSafef(cond_str, sizeof(cond_str), "mapID=%c%s%c", '\'', biocMapID, '\''); mapDescription = sqlGetField(database, "cgapBiocDesc", "description",cond_str); hPrintf("<A HREF = \""); hPrintf("http://cgap.nci.nih.gov/Pathways/BioCarta/%s", biocMapID); hPrintf("\" TARGET=_blank>%s</A> - %s <BR>\n", biocMapID, mapDescription); row = sqlNextRow(sr); } sqlFreeResult(&sr); } } /* Process KEGG Pathway link data */ if (sqlTableExists(conn, "keggPathway")) { sqlSafef(query, sizeof(query), "select * from %s.keggPathway where kgID = '%s'", database, mrnaName); sr = sqlGetResult(conn, query); row = sqlNextRow(sr); if (row != NULL) { if (!hasPathway) { hPrintf("<B>Pathways:</B>\n<UL>"); hasPathway = TRUE; } while (row != NULL) { locusID = row[1]; mapID = row[2]; hPrintf("<LI>KEGG -  "); - sqlSafefFrag(cond_str, sizeof(cond_str), "mapID=%c%s%c", '\'', mapID, '\''); + sqlSafef(cond_str, sizeof(cond_str), "mapID=%c%s%c", '\'', mapID, '\''); mapDescription = sqlGetField(database, "keggMapDesc", "description", cond_str); hPrintf("<A HREF = \""); hPrintf("http://www.genome.ad.jp/dbget-bin/show_pathway?%s+%s", mapID, locusID); hPrintf("\" TARGET=_blank>%s</A> - %s <BR>\n",mapID, mapDescription); row = sqlNextRow(sr); } } sqlFreeResult(&sr); } /* Process SRI BioCyc link data */ if (sqlTableExists(conn, "bioCycPathway")) { sqlSafef(query, sizeof(query), "select * from %s.bioCycPathway where kgID = '%s'", database, mrnaName); sr = sqlGetResult(conn, query); row = sqlNextRow(sr); if (row != NULL) { if (!hasPathway) { hPrintf("<BR><B>Pathways:</B>\n<UL>"); hasPathway = TRUE; } while (row != NULL) { geneID = row[1]; mapID = row[2]; hPrintf("<LI>BioCyc -  "); - sqlSafefFrag(cond_str, sizeof(cond_str), "mapID=%c%s%c", '\'', mapID, '\''); + sqlSafef(cond_str, sizeof(cond_str), "mapID=%c%s%c", '\'', mapID, '\''); mapDescription = sqlGetField(database, "bioCycMapDesc", "description", cond_str); hPrintf("<A HREF = \""); hPrintf("http://biocyc.org/HUMAN/new-image?type=PATHWAY&object=%s&detail-level=2", mapID); hPrintf("\" TARGET=_blank>%s</A> %s <BR>\n",mapID, mapDescription); row = sqlNextRow(sr); } } sqlFreeResult(&sr); } if (hasPathway) { hPrintf("</UL>\n"); } hFreeConn(&conn); hFreeConn(&conn2); } char *hDbOrganism(char *database) /* Function to get organism from the genome db */ { struct sqlConnection *connCentral = hConnectCentral(); char buf[128]; char query[256]; char *res; sqlSafef(query, sizeof(query), "select organism from dbDb where name = '%s'", database); res = strdup(sqlQuickQuery(connCentral, query, buf, sizeof(buf))); hDisconnectCentral(&connCentral); return res; } int searchProteinsInSupportedGenomes(char *queryID, char **database) /* search existing genome databases to see if they contain the protein Input: queryID return: number of proteins found in existing genome databases output: the last genome database is stored at *database */ { int pbProteinCnt = {0}; char *gDatabase; char *org = NULL; char cond_str[255]; struct sqlConnection *conn; struct sqlConnection *connCentral; char queryCentral[256]; struct sqlResult *srCentral; char **row3; char *answer; /* get all genome DBs that support PB */ connCentral = hConnectCentral(); sqlSafef(queryCentral, sizeof(queryCentral), "select defaultDb.name, dbDb.organism from dbDb,defaultDb where hgPbOk=1 and defaultDb.name=dbDb.name"); srCentral = sqlMustGetResult(connCentral, queryCentral); row3 = sqlNextRow(srCentral); /* go through each valid genome database that has PB */ while (row3 != NULL) { gDatabase = row3[0]; org = row3[1]; conn = sqlConnect(gDatabase); - sqlSafefFrag(cond_str, sizeof(cond_str), "alias='%s'", queryID); + sqlSafef(cond_str, sizeof(cond_str), "alias='%s'", queryID); answer = sqlGetField(gDatabase, "kgSpAlias", "count(distinct spID)", cond_str); sqlDisconnect(&conn); if ((answer != NULL) && (!sameWord(answer, "0"))) { /* increase the count only by one, because new addition of splice variants to kgSpAlias would give a count of 2 for both the parent and the variant, which caused the problem when rescale button is pressed */ if (atoi(answer) > 0) pbProteinCnt++; *database = strdup(gDatabase); } row3 = sqlNextRow(srCentral); } sqlFreeResult(&srCentral); hDisconnectCentral(&connCentral); return(pbProteinCnt); } void presentProteinSelections(char *queryID, int protCntInSwissByGene, int protCntInSupportedGenomeDb) /* Fuction to present a web page with proteins of different organisms */ { char *gDatabase; char *org = NULL; char *spID, *displayID, *desc; char cond_str[255]; struct sqlConnection *conn, *conn3; char query[256], query3[512]; struct sqlResult *sr, *sr3; char **row, **row3; struct sqlConnection *connCentral, *proteinsConn; char queryCentral[256]; struct sqlResult *srCentral; char *answer; char *taxonId, *protAcc, *protDisp, *protOrg, *protDesc; char *oldOrg, *orgSciName; char *pbOrgSciName[MAX_PB_ORG]; boolean pbOrgPresented[MAX_PB_ORG]; boolean skipIt; int i, maxPbOrg; int otherCnt; connCentral = hConnectCentral(); hPrintf("<TABLE WIDTH=\"100%%\" BGCOLOR=\"#"HG_COL_HOTLINKS"\" BORDER=\"0\" CELLSPACING=\"0\""); hPrintf("CELLPADDING=\"2\"><TR>\n"); hPrintf("<TD ALIGN=LEFT><A HREF=\"../index.html\">%s</A></TD>", wrapWhiteFont("Home")); hPrintf("<TD style='text-align:center; color:#FFFFFF; font-size:medium;'>%s</TD>", "UCSC Proteome Browser"); if (proteinInSupportedGenome) { hPrintf("<TD ALIGN=Right><A HREF=\"../goldenPath/help/pbTracksHelpFiles/pbTracksHelp.shtml\""); } else { hPrintf("<TD ALIGN=Right><A HREF=\"../goldenPath/help/pbTracksHelpFiles/pbTracksHelp.shtml\""); } hPrintf("TARGET=_blank>%s</A></TD>", wrapWhiteFont("Help")); hPrintf("</TR></TABLE>"); hPrintf("<FONT SIZE=4><BR><B>Please select one of the following proteins:<BR><BR></B></FONT>\n"); /* remmember a list of scientific names for the genomes that supports PB */ sqlSafef(queryCentral, sizeof(queryCentral), "select distinct dbDb.scientificName from dbDb where hgPbOk=1"); srCentral = sqlMustGetResult(connCentral, queryCentral); row3 = sqlNextRow(srCentral); i=0; while (row3 != NULL) { pbOrgSciName[i] = strdup(row3[0]); pbOrgPresented[i] = FALSE; i++; row3 = sqlNextRow(srCentral); } maxPbOrg = i; /* go through each genome DB that supports PB */ sqlSafef(queryCentral, sizeof(queryCentral), "select defaultDb.name, dbDb.organism, dbDb.scientificName from dbDb,defaultDb where hgPbOk=1 and defaultDb.name=dbDb.name"); srCentral = sqlMustGetResult(connCentral, queryCentral); row3 = sqlNextRow(srCentral); while (row3 != NULL) { gDatabase = row3[0]; org = row3[1]; orgSciName= row3[2]; protDbName = hPdbFromGdb(gDatabase); proteinsConn = sqlConnect(protDbName); conn = sqlConnect(gDatabase); - sqlSafefFrag(cond_str, sizeof(cond_str), "alias='%s' and spID != ''", queryID); + sqlSafef(cond_str, sizeof(cond_str), "alias='%s' and spID != ''", queryID); answer = sqlGetField(gDatabase, "kgSpAlias", "count(distinct spID)", cond_str); if ((answer != NULL) && (!sameWord(answer, "0"))) { /* display organism name */ hPrintf("<FONT SIZE=4><B>"); hPrintf("<A href=\"https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Undef&name=%s&lvl=0&srchmode=1\" TARGET=_blank>%s</A>", cgiEncode(orgSciName), orgSciName); hPrintf(" (%s):</B></FONT>\n", org); hPrintf("<UL>"); sqlSafef(query, sizeof(query), "select distinct spID from %s.kgSpAlias where alias='%s' " "and spID != ''", gDatabase, queryID); sr = sqlMustGetResult(conn, query); row = sqlNextRow(sr); while (row != NULL) { spID = row[0]; - sqlSafefFrag(cond_str, sizeof(cond_str), "accession='%s'", spID); + sqlSafef(cond_str, sizeof(cond_str), "accession='%s'", spID); displayID = sqlGetField(protDbName, "spXref3", "displayID", cond_str); - sqlSafefFrag(cond_str, sizeof(cond_str), "accession='%s'", spID); + sqlSafef(cond_str, sizeof(cond_str), "accession='%s'", spID); desc = sqlGetField(protDbName, "spXref3", "description", cond_str); /* display a protein */ hPrintf( "<LI><A HREF=\"../cgi-bin/pbGlobal?proteinID=%s&db=%s\">", displayID, gDatabase); if (sameWord(spID, displayID) || (strstr(displayID, spID) != NULL)) { hPrintf("%s</A> %s\n", spID, desc); } else { hPrintf("%s</A> (aka %s) %s\n", spID, displayID, desc); } /* remember the fact that a protein is shown under this PB supported genome */ for (i=0; i<maxPbOrg; i++) { if (sameWord(orgSciName, pbOrgSciName[i])) { pbOrgPresented[i] = TRUE; } } row = sqlNextRow(sr); } hPrintf("</UL>");fflush(stdout); sqlFreeResult(&sr); } sqlDisconnect(&proteinsConn); row3 = sqlNextRow(srCentral); } sqlFreeResult(&srCentral); hDisconnectCentral(&connCentral); sqlDisconnect(&conn); if (protCntInSwissByGene > protCntInSupportedGenomeDb) { otherCnt = -1; if (protCntInSupportedGenomeDb >0) { otherCnt = 0; hPrintf("<FONT SIZE=4><B>Other Organisms:</B></FONT>\n"); hPrintf("<UL>"); } else { hPrintf("<UL>"); } oldOrg = strdup(""); conn3 = sqlConnect(UNIPROT_DB_NAME); sqlSafef(query3, sizeof(query3), "select taxon.id, gene.acc, displayId.val, binomial, description.val " "from gene, displayId, accToTaxon,taxon, description " "where gene.val='%s' and gene.acc=displayId.acc and accToTaxon.taxon=taxon.id " "and accToTaxon.acc=gene.acc and description.acc=gene.acc order by binomial", queryID); sr3 = sqlMustGetResult(conn3, query3); row3 = sqlNextRow(sr3); /* go through each protein */ while (row3 != NULL) { taxonId = row3[0]; protAcc = row3[1]; protDisp = row3[2]; protOrg = row3[3]; protDesc = row3[4]; /* decide if this entry should be skipped */ skipIt = FALSE; for (i=0; i<maxPbOrg; i++) { if (sameWord(pbOrgSciName[i], protOrg) && pbOrgPresented[i]) { skipIt = TRUE; } } /* print organism name if organism changed */ if (!sameWord(protOrg, oldOrg)) { if (!sameWord(oldOrg, "")) { hPrintf("</UL>\n"); } if (!skipIt) { - sqlSafefFrag(cond_str, sizeof(cond_str), "id=%s and nameType='genbank common name'", taxonId); + sqlSafef(cond_str, sizeof(cond_str), "id=%s and nameType='genbank common name'", taxonId); answer = sqlGetField(PROTEOME_DB_NAME, "taxonNames", "name", cond_str); hPrintf("<FONT SIZE=3><B>"); hPrintf("<A href=\"https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Undef&name=%s&lvl=0&srchmode=1\" TARGET=_blank>%s</A>", cgiEncode(protOrg), protOrg); if (answer != NULL) { hPrintf(" (%s)", answer); } hPrintf(":</B></FONT>\n"); } hPrintf("<UL>\n"); } /* print protein entry, if it is not already displayed in the PB supported genome list */ if (!skipIt) { otherCnt++; if (sameWord(protAcc, protDisp)) { hPrintf("<LI><A HREF=\"../cgi-bin/pbGlobal?proteinID=%s\">", protAcc); hPrintf("%s</A> %s\n", protAcc, protDesc); } else { hPrintf("<LI><A HREF=\"../cgi-bin/pbGlobal?proteinID=%s\">", protAcc); if (strstr(protDisp, protAcc) != NULL) { hPrintf("%s</A> %s\n", protAcc, protDesc); } else { hPrintf("%s</A> (aka %s) %s\n", protAcc, protDisp, protDesc); } } } oldOrg = strdup(protOrg); row3 = sqlNextRow(sr3); } if (otherCnt == 0) hPrintf("</UL>None");fflush(stdout); sqlFreeResult(&sr3); sqlDisconnect(&conn3); } } int searchProteinsInSwissProtByGene(char *queryGeneID) /* search Swiss-Prot database to see if it contains the protein Input: queryGeneID return: number of proteins found in Swiss-Prot */ { int proteinCnt; struct sqlConnection *conn; char query[256]; struct sqlResult *sr; char **row; conn = sqlConnect(UNIPROT_DB_NAME); sqlSafef(query, sizeof(query), "select count(*) from gene, displayId, accToTaxon,taxon " "where gene.val='%s' and gene.acc=displayId.acc and accToTaxon.taxon=taxon.id " "and accToTaxon.acc=gene.acc order by taxon.id", queryGeneID); sr = sqlMustGetResult(conn, query); row = sqlNextRow(sr); if (row == NULL) { errAbort("Error occured during mySQL query: %s\n", query); } proteinCnt = atoi(row[0]); sqlFreeResult(&sr); sqlDisconnect(&conn); return(proteinCnt); }