a53b9958fa734f73aeffb9ddfe2fbad1ca65f90c galt Mon Jan 30 16:18:41 2017 -0800 Check-in of CSP2 Content-Security-Policy work. All C-language CGIs should now support CSP2 in browser to stop major forms of XSS javascript injection. Javascript on pages is gathered together, and then emitted in a single script block at the end with a nonce that tells the browser, this is js that we generated instead of being injected by a hacker. Both inline script from script blocks and inline js event handlers had to be pulled out and separated. You will not see js sprinkled through-out the page now. Older browsers that support CSP1 or that do not understand CSP at all will still work, just without protection. External js libraries loaded at runtime need to be added to the CSP policy header in src/lib/htmshell.c. diff --git src/hg/hgBlat/hgBlat.c src/hg/hgBlat/hgBlat.c index 6ce2716..f3524be 100644 --- src/hg/hgBlat/hgBlat.c +++ src/hg/hgBlat/hgBlat.c @@ -1,1065 +1,1068 @@ /* hgBlat - CGI-script to manage fast human genome sequence searching. */ /* Copyright (C) 2014 The Regents of the University of California * See README in this or parent directory for licensing information. */ #include "common.h" #include "errAbort.h" #include "hCommon.h" #include "jksql.h" #include "portable.h" #include "linefile.h" #include "dnautil.h" #include "fa.h" #include "psl.h" #include "genoFind.h" #include "cheapcgi.h" #include "htmshell.h" #include "hdb.h" #include "hui.h" #include "cart.h" #include "dbDb.h" #include "blatServers.h" #include "web.h" #include "hash.h" #include "botDelay.h" #include "trashDir.h" #include "trackHub.h" #include "hgConfig.h" struct cart *cart; /* The user's ui state. */ struct hash *oldVars = NULL; boolean orgChange = FALSE; boolean dbChange = FALSE; struct serverTable /* Information on a server. */ { char *db; /* Database name. */ char *genome; /* Genome name. */ boolean isTrans; /* Is tranlated to protein? */ char *host; /* Name of machine hosting server. */ char *port; /* Port that hosts server. */ char *nibDir; /* Directory of sequence files. */ }; char *typeList[] = {"BLAT's guess", "DNA", "protein", "translated RNA", "translated DNA"}; char *sortList[] = {"query,score", "query,start", "chrom,score", "chrom,start", "score"}; char *outputList[] = {"hyperlink", "psl", "psl no header"}; #ifdef LOWELAB int minMatchShown = 14; #else int minMatchShown = 20; #endif static struct serverTable *trackHubServerTable(char *db, boolean isTrans) /* Find out if database is a track hub with a blat server */ { char *host, *port; if (!trackHubGetBlatParams(db, isTrans, &host, &port)) return NULL; struct serverTable *st; AllocVar(st); st->db = cloneString(db); st->genome = cloneString(hGenome(db)); st->isTrans = isTrans; st->host = host; st->port = port; struct trackHubGenome *genome = trackHubGetGenome(db); st->nibDir = cloneString(genome->twoBitPath); char *ptr = strrchr(st->nibDir, '/'); // we only want the directory name if (ptr != NULL) *ptr = 0; return st; } struct serverTable *findServer(char *db, boolean isTrans) /* Return server for given database. Db can either be * database name or description. */ { if (trackHubDatabase(db)) { struct serverTable *hubSt = trackHubServerTable(db, isTrans); if (hubSt != NULL) return hubSt; errAbort("Cannot get blat server parameters for track hub with database %s\n", db); } static struct serverTable st; struct sqlConnection *conn = hConnectCentral(); char query[256]; struct sqlResult *sr; char **row; char dbActualName[32]; /* If necessary convert database description to name. */ sqlSafef(query, sizeof(query), "select name from dbDb where name = '%s'", db); if (!sqlExists(conn, query)) { sqlSafef(query, sizeof(query), "select name from dbDb where description = '%s'", db); if (sqlQuickQuery(conn, query, dbActualName, sizeof(dbActualName)) != NULL) db = dbActualName; } /* Do a little join to get data to fit into the serverTable. */ sqlSafef(query, sizeof(query), "select dbDb.name,dbDb.description,blatServers.isTrans" ",blatServers.host,blatServers.port,dbDb.nibPath " "from dbDb,blatServers where blatServers.isTrans = %d and " "dbDb.name = '%s' and dbDb.name = blatServers.db", isTrans, db); sr = sqlGetResult(conn, query); if ((row = sqlNextRow(sr)) == NULL) { errAbort("Can't find a server for %s database %s. Click " "here " "to reset to default database.", (isTrans ? "translated" : "DNA"), db, cartSidUrlString(cart), hDefaultDb()); } st.db = cloneString(row[0]); st.genome = cloneString(row[1]); st.isTrans = atoi(row[2]); st.host = cloneString(row[3]); st.port = cloneString(row[4]); st.nibDir = hReplaceGbdbSeqDir(row[5], st.db); sqlFreeResult(&sr); hDisconnectCentral(&conn); return &st; } void findClosestServer(char **pDb, char **pOrg) /* If db doesn't have a blat server, look for the closest db (or org) that has one, * as hgPcr does. */ { char *db = *pDb, *org = *pOrg; if (trackHubDatabase(db) && (trackHubServerTable(db, FALSE) != NULL)) { *pDb = db; *pOrg = hGenome(db); return; } struct sqlConnection *conn = hConnectCentral(); char query[256]; sqlSafef(query, sizeof(query), "select db from blatServers where db = '%s'", db); if (!sqlExists(conn, query)) { sqlSafef(query, sizeof(query), "select blatServers.db from blatServers,dbDb " "where blatServers.db = dbDb.name and dbDb.genome = '%s'", org); char *db = sqlQuickString(conn, query); if (db == NULL) { sqlSafef(query, sizeof(query), "select blatServers.db from blatServers,dbDb " "where blatServers.db = dbDb.name order by dbDb.orderKey,dbDb.name desc"); char *db = sqlQuickString(conn, query); if (db == NULL) errAbort("central database tables blatServers and dbDb are disjoint/empty"); else { *pDb = db; *pOrg = hGenome(db); } } else { *pDb = db; *pOrg = hGenome(db); } } hDisconnectCentral(&conn); } void usage() /* Explain usage and exit. */ { errAbort( "hgBlat - CGI-script to manage fast human genome sequence searching\n"); } int countSameNonDigit(char *a, char *b) /* Return count of characters in a,b that are the same * up until first digit in either one. */ { char cA, cB; int same = 0; for (;;) { cA = *a++; cB = *b++; if (cA != cB) break; if (cA == 0 || cB == 0) break; if (isdigit(cA) || isdigit(cB)) break; ++same; } return same; } boolean allDigits(char *s) /* Return TRUE if s is all digits */ { char c; while ((c = *s++) != 0) if (!isdigit(c)) return FALSE; return TRUE; } int cmpChrom(char *a, char *b) /* Compare two chromosomes. */ { return cmpStringsWithEmbeddedNumbers(a, b); } int pslCmpTargetScore(const void *va, const void *vb) /* Compare to sort based on target then score. */ { const struct psl *a = *((struct psl **)va); const struct psl *b = *((struct psl **)vb); int diff = cmpChrom(a->tName, b->tName); if (diff == 0) diff = pslScore(b) - pslScore(a); return diff; } int pslCmpTargetStart(const void *va, const void *vb) /* Compare to sort based on target start. */ { const struct psl *a = *((struct psl **)va); const struct psl *b = *((struct psl **)vb); int diff = cmpChrom(a->tName, b->tName); if (diff == 0) diff = a->tStart - b->tStart; return diff; } void printLuckyRedirect(char *browserUrl, struct psl *psl, char *database, char *pslName, char *faName, char *uiState, char *unhideTrack) /* Print out a very short page that redirects us. */ { char url[1024]; safef(url, sizeof(url), "%s?position=%s:%d-%d&db=%s&ss=%s+%s&%s%s", browserUrl, psl->tName, psl->tStart + 1, psl->tEnd, database, pslName, faName, uiState, unhideTrack); /* htmlStart("Redirecting"); */ /* Odd it appears that we've already printed the Content-Typ:text/html line but I can't figure out where... */ htmStart(stdout, "Redirecting"); -printf("", url); +char javascript[1024]; +safef(javascript, sizeof javascript, + "location.replace('%s');", url); +jsInline(javascript); printf("", url); htmlEnd(); } static char *replaceSuffix(char *input, char *newSuffix) /* Given a filename with a suffix, replace existing suffix with a new suffix. */ { char buffer[4096]; safecpy(buffer, sizeof buffer, input); char *dot = strrchr(buffer, '.'); safecpy(dot+1, sizeof buffer - 1 - (dot - buffer), newSuffix); return cloneString(buffer); } static void makeBigPsl(char *pslName, char *faName, char *db, char *outputBigBed) /* Make a bigPsl with the blat results. */ { char *bigPslFile = replaceSuffix(outputBigBed, "bigPsl"); char cmdBuffer[4096]; safef(cmdBuffer, sizeof(cmdBuffer), "loader/pslToBigPsl %s -fa=%s stdout | sort -k1,1 -k2,2n > %s", pslName, faName, bigPslFile); system(cmdBuffer); char buf[4096]; char *twoBitDir; if (trackHubDatabase(db)) { struct trackHubGenome *genome = trackHubGetGenome(db); twoBitDir = genome->twoBitPath; } else { safef(buf, sizeof(buf), "/gbdb/%s", db); twoBitDir = hReplaceGbdbSeqDir(buf, db); safef(buf, sizeof(buf), "%s%s.2bit", twoBitDir, db); twoBitDir = buf; } safef(cmdBuffer, sizeof(cmdBuffer), "loader/bedToBigBed -verbose=0 -udcDir=%s -extraIndex=name -sizesIs2Bit -tab -as=loader/bigPsl.as -type=bed9+16 %s %s %s", udcDefaultDir(), bigPslFile, twoBitDir, outputBigBed); system(cmdBuffer); unlink(bigPslFile); } void showAliPlaces(char *pslName, char *faName, char *customText, char *database, enum gfType qType, enum gfType tType, char *organism, boolean feelingLucky) /* Show all the places that align. */ { struct lineFile *lf = pslFileOpen(pslName); struct psl *pslList = NULL, *psl; char *browserUrl = hgTracksName(); char *hgcUrl = hgcName(); char uiState[64]; char *vis; char unhideTrack[64]; char *sort = cartUsualString(cart, "sort", sortList[0]); char *output = cartUsualString(cart, "output", outputList[0]); boolean pslOut = startsWith("psl", output); boolean isStraightNuc = (qType == gftRna || qType == gftDna); int minThreshold = (isStraightNuc ? minMatchShown : 0); sprintf(uiState, "%s=%s", cartSessionVarName(), cartSessionId(cart)); /* If user has hidden BLAT track, add a setting that will unhide the track if user clicks on a browser link. */ vis = cartOptionalString(cart, "hgUserPsl"); if (vis != NULL && sameString(vis, "hide")) snprintf(unhideTrack, sizeof(unhideTrack), "&hgUserPsl=dense"); else unhideTrack[0] = 0; while ((psl = pslNext(lf)) != NULL) { if (psl->match >= minThreshold) slAddHead(&pslList, psl); } lineFileClose(&lf); if (pslList == NULL) { puts("
Sorry, no matches found |
"); if (!sameString(output, "psl no header")) pslxWriteHead(stdout, qType, tType); for (psl = pslList; psl != NULL; psl = psl->next) pslTabOut(psl, stdout); printf(""); } else { printf("
Go back to %s on the Genome Browser.
\n", browserUrl, posStr); printf(""); printf(" ACTIONS QUERY SCORE START END QSIZE IDENTITY CHRO STRAND START END SPAN\n"); printf("---------------------------------------------------------------------------------------------------\n"); for (psl = pslList; psl != NULL; psl = psl->next) { if (customText) printf("", browserUrl, psl->tName, psl->tStart + 1, psl->tEnd, database, customText, uiState, unhideTrack); else printf("", browserUrl, psl->tName, psl->tStart + 1, psl->tEnd, database, pslName, faName, uiState, unhideTrack); printf("browser "); printf("", hgcUrl, psl->tStart, pslName, cgiEncode(faName), psl->qName, psl->tName, psl->tStart, psl->tEnd, database, uiState); printf("details "); printf("%-14s %5d %5d %5d %5d %5.1f%% %4s %2s %9d %9d %6d\n", psl->qName, pslScore(psl), psl->qStart+1, psl->qEnd, psl->qSize, 100.0 - pslCalcMilliBad(psl, TRUE) * 0.1, skipChr(psl->tName), psl->strand, psl->tStart+1, psl->tEnd, psl->tEnd - psl->tStart); } printf("\n"); puts("\n"); puts("
Sorry, no matches found |
BLAT on DNA is designed to\n" "quickly find sequences of 95%% and greater similarity of length 25 bases or\n" "more. It may miss more divergent or shorter sequence alignments. It will find\n" "perfect sequence matches of 20 bases.\n" "BLAT on proteins finds sequences of 80%% and greater similarity of length 20 amino\n" "acids or more. In practice DNA BLAT works well on primates, and protein\n" "blat on land vertebrates." ); printf("%s", "\n
BLAT is not BLAST. DNA BLAT works by keeping an index of the entire genome\n" "in memory. The index consists of all overlapping 11-mers stepping by 5 except for\n" "those heavily involved in repeats. The index takes up about\n" "2 gigabytes of RAM. RAM can be further reduced to less than 1 GB by increasing step size to 11.\n" "The genome itself is not kept in memory, allowing\n" "BLAT to deliver high performance on a reasonably priced Linux box.\n" "The index is used to find areas of probable homology, which are then\n" "loaded into memory for a detailed alignment. Protein BLAT works in a similar\n" "manner, except with 4-mers rather than 11-mers. The protein index takes a little\n" "more than 2 gigabytes.
\n" "BLAT was written by Jim Kent.\n" "Like most of Jim's software, interactive use on this web server is free to all.\n" "Sources and executables to run batch jobs on your own server are available free\n" "for academic, personal, and non-profit purposes. Non-exclusive commercial\n" "licenses are also available. See the \n" "Kent Informatics\n" "website for details.
\n" "\n" "For more information on the graphical version of BLAT, click the Help \n" "button on the top menu bar"); if (hIsGsidServer()) printf(".
\n"); else printf(" or see the Genome Browser FAQ. \n"); } void doMiddle(struct cart *theCart) /* Write header and body of html page. */ { char *userSeq; char *db, *organism; boolean clearUserSeq = cgiBoolean("Clear"); cart = theCart; dnaUtilOpen(); orgChange = sameOk(cgiOptionalString("changeInfo"),"orgChange"); if (orgChange) { cgiVarSet("db", hDefaultDbForGenome(cgiOptionalString("org"))); } getDbAndGenome(cart, &db, &organism, oldVars); char *oldDb = cloneString(db); findClosestServer(&db, &organism); /* Get sequence - from userSeq variable, or if * that is empty from a file. */ if (clearUserSeq) { cartSetString(cart, "userSeq", ""); cartSetString(cart, "seqFile", ""); } userSeq = cartUsualString(cart, "userSeq", ""); if (isEmpty(userSeq)) { userSeq = cartOptionalString(cart, "seqFile"); } if (isEmpty(userSeq) || orgChange) { cartWebStart(theCart, db, "%s BLAT Search", trackHubSkipHubName(organism)); if (differentString(oldDb, db)) printf("Note: BLAT search is not available for %s %s; " "defaulting to %s %s