9a71e97d8a11e25bc6e48e8e95fa0c9cc7da4801 chmalee Tue Apr 4 11:59:28 2023 -0700 hgPcr saves hits instead of overwriting the trash file with each new pcr search, refs #30925 diff --git src/hg/hgPcr/hgPcr.c src/hg/hgPcr/hgPcr.c index c91e5b2..b19e076 100644 --- src/hg/hgPcr/hgPcr.c +++ src/hg/hgPcr/hgPcr.c @@ -1,773 +1,786 @@ /* hgPcr - In-silico PCR CGI for UCSC. */ /* Copyright (C) 2014 The Regents of the University of California * See kent/LICENSE or http://genome.ucsc.edu/license/ for licensing information. */ #include "common.h" #include "hash.h" #include "errAbort.h" #include "errCatch.h" #include "hCommon.h" #include "hgConfig.h" #include "dystring.h" #include "jksql.h" #include "linefile.h" #include "dnautil.h" #include "fa.h" #include "psl.h" #include "genoFind.h" #include "gfPcrLib.h" #include "cheapcgi.h" #include "htmshell.h" #include "hdb.h" #include "hui.h" #include "cart.h" #include "dbDb.h" #include "blatServers.h" #include "targetDb.h" #include "pcrResult.h" #include "trashDir.h" #include "web.h" #include "botDelay.h" #include "oligoTm.h" #include "trackHub.h" #include "hubConnect.h" struct cart *cart; /* The user's ui state. */ struct hash *oldVars = NULL; /* for earlyBotCheck() function at the beginning of main() */ #define delayFraction 1.0 /* standard penalty for most CGIs */ static boolean issueBotWarning = FALSE; void usage() /* Explain usage and exit. */ { errAbort( "hgPcr - In-silico PCR CGI for UCSC\n" "usage:\n" " hgPcr XXX\n" "options:\n" " -xxx=XXX\n" ); } struct pcrServer /* Information on a server running on genomic assembly sequence. */ { struct pcrServer *next; /* Next in list. */ char *db; /* Database name. */ char *genome; /* Genome name. */ char *description; /* Assembly description */ char *host; /* Name of machine hosting server. */ char *port; /* Port that hosts server. */ char *seqDir; /* Directory of sequence files. */ boolean isDynamic; /* is a dynamic server */ char* genomeDataDir; /* genome name for dynamic gfServer */ }; struct targetPcrServer /* Information on a server running on non-genomic sequence, e.g. mRNA, * that has been aligned to a particular genomic assembly. */ { struct targetPcrServer *next; /* Next in list. */ char *host; /* Name of machine hosting server. */ char *port; /* Port that hosts server. */ struct targetDb *targetDb; /* All of the info about the target. */ }; struct pcrServer *getTrackHubServers() /* Get the list of track hubs that have PCR services. */ { struct pcrServer *serverList = NULL, *server; struct dbDb *dbDbList = trackHubGetPcrServers(); for(; dbDbList; dbDbList = dbDbList->next) { AllocVar(server); server->db = dbDbList->name; server->genome = dbDbList->organism; server->description = dbDbList->description; trackHubGetPcrParams(server->db, &server->host, &server->port, &server->genomeDataDir); struct trackHubGenome *genome = trackHubGetGenome(server->db); server->seqDir = cloneString(genome->twoBitPath); char *ptr = strrchr(server->seqDir, '/'); // we only want the directory name if (ptr != NULL) *ptr = 0; slAddHead(&serverList, server); } return serverList; } struct pcrServer *getServerList() /* Get list of available servers. */ { struct pcrServer *serverList = NULL, *server; struct sqlConnection *conn = hConnectCentral(); struct sqlResult *sr; char **row; serverList = getTrackHubServers(); /* Do a little join to get data to fit into the pcrServer. Check for newer * dynamic flag and allow with or without it. For debugging, one set the * variable blatServersTbl to some db.table to pick up settings from somewhere * other than dbDb.blatServers. */ char *blatServersTbl = cfgOptionDefault("blatServersTbl", "blatServers"); boolean haveDynamic = sqlColumnExists(conn, blatServersTbl, "dynamic"); char query[512]; sqlSafef(query, sizeof(query), "select dbDb.name,dbDb.genome,dbDb.description,blatServers.host," "blatServers.port,dbDb.nibPath, %s " "from dbDb, %s blatServers where " "dbDb.name = blatServers.db " "and blatServers.canPcr = 1 order by dbDb.orderKey", (haveDynamic ? "blatServers.dynamic" : "0"), blatServersTbl); sr = sqlGetResult(conn, query); while ((row = sqlNextRow(sr)) != NULL) { AllocVar(server); server->db = cloneString(row[0]); server->genome = cloneString(row[1]); server->description = cloneString(row[2]); server->host = cloneString(row[3]); server->port = cloneString(row[4]); server->seqDir = hReplaceGbdbSeqDir(row[5], server->db); if (atoi(row[6])) { server->isDynamic = TRUE; server->genomeDataDir = cloneString(server->db); // directories by database name for database genomes } slAddHead(&serverList, server); } sqlFreeResult(&sr); hDisconnectCentral(&conn); if (serverList == NULL) errAbort("Sorry, no PCR servers are available"); slReverse(&serverList); return serverList; } struct pcrServer *findServer(char *db, struct pcrServer *serverList) /* Return server for given database. Db can either be * database name or description. */ { struct pcrServer *server; for (server = serverList; server != NULL; server = server->next) { if (sameString(db, server->db)) return server; } errAbort("Can't find a server for PCR database %s\n", db); return NULL; } struct targetPcrServer *getTargetServerList(char *db, char *name) /* Get list of available non-genomic-assembly target pcr servers associated * with db (and name, if not NULL). There may be none -- that's fine. */ { if (trackHubDatabase(db)) return NULL; struct targetPcrServer *serverList = NULL, *server; struct sqlConnection *conn = hConnectCentral(); struct sqlConnection *conn2 = hAllocConn(db); struct sqlResult *sr; char **row; struct dyString *dy = dyStringNew(0); sqlDyStringPrintf(dy, "select b.host, b.port, t.* from targetDb as t, blatServers as b " "where b.db = t.name and t.db = '%s' and b.canPcr = 1 ", db); if (isNotEmpty(name)) sqlDyStringPrintf(dy, "and t.name = '%s' ", name); sqlDyStringPrintf(dy, "order by t.priority"); sr = sqlGetResult(conn, dy->string); while ((row = sqlNextRow(sr)) != NULL) { /* Keep this server only if its timestamp is newer than the tables * and file on which it depends. */ struct targetDb *target = targetDbMaybeLoad(conn2, row+2); if (target != NULL) { AllocVar(server); server->host = cloneString(row[0]); server->port = cloneString(row[1]); server->targetDb = target; slAddHead(&serverList, server); } } dyStringFree(&dy); sqlFreeResult(&sr); hDisconnectCentral(&conn); hFreeConn(&conn2); slReverse(&serverList); return serverList; } void doHelp() /* Print up help page */ { puts( "In-Silico PCR searches a sequence database with a pair of\n" "PCR primers, using an indexing strategy for fast performance.\n" "See an example\n" "video\n" "on our YouTube channel.\n" "\n" "
\n" ">chr22:31000551+31001000 TAACAGATTGATGATGCATGAAATGGG CCCATGAGTGGCTCCTAAAGCAGCTGC\n" "TtACAGATTGATGATGCATGAAATGGGgggtggccaggggtggggggtga\n" "gactgcagagaaaggcagggctggttcataacaagctttgtgcgtcccaa\n" "tatgacagctgaagttttccaggggctgatggtgagccagtgagggtaag\n" "tacacagaacatcctagagaaaccctcattccttaaagattaaaaataaa\n" "gacttgctgtctgtaagggattggattatcctatttgagaaattctgtta\n" "tccagaatggcttaccccacaatgctgaaaagtgtgtaccgtaatctcaa\n" "agcaagctcctcctcagacagagaaacaccagccgtcacaggaagcaaag\n" "aaattggcttcacttttaaggtgaatccagaacccagatgtcagagctcc\n" "aagcactttgctctcagctccacGCAGCTGCTTTAGGAGCCACTCATGaG\n" "\n" "The + between the coordinates in the fasta header indicates \n" "this is on the positive strand. \n" ); } #define ORGFORM_KEEP_ORG "document.orgForm.org.value = " \ " document.mainForm.org.options[document.mainForm.org.selectedIndex].value; " #define ORGFORM_KEEP_DB " document.orgForm.db.value = " \ " document.mainForm.db.options[document.mainForm.db.selectedIndex].value; " #define ORGFORM_KEEP_PARAMS \ " document.orgForm.wp_f.value = document.mainForm.wp_f.value; " \ " document.orgForm.wp_r.value = document.mainForm.wp_r.value; " \ " document.orgForm.wp_size.value = document.mainForm.wp_size.value; " \ " document.orgForm.wp_perfect.value = document.mainForm.wp_perfect.value; " \ " document.orgForm.wp_good.value = document.mainForm.wp_good.value; " #define ORGFORM_RESET_DB " document.orgForm.db.value = 0; " #define ORGFORM_RESET_TARGET " document.orgForm.wp_target.value = \"\"; " #define ORGFORM_SUBMIT " document.orgForm.submit();" void showGenomes(char *genome, struct pcrServer *serverList) /* Put up drop-down list with genomes on it. */ { struct hash *uniqHash = hashNew(8); struct pcrServer *server; char *onChangeText = ORGFORM_KEEP_PARAMS ORGFORM_KEEP_ORG ORGFORM_RESET_DB ORGFORM_RESET_TARGET ORGFORM_SUBMIT ; printf("\n"); hashFree(&uniqHash); } void showAssemblies(char *genome, char *db, struct pcrServer *serverList, boolean submitOnClick) /* Put up drop-down list with assemblies on it. */ { struct pcrServer *server; char *onChangeText = ORGFORM_KEEP_PARAMS ORGFORM_KEEP_ORG ORGFORM_KEEP_DB ORGFORM_RESET_TARGET ORGFORM_SUBMIT ; printf("\n"); } void showTargets(char *target, struct targetPcrServer *serverList) /* Put up drop-down list with targets on it. */ { struct targetPcrServer *server; printf("\n"); } void redoDbAndOrgIfNoServer(struct pcrServer *serverList, char **pDb, char **pOrg) /* Check that database and organism are on our serverList. If not, then update * them to first thing that is. */ { struct pcrServer *server, *orgServer = NULL; char *organism = *pOrg; char *db = *pDb; boolean gotDb = FALSE; /* Find first server for our organism */ for (server = serverList; server != NULL; server = server->next) { if (sameString(server->genome, organism)) { orgServer = server; break; } } /* If no server, change our organism to the one of the first server in list. */ if (orgServer == NULL) { orgServer = serverList; *pOrg = organism = orgServer->genome; } /* Search for our database. */ for (server = serverList; server != NULL; server = server->next) { if (sameString(db, server->db)) { gotDb = TRUE; break; } } /* If no server for db, change db. */ if (!gotDb) { if (differentString(db, orgServer->db)) printf("
Note: In-Silico PCR is not available for %s %s; " "defaulting to %s %s
In-Silico PCR was written by " "Jim Kent.\n" "Interactive use on this web server is free to all.\n" "Sources and executables to run batch jobs on your own server are available free\n" "for academic, personal, and non-profit purposes. Non-exclusive commercial\n" "licenses are also available. Contact Jim for details.
\n"); } void writePrimers(struct gfPcrOutput *gpo, char *fileName) /* Write primer sequences to file. Look at only the first gpo because there * is only one set of primers in the input form. */ { if (gpo == NULL) return; -FILE *f = mustOpen(fileName, "w"); +FILE *f = mustOpen(fileName, "a"); fprintf(f, "%s\t%s\n", gpo->fPrimer, gpo->rPrimer); carefulClose(&f); } void writePcrResultTrack(struct gfPcrOutput *gpoList, char *db, char *target) /* Write trash files and store their name in a cart variable. */ { char *cartVar = pcrResultCartVar(db); struct tempName bedTn, primerTn; char buf[2048]; +char *pslFile, *txtFile, *cartResult; +if ( (cartResult = cartOptionalString(cart, cartVar)) != NULL) + { + char *pcrFiles[3]; + chopByWhite(cloneString(cartResult), pcrFiles, 3); + pslFile = pcrFiles[0]; + txtFile = pcrFiles[1]; + gfPcrOutputWriteAll(gpoList, "psl", NULL, pslFile); + writePrimers(gpoList, txtFile); + } +else + { trashDirFile(&bedTn, "hgPcr", "hgPcr", ".psl"); trashDirFile(&primerTn, "hgPcr", "hgPcr", ".txt"); gfPcrOutputWriteAll(gpoList, "psl", NULL, bedTn.forCgi); writePrimers(gpoList, primerTn.forCgi); if (isNotEmpty(target)) safef(buf, sizeof(buf), "%s %s %s", bedTn.forCgi, primerTn.forCgi, target); else safef(buf, sizeof(buf), "%s %s", bedTn.forCgi, primerTn.forCgi); cartSetString(cart, cartVar, buf); } +} static void printHelpLinks(struct gfPcrOutput *gpoList) { /* print links to our docs for special chromosome names */ // if you modify this, also modify hgBlat.c:showAliPlaces, which implements a similar feature, for hgBlat boolean isAlt = FALSE; boolean isFix = FALSE; boolean isRandom = FALSE; boolean isChrUn = FALSE; if (gpoList != NULL) { struct gfPcrOutput *gpo; for (gpo = gpoList; gpo != NULL; gpo = gpo->next) { char *seq = gpo->seqName; if (endsWith(seq, "_fix")) isFix = TRUE; else if (endsWith(seq, "_alt")) isAlt = TRUE; else if (endsWith(seq, "_random")) isRandom = TRUE; else if (startsWith(seq, "chrUn")) isChrUn = TRUE; } } if (isFix || isRandom || isAlt || isChrUn) webNewSection("Notes on the results above"); if (isFix) printf("What is chrom_fix?"); gfPcrOutputWriteAll(gpoList, "fa", urlFormat, "stdout"); printf(""); printHelpLinks(gpoList); writePcrResultTrack(gpoList, server->db, NULL); } else { printf("No matches to %s %s in %s %s", gpi->fPrimer, gpi->rPrimer, server->genome, server->description); } gfDisconnect(&conn); } void doTargetQuery(struct targetPcrServer *server, struct gfPcrInput *gpi, int maxSize, int minPerfect, int minGood) /* Send a query to a non-genomic target PCR server and print the results. */ { struct gfConnection *conn = gfConnect(server->host, server->port, NULL, NULL); struct gfPcrOutput *gpoList; char seqDir[PATH_LEN]; splitPath(server->targetDb->seqFile, seqDir, NULL, NULL); if (endsWith("/", seqDir)) seqDir[strlen(seqDir) - 1] = '\0'; gpoList = gfPcrViaNet(conn, seqDir, gpi, maxSize, minPerfect, minGood); if (gpoList != NULL) { struct gfPcrOutput *gpo; char urlFormat[2048]; printf("The sequences and coordinates shown below are from %s, " "not from the genome assembly. The links lead to the " "Genome Browser at the position of the entire target " "sequence.
"); for (gpo = gpoList; gpo != NULL; gpo = gpo->next) { /* Not used as a format here; we modify the name used for position: */ safef(urlFormat, sizeof(urlFormat), "%s?%s&db=%s&position=%s" "&hgPcrResult=pack", hgTracksName(), cartSidUrlString(cart), server->targetDb->db, pcrResultItemAccession(gpo->seqName)); if (gpo->strand == '-') printf("Warning: this amplification is on the reverse-" "complement of %s.\n", gpo->seqName); gfPcrOutputWriteOne(gpo, "fa", urlFormat, stdout); printf("\n"); } printf(""); writePcrResultTrack(gpoList, server->targetDb->db, server->targetDb->name); } else { printf("No matches to %s %s in %s", gpi->fPrimer, gpi->rPrimer, server->targetDb->description); } gfDisconnect(&conn); } boolean doPcr(struct pcrServer *server, struct targetPcrServer *targetServer, char *fPrimer, char *rPrimer, int maxSize, int minPerfect, int minGood, boolean flipReverse) /* Do the PCR, and show results. */ { struct errCatch *errCatch = errCatchNew(); boolean ok = FALSE; if (issueBotWarning) { char *ip = getenv("REMOTE_ADDR"); botDelayMessage(ip, botDelayMillis); } if (flipReverse) reverseComplement(rPrimer, strlen(rPrimer)); if (errCatchStart(errCatch)) { struct gfPcrInput *gpi; AllocVar(gpi); gpi->fPrimer = fPrimer; gpi->rPrimer = rPrimer; if (server != NULL) doQuery(server, gpi, maxSize, minPerfect, minGood); if (targetServer != NULL) doTargetQuery(targetServer, gpi, maxSize, minPerfect, minGood); ok = TRUE; } errCatchEnd(errCatch); if (errCatch->gotError) warn("%s", errCatch->message->string); errCatchFree(&errCatch); if (flipReverse) reverseComplement(rPrimer, strlen(rPrimer)); webNewSection("Primer Melting Temperatures"); printf(""); printf("Forward: %4.1f C %s