06d7be056190c14b85e71bc12523f18ea6815b5e markd Mon Dec 7 00:50:29 2020 -0800 BLAT mmap index support merge with master diff --git src/hg/hgPcr/hgPcr.c src/hg/hgPcr/hgPcr.c index d772ff3..cf9eaf9 100644 --- src/hg/hgPcr/hgPcr.c +++ src/hg/hgPcr/hgPcr.c @@ -1,662 +1,694 @@ /* hgPcr - In-silico PCR CGI for UCSC. */ /* Copyright (C) 2014 The Regents of the University of California * See README in this or parent directory for licensing information. */ #include "common.h" #include "hash.h" #include "errAbort.h" #include "errCatch.h" #include "hCommon.h" #include "dystring.h" #include "jksql.h" #include "linefile.h" #include "dnautil.h" #include "fa.h" #include "psl.h" #include "gfPcrLib.h" #include "cheapcgi.h" #include "htmshell.h" #include "hdb.h" #include "hui.h" #include "cart.h" #include "dbDb.h" #include "blatServers.h" #include "targetDb.h" #include "pcrResult.h" #include "trashDir.h" #include "web.h" #include "botDelay.h" #include "oligoTm.h" +#include "trackHub.h" +#include "hubConnect.h" struct cart *cart; /* The user's ui state. */ struct hash *oldVars = NULL; void usage() /* Explain usage and exit. */ { errAbort( "hgPcr - In-silico PCR CGI for UCSC\n" "usage:\n" " hgPcr XXX\n" "options:\n" " -xxx=XXX\n" ); } struct pcrServer /* Information on a server running on genomic assembly sequence. */ { struct pcrServer *next; /* Next in list. */ char *db; /* Database name. */ char *genome; /* Genome name. */ char *description; /* Assembly description */ char *host; /* Name of machine hosting server. */ char *port; /* Port that hosts server. */ char *seqDir; /* Directory of sequence files. */ }; struct targetPcrServer /* Information on a server running on non-genomic sequence, e.g. mRNA, * that has been aligned to a particular genomic assembly. */ { struct targetPcrServer *next; /* Next in list. */ char *host; /* Name of machine hosting server. */ char *port; /* Port that hosts server. */ struct targetDb *targetDb; /* All of the info about the target. */ }; +struct pcrServer *getTrackHubServers() +/* Get the list of track hubs that have PCR services. */ +{ +struct pcrServer *serverList = NULL, *server; + +struct dbDb *dbDbList = trackHubGetPcrServers(); + +for(; dbDbList; dbDbList = dbDbList->next) + { + AllocVar(server); + server->db = dbDbList->name; + server->genome = dbDbList->organism; + server->description = dbDbList->description; + trackHubGetPcrParams(server->db, &server->host, &server->port); + struct trackHubGenome *genome = trackHubGetGenome(server->db); + server->seqDir = cloneString(genome->twoBitPath); + char *ptr = strrchr(server->seqDir, '/'); + // we only want the directory name + if (ptr != NULL) + *ptr = 0; + slAddHead(&serverList, server); + } + +return serverList; +} + struct pcrServer *getServerList() /* Get list of available servers. */ { struct pcrServer *serverList = NULL, *server; struct sqlConnection *conn = hConnectCentral(); struct sqlResult *sr; char **row; +serverList = getTrackHubServers(); + /* Do a little join to get data to fit into the pcrServer. */ sr = sqlGetResult(conn, NOSQLINJ "select dbDb.name,dbDb.genome,dbDb.description,blatServers.host," "blatServers.port,dbDb.nibPath " "from dbDb,blatServers where " "dbDb.name = blatServers.db " "and blatServers.canPcr = 1 order by dbDb.orderKey" ); while ((row = sqlNextRow(sr)) != NULL) { AllocVar(server); server->db = cloneString(row[0]); server->genome = cloneString(row[1]); server->description = cloneString(row[2]); server->host = cloneString(row[3]); server->port = cloneString(row[4]); server->seqDir = hReplaceGbdbSeqDir(row[5], server->db); slAddHead(&serverList, server); } sqlFreeResult(&sr); hDisconnectCentral(&conn); if (serverList == NULL) errAbort("Sorry, no PCR servers are available"); slReverse(&serverList); return serverList; } struct pcrServer *findServer(char *db, struct pcrServer *serverList) /* Return server for given database. Db can either be * database name or description. */ { struct pcrServer *server; for (server = serverList; server != NULL; server = server->next) { if (sameString(db, server->db)) return server; } errAbort("Can't find a server for PCR database %s\n", db); return NULL; } struct targetPcrServer *getTargetServerList(char *db, char *name) /* Get list of available non-genomic-assembly target pcr servers associated * with db (and name, if not NULL). There may be none -- that's fine. */ { +if (trackHubDatabase(db)) + return NULL; struct targetPcrServer *serverList = NULL, *server; struct sqlConnection *conn = hConnectCentral(); struct sqlConnection *conn2 = hAllocConn(db); struct sqlResult *sr; char **row; struct dyString *dy = dyStringNew(0); sqlDyStringPrintf(dy, "select b.host, b.port, t.* from targetDb as t, blatServers as b " "where b.db = t.name and t.db = '%s' and b.canPcr = 1 ", db); if (isNotEmpty(name)) sqlDyStringPrintf(dy, "and t.name = '%s' ", name); dyStringAppend(dy, "order by t.priority"); sr = sqlGetResult(conn, dy->string); while ((row = sqlNextRow(sr)) != NULL) { /* Keep this server only if its timestamp is newer than the tables * and file on which it depends. */ struct targetDb *target = targetDbMaybeLoad(conn2, row+2); if (target != NULL) { AllocVar(server); server->host = cloneString(row[0]); server->port = cloneString(row[1]); server->targetDb = target; slAddHead(&serverList, server); } } dyStringFree(&dy); sqlFreeResult(&sr); hDisconnectCentral(&conn); hFreeConn(&conn2); slReverse(&serverList); return serverList; } void doHelp() /* Print up help page */ { puts( "In-Silico PCR searches a sequence database with a pair of\n" "PCR primers, using an indexing strategy for fast performance.\n" "See an example\n" "video\n" "on our YouTube channel.\n" "\n" "
\n" ">chr22:31000551+31001000 TAACAGATTGATGATGCATGAAATGGG CCCATGAGTGGCTCCTAAAGCAGCTGC\n" "TtACAGATTGATGATGCATGAAATGGGgggtggccaggggtggggggtga\n" "gactgcagagaaaggcagggctggttcataacaagctttgtgcgtcccaa\n" "tatgacagctgaagttttccaggggctgatggtgagccagtgagggtaag\n" "tacacagaacatcctagagaaaccctcattccttaaagattaaaaataaa\n" "gacttgctgtctgtaagggattggattatcctatttgagaaattctgtta\n" "tccagaatggcttaccccacaatgctgaaaagtgtgtaccgtaatctcaa\n" "agcaagctcctcctcagacagagaaacaccagccgtcacaggaagcaaag\n" "aaattggcttcacttttaaggtgaatccagaacccagatgtcagagctcc\n" "aagcactttgctctcagctccacGCAGCTGCTTTAGGAGCCACTCATGaG\n" "\n" "The + between the coordinates in the fasta header indicates \n" "this is on the positive strand. \n" ); } #define ORGFORM_KEEP_ORG "document.orgForm.org.value = " \ " document.mainForm.org.options[document.mainForm.org.selectedIndex].value; " #define ORGFORM_KEEP_DB " document.orgForm.db.value = " \ " document.mainForm.db.options[document.mainForm.db.selectedIndex].value; " #define ORGFORM_KEEP_PARAMS \ " document.orgForm.wp_f.value = document.mainForm.wp_f.value; " \ " document.orgForm.wp_r.value = document.mainForm.wp_r.value; " \ " document.orgForm.wp_size.value = document.mainForm.wp_size.value; " \ " document.orgForm.wp_perfect.value = document.mainForm.wp_perfect.value; " \ " document.orgForm.wp_good.value = document.mainForm.wp_good.value; " #define ORGFORM_RESET_DB " document.orgForm.db.value = 0; " #define ORGFORM_RESET_TARGET " document.orgForm.wp_target.value = \"\"; " #define ORGFORM_SUBMIT " document.orgForm.submit();" void showGenomes(char *genome, struct pcrServer *serverList) /* Put up drop-down list with genomes on it. */ { struct hash *uniqHash = hashNew(8); struct pcrServer *server; char *onChangeText = ORGFORM_KEEP_PARAMS ORGFORM_KEEP_ORG ORGFORM_RESET_DB ORGFORM_RESET_TARGET ORGFORM_SUBMIT ; printf("\n"); hashFree(&uniqHash); } void showAssemblies(char *genome, char *db, struct pcrServer *serverList, boolean submitOnClick) /* Put up drop-down list with assemblies on it. */ { struct pcrServer *server; char *onChangeText = ORGFORM_KEEP_PARAMS ORGFORM_KEEP_ORG ORGFORM_KEEP_DB ORGFORM_RESET_TARGET ORGFORM_SUBMIT ; printf("\n"); } void showTargets(char *target, struct targetPcrServer *serverList) /* Put up drop-down list with targets on it. */ { struct targetPcrServer *server; printf("\n"); } void redoDbAndOrgIfNoServer(struct pcrServer *serverList, char **pDb, char **pOrg) /* Check that database and organism are on our serverList. If not, then update * them to first thing that is. */ { struct pcrServer *server, *orgServer = NULL; char *organism = *pOrg; char *db = *pDb; boolean gotDb = FALSE; /* Find first server for our organism */ for (server = serverList; server != NULL; server = server->next) { if (sameString(server->genome, organism)) { orgServer = server; break; } } /* If no server, change our organism to the one of the first server in list. */ if (orgServer == NULL) { orgServer = serverList; *pOrg = organism = orgServer->genome; } /* Search for our database. */ for (server = serverList; server != NULL; server = server->next) { if (sameString(db, server->db)) { gotDb = TRUE; break; } } /* If no server for db, change db. */ if (!gotDb) { if (differentString(db, orgServer->db)) printf("
Note: In-Silico PCR is not available for %s %s; " "defaulting to %s %s
In-Silico PCR was written by " "Jim Kent.\n" "Interactive use on this web server is free to all.\n" "Sources and executables to run batch jobs on your own server are available free\n" "for academic, personal, and non-profit purposes. Non-exclusive commercial\n" "licenses are also available. Contact Jim for details.
\n"); } void writePrimers(struct gfPcrOutput *gpo, char *fileName) /* Write primer sequences to file. Look at only the first gpo because there * is only one set of primers in the input form. */ { if (gpo == NULL) return; FILE *f = mustOpen(fileName, "w"); fprintf(f, "%s\t%s\n", gpo->fPrimer, gpo->rPrimer); carefulClose(&f); } void writePcrResultTrack(struct gfPcrOutput *gpoList, char *db, char *target) /* Write trash files and store their name in a cart variable. */ { char *cartVar = pcrResultCartVar(db); struct tempName bedTn, primerTn; char buf[2048]; trashDirFile(&bedTn, "hgPcr", "hgPcr", ".psl"); trashDirFile(&primerTn, "hgPcr", "hgPcr", ".txt"); gfPcrOutputWriteAll(gpoList, "psl", NULL, bedTn.forCgi); writePrimers(gpoList, primerTn.forCgi); if (isNotEmpty(target)) safef(buf, sizeof(buf), "%s %s %s", bedTn.forCgi, primerTn.forCgi, target); else safef(buf, sizeof(buf), "%s %s", bedTn.forCgi, primerTn.forCgi); cartSetString(cart, cartVar, buf); } void doQuery(struct pcrServer *server, struct gfPcrInput *gpi, int maxSize, int minPerfect, int minGood) /* Send a query to a genomic assembly PCR server and print the results. */ { struct gfPcrOutput *gpoList = gfPcrViaNet(server->host, server->port, server->seqDir, gpi, maxSize, minPerfect, minGood); if (gpoList != NULL) { char urlFormat[2048]; safef(urlFormat, sizeof(urlFormat), "%s?%s&db=%s&position=%%s:%%d-%%d" "&hgPcrResult=pack", hgTracksName(), cartSidUrlString(cart), server->db); printf(""); gfPcrOutputWriteAll(gpoList, "fa", urlFormat, "stdout"); printf(""); writePcrResultTrack(gpoList, server->db, NULL); } else { printf("No matches to %s %s in %s %s", gpi->fPrimer, gpi->rPrimer, server->genome, server->description); } } void doTargetQuery(struct targetPcrServer *server, struct gfPcrInput *gpi, int maxSize, int minPerfect, int minGood) /* Send a query to a non-genomic target PCR server and print the results. */ { struct gfPcrOutput *gpoList; char seqDir[PATH_LEN]; splitPath(server->targetDb->seqFile, seqDir, NULL, NULL); if (endsWith("/", seqDir)) seqDir[strlen(seqDir) - 1] = '\0'; gpoList = gfPcrViaNet(server->host, server->port, seqDir, gpi, maxSize, minPerfect, minGood); if (gpoList != NULL) { struct gfPcrOutput *gpo; char urlFormat[2048]; printf("The sequences and coordinates shown below are from %s, " "not from the genome assembly. The links lead to the " "Genome Browser at the position of the entire target " "sequence.
"); for (gpo = gpoList; gpo != NULL; gpo = gpo->next) { /* Not used as a format here; we modify the name used for position: */ safef(urlFormat, sizeof(urlFormat), "%s?%s&db=%s&position=%s" "&hgPcrResult=pack", hgTracksName(), cartSidUrlString(cart), server->targetDb->db, pcrResultItemAccession(gpo->seqName)); if (gpo->strand == '-') printf("Warning: this amplification is on the reverse-" "complement of %s.\n", gpo->seqName); gfPcrOutputWriteOne(gpo, "fa", urlFormat, stdout); printf("\n"); } printf(""); writePcrResultTrack(gpoList, server->targetDb->db, server->targetDb->name); } else { printf("No matches to %s %s in %s", gpi->fPrimer, gpi->rPrimer, server->targetDb->description); } } boolean doPcr(struct pcrServer *server, struct targetPcrServer *targetServer, char *fPrimer, char *rPrimer, int maxSize, int minPerfect, int minGood, boolean flipReverse) /* Do the PCR, and show results. */ { struct errCatch *errCatch = errCatchNew(); boolean ok = FALSE; hgBotDelay(); if (flipReverse) reverseComplement(rPrimer, strlen(rPrimer)); if (errCatchStart(errCatch)) { struct gfPcrInput *gpi; AllocVar(gpi); gpi->fPrimer = fPrimer; gpi->rPrimer = rPrimer; if (server != NULL) doQuery(server, gpi, maxSize, minPerfect, minGood); if (targetServer != NULL) doTargetQuery(targetServer, gpi, maxSize, minPerfect, minGood); ok = TRUE; } errCatchEnd(errCatch); if (errCatch->gotError) warn("%s", errCatch->message->string); errCatchFree(&errCatch); if (flipReverse) reverseComplement(rPrimer, strlen(rPrimer)); webNewSection("Primer Melting Temperatures"); printf(""); printf("Forward: %4.1f C %s