src/webBlat/webBlat.c 1.10
1.10 2009/08/05 20:51:57 galt
fixed comment mistake
Index: src/webBlat/webBlat.c
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/webBlat/webBlat.c,v
retrieving revision 1.9
retrieving revision 1.10
diff -b -B -U 1000000 -r1.9 -r1.10
--- src/webBlat/webBlat.c 5 Aug 2009 20:33:50 -0000 1.9
+++ src/webBlat/webBlat.c 5 Aug 2009 20:51:57 -0000 1.10
@@ -1,548 +1,548 @@
/* webBlat - CGI Applet for Blat. */
/* Copyright 2004 Jim Kent. All rights reserved. */
#include "common.h"
#include "linefile.h"
#include "hash.h"
#include "errabort.h"
#include "errCatch.h"
#include "portable.h"
#include "htmshell.h"
#include "dnautil.h"
#include "dnaseq.h"
#include "fa.h"
#include "nib.h"
#include "twoBit.h"
#include "psl.h"
#include "fuzzyFind.h"
#include "cheapcgi.h"
#include "genoFind.h"
#include "gfPcrLib.h"
#include "gfWebLib.h"
void usage()
/* Explain usage and exit. */
{
errAbort(
"webBlat - CGI Applet for Blat\n"
"This program is not generally meant to be run from the command line.\n"
);
}
struct gfWebConfig *cfg; /* Our configuration. */
struct gfServerAt *findServer(boolean txServer)
/* Find gfServer. */
{
if (txServer)
return gfWebFindServer(cfg->transServerList, "wb_db");
else
return gfWebFindServer(cfg->serverList, "wb_db");
}
void doHelp()
/* Put up help page. */
{
uglyf("I'm just not very helpful");
}
char *protQueryMenu[] = {"Protein", "Translated DNA", "Translated RNA"};
char *nucQueryMenu[] = {"DNA", "RNA", };
char *bothQueryMenu[] = {"BLAT's Guess", "DNA", "RNA",
"Protein", "Translated DNA", "Translated RNA"};
char *sortMenu[] = {"query,score", "query,start", "chrom,score", "chrom,start", "score"};
char *outputMenu[] = {"hyperlink", "psl", "psl no header"};
boolean isTxType(char *type)
/* Return TRUE if it's a query requiring a translated server type */
{
int i;
for (i=0; i<ArraySize(protQueryMenu); ++i)
{
if (sameWord(type, protQueryMenu[i]))
return TRUE;
}
return FALSE;
}
int cmpSeqName(char *a, char *b)
/* Compare two sequence names likely to be of form prefix followed by a number. */
{
char cA, cB;
int cSame = countSame(a, b);
a += cSame;
b += cSame;
cA = *a;
cB = *b;
if (isdigit(cA))
{
if (isdigit(cB))
return atoi(a) - atoi(b);
else
return -1;
}
else if (isdigit(cB))
return 1;
else
return strcmp(a, b);
}
int pslCmpTargetScore(const void *va, const void *vb)
/* Compare to sort based on target then score. */
{
const struct psl *a = *((struct psl **)va);
const struct psl *b = *((struct psl **)vb);
int diff = cmpSeqName(a->tName, b->tName);
if (diff == 0)
diff = pslScore(b) - pslScore(a);
return diff;
}
int pslCmpTargetStart(const void *va, const void *vb)
/* Compare to sort based on target start. */
{
const struct psl *a = *((struct psl **)va);
const struct psl *b = *((struct psl **)vb);
int diff = cmpSeqName(a->tName, b->tName);
if (diff == 0)
diff = a->tStart - b->tStart;
return diff;
}
char *skipFile(char *fileSeq)
/* Skip over file: */
{
char *s = strchr(fileSeq, ':');
if (s != NULL)
return s+1;
else
{
internalErr();
return fileSeq;
}
}
void parseFileSeq(char *spec, char **retFile, char **retSeq)
/* Parse out file:seq into file and seq. */
{
char *seq = skipFile(spec);
*retSeq = cloneString(seq);
*retFile = cloneStringZ(spec, seq - spec - 1);
}
void aliLines(char *pslName, char *faName, char *database, char *type)
/* Show all the places that align. */
{
char *url = "../cgi-bin/webBlat";
char *sort = cgiUsualString("wb_sort", sortMenu[0]);
char *output = cgiUsualString("wb_output", outputMenu[0]);
boolean pslOut = startsWith("psl", output);
struct lineFile *lf = pslFileOpen(pslName);
struct psl *pslList = NULL, *psl, **pslArray;
int i, pslCount = 0;
while ((psl = pslNext(lf)) != NULL)
{
slAddHead(&pslList, psl);
++pslCount;
}
lineFileClose(&lf);
slReverse(&pslList);
if (pslList == NULL)
errAbort("Sorry, no matches found");
/* Keep an array in unsorted order */
AllocArray(pslArray, pslCount);
for (psl = pslList, i=0; psl != NULL; psl = psl->next, ++i)
pslArray[i] = psl;
if (sameString(sort, "query,start"))
{
slSort(&pslList, pslCmpQuery);
}
else if (sameString(sort, "query,score"))
{
slSort(&pslList, pslCmpQueryScore);
}
else if (sameString(sort, "score"))
{
slSort(&pslList, pslCmpScore);
}
else if (sameString(sort, "chrom,start"))
{
slSort(&pslList, pslCmpTargetStart);
}
else if (sameString(sort, "chrom,score"))
{
slSort(&pslList, pslCmpTargetScore);
}
else
{
slSort(&pslList, pslCmpQueryScore);
}
if (pslOut)
{
printf("<TT><PRE>");
if (!sameString(output, "psl no header"))
pslWriteHead(stdout);
for (psl = pslList; psl != NULL; psl = psl->next)
pslTabOut(psl, stdout);
}
else
{
int lineIx = 0;
printf("<H2>Web BLAT Search Results</H2>");
printf("<TT><PRE>");
printf("QUERY SCORE START END QSIZE IDENTITY CHRO STRAND START END SPAN\n");
printf("---------------------------------------------------------------------------------------------------\n");
for (psl = pslList; psl != NULL; psl = psl->next)
{
printf("<A HREF=\"%s?wb_qType=%s&wb_psl=%s&wb_fa=%s&wb_doDetailLine=%d&wb_db=%s\">",
url, type, pslName, faName, ptArrayIx(psl, pslArray, pslCount),
database);
printf("%-14s %5d %5d %5d %5d %5.1f%% %4s %2s %9d %9d %6d",
psl->qName, pslScore(psl), psl->qStart+1, psl->qEnd, psl->qSize,
100.0 - pslCalcMilliBad(psl, TRUE) * 0.1,
skipFile(psl->tName), psl->strand, psl->tStart+1, psl->tEnd,
psl->tEnd - psl->tStart);
printf("</A>\n");
++lineIx;
}
}
pslFreeList(&pslList);
freez(&pslArray);
printf("</TT></PRE>");
}
struct dnaSeq *faReadNamedSeq(char *fileName, char *name, boolean isProt)
/* Return DNA sequence corresponding to named fasta record. */
{
struct lineFile *lf = lineFileOpen(fileName, TRUE);
DNA *dna;
int dnaSize;
char *dnaName;
struct dnaSeq *seq = NULL;
while (faSomeSpeedReadNext(lf, &dna, &dnaSize, &dnaName, !isProt))
{
if (sameString(name, dnaName))
{
AllocVar(seq);
seq->name = cloneString(dnaName);
seq->size = dnaSize;
seq->dna = cloneStringZ(dna, dnaSize);
break;
}
}
lineFileClose(&lf);
return seq;
}
struct dnaSeq *readSeqFrag(char *seqDir, char *fileName, char *seqName, int start, int end)
/* Read in fragment of sequence. */
{
char path[PATH_LEN];
safef(path, sizeof(path), "%s/%s", seqDir, fileName);
if (nibIsFile(path))
{
return nibLoadPart(path, start, end-start);
}
else
{
struct twoBitFile *tbf = twoBitOpen(path);
struct dnaSeq *seq = twoBitReadSeqFragLower(tbf, seqName, start, end);
twoBitClose(&tbf);
return seq;
}
}
void doDetailLine()
/* Show alignment details - creating a html frame with
* two pages: index and body. */
{
char *pslFileName = cgiString("wb_psl");
char *faFileName = cgiString("wb_fa");
char *type = cgiString("wb_qType");
boolean isTx = isTxType(type);
struct gfServerAt *server = findServer(isTx);
int pslLineIx = cgiInt("wb_doDetailLine");
int i;
struct lineFile *lf = pslFileOpen(pslFileName);
struct dnaSeq *qSeq = NULL, *tSeq = NULL;
char *tFileName, *tSeqName;
int blockCount;
int tStart, tEnd;
boolean protQuery = sameWord(type, "Protein");
char *bodyName = cloneString(rTempName(cfg->tempDir, "body", ".html"));
char *indexName = cloneString(rTempName(cfg->tempDir, "index", ".html"));
FILE *f;
/* Read in psl file and find the alignment line we're looking for. */
struct psl *psl = NULL;
for (i=0; i<=pslLineIx; ++i)
{
psl = pslNext(lf);
if (psl == NULL)
errAbort("Expecting at least %d lines, got %d in %s", pslLineIx+1, i+1, pslFileName);
}
lineFileClose(&lf);
/* Read in fa file and find the query sequence. */
qSeq = faReadNamedSeq(faFileName, psl->qName, protQuery);
if (qSeq == NULL)
errAbort("Couldn't find %s in %s", psl->qName, faFileName);
/* Parse out file:seq into file and seq, and load needed part of target seq. */
parseFileSeq(psl->tName, &tFileName, &tSeqName);
int lineWidth = protQuery ? 60 : 50;
-int extraContext = 2 * lineWidth; /* one line before and one line after */
+int extraContext = 2 * lineWidth; /* two lines before ... */
tStart = psl->tStart - extraContext;
if (tStart < 0) tStart = 0;
-tEnd = psl->tEnd + extraContext;
+tEnd = psl->tEnd + extraContext; /* and two lines after */
if (tEnd > psl->tSize)
tEnd = psl->tSize;
tSeq = readSeqFrag(server->seqDir, tFileName, tSeqName, tStart, tEnd);
/* Write out body. */
f = mustOpen(bodyName, "w");
htmStart(f, psl->qName);
blockCount = pslShowAlignment(psl, protQuery, psl->qName, qSeq, 0, qSeq->size,
tSeqName, tSeq, tStart, tEnd, f);
htmEnd(f);
carefulClose(&f);
chmod(bodyName, 0666);
/* Write out index. */
f = mustOpen(indexName, "w");
htmStart(f, psl->qName);
fprintf(f, "<H3>Alignment of %s</H3>", psl->qName);
fprintf(f, "<A HREF=\"%s#cDNA\" TARGET=\"body\">%s</A><BR>\n", bodyName, psl->qName);
fprintf(f, "<A HREF=\"%s#genomic\" TARGET=\"body\">%s</A><BR>\n", bodyName, tSeqName);
for (i=1; i<=blockCount; ++i)
{
fprintf(f, "<A HREF=\"%s#%d\" TARGET=\"body\">block%d</A><BR>\n",
bodyName, i, i);
}
fprintf(f, "<A HREF=\"%s#ali\" TARGET=\"body\">together</A><BR>\n", bodyName);
carefulClose(&f);
chmod(indexName, 0666);
/* Write (to stdout) the main html page containing just the frame info. */
puts("<FRAMESET COLS = \"13%,87% \" >");
printf(" <FRAME SRC=\"%s\" NAME=\"index\">\n", indexName);
printf(" <FRAME SRC=\"%s\" NAME=\"body\">\n", bodyName);
puts("<NOFRAMES><BODY></BODY></NOFRAMES>");
puts("</FRAMESET>");
puts("</HTML>\n");
}
void doBlat()
/* Do actual blatting */
{
char *seqText = cgiString("wb_seq");
bioSeq *seqList, *seq;
char *type = NULL;
boolean txServer = FALSE, protQuery = FALSE;
struct gfServerAt *server;
int conn;
FILE *f;
struct gfOutput *gvo;
struct hash *tFileCache = gfFileCacheNew();
char *pslName = cloneString(rTempName(cfg->tempDir, "wb", ".psl"));
char *faName = cloneString(rTempName(cfg->tempDir, "wb", ".fa"));
/* Figure out type and if it is a protein or a DNA based query */
type = cgiUsualString("wb_qType", bothQueryMenu[0]);
/* Get sequence from control into memory and also saved as fasta file. */
if (sameWord(type, bothQueryMenu[0]))
{
seqList = faSeqListFromMemText(seqText, FALSE);
if (seqList == NULL)
errAbort("Please go back and paste in a sequence");
if (seqIsDna(seqList))
{
for (seq = seqList; seq != NULL; seq = seq->next)
{
toLowerN(seq->dna, seq->size);
dnaFilterToN(seq->dna, seq->dna);
}
}
else
{
protQuery = TRUE;
type = "Protein";
}
}
else
{
protQuery = sameWord(type, "Protein");
seqList = faSeqListFromMemText(seqText, !protQuery);
if (seqList == NULL)
errAbort("Please go back and paste in a sequence");
}
if (seqList->name == NULL || seqList->name[0] == 0)
{
freez(&seqList->name);
seqList->name = cloneString("query");
}
faWriteAll(faName, seqList);
/* Set up output for blat. */
f = mustOpen(pslName, "w");
gvo = gfOutputPsl(0, protQuery, FALSE, f, FALSE, TRUE);
gvo->includeTargetFile = TRUE;
txServer = isTxType(type);
server = findServer(txServer);
/* Loop through sequences doing alignments and saving to file. */
for (seq = seqList; seq != NULL; seq = seq->next)
{
conn = gfConnect(server->host, server->port);
if (txServer)
{
gvo->reportTargetStrand = TRUE;
if (protQuery)
{
gfAlignTrans(&conn, server->seqDir, seq, 5, tFileCache, gvo);
}
else
{
boolean isRna = sameWord(type, "RNA");
gfAlignTransTrans(&conn, server->seqDir, seq, FALSE, 5,
tFileCache, gvo, isRna);
if (!isRna)
{
reverseComplement(seq->dna, seq->size);
conn = gfConnect(server->host, server->port);
gfAlignTransTrans(&conn, server->seqDir, seq, TRUE, 5,
tFileCache, gvo, FALSE);
}
}
}
else
{
gfAlignStrand(&conn, server->seqDir, seq, FALSE, 16, tFileCache, gvo);
reverseComplement(seq->dna, seq->size);
conn = gfConnect(server->host, server->port);
gfAlignStrand(&conn, server->seqDir, seq, TRUE, 16, tFileCache, gvo);
}
gfOutputQuery(gvo, f);
}
carefulClose(&f);
/* Display alignment results. */
aliLines(pslName, faName, server->name, type);
}
void doGetSeq()
/* Put up form that asks them to submit sequence. */
{
char *qType = NULL;
char **queryMenu = NULL;
int queryMenuSize = 0;
struct gfServerAt *serverList = NULL, *server;
printf("<H1>%s Web BLAT</H1>", cfg->company);
printf("<FORM ACTION=\"../cgi-bin/webBlat\" METHOD=\"POST\">\n");
/* Figure out whether we do nucleotide, translated, or both. */
if (cfg->serverList != NULL && cfg->transServerList != NULL)
{
queryMenu = bothQueryMenu;
queryMenuSize = ArraySize(bothQueryMenu);
serverList = cfg->serverList;
}
else if (cfg->serverList != NULL)
{
queryMenu = nucQueryMenu;
queryMenuSize = ArraySize(nucQueryMenu);
serverList = cfg->serverList;
}
else if (cfg->transServerList != NULL)
{
queryMenu = protQueryMenu;
queryMenuSize = ArraySize(protQueryMenu);
serverList = cfg->transServerList;
}
else
{
errAbort("No servers configured!");
}
/* Put up database control */
printf("Database: ");
printf("<SELECT NAME=\"wb_db\">\n");
printf(" <OPTION SELECTED>%s</OPTION>\n", serverList->name);
for (server = serverList->next; server != NULL; server = server->next)
printf(" <OPTION>%s</OPTION>\n", server->name);
printf("</SELECT>\n");
/* Put up query type control. */
qType = cgiUsualString("wb_qType", queryMenu[0]);
printf("Query Type: ");
cgiMakeDropList("wb_qType", queryMenu, queryMenuSize, qType);
printf("<BR>\n");
/* Put up sort and output type controls. */
printf("Sort By: ");
cgiMakeDropList("wb_sort", sortMenu, ArraySize(sortMenu), sortMenu[0]);
printf("Output: ");
cgiMakeDropList("wb_output", outputMenu, ArraySize(outputMenu), outputMenu[0]);
cgiMakeButton("Submit", "Submit");
printf("<BR>\n");
cgiMakeTextArea("wb_seq", "", 10, 60);
printf("<BR>\n");
printf("Please paste in some sequence and press submit. You can submit multiple "
"sequences at once if they are in fasta format (where each sequence has "
"a header line that starts with > and contains the name of the sequence)");
printf("</FORM>");
}
void webBlat()
/* Parse out CGI variables and decide which page to put up. */
{
if (cgiVarExists("wb_help"))
htmShell("Web BLAT Help", doHelp, NULL);
else if (cgiVarExists("wb_seq"))
htmShell("Web BLAT Results", doBlat, NULL);
else if (cgiVarExists("wb_doDetailLine"))
{
puts("Content-Type:text/html");
puts("\n");
doDetailLine();
}
else
htmShell("Web BLAT", doGetSeq, NULL);
}
int main(int argc, char *argv[])
/* Process command line. */
{
boolean isFromWeb = cgiIsOnWeb();
htmlPushEarlyHandlers();
dnaUtilOpen();
if (!isFromWeb && !cgiSpoof(&argc, argv))
usage();
cfg = gfWebConfigRead("webBlat.cfg");
if (cfg->tempDir == NULL)
errAbort("No tempDir set in webBlat.cfg");
if (cfg->background != NULL)
htmlSetBackground(cfg->background);
webBlat();
return 0;
}