e9492dbfa8a68aee133893d4b02475b0aec27bb9
markd
Sun Dec 6 23:48:42 2020 -0800
webblat and hgBlat working
diff --git src/hg/hgBlat/hgBlat.c src/hg/hgBlat/hgBlat.c
index 2254fe8..013a9ee3 100644
--- src/hg/hgBlat/hgBlat.c
+++ src/hg/hgBlat/hgBlat.c
@@ -61,31 +61,30 @@
given by caller rather than returned by gfServer. */
};
struct genomeHits
/* Information about hits on a genome assembly */
{
struct genomeHits *next;
char *host; /* Host. */
char *port; /* Port. */
char *db; /* Database name. */
char *genome; /* Genome name. */
int seqNumber; /* Submission order */
char *faName; /* fasta name */
char *dna; /* query dna */
int dnaSize; /* query dna size */
- struct gfConnection *conn; /* Connection */
char *type; /* query type = query, protQuery, transQuery */
char *xType; /* query type = dna, prot, rnax, dnax */
boolean queryRC; /* is the query reverse-complemented */
boolean complex; /* is the query complex */
boolean isProt; /* is the protein query */
boolean isDynamic; /* is a dynamic server */
char *genomeDataDir; /* dynamic server root-relative directory */
int maxGeneHits; /* Highest gene hit-count */
char *maxGeneChrom; /* Target Chrom for gene with max gene hits */
int maxGeneChromSize; /* Target Chrom Size for only prot, rnax, dnax */
int maxGeneTStart; /* Target Start Coordinate for gene with max hits */
int maxGeneTEnd; /* Target End Coordinate for gene with max hits*/
int maxGeneExons; /* Number of Exons in gene with max hits */
char maxGeneStrand[3]; /* + or - or ++ +- -+ -- Strand for gene with max hits */
@@ -866,31 +865,31 @@
safef(shortName, sizeof shortName, "blat %s+%d", names->name, count - 1);
safef(description, sizeof description, "blat on %d queries (%s, %s)", count, names->name, names->next->name);
}
else
{
safef(shortName, sizeof shortName, "blat %s+%d", names->name, count - 1);
safef(description, sizeof description, "blat on %d queries (%s, %s, ...)", count, names->name, names->next->name);
}
*pName = makeNameUnique(shortName, database, cart);
*pDescription = cloneString(description);
}
void queryServer(char *host, char *port, char *db, struct dnaSeq *seq, char *type, char *xType,
boolean complex, boolean isProt, boolean queryRC, int seqNumber, char *genomeDataDir)
-/* Send simple query to server and report results.
+/* Send simple query to server and report results. (no, it doesn't do this)
* queryRC is true when the query has been reverse-complemented */
{
struct genomeHits *gH;
AllocVar(gH);
gH->host=cloneString(host);
gH->port=cloneString(port);
gH->db = cloneString(db);
gH->genome = cloneString(hGenome(db));
gH->seqNumber = seqNumber;
gH->faName = cloneString(seq->name);
gH->dna = cloneString(seq->dna);
gH->dnaSize = seq->size;
gH->type = cloneString(type);
gH->xType = cloneString(xType);
@@ -993,81 +992,72 @@
return;
int qFactor = 3;
int tFactor = 3;
if (gH->isProt)
qFactor = 1;
struct gfResult *gfR = NULL;
for(gfR=gH->gfList; gfR; gfR=gfR->next)
{
gfR->qStart = gfR->qStart * qFactor + gfR->qFrame;
gfR->qEnd = gfR->qEnd * qFactor + gfR->qFrame;
gfR->tStart = gfR->tStart * tFactor + gfR->tFrame;
gfR->tEnd = gfR->tEnd * tFactor + gfR->tFrame;
}
}
-struct gfConnection *gfConnectEx(char *host, char *port, boolean isDynamic)
-/* Try to connect to gfServer */
-{
-if (allGenomes)
- return gfMayConnect(host, port, isDynamic); // returns NULL on failure
-else
- return gfConnect(host, port, isDynamic); // errAborts on failure.
-}
-
-
void queryServerFinish(struct genomeHits *gH)
/* Report results from gfServer. */
{
char buf[256];
int matchCount = 0;
-gH->conn = gfConnectEx(gH->host, gH->port, gH->isDynamic);
-if (gH->conn == NULL)
+struct gfConnection *conn = gfMayConnect(gH->host, gH->port, gH->isDynamic);
+if (conn == NULL)
{
gH->error = TRUE;
gH->networkErrMsg = "Connection to gfServer failed.";
return;
}
dyStringPrintf(gH->dbg,"query strand %s qsize %d
\n", gH->queryRC ? "-" : "+", gH->dnaSize);
/* Put together query command. */
if (gH->isDynamic)
safef(buf, sizeof buf, "%s%s %s %s %d", gfSignature(), gH->type,
dynServerGenome(gH->db), gH->genomeDataDir, gH->dnaSize);
else
safef(buf, sizeof buf, "%s%s %d", gfSignature(), gH->type, gH->dnaSize);
-mustWriteFd(gH->conn->fd, buf, strlen(buf));
+gfBeginRequest(conn);
+mustWriteFd(conn->fd, buf, strlen(buf));
-if (read(gH->conn->fd, buf, 1) < 0)
+if (read(conn->fd, buf, 1) < 0)
errAbort("queryServerFinish: read failed: %s", strerror(errno));
if (buf[0] != 'Y')
errAbort("Expecting 'Y' from server, got %c", buf[0]);
-mustWriteFd(gH->conn->fd, gH->dna, gH->dnaSize); // Cannot shifted earlier for speed. must wait for Y confirmation.
+mustWriteFd(conn->fd, gH->dna, gH->dnaSize); // Cannot shifted earlier for speed. must wait for Y confirmation.
if (gH->complex)
{
- char *s = netRecieveString(gH->conn->fd, buf);
+ char *s = netRecieveString(conn->fd, buf);
if (!s)
errAbort("expected response from gfServer with tileSize");
dyStringPrintf(gH->dbg,"%s
\n", s); // from server: tileSize 4
}
for (;;)
{
- if (netGetString(gH->conn->fd, buf) == NULL)
+ if (netGetString(conn->fd, buf) == NULL)
break;
if (sameString(buf, "end"))
{
dyStringPrintf(gH->dbg,"%d matches
\n", matchCount);
break;
}
else if (startsWith("Error:", buf))
{
errAbort("%s", buf);
break;
}
else
{
dyStringPrintf(gH->dbg,"%s
\n", buf);
// chop the line into words
@@ -1136,31 +1126,31 @@
{
gfR->tStrand = word[6][0]; // e.g. + or -
gfR->tFrame = sqlUnsigned(word[7]); // e.g. 0,1,2
if (!gH->isProt)
{
gfR->qFrame = sqlUnsigned(word[8]); // e.g. 0,1,2
}
}
else
{
gfR->tStrand = '+'; // dna search only on + target strand
}
if (gH->complex)
{
- char *s = netGetLongString(gH->conn->fd);
+ char *s = netGetLongString(conn->fd);
if (s == NULL)
break;
dyStringPrintf(gH->dbg,"%s
\n", s); //dumps out qstart1 tstart1 qstart2 tstart2 ...
freeMem(s);
}
slAddHead(&gH->gfList, gfR);
}
++matchCount;
}
slReverse(&gH->gfList);
unTranslateCoordinates(gH); // convert back to untranslated coordinates
slSort(&gH->gfList, gfResultsCmp); // sort by tStrand, chrom, tStart
@@ -1258,32 +1248,31 @@
// Adjust tEnd
if (gH->maxGeneTEnd < r[qFrame].maxGeneTEnd)
{
gH->maxGeneTEnd = r[qFrame].maxGeneTEnd;
}
}
gH->maxGeneHits /= 3; // average over 3 frames.
char qStrand = (gH->queryRC ? '-' : '+');
safef(gH->maxGeneStrand, sizeof gH->maxGeneStrand, "%c%c", qStrand, gH->maxGeneTStrand);
}
-
-gfDisconnect(&gH->conn);
+gfDisconnect(&conn);
}
int findMinMatch(long genomeSize, boolean isProt)
// Return default minMatch for genomeSize,
// the expected number of occurrences of string length k
// in random genome of size N = N/(4^k)
{
int alphaBetSize;
if (isProt)
{
alphaBetSize = 20;
genomeSize = genomeSize / 3;
}
else
{
@@ -1324,41 +1313,40 @@
genome->tbf = twoBitOpen(genome->twoBitPath);
long genomeSize = 0;
struct twoBitIndex *index;
for (index = genome->tbf->indexList; index != NULL; index = index->next)
{
genomeSize += twoBitSeqSize(genome->tbf, index->name);
}
twoBitClose(&genome->tbf);
return genomeSize;
}
-int findGenomeParams(struct serverTable *serve)
+int findGenomeParams(struct gfConnection *conn, struct serverTable *serve)
/* Send status message to server arnd report result.
* Get tileSize stepSize and minMatch.
*/
{
char buf[256];
int ret = 0;
/* Put together command. */
-// FIXME: need to keep open
-struct gfConnection *conn = gfConnectEx(serve->host, serve->port, serve->isDynamic);
+gfBeginRequest(conn);
if (serve->isDynamic)
sprintf(buf, "%s%s %s %s", gfSignature(), (serve->isTrans ? "transInfo" : "untransInfo"),
dynServerGenome(serve->db), serve->genomeDataDir);
else
sprintf(buf, "%sstatus", gfSignature());
mustWriteFd(conn->fd, buf, strlen(buf));
for (;;)
{
if (netGetString(conn->fd, buf) == NULL)
{
warn("Error reading status information from %s:%s",serve->host, serve->port);
ret = -1;
break;
}
@@ -1368,31 +1356,31 @@
{
if (startsWith("tileSize ", buf))
{
serve->tileSize = atoi(buf+strlen("tileSize "));
}
if (startsWith("stepSize ", buf))
{
serve->stepSize = atoi(buf+strlen("stepSize "));
}
if (startsWith("minMatch ", buf))
{
serve->minMatch = atoi(buf+strlen("minMatch "));
}
}
}
-gfDisconnect(&conn);
+gfEndRequest(conn);
return(ret);
}
void blatSeq(char *userSeq, char *organism, char *database, int dbCount)
/* Blat sequence user pasted in. */
{
FILE *f;
struct dnaSeq *seqList = NULL, *seq;
struct tempName pslTn, faTn;
int maxSingleSize, maxTotalSize, maxSeqCount;
char *genome, *db;
char *type = cgiString("type");
char *seqLetters = cloneString(userSeq);
struct serverTable *serve;
struct gfConnection *conn = NULL;
@@ -1572,32 +1560,34 @@
#else
// read genome size
if (trackHubDatabase(database))
{
genomeSize = findGenomeSizeFromHub(database);
}
else
{
genomeSize = findGenomeSize(database);
}
minMatchShown = findMinMatch(genomeSize, qType == gftProt);
#endif
if (allResults)
minMatchShown = 0;
+ conn = gfConnect(serve->host, serve->port, serve->isDynamic);
+
// read tileSize stepSize minMatch from server status
- findGenomeParams(serve);
+ findGenomeParams(conn, serve);
int minLucky = (serve->minMatch * serve->stepSize + (serve->tileSize - serve->stepSize)) * xlat;
minSuggested = max(minMatchShown,minLucky);
}
int seqNumber = 0;
/* Loop through each sequence. */
for (seq = seqList; seq != NULL; seq = seq->next)
{
printf(" "); fflush(stdout); /* prevent apache cgi timeout by outputting something */
oneSize = realSeqSize(seq, !isTx);
// Impose half the usual bot delay per sequence
if (dbCount == 0)
@@ -1630,79 +1620,74 @@
warn("Sequence %s would take us over the %d letter limit, stopping here.",
seq->name, maxTotalSize);
break;
}
if (isTx)
{
gvo->reportTargetStrand = TRUE;
if (isTxTx)
{
if (allGenomes)
queryServer(serve->host, serve->port, db, seq, "transQuery", xType, TRUE, FALSE, FALSE, seqNumber,
serve->genomeDataDir);
else
{
- conn = gfConnectEx(serve->host, serve->port, serve->isDynamic);
gfAlignTransTrans(conn, serve->nibDir, seq, FALSE, 5, tFileCache, gvo, !txTxBoth,
dynServerGenome(serve->db), serve->genomeDataDir);
}
if (txTxBoth)
{
reverseComplement(seq->dna, seq->size);
if (allGenomes)
queryServer(serve->host, serve->port, db, seq, "transQuery", xType, TRUE, FALSE, TRUE, seqNumber,
serve->genomeDataDir);
else
{
- conn = gfConnectEx(serve->host, serve->port, serve->isDynamic);
gfAlignTransTrans(conn, serve->nibDir, seq, TRUE, 5, tFileCache, gvo, FALSE,
dynServerGenome(serve->db), serve->genomeDataDir);
}
}
}
else
{
if (allGenomes)
queryServer(serve->host, serve->port, db, seq, "protQuery", xType, TRUE, TRUE, FALSE, seqNumber,
serve->genomeDataDir);
else
{
- conn = gfConnectEx(serve->host, serve->port, serve->isDynamic);
gfAlignTrans(conn, serve->nibDir, seq, 5, tFileCache, gvo,
dynServerGenome(serve->db), serve->genomeDataDir);
}
}
}
else
{
if (allGenomes)
queryServer(serve->host, serve->port, db, seq, "query", xType, FALSE, FALSE, FALSE, seqNumber,
serve->genomeDataDir);
else
{
- conn = gfConnectEx(serve->host, serve->port, serve->isDynamic);
gfAlignStrand(conn, serve->nibDir, seq, FALSE, minMatchShown, tFileCache, gvo,
dynServerGenome(serve->db), serve->genomeDataDir);
}
reverseComplement(seq->dna, seq->size);
if (allGenomes)
queryServer(serve->host, serve->port, db, seq, "query", xType, FALSE, FALSE, TRUE, seqNumber,
serve->genomeDataDir);
else
{
- conn = gfConnectEx(serve->host, serve->port, serve->isDynamic);
gfAlignStrand(conn, serve->nibDir, seq, TRUE, minMatchShown, tFileCache, gvo,
dynServerGenome(serve->db), serve->genomeDataDir);
}
}
gfOutputQuery(gvo, f);
++seqNumber;
}
carefulClose(&f);
if (!allGenomes)
{
showAliPlaces(pslTn.forCgi, faTn.forCgi, NULL, serve->db, qType, tType,
organism, feelingLucky);
}