f50fee44b129d516772f00d7a1d00be2f3953e6f
markd
Thu Jul 2 11:50:46 2020 -0700
hgBlat appears to be working
diff --git src/hg/hgBlat/hgBlat.c src/hg/hgBlat/hgBlat.c
index cef6e0c..1188981 100644
--- src/hg/hgBlat/hgBlat.c
+++ src/hg/hgBlat/hgBlat.c
@@ -65,30 +65,31 @@
/* Information about hits on a genome assembly */
{
struct genomeHits *next;
char *db; /* Database name. */
char *genome; /* Genome name. */
int seqNumber; /* Submission order */
char *faName; /* fasta name */
char *dna; /* query dna */
int dnaSize; /* query dna size */
int sd; /* Connection */
char *type; /* query type = query, protQuery, transQuery */
char *xType; /* query type = dna, prot, rnax, dnax */
boolean queryRC; /* is the query reverse-complemented */
boolean complex; /* is the query complex */
boolean isProt; /* is the protein query */
+ char *dynGenome; /* genome for dynamic server */
int maxGeneHits; /* Highest gene hit-count */
char *maxGeneChrom; /* Target Chrom for gene with max gene hits */
int maxGeneChromSize; /* Target Chrom Size for only prot, rnax, dnax */
int maxGeneTStart; /* Target Start Coordinate for gene with max hits */
int maxGeneTEnd; /* Target End Coordinate for gene with max hits*/
int maxGeneExons; /* Number of Exons in gene with max hits */
char maxGeneStrand[3]; /* + or - or ++ +- -+ -- Strand for gene with max hits */
char maxGeneTStrand;/* + or - TStrand for gene with max hits */
boolean done; /* Did the job get to finish */
boolean error; /* Some error happened */
char *networkErrMsg; /* Network layer error message */
struct dyString *dbg; /* Output debugging info */
struct gfResult *gfList; /* List of gfResult records */
boolean hide; /* To not show both strands, suppress the weaker-scoring one */
@@ -837,48 +838,49 @@
{
safef(shortName, sizeof shortName, "blat %s+%d", names->name, count - 1);
safef(description, sizeof description, "blat on %d queries (%s, %s)", count, names->name, names->next->name);
}
else
{
safef(shortName, sizeof shortName, "blat %s+%d", names->name, count - 1);
safef(description, sizeof description, "blat on %d queries (%s, %s, ...)", count, names->name, names->next->name);
}
*pName = makeNameUnique(shortName, database, cart);
*pDescription = cloneString(description);
}
void queryServerSetup(int conn, char *db, struct dnaSeq *seq, char *type, char *xType,
- boolean complex, boolean isProt, boolean queryRC, int seqNumber)
+ boolean complex, boolean isProt, boolean queryRC, int seqNumber, char *dynGenome)
/* Setup query to server.
* queryRC is true when the query has been reverse-complemented */
{
struct genomeHits *gH;
AllocVar(gH);
gH->db = cloneString(db);
gH->genome = cloneString(hGenome(db));
gH->seqNumber = seqNumber;
gH->faName = cloneString(seq->name);
gH->dna = cloneString(seq->dna);
gH->dnaSize = seq->size;
gH->type = cloneString(type);
gH->xType = cloneString(xType);
gH->queryRC = queryRC;
gH->complex = complex;
gH->isProt = isProt;
+gH->dynGenome = dynGenome;
gH->sd = conn;
if (gH->sd == -1)
{
gH->error = TRUE;
gH->networkErrMsg = "Connection to gfServer failed.";
}
gH->dbg = dyStringNew(256);
slAddHead(&pfdList, gH);
}
void findBestGene(struct genomeHits *gH, int queryFrame)
/* Find best gene-like object with multiple linked-features.
* Remember chrom start end of best gene found and total hits in the gene.
* Should sort the gfResults by tStrand, chrom, tStart.
* Filters on queryFrame */
@@ -976,31 +978,34 @@
gfR->qEnd = gfR->qEnd * qFactor + gfR->qFrame;
gfR->tStart = gfR->tStart * tFactor + gfR->tFrame;
gfR->tEnd = gfR->tEnd * tFactor + gfR->tFrame;
}
}
void queryServerFinish(struct genomeHits *gH)
/* Report results from gfServer. */
{
char buf[256];
int matchCount = 0;
dyStringPrintf(gH->dbg,"query strand %s qsize %d
\n", gH->queryRC ? "-" : "+", gH->dnaSize);
/* Put together query command. */
+if (gH->dynGenome == NULL)
safef(buf, sizeof buf, "%s%s %d", gfSignature(), gH->type, gH->dnaSize);
+else
+ safef(buf, sizeof buf, "%s%s %d %s", gfSignature(), gH->type, gH->dnaSize, gH->dynGenome);
mustWriteFd(gH->sd, buf, strlen(buf));
if (read(gH->sd, buf, 1) < 0)
errAbort("queryServerFinish: read failed: %s", strerror(errno));
if (buf[0] != 'Y')
errAbort("Expecting 'Y' from server, got %c", buf[0]);
mustWriteFd(gH->sd, gH->dna, gH->dnaSize);
if (gH->complex)
{
char *s = netRecieveString(gH->sd, buf);
if (!s)
errAbort("expected response from gfServer with tileSize");
dyStringPrintf(gH->dbg,"%s
\n", s); // from server: tileSize 4
}
@@ -1299,31 +1304,35 @@
}
int findGenomeParams(struct serverTable *serve)
/* Send status message to server arnd report result.
* Get tileSize stepSize and minMatch.
*/
{
char buf[256];
int sd = 0;
int ret = 0;
/* Put together command. */
sd = gfConnectEx(serve->host, serve->port);
+if (serve->dynGenome == NULL)
sprintf(buf, "%sstatus", gfSignature());
+else
+ sprintf(buf, "%s%s %s", gfSignature(), (serve->isTrans ? "transInfo" : "untransInfo"),
+ serve->dynGenome);
mustWriteFd(sd, buf, strlen(buf));
for (;;)
{
if (netGetString(sd, buf) == NULL)
{
warn("Error reading status information from %s:%s",serve->host, serve->port);
ret = -1;
break;
}
if (sameString(buf, "end"))
break;
else
{
if (startsWith("tileSize ", buf))
@@ -1332,64 +1341,30 @@
}
if (startsWith("stepSize ", buf))
{
serve->stepSize = atoi(buf+strlen("stepSize "));
}
if (startsWith("minMatch ", buf))
{
serve->minMatch = atoi(buf+strlen("minMatch "));
}
}
}
close(sd);
return(ret);
}
-int findGenomeParamsDynamic(struct serverTable *serve)
-/* read genome params from dynamic server, which follow "end" in response trailer.
- */
-{
-char buf[256];
-for (;;)
- {
- if (netGetString(STDIN_FILENO, buf) == NULL)
- {
- warn("Error reading status information from dynamic server");
- return -1;
- }
- if (sameString(buf, "trailerEnd"))
- return 0;
- else
- {
- if (startsWith("tileSize ", buf))
- {
- serve->tileSize = atoi(buf+strlen("tileSize "));
- }
- if (startsWith("stepSize ", buf))
- {
- serve->stepSize = atoi(buf+strlen("stepSize "));
- }
- if (startsWith("minMatch ", buf))
- {
- serve->minMatch = atoi(buf+strlen("minMatch "));
- }
- }
- }
-}
-
-
-
void blatSeq(char *userSeq, char *organism, char *database, int dbCount)
/* Blat sequence user pasted in. */
{
FILE *f;
struct dnaSeq *seqList = NULL, *seq;
struct tempName pslTn, faTn;
int maxSingleSize, maxTotalSize, maxSeqCount;
char *genome, *db;
char *type = cgiString("type");
char *seqLetters = cloneString(userSeq);
struct serverTable *serve;
int conn;
int oneSize, totalSize = 0, seqCount = 0;
boolean isTx = FALSE;
boolean isTxTx = FALSE;
@@ -1564,33 +1539,30 @@
// read genome size
if (trackHubDatabase(database))
{
genomeSize = findGenomeSizeFromHub(database);
}
else
{
genomeSize = findGenomeSize(database);
}
minMatchShown = findMinMatch(genomeSize, qType == gftProt);
#endif
if (allResults)
minMatchShown = 0;
// read tileZize stepSize minMatch from server status
- if (serve->dynGenome != NULL)
- findGenomeParamsDynamic(serve);
- else
findGenomeParams(serve);
int minLucky = (serve->minMatch * serve->stepSize + (serve->tileSize - serve->stepSize)) * xlat;
minSuggested = max(minMatchShown,minLucky);
}
int seqNumber = 0;
/* Loop through each sequence. */
for (seq = seqList; seq != NULL; seq = seq->next)
{
printf(" "); fflush(stdout); /* prevent apache cgi timeout by outputting something */
oneSize = realSeqSize(seq, !isTx);
// Impose half the usual bot delay per sequence
@@ -1622,65 +1594,70 @@
if (totalSize > maxTotalSize)
{
warn("Sequence %s would take us over the %d letter limit, stopping here.",
seq->name, maxTotalSize);
break;
}
conn = gfConnectEx(serve->host, serve->port);
if (isTx)
{
gvo->reportTargetStrand = TRUE;
if (isTxTx)
{
if (allGenomes)
- queryServerSetup(conn, db, seq, "transQuery", xType, TRUE, FALSE, FALSE, seqNumber);
+ queryServerSetup(conn, db, seq, "transQuery", xType, TRUE, FALSE, FALSE, seqNumber,
+ serve->dynGenome);
else
gfAlignTransTrans(&conn, serve->nibDir, seq, FALSE, 5, tFileCache, gvo, !txTxBoth,
serve->dynGenome);
if (txTxBoth)
{
reverseComplement(seq->dna, seq->size);
conn = gfConnectEx(serve->host, serve->port);
if (allGenomes)
- queryServerSetup(conn, db, seq, "transQuery", xType, TRUE, FALSE, TRUE, seqNumber);
+ queryServerSetup(conn, db, seq, "transQuery", xType, TRUE, FALSE, TRUE, seqNumber,
+ serve->dynGenome);
else
gfAlignTransTrans(&conn, serve->nibDir, seq, TRUE, 5, tFileCache, gvo, FALSE,
serve->dynGenome);
}
}
else
{
if (allGenomes)
- queryServerSetup(conn, db, seq, "protQuery", xType, TRUE, TRUE, FALSE, seqNumber);
+ queryServerSetup(conn, db, seq, "protQuery", xType, TRUE, TRUE, FALSE, seqNumber,
+ serve->dynGenome);
else
gfAlignTrans(&conn, serve->nibDir, seq, 5, tFileCache, gvo,
serve->dynGenome);
}
}
else
{
if (allGenomes)
- queryServerSetup(conn, db, seq, "query", xType, FALSE, FALSE, FALSE, seqNumber);
+ queryServerSetup(conn, db, seq, "query", xType, FALSE, FALSE, FALSE, seqNumber,
+ serve->dynGenome);
else
gfAlignStrand(&conn, serve->nibDir, seq, FALSE, minMatchShown, tFileCache, gvo,
serve->dynGenome);
reverseComplement(seq->dna, seq->size);
conn = gfConnectEx(serve->host, serve->port);
if (allGenomes)
- queryServerSetup(conn, db, seq, "query", xType, FALSE, FALSE, TRUE, seqNumber);
+ queryServerSetup(conn, db, seq, "query", xType, FALSE, FALSE, TRUE, seqNumber,
+ serve->dynGenome);
else
gfAlignStrand(&conn, serve->nibDir, seq, TRUE, minMatchShown, tFileCache, gvo,
serve->dynGenome);
}
gfOutputQuery(gvo, f);
++seqNumber;
}
carefulClose(&f);
if (!allGenomes)
{
showAliPlaces(pslTn.forCgi, faTn.forCgi, NULL, serve->db, qType, tType,
organism, feelingLucky);
}