e9492dbfa8a68aee133893d4b02475b0aec27bb9 markd Sun Dec 6 23:48:42 2020 -0800 webblat and hgBlat working diff --git src/hg/hgBlat/hgBlat.c src/hg/hgBlat/hgBlat.c index 2254fe8..013a9ee3 100644 --- src/hg/hgBlat/hgBlat.c +++ src/hg/hgBlat/hgBlat.c @@ -61,31 +61,30 @@ given by caller rather than returned by gfServer. */ }; struct genomeHits /* Information about hits on a genome assembly */ { struct genomeHits *next; char *host; /* Host. */ char *port; /* Port. */ char *db; /* Database name. */ char *genome; /* Genome name. */ int seqNumber; /* Submission order */ char *faName; /* fasta name */ char *dna; /* query dna */ int dnaSize; /* query dna size */ - struct gfConnection *conn; /* Connection */ char *type; /* query type = query, protQuery, transQuery */ char *xType; /* query type = dna, prot, rnax, dnax */ boolean queryRC; /* is the query reverse-complemented */ boolean complex; /* is the query complex */ boolean isProt; /* is the protein query */ boolean isDynamic; /* is a dynamic server */ char *genomeDataDir; /* dynamic server root-relative directory */ int maxGeneHits; /* Highest gene hit-count */ char *maxGeneChrom; /* Target Chrom for gene with max gene hits */ int maxGeneChromSize; /* Target Chrom Size for only prot, rnax, dnax */ int maxGeneTStart; /* Target Start Coordinate for gene with max hits */ int maxGeneTEnd; /* Target End Coordinate for gene with max hits*/ int maxGeneExons; /* Number of Exons in gene with max hits */ char maxGeneStrand[3]; /* + or - or ++ +- -+ -- Strand for gene with max hits */ @@ -866,31 +865,31 @@ safef(shortName, sizeof shortName, "blat %s+%d", names->name, count - 1); safef(description, sizeof description, "blat on %d queries (%s, %s)", count, names->name, names->next->name); } else { safef(shortName, sizeof shortName, "blat %s+%d", names->name, count - 1); safef(description, sizeof description, "blat on %d queries (%s, %s, ...)", count, names->name, names->next->name); } *pName = makeNameUnique(shortName, database, cart); *pDescription = cloneString(description); } void queryServer(char *host, char *port, char *db, struct dnaSeq *seq, char *type, char *xType, boolean complex, boolean isProt, boolean queryRC, int seqNumber, char *genomeDataDir) -/* Send simple query to server and report results. +/* Send simple query to server and report results. (no, it doesn't do this) * queryRC is true when the query has been reverse-complemented */ { struct genomeHits *gH; AllocVar(gH); gH->host=cloneString(host); gH->port=cloneString(port); gH->db = cloneString(db); gH->genome = cloneString(hGenome(db)); gH->seqNumber = seqNumber; gH->faName = cloneString(seq->name); gH->dna = cloneString(seq->dna); gH->dnaSize = seq->size; gH->type = cloneString(type); gH->xType = cloneString(xType); @@ -993,81 +992,72 @@ return; int qFactor = 3; int tFactor = 3; if (gH->isProt) qFactor = 1; struct gfResult *gfR = NULL; for(gfR=gH->gfList; gfR; gfR=gfR->next) { gfR->qStart = gfR->qStart * qFactor + gfR->qFrame; gfR->qEnd = gfR->qEnd * qFactor + gfR->qFrame; gfR->tStart = gfR->tStart * tFactor + gfR->tFrame; gfR->tEnd = gfR->tEnd * tFactor + gfR->tFrame; } } -struct gfConnection *gfConnectEx(char *host, char *port, boolean isDynamic) -/* Try to connect to gfServer */ -{ -if (allGenomes) - return gfMayConnect(host, port, isDynamic); // returns NULL on failure -else - return gfConnect(host, port, isDynamic); // errAborts on failure. -} - - void queryServerFinish(struct genomeHits *gH) /* Report results from gfServer. */ { char buf[256]; int matchCount = 0; -gH->conn = gfConnectEx(gH->host, gH->port, gH->isDynamic); -if (gH->conn == NULL) +struct gfConnection *conn = gfMayConnect(gH->host, gH->port, gH->isDynamic); +if (conn == NULL) { gH->error = TRUE; gH->networkErrMsg = "Connection to gfServer failed."; return; } dyStringPrintf(gH->dbg,"query strand %s qsize %d
\n", gH->queryRC ? "-" : "+", gH->dnaSize); /* Put together query command. */ if (gH->isDynamic) safef(buf, sizeof buf, "%s%s %s %s %d", gfSignature(), gH->type, dynServerGenome(gH->db), gH->genomeDataDir, gH->dnaSize); else safef(buf, sizeof buf, "%s%s %d", gfSignature(), gH->type, gH->dnaSize); -mustWriteFd(gH->conn->fd, buf, strlen(buf)); +gfBeginRequest(conn); +mustWriteFd(conn->fd, buf, strlen(buf)); -if (read(gH->conn->fd, buf, 1) < 0) +if (read(conn->fd, buf, 1) < 0) errAbort("queryServerFinish: read failed: %s", strerror(errno)); if (buf[0] != 'Y') errAbort("Expecting 'Y' from server, got %c", buf[0]); -mustWriteFd(gH->conn->fd, gH->dna, gH->dnaSize); // Cannot shifted earlier for speed. must wait for Y confirmation. +mustWriteFd(conn->fd, gH->dna, gH->dnaSize); // Cannot shifted earlier for speed. must wait for Y confirmation. if (gH->complex) { - char *s = netRecieveString(gH->conn->fd, buf); + char *s = netRecieveString(conn->fd, buf); if (!s) errAbort("expected response from gfServer with tileSize"); dyStringPrintf(gH->dbg,"%s
\n", s); // from server: tileSize 4 } for (;;) { - if (netGetString(gH->conn->fd, buf) == NULL) + if (netGetString(conn->fd, buf) == NULL) break; if (sameString(buf, "end")) { dyStringPrintf(gH->dbg,"%d matches
\n", matchCount); break; } else if (startsWith("Error:", buf)) { errAbort("%s", buf); break; } else { dyStringPrintf(gH->dbg,"%s
\n", buf); // chop the line into words @@ -1136,31 +1126,31 @@ { gfR->tStrand = word[6][0]; // e.g. + or - gfR->tFrame = sqlUnsigned(word[7]); // e.g. 0,1,2 if (!gH->isProt) { gfR->qFrame = sqlUnsigned(word[8]); // e.g. 0,1,2 } } else { gfR->tStrand = '+'; // dna search only on + target strand } if (gH->complex) { - char *s = netGetLongString(gH->conn->fd); + char *s = netGetLongString(conn->fd); if (s == NULL) break; dyStringPrintf(gH->dbg,"%s
\n", s); //dumps out qstart1 tstart1 qstart2 tstart2 ... freeMem(s); } slAddHead(&gH->gfList, gfR); } ++matchCount; } slReverse(&gH->gfList); unTranslateCoordinates(gH); // convert back to untranslated coordinates slSort(&gH->gfList, gfResultsCmp); // sort by tStrand, chrom, tStart @@ -1258,32 +1248,31 @@ // Adjust tEnd if (gH->maxGeneTEnd < r[qFrame].maxGeneTEnd) { gH->maxGeneTEnd = r[qFrame].maxGeneTEnd; } } gH->maxGeneHits /= 3; // average over 3 frames. char qStrand = (gH->queryRC ? '-' : '+'); safef(gH->maxGeneStrand, sizeof gH->maxGeneStrand, "%c%c", qStrand, gH->maxGeneTStrand); } - -gfDisconnect(&gH->conn); +gfDisconnect(&conn); } int findMinMatch(long genomeSize, boolean isProt) // Return default minMatch for genomeSize, // the expected number of occurrences of string length k // in random genome of size N = N/(4^k) { int alphaBetSize; if (isProt) { alphaBetSize = 20; genomeSize = genomeSize / 3; } else { @@ -1324,41 +1313,40 @@ genome->tbf = twoBitOpen(genome->twoBitPath); long genomeSize = 0; struct twoBitIndex *index; for (index = genome->tbf->indexList; index != NULL; index = index->next) { genomeSize += twoBitSeqSize(genome->tbf, index->name); } twoBitClose(&genome->tbf); return genomeSize; } -int findGenomeParams(struct serverTable *serve) +int findGenomeParams(struct gfConnection *conn, struct serverTable *serve) /* Send status message to server arnd report result. * Get tileSize stepSize and minMatch. */ { char buf[256]; int ret = 0; /* Put together command. */ -// FIXME: need to keep open -struct gfConnection *conn = gfConnectEx(serve->host, serve->port, serve->isDynamic); +gfBeginRequest(conn); if (serve->isDynamic) sprintf(buf, "%s%s %s %s", gfSignature(), (serve->isTrans ? "transInfo" : "untransInfo"), dynServerGenome(serve->db), serve->genomeDataDir); else sprintf(buf, "%sstatus", gfSignature()); mustWriteFd(conn->fd, buf, strlen(buf)); for (;;) { if (netGetString(conn->fd, buf) == NULL) { warn("Error reading status information from %s:%s",serve->host, serve->port); ret = -1; break; } @@ -1368,31 +1356,31 @@ { if (startsWith("tileSize ", buf)) { serve->tileSize = atoi(buf+strlen("tileSize ")); } if (startsWith("stepSize ", buf)) { serve->stepSize = atoi(buf+strlen("stepSize ")); } if (startsWith("minMatch ", buf)) { serve->minMatch = atoi(buf+strlen("minMatch ")); } } } -gfDisconnect(&conn); +gfEndRequest(conn); return(ret); } void blatSeq(char *userSeq, char *organism, char *database, int dbCount) /* Blat sequence user pasted in. */ { FILE *f; struct dnaSeq *seqList = NULL, *seq; struct tempName pslTn, faTn; int maxSingleSize, maxTotalSize, maxSeqCount; char *genome, *db; char *type = cgiString("type"); char *seqLetters = cloneString(userSeq); struct serverTable *serve; struct gfConnection *conn = NULL; @@ -1572,32 +1560,34 @@ #else // read genome size if (trackHubDatabase(database)) { genomeSize = findGenomeSizeFromHub(database); } else { genomeSize = findGenomeSize(database); } minMatchShown = findMinMatch(genomeSize, qType == gftProt); #endif if (allResults) minMatchShown = 0; + conn = gfConnect(serve->host, serve->port, serve->isDynamic); + // read tileSize stepSize minMatch from server status - findGenomeParams(serve); + findGenomeParams(conn, serve); int minLucky = (serve->minMatch * serve->stepSize + (serve->tileSize - serve->stepSize)) * xlat; minSuggested = max(minMatchShown,minLucky); } int seqNumber = 0; /* Loop through each sequence. */ for (seq = seqList; seq != NULL; seq = seq->next) { printf(" "); fflush(stdout); /* prevent apache cgi timeout by outputting something */ oneSize = realSeqSize(seq, !isTx); // Impose half the usual bot delay per sequence if (dbCount == 0) @@ -1630,79 +1620,74 @@ warn("Sequence %s would take us over the %d letter limit, stopping here.", seq->name, maxTotalSize); break; } if (isTx) { gvo->reportTargetStrand = TRUE; if (isTxTx) { if (allGenomes) queryServer(serve->host, serve->port, db, seq, "transQuery", xType, TRUE, FALSE, FALSE, seqNumber, serve->genomeDataDir); else { - conn = gfConnectEx(serve->host, serve->port, serve->isDynamic); gfAlignTransTrans(conn, serve->nibDir, seq, FALSE, 5, tFileCache, gvo, !txTxBoth, dynServerGenome(serve->db), serve->genomeDataDir); } if (txTxBoth) { reverseComplement(seq->dna, seq->size); if (allGenomes) queryServer(serve->host, serve->port, db, seq, "transQuery", xType, TRUE, FALSE, TRUE, seqNumber, serve->genomeDataDir); else { - conn = gfConnectEx(serve->host, serve->port, serve->isDynamic); gfAlignTransTrans(conn, serve->nibDir, seq, TRUE, 5, tFileCache, gvo, FALSE, dynServerGenome(serve->db), serve->genomeDataDir); } } } else { if (allGenomes) queryServer(serve->host, serve->port, db, seq, "protQuery", xType, TRUE, TRUE, FALSE, seqNumber, serve->genomeDataDir); else { - conn = gfConnectEx(serve->host, serve->port, serve->isDynamic); gfAlignTrans(conn, serve->nibDir, seq, 5, tFileCache, gvo, dynServerGenome(serve->db), serve->genomeDataDir); } } } else { if (allGenomes) queryServer(serve->host, serve->port, db, seq, "query", xType, FALSE, FALSE, FALSE, seqNumber, serve->genomeDataDir); else { - conn = gfConnectEx(serve->host, serve->port, serve->isDynamic); gfAlignStrand(conn, serve->nibDir, seq, FALSE, minMatchShown, tFileCache, gvo, dynServerGenome(serve->db), serve->genomeDataDir); } reverseComplement(seq->dna, seq->size); if (allGenomes) queryServer(serve->host, serve->port, db, seq, "query", xType, FALSE, FALSE, TRUE, seqNumber, serve->genomeDataDir); else { - conn = gfConnectEx(serve->host, serve->port, serve->isDynamic); gfAlignStrand(conn, serve->nibDir, seq, TRUE, minMatchShown, tFileCache, gvo, dynServerGenome(serve->db), serve->genomeDataDir); } } gfOutputQuery(gvo, f); ++seqNumber; } carefulClose(&f); if (!allGenomes) { showAliPlaces(pslTn.forCgi, faTn.forCgi, NULL, serve->db, qType, tType, organism, feelingLucky); }