9186d6410c3266a7069c81e642c9ea22dc981191 Merge parents bff6869 98c0054 markd Sun Oct 25 20:12:47 2020 -0700 merged master diff --cc src/hg/hgBlat/hgBlat.c index c216861,2251b88..ae4d56c --- src/hg/hgBlat/hgBlat.c +++ src/hg/hgBlat/hgBlat.c @@@ -53,44 -53,44 +53,46 @@@ int tStart; /* Target Start Coordinate */ int tEnd; /* Target End Coordinate */ int numHits; /* number of tile hits, minimum 2 for dna */ char tStrand; /* + or - Target Strand used with prot, rnax, dnax */ int tFrame; /* Target Frame 0,1,2 (mostly ignorable?) used with prot, rnax, dnax */ int qFrame; /* Query Frame 0,1,2 (mostly ignorable?) used with rnax, dnax*/ char qStrand; /* + or - Query Strand used with prot, rnax, dnax, given by caller rather than returned by gfServer. */ }; struct genomeHits /* Information about hits on a genome assembly */ { struct genomeHits *next; + char *host; /* Host. */ + char *port; /* Port. */ char *db; /* Database name. */ char *genome; /* Genome name. */ int seqNumber; /* Submission order */ char *faName; /* fasta name */ char *dna; /* query dna */ int dnaSize; /* query dna size */ int sd; /* Connection */ char *type; /* query type = query, protQuery, transQuery */ char *xType; /* query type = dna, prot, rnax, dnax */ boolean queryRC; /* is the query reverse-complemented */ boolean complex; /* is the query complex */ boolean isProt; /* is the protein query */ + boolean isDynamic; /* is a dynamic server */ + char *genomeDataDir; /* dynamic server root-relative directory */ int maxGeneHits; /* Highest gene hit-count */ char *maxGeneChrom; /* Target Chrom for gene with max gene hits */ int maxGeneChromSize; /* Target Chrom Size for only prot, rnax, dnax */ int maxGeneTStart; /* Target Start Coordinate for gene with max hits */ int maxGeneTEnd; /* Target End Coordinate for gene with max hits*/ int maxGeneExons; /* Number of Exons in gene with max hits */ char maxGeneStrand[3]; /* + or - or ++ +- -+ -- Strand for gene with max hits */ char maxGeneTStrand;/* + or - TStrand for gene with max hits */ boolean done; /* Did the job get to finish */ boolean error; /* Some error happened */ char *networkErrMsg; /* Network layer error message */ struct dyString *dbg; /* Output debugging info */ struct gfResult *gfList; /* List of gfResult records */ boolean hide; /* To not show both strands, suppress the weaker-scoring one */ @@@ -862,57 -834,52 +864,53 @@@ else if (count == 2) { safef(shortName, sizeof shortName, "blat %s+%d", names->name, count - 1); safef(description, sizeof description, "blat on %d queries (%s, %s)", count, names->name, names->next->name); } else { safef(shortName, sizeof shortName, "blat %s+%d", names->name, count - 1); safef(description, sizeof description, "blat on %d queries (%s, %s, ...)", count, names->name, names->next->name); } *pName = makeNameUnique(shortName, database, cart); *pDescription = cloneString(description); } - void queryServerSetup(int conn, char *db, struct dnaSeq *seq, char *type, char *xType, + void queryServer(char *host, char *port, char *db, struct dnaSeq *seq, char *type, char *xType, - boolean complex, boolean isProt, boolean queryRC, int seqNumber) + boolean complex, boolean isProt, boolean queryRC, int seqNumber, char *genomeDataDir) - /* Setup query to server. + /* Send simple query to server and report results. * queryRC is true when the query has been reverse-complemented */ { struct genomeHits *gH; AllocVar(gH); + gH->host=cloneString(host); + gH->port=cloneString(port); gH->db = cloneString(db); gH->genome = cloneString(hGenome(db)); gH->seqNumber = seqNumber; gH->faName = cloneString(seq->name); gH->dna = cloneString(seq->dna); gH->dnaSize = seq->size; gH->type = cloneString(type); gH->xType = cloneString(xType); gH->queryRC = queryRC; gH->complex = complex; gH->isProt = isProt; - +gH->isDynamic = (genomeDataDir != NULL); +gH->genomeDataDir = genomeDataDir; - gH->sd = conn; - if (gH->sd == -1) - { - gH->error = TRUE; - gH->networkErrMsg = "Connection to gfServer failed."; - } gH->dbg = dyStringNew(256); slAddHead(&pfdList, gH); } void findBestGene(struct genomeHits *gH, int queryFrame) /* Find best gene-like object with multiple linked-features. * Remember chrom start end of best gene found and total hits in the gene. * Should sort the gfResults by tStrand, chrom, tStart. * Filters on queryFrame */ { char *bestChrom = NULL; int bestHits = 0; int bestTStart = 0; int bestTEnd = 0; int bestExons = 0; @@@ -995,51 -962,67 +993,71 @@@ return; int qFactor = 3; int tFactor = 3; if (gH->isProt) qFactor = 1; struct gfResult *gfR = NULL; for(gfR=gH->gfList; gfR; gfR=gfR->next) { gfR->qStart = gfR->qStart * qFactor + gfR->qFrame; gfR->qEnd = gfR->qEnd * qFactor + gfR->qFrame; gfR->tStart = gfR->tStart * tFactor + gfR->tFrame; gfR->tEnd = gfR->tEnd * tFactor + gfR->tFrame; } } + int gfConnectEx(char *host, char *port) + /* Try to connect to gfServer */ + { + int conn = -1; + if (allGenomes) + conn = gfMayConnect(host, port); // returns -1 on failure + else + conn = gfConnect(host, port); // errAborts on failure. + return conn; + } + + void queryServerFinish(struct genomeHits *gH) /* Report results from gfServer. */ { char buf[256]; int matchCount = 0; + gH->sd = gfConnectEx(gH->host, gH->port); + if (gH->sd == -1) + { + gH->error = TRUE; + gH->networkErrMsg = "Connection to gfServer failed."; + return; + } + dyStringPrintf(gH->dbg,"query strand %s qsize %d
\n", gH->queryRC ? "-" : "+", gH->dnaSize); /* Put together query command. */ +if (gH->isDynamic) + safef(buf, sizeof buf, "%s%s %d %s %s", gfSignature(), gH->type, gH->dnaSize, + dynServerGenome(gH->db), gH->genomeDataDir); +else safef(buf, sizeof buf, "%s%s %d", gfSignature(), gH->type, gH->dnaSize); mustWriteFd(gH->sd, buf, strlen(buf)); if (read(gH->sd, buf, 1) < 0) errAbort("queryServerFinish: read failed: %s", strerror(errno)); if (buf[0] != 'Y') errAbort("Expecting 'Y' from server, got %c", buf[0]); - mustWriteFd(gH->sd, gH->dna, gH->dnaSize); + mustWriteFd(gH->sd, gH->dna, gH->dnaSize); // Cannot shifted earlier for speed. must wait for Y confirmation. if (gH->complex) { char *s = netRecieveString(gH->sd, buf); if (!s) errAbort("expected response from gfServer with tileSize"); dyStringPrintf(gH->dbg,"%s
\n", s); // from server: tileSize 4 } for (;;) { if (netGetString(gH->sd, buf) == NULL) break; if (sameString(buf, "end")) { @@@ -1612,82 -1584,83 +1622,93 @@@ seq->name, oneSize, minSuggested); // we could use "continue;" here to actually enforce skipping, // but let's give the short sequence a chance, it might work. // minimum possible length = tileSize+stepSize, so mpl=16 for dna stepSize=5, mpl=10 for protein. if (qIsProt && oneSize < 1) // protein does not tolerate oneSize==0 continue; } totalSize += oneSize; if (totalSize > maxTotalSize) { warn("Sequence %s would take us over the %d letter limit, stopping here.", seq->name, maxTotalSize); break; } - conn = gfConnectEx(serve->host, serve->port); - if (isTx) { gvo->reportTargetStrand = TRUE; if (isTxTx) { if (allGenomes) - queryServerSetup(conn, db, seq, "transQuery", xType, TRUE, FALSE, FALSE, seqNumber, - queryServer(serve->host, serve->port, db, seq, "transQuery", xType, TRUE, FALSE, FALSE, seqNumber); ++ queryServer(serve->host, serve->port, db, seq, "transQuery", xType, TRUE, FALSE, FALSE, seqNumber, + serve->genomeDataDir); else + { + conn = gfConnectEx(serve->host, serve->port); - gfAlignTransTrans(&conn, serve->nibDir, seq, FALSE, 5, tFileCache, gvo, !txTxBoth); + gfAlignTransTrans(&conn, serve->nibDir, seq, FALSE, 5, tFileCache, gvo, !txTxBoth, + dynServerGenome(serve->db), serve->genomeDataDir); + } if (txTxBoth) { reverseComplement(seq->dna, seq->size); - conn = gfConnectEx(serve->host, serve->port); if (allGenomes) - queryServerSetup(conn, db, seq, "transQuery", xType, TRUE, FALSE, TRUE, seqNumber, - queryServer(serve->host, serve->port, db, seq, "transQuery", xType, TRUE, FALSE, TRUE, seqNumber); ++ queryServer(serve->host, serve->port, db, seq, "transQuery", xType, TRUE, FALSE, TRUE, seqNumber, + serve->genomeDataDir); else + { + conn = gfConnectEx(serve->host, serve->port); - gfAlignTransTrans(&conn, serve->nibDir, seq, TRUE, 5, tFileCache, gvo, FALSE); + gfAlignTransTrans(&conn, serve->nibDir, seq, TRUE, 5, tFileCache, gvo, FALSE, + dynServerGenome(serve->db), serve->genomeDataDir); } } + } else { if (allGenomes) - queryServerSetup(conn, db, seq, "protQuery", xType, TRUE, TRUE, FALSE, seqNumber, - queryServer(serve->host, serve->port, db, seq, "protQuery", xType, TRUE, TRUE, FALSE, seqNumber); ++ queryServer(serve->host, serve->port, db, seq, "protQuery", xType, TRUE, TRUE, FALSE, seqNumber, + serve->genomeDataDir); else + { + conn = gfConnectEx(serve->host, serve->port); - gfAlignTrans(&conn, serve->nibDir, seq, 5, tFileCache, gvo); + gfAlignTrans(&conn, serve->nibDir, seq, 5, tFileCache, gvo, + dynServerGenome(serve->db), serve->genomeDataDir); } } + } else { if (allGenomes) - queryServerSetup(conn, db, seq, "query", xType, FALSE, FALSE, FALSE, seqNumber, - queryServer(serve->host, serve->port, db, seq, "query", xType, FALSE, FALSE, FALSE, seqNumber); ++ queryServer(serve->host, serve->port, db, seq, "query", xType, FALSE, FALSE, FALSE, seqNumber, + serve->genomeDataDir); else + { + conn = gfConnectEx(serve->host, serve->port); - gfAlignStrand(&conn, serve->nibDir, seq, FALSE, minMatchShown, tFileCache, gvo); + gfAlignStrand(&conn, serve->nibDir, seq, FALSE, minMatchShown, tFileCache, gvo, + dynServerGenome(serve->db), serve->genomeDataDir); + } reverseComplement(seq->dna, seq->size); - conn = gfConnectEx(serve->host, serve->port); if (allGenomes) - queryServerSetup(conn, db, seq, "query", xType, FALSE, FALSE, TRUE, seqNumber, - queryServer(serve->host, serve->port, db, seq, "query", xType, FALSE, FALSE, TRUE, seqNumber); ++ queryServer(serve->host, serve->port, db, seq, "query", xType, FALSE, FALSE, TRUE, seqNumber, + serve->genomeDataDir); else + { + conn = gfConnectEx(serve->host, serve->port); - gfAlignStrand(&conn, serve->nibDir, seq, TRUE, minMatchShown, tFileCache, gvo); + gfAlignStrand(&conn, serve->nibDir, seq, TRUE, minMatchShown, tFileCache, gvo, + dynServerGenome(serve->db), serve->genomeDataDir); } + } gfOutputQuery(gvo, f); ++seqNumber; } carefulClose(&f); if (!allGenomes) { showAliPlaces(pslTn.forCgi, faTn.forCgi, NULL, serve->db, qType, tType, organism, feelingLucky); } if(!feelingLucky && !allGenomes) cartWebEnd(); gfFileCacheFree(&tFileCache);