f8058232ef369389e7bca4eac46901e8eca76c9a galt Fri Mar 12 13:29:57 2021 -0800 Improvements to hgBlat to tolerate and explain limitations of using dynamic blat servers with the Search Alll feature. refs #26658 diff --git src/hg/hgBlat/hgBlat.c src/hg/hgBlat/hgBlat.c index ea57b9d..e0f258b 100644 --- src/hg/hgBlat/hgBlat.c +++ src/hg/hgBlat/hgBlat.c @@ -268,36 +268,43 @@ for (pfd = pfdRunning; pfd; pfd = pfd->next) { // unfinished query char temp[1024]; safef(temp, sizeof temp, "Timeout %d milliseconds exceeded processing %s %s", maxTimeInMilliseconds, pfd->genome, pfd->db); pfd->networkErrMsg = cloneString(temp); pfd->error = TRUE; ++errCount; } for (pfd = pfdDone; pfd; pfd = pfd->next) { // some done queries may have errors if (pfd->error) ++errCount; } +slCat(pfdDone, pfdRunning); +pfdRunning = NULL; +slCat(pfdDone, pfdNeverStarted); +pfdNeverStarted = NULL; pthread_mutex_unlock( &pfdMutex ); return errCount; } // ================== +int nonHubDynamicBlatServerCount = 0; +int hubDynamicBlatServerCount = 0; + struct serverTable /* Information on a server. */ { char *db; /* Database name. */ char *genome; /* Genome name. */ boolean isTrans; /* Is tranlated to protein? */ char *host; /* Name of machine hosting server. */ char *port; /* Port that hosts server. */ char *nibDir; /* Directory of sequence files. */ int tileSize; /* gfServer -tileSize */ int stepSize; /* gfServer -stepSize */ int minMatch; /* gfServer -minMatch */ boolean isDynamic; /* is a dynamic server */ char* genomeDataDir; /* genome name for dynamic gfServer */ }; @@ -348,30 +355,31 @@ "here " "to reset to default database.", (isTrans ? "translated" : "DNA"), db, cartSidUrlString(cart), hDefaultDb()); } st->db = cloneString(row[0]); st->genome = cloneString(row[1]); st->isTrans = atoi(row[2]); st->host = cloneString(row[3]); st->port = cloneString(row[4]); st->nibDir = hReplaceGbdbSeqDir(row[5], st->db); if (atoi(row[6])) { st->isDynamic = TRUE; st->genomeDataDir = cloneString(st->db); // directories by database name for database genomes + ++nonHubDynamicBlatServerCount; } sqlFreeResult(&sr); hDisconnectCentral(&conn); return st; } static struct serverTable *trackHubServerTable(char *db, boolean isTrans) /* Load blat table for a hub */ { char *host, *port; char *genomeDataDir; if (!trackHubGetBlatParams(db, isTrans, &host, &port, &genomeDataDir)) errAbort("Cannot get blat server parameters for track hub with database %s", db); @@ -382,30 +390,31 @@ st->db = cloneString(db); st->genome = cloneString(hGenome(db)); st->isTrans = isTrans; st->host = host; st->port = port; struct trackHubGenome *genome = trackHubGetGenome(db); st->nibDir = cloneString(genome->twoBitPath); char *ptr = strrchr(st->nibDir, '/'); // we only want the directory name if (ptr != NULL) *ptr = 0; if (genomeDataDir != NULL) { st->isDynamic = TRUE; st->genomeDataDir = cloneString(genomeDataDir); + ++hubDynamicBlatServerCount; } return st; } struct serverTable *findServer(char *db, boolean isTrans) /* Return server for given database. Db can either be * database name or description. */ { if (trackHubDatabase(db)) return trackHubServerTable(db, isTrans); else return databaseServerTable(db, isTrans); } void findClosestServer(char **pDb, char **pOrg) @@ -887,59 +896,67 @@ else { safef(shortName, sizeof shortName, "blat %s+%d", names->name, count - 1); safef(description, sizeof description, "blat on %d queries (%s, %s, ...)", count, names->name, names->next->name); } *pName = makeNameUnique(shortName, database, cart); *pDescription = cloneString(description); } void queryServer(char *host, char *port, char *db, struct dnaSeq *seq, char *type, char *xType, boolean complex, boolean isProt, boolean queryRC, int seqNumber, char *genomeDataDir) /* Send simple query to server and report results. (no, it doesn't do this) * queryRC is true when the query has been reverse-complemented */ { -/* - * xinetd throttles by refusing more connections, which causes queries to fail - * when the configured limit is reached. Rather than trying to throttle in the - * client, dynamic servers are excluded. See issue #26658. - */ -if (genomeDataDir != NULL) - return; struct genomeHits *gH; AllocVar(gH); gH->host=cloneString(host); gH->port=cloneString(port); gH->db = cloneString(db); gH->genome = cloneString(hGenome(db)); gH->seqNumber = seqNumber; gH->faName = cloneString(seq->name); gH->dna = cloneString(seq->dna); gH->dnaSize = seq->size; gH->type = cloneString(type); gH->xType = cloneString(xType); gH->queryRC = queryRC; gH->complex = complex; gH->isProt = isProt; gH->isDynamic = (genomeDataDir != NULL); gH->genomeDataDir = genomeDataDir; gH->dbg = dyStringNew(256); + +/* SKIP DYNAMIC SERVERS + * xinetd throttles by refusing more connections, which causes queries to fail + * when the configured limit is reached. Rather than trying to throttle in the + * client, dynamic servers are excluded. See issue #26658. + */ +if (gH->isDynamic) + { + gH->error = TRUE; + gH->networkErrMsg = cloneString("Skipped Dynamic Server"); + slAddHead(&pfdDone, gH); + } +else + { slAddHead(&pfdList, gH); } +} void findBestGene(struct genomeHits *gH, int queryFrame) /* Find best gene-like object with multiple linked-features. * Remember chrom start end of best gene found and total hits in the gene. * Should sort the gfResults by tStrand, chrom, tStart. * Filters on queryFrame */ { char *bestChrom = NULL; int bestHits = 0; int bestTStart = 0; int bestTEnd = 0; int bestExons = 0; char bestTStrand = ' '; char bestQStrand = ' '; @@ -1736,31 +1753,31 @@ "document.mainForm.submit();"; char *userSeq = NULL; char *type = NULL; printf( "
\n"); @@ -1916,31 +1936,31 @@ struct genomeHits* gH2 = NULL; for (;gH1; gH1 = gH2->next) { gH2 = gH1->next; if (!gH2) errAbort("Hiding weaker of pairs found one without sibling."); if (!((gH1->seqNumber == gH2->seqNumber) && sameString(gH1->db, gH2->db) && (gH1->queryRC != gH2->queryRC))) errAbort("Error matching pairs, sibling does not match seqNumber and db."); // check if one or the other had an error if (gH1->error && gH2->error) gH2->hide = TRUE; // arbitrarily else if (gH1->error && !gH2->error) gH1->hide = TRUE; else if (!gH1->error && gH2->error) gH2->hide = TRUE; - else // keep the best scoring or the pair, hide the other + else // keep the best scoring of the pair, hide the other { if (gH2->maxGeneHits > gH1->maxGeneHits) gH1->hide = TRUE; else gH2->hide = TRUE; } } } void changeMaxGenePositionToPositiveStrandCoords(struct genomeHits *gH) /* convert negative strand coordinates to positive strand coordinates if TStrand=='-' */ { for (;gH; gH = gH->next) { if (gH->hide) @@ -2095,30 +2115,33 @@ cartWebEnd(); } else { if (allGenomes) { cartWebStart(cart, db, "All Genomes BLAT Results"); struct dbDb *dbList = hGetBlatIndexedDatabases(); struct dbDb *this = NULL; char *saveDb = db; char *saveOrg = organism; struct sqlConnection *conn = hConnectCentral(); int dbCount = 0; + nonHubDynamicBlatServerCount = 0; + hubDynamicBlatServerCount = 0; + for(this = dbList; this; this = this->next) { db = this->name; organism = hGenome(db); if (!trackHubDatabase(db)) // if not hub db, make sure it is the default assembly. { char query[256]; sqlSafef(query, sizeof query, "select name from defaultDb where genome='%s'", organism); char *defaultDb = sqlQuickString(conn, query); if (!sameOk(defaultDb, db)) continue; // skip non-default dbs } blatSeq(skipLeadingSpaces(userSeq), organism, db, dbCount); @@ -2264,35 +2287,48 @@ } printf("\n"); jsOnEventByIdF("click", id, "document.mainForm.org.value=\"%s\";" // some have single-quotes in their value. "document.mainForm.db.value='%s';" "document.mainForm.submit();" "return false;" // cancel the default link url , gH->genome, gH->db ); idCount++; } printf("\n"); } - printf("