7d0af5fc6dbc224c4131e6d6a43b09a8f661502b
galt
  Thu Oct 8 23:09:56 2020 -0700
Improving hgBlat by moving the gfServer connect from being done up front to being done in the per-thread routine. This means it should use fewer connections in worse case scenarios, and also mean that gfServer will not have to wait as long.

diff --git src/hg/hgBlat/hgBlat.c src/hg/hgBlat/hgBlat.c
index 1d14f95..2251b88 100644
--- src/hg/hgBlat/hgBlat.c
+++ src/hg/hgBlat/hgBlat.c
@@ -53,30 +53,32 @@
     int tStart;    /* Target Start Coordinate */  
     int tEnd;      /* Target End Coordinate */
     int numHits;   /* number of tile hits, minimum  2 for dna */ 
     char tStrand;  /* + or - Target Strand used with prot, rnax, dnax */ 
     int tFrame;    /* Target Frame 0,1,2 (mostly ignorable?) used with prot, rnax, dnax */ 
     int qFrame;    /* Query  Frame 0,1,2 (mostly ignorable?) used with rnax, dnax*/ 
    
     char qStrand;  /* + or - Query Strand used with prot, rnax, dnax, 
 		      given by caller rather than returned by gfServer. */ 
     };
 
 struct genomeHits
 /* Information about hits on a genome assembly */
     {
     struct genomeHits *next;
+    char *host;		/* Host. */
+    char *port;	        /* Port. */
     char *db;		/* Database name. */
     char *genome;	/* Genome name. */
     int seqNumber;      /* Submission order */
     char *faName;       /* fasta name */
     char *dna;          /* query dna */
     int dnaSize;        /* query dna size */
     int sd;             /* Connection */
     char *type;         /* query type = query, protQuery, transQuery */
     char *xType;        /* query type = dna, prot, rnax, dnax */
     boolean queryRC;    /* is the query reverse-complemented */
     boolean complex;    /* is the query complex */
     boolean isProt;     /* is the protein query */
    
     int maxGeneHits;    /* Highest gene hit-count */
     char *maxGeneChrom; /* Target Chrom for gene with max gene hits */
@@ -832,55 +834,52 @@
 else if (count == 2)
     {
     safef(shortName, sizeof shortName, "blat %s+%d", names->name, count - 1);
     safef(description, sizeof description, "blat on %d queries (%s, %s)", count, names->name, names->next->name);
     }
 else
     {
     safef(shortName, sizeof shortName, "blat %s+%d", names->name, count - 1);
     safef(description, sizeof description, "blat on %d queries (%s, %s, ...)", count, names->name, names->next->name);
     }
 
 *pName = makeNameUnique(shortName, database, cart);
 *pDescription = cloneString(description);
 }
 
-void queryServer(int conn, char *db, struct dnaSeq *seq, char *type, char *xType,
+void queryServer(char *host, char *port, char *db, struct dnaSeq *seq, char *type, char *xType,
     boolean complex, boolean isProt, boolean queryRC, int seqNumber)
 /* Send simple query to server and report results.
  * queryRC is true when the query has been reverse-complemented */
 {
 struct genomeHits *gH;
 AllocVar(gH);
 
+gH->host=cloneString(host);
+gH->port=cloneString(port);
 gH->db = cloneString(db);
 gH->genome = cloneString(hGenome(db));
 gH->seqNumber = seqNumber;
 gH->faName = cloneString(seq->name);
 gH->dna = cloneString(seq->dna);
 gH->dnaSize = seq->size;
 gH->type = cloneString(type);
 gH->xType = cloneString(xType);
 gH->queryRC = queryRC;
 gH->complex = complex;
 gH->isProt = isProt;
-gH->sd = conn;
-if (gH->sd == -1)
-    {
-    gH->error = TRUE;
-    gH->networkErrMsg = "Connection to gfServer failed.";
-    }
+
 gH->dbg = dyStringNew(256);
 slAddHead(&pfdList, gH);
 }
 
 void findBestGene(struct genomeHits *gH, int queryFrame)
 /* Find best gene-like object with multiple linked-features.
  * Remember chrom start end of best gene found and total hits in the gene. 
  * Should sort the gfResults by tStrand, chrom, tStart.
  * Filters on queryFrame */
 {
 char *bestChrom = NULL;
 int bestHits   = 0;
 int bestTStart = 0;
 int bestTEnd   = 0;
 int bestExons  = 0;
@@ -963,47 +962,67 @@
     return;
 int qFactor = 3;
 int tFactor = 3;
 if (gH->isProt)
     qFactor = 1;
 struct gfResult *gfR = NULL;
 for(gfR=gH->gfList; gfR; gfR=gfR->next)
     {
     gfR->qStart = gfR->qStart * qFactor + gfR->qFrame;
     gfR->qEnd   = gfR->qEnd   * qFactor + gfR->qFrame;
     gfR->tStart = gfR->tStart * tFactor + gfR->tFrame;
     gfR->tEnd   = gfR->tEnd   * tFactor + gfR->tFrame;
     }
 }
 
+int gfConnectEx(char *host, char *port)
+/* Try to connect to gfServer */
+{
+int conn = -1;
+if (allGenomes)
+    conn = gfMayConnect(host, port); // returns -1 on failure
+else
+    conn = gfConnect(host, port);  // errAborts on failure.
+return conn;
+}
+
+
 void queryServerFinish(struct genomeHits *gH)
 /* Report results from gfServer. */
 {
 char buf[256];
 int matchCount = 0;
 
+gH->sd = gfConnectEx(gH->host, gH->port);
+if (gH->sd == -1)
+    {
+    gH->error = TRUE;
+    gH->networkErrMsg = "Connection to gfServer failed.";
+    return;
+    }
+
 dyStringPrintf(gH->dbg,"query strand %s qsize %d<br>\n", gH->queryRC ? "-" : "+", gH->dnaSize);
 
 /* Put together query command. */
 safef(buf, sizeof buf, "%s%s %d", gfSignature(), gH->type, gH->dnaSize);
 mustWriteFd(gH->sd, buf, strlen(buf));
 
 if (read(gH->sd, buf, 1) < 0)
     errAbort("queryServerFinish: read failed: %s", strerror(errno));
 if (buf[0] != 'Y')
     errAbort("Expecting 'Y' from server, got %c", buf[0]);
-mustWriteFd(gH->sd, gH->dna, gH->dnaSize);
+mustWriteFd(gH->sd, gH->dna, gH->dnaSize);  // Cannot shifted earlier for speed. must wait for Y confirmation.
 
 if (gH->complex)
     {
     char *s = netRecieveString(gH->sd, buf);
     if (!s)
 	errAbort("expected response from gfServer with tileSize");
     dyStringPrintf(gH->dbg,"%s<br>\n", s);  // from server: tileSize 4
     }
 
 for (;;)
     {
     if (netGetString(gH->sd, buf) == NULL)
         break;
     if (sameString(buf, "end"))
         {
@@ -1210,41 +1229,30 @@
 	    }
 		
 	}
 
     gH->maxGeneHits /= 3;  // average over 3 frames.
 
     char qStrand = (gH->queryRC ? '-' : '+');
     safef(gH->maxGeneStrand, sizeof gH->maxGeneStrand, "%c%c", qStrand, gH->maxGeneTStrand);
 
     }
 
 
 close(gH->sd);
 }
 
-int gfConnectEx(char *host, char *port)
-/* Try to connect to gfServer */
-{
-int conn = -1;
-if (allGenomes)
-    conn = gfMayConnect(host, port); // returns -1 on failure
-else
-    conn = gfConnect(host, port);  // errAborts on failure.
-return conn;
-}
-
 int findMinMatch(long genomeSize, boolean isProt)
 // Return default minMatch for genomeSize,
 // the expected number of occurrences of string length k 
 // in random genome of size N = N/(4^k)
 {
 int alphaBetSize;
 if (isProt)
     {
     alphaBetSize = 20;
     genomeSize = genomeSize / 3;
     }
 else
     {
     alphaBetSize = 4;
     }
@@ -1576,72 +1584,83 @@
 		seq->name, oneSize, minSuggested);
 	// we could use "continue;" here to actually enforce skipping, 
 	// but let's give the short sequence a chance, it might work.
 	// minimum possible length = tileSize+stepSize, so mpl=16 for dna stepSize=5, mpl=10 for protein.
 	if (qIsProt && oneSize < 1) // protein does not tolerate oneSize==0
 	    continue;
 	}
     totalSize += oneSize;
     if (totalSize > maxTotalSize)
         {
 	warn("Sequence %s would take us over the %d letter limit, stopping here.",
 	     seq->name, maxTotalSize);
 	break;
 	}
 
-    conn = gfConnectEx(serve->host, serve->port);
-
     if (isTx)
 	{
 	gvo->reportTargetStrand = TRUE;
 	if (isTxTx)
 	    {
 	    if (allGenomes)
-		queryServer(conn, db, seq, "transQuery", xType, TRUE, FALSE, FALSE, seqNumber);
+		queryServer(serve->host, serve->port, db, seq, "transQuery", xType, TRUE, FALSE, FALSE, seqNumber);
 	    else
+		{
+		conn = gfConnectEx(serve->host, serve->port);
 		gfAlignTransTrans(&conn, serve->nibDir, seq, FALSE, 5, tFileCache, gvo, !txTxBoth);
+		}
 	    if (txTxBoth)
 		{
 		reverseComplement(seq->dna, seq->size);
-		conn = gfConnectEx(serve->host, serve->port);
 		if (allGenomes)
-		    queryServer(conn, db, seq, "transQuery", xType, TRUE, FALSE, TRUE, seqNumber);
+		    queryServer(serve->host, serve->port, db, seq, "transQuery", xType, TRUE, FALSE, TRUE, seqNumber);
 		else
+		    {
+		    conn = gfConnectEx(serve->host, serve->port);
 		    gfAlignTransTrans(&conn, serve->nibDir, seq, TRUE, 5, tFileCache, gvo, FALSE);
 		    }
 		}
+	    }
 	else
 	    {
 	    if (allGenomes)
-		queryServer(conn, db, seq, "protQuery", xType, TRUE, TRUE, FALSE, seqNumber);
+		queryServer(serve->host, serve->port, db, seq, "protQuery", xType, TRUE, TRUE, FALSE, seqNumber);
 	    else
+		{
+		conn = gfConnectEx(serve->host, serve->port);
 		gfAlignTrans(&conn, serve->nibDir, seq, 5, tFileCache, gvo);
 		}
 	    }
+	}
     else
 	{
 	if (allGenomes)
-	    queryServer(conn, db, seq, "query", xType, FALSE, FALSE, FALSE, seqNumber);
+	    queryServer(serve->host, serve->port, db, seq, "query", xType, FALSE, FALSE, FALSE, seqNumber);
 	else
+	    {
+	    conn = gfConnectEx(serve->host, serve->port);
 	    gfAlignStrand(&conn, serve->nibDir, seq, FALSE, minMatchShown, tFileCache, gvo);
+	    }
 	reverseComplement(seq->dna, seq->size);
-	conn = gfConnectEx(serve->host, serve->port);
 	if (allGenomes)
-	    queryServer(conn, db, seq, "query", xType, FALSE, FALSE, TRUE, seqNumber);
+	    queryServer(serve->host, serve->port, db, seq, "query", xType, FALSE, FALSE, TRUE, seqNumber);
 	else
+	    {
+	    conn = gfConnectEx(serve->host, serve->port);
 	    gfAlignStrand(&conn, serve->nibDir, seq, TRUE, minMatchShown, tFileCache, gvo);
 	    }
+	}
     gfOutputQuery(gvo, f);
     ++seqNumber;
     }
 carefulClose(&f);
 
 if (!allGenomes)
     {
     showAliPlaces(pslTn.forCgi, faTn.forCgi, NULL, serve->db, qType, tType, 
               organism, feelingLucky);
     }
 
 if(!feelingLucky && !allGenomes)
     cartWebEnd();
 
 gfFileCacheFree(&tFileCache);