e9492dbfa8a68aee133893d4b02475b0aec27bb9
markd
  Sun Dec 6 23:48:42 2020 -0800
webblat and hgBlat working

diff --git src/gfServer/gfServer.c src/gfServer/gfServer.c
index 482276b..22c2a81 100644
--- src/gfServer/gfServer.c
+++ src/gfServer/gfServer.c
@@ -106,31 +106,33 @@
   "     be created for untranslated and translated queries.  These can be used\n"
   "     with a persistent server as with 'start -indexFile or a dynamic server.\n"
   "     They must follow the naming convention for for dynamic servers.\n"
   "   To run a dynamic server (usually called by xinetd):\n"
   "      gfServer dynserver rootdir\n"
   "     Data files for genomes are found relative to the root directory.\n"
   "     Queries are made using the prefix of the file path relative to the root\n"
   "     directory.  The files $genome.2bit, $genome.untrans.gfidx, and\n"
   "     $genome.trans.gfidx are required. Typically the structure will be in\n"
   "     the form:\n"
   "         $rootdir/$genomeDataDir/$genome.2bit\n"
   "         $rootdir/$genomeDataDir/$genome.untrans.gfidx\n"
   "         $rootdir/$genomeDataDir/$genome.untrans.gfidx\n"
   "     in this case, one would call gfClient with \n"
   "         -genome=$genome -genomeDataDir=$genomeDataDir\n"
-  "     Where the contain directories are optional.\n"
+  "     Often $genomeDataDir will be the same name as $genome, however it\n"
+  "     can be a multi-level path.  The $genomeDataDir may also be an absolute\n"
+  "     path.\n"
   "     The -perSeqMax functionality can be implemented by creating a file\n"
   "         $rootdir/$genomeDataDir/$genome.perseqmax\n"
   "\n"
   "options:\n"
   "   -tileSize=N     Size of n-mers to index.  Default is 11 for nucleotides, 4 for\n"
   "                   proteins (or translated nucleotides).\n"
   "   -stepSize=N     Spacing between tiles. Default is tileSize.\n"
   "   -minMatch=N     Number of n-mer matches that trigger detailed alignment.\n"
   "                   Default is 2 for nucleotides, 3 for proteins.\n"
   "   -maxGap=N       Number of insertions or deletions allowed between n-mers.\n"
   "                   Default is 2 for nucleotides, 0 for proteins.\n"
   "   -trans          Translate database to protein in 6 frames.  Note: it is best\n"
   "                   to run this on RepeatMasked data in this case.\n"
   "   -log=logFile    Keep a log file that records server requests.\n"
   "   -seqLog         Include sequences in log file (not logged with -syslog).\n"
@@ -188,30 +190,31 @@
 setsockopt(sockfd, SOL_SOCKET, SO_RCVTIMEO, (const char*)&tv, sizeof tv);
 setsockopt(sockfd, SOL_SOCKET, SO_SNDTIMEO, (const char*)&tv, sizeof tv);
 }
 
 static boolean sendOk = TRUE;
 
 void setSendOk()
 // Reset to OK to send
 {
 sendOk = TRUE;
 }
 
 void errSendString(int sd, char *s)
 // Send string. If not OK, remember we had an error, do not try to write anything more on this connection.
 {
+
 if (sendOk) sendOk = netSendString(sd, s);
 }
 
 void errSendLongString(int sd, char *s)
 // Send string unless we had an error already on the connection.
 {
 if (sendOk) sendOk = netSendLongString(sd, s);
 }
 
 void genoFindDirect(char *probeName, int fileCount, char *seqFiles[])
 /* Don't set up server - just directly look for matches. */
 {
 struct genoFind *gf = NULL;
 struct lineFile *lf = lineFileOpen(probeName, TRUE);
 struct dnaSeq seq;
@@ -1088,62 +1091,81 @@
 char buf[4096];
 int msgLen = vsnprintf(buf, sizeof(buf) - 1, msg, args);
 buf[msgLen] = '\0';
 logError("%s", buf);
 printf("Error: %s\n", buf);
 }
 
 struct dynSession
 /* information on dynamic server connection session.  This is all data
  * currently cached.  If is not changed if the genome and query mode is the
  * same as the previous request.
  */
 {
     boolean isTrans;              // translated 
     char genome[256];             // genome name
-    char genomeDataDir[PATH_LEN]; // relative directory of data dir
     char gfIdxFile[PATH_LEN];     // index file location
     struct hash *perSeqMaxHash;   // max hits per sequence
     struct genoFindIndex *gfIdx;  // index
 };
 
+static struct genoFindIndex *loadGfIndex(char *gfIdxFile, boolean isTrans)
+/* load index and set globals from it */
+{
+struct genoFindIndex *gfIdx = genoFindIndexLoad(gfIdxFile, isTrans);
+struct genoFind *gf = isTrans ? gfIdx->transGf[0][0] : gfIdx->untransGf;
+minMatch = gf->minMatch;
+maxGap = gf->maxGap;
+tileSize = gf->tileSize;
+noSimpRepMask = gf->noSimpRepMask;
+allowOneMismatch = gf->allowOneMismatch;
+stepSize = gf->stepSize;
+return gfIdx;
+}
+
 static void dynSessionInit(struct dynSession *dynSession, char *rootDir,
                            char *genome, char *genomeDataDir, boolean isTrans)
 /* Initialize or reinitialize a dynSession object */
 {
 // will free current content if initialized
 genoFindIndexFree(&dynSession->gfIdx);
 hashFree(&dynSession->perSeqMaxHash);
 
 time_t startTime = clock1000();
 dynSession->isTrans = isTrans;
 safecpy(dynSession->genome, sizeof(dynSession->genome), genome);
-safecpy(dynSession->genomeDataDir, sizeof(dynSession->genomeDataDir), genomeDataDir);
+
+// construct path to sequence and index files
+char seqFileDir[PATH_LEN];
+if (genomeDataDir[0] == '/')  // abs or relative
+    safecpy(seqFileDir, sizeof(seqFileDir), genomeDataDir);
+else
+    safef(seqFileDir, sizeof(seqFileDir), "%s/%s", rootDir, genomeDataDir);
     
 char seqFile[PATH_LEN];
-safef(seqFile, PATH_LEN, "%s/%s/%s.2bit", rootDir, genomeDataDir, genome);
+safef(seqFile, PATH_LEN, "%s/%s.2bit", seqFileDir, genome);
 if (!fileExists(seqFile))
     errAbort("sequence file for %s does not exist: %s", genome, seqFile);
 
 char gfIdxFile[PATH_LEN];
-safef(gfIdxFile, PATH_LEN, "%s/%s/%s.%s.gfidx", rootDir, genomeDataDir, genome, isTrans ? "trans" : "untrans");
+safef(gfIdxFile, PATH_LEN, "%s/%s.%s.gfidx", seqFileDir, genome, isTrans ? "trans" : "untrans");
 if (!fileExists(gfIdxFile))
     errAbort("gf index file for %s does not exist: %s", genome, gfIdxFile);
-dynSession->gfIdx = genoFindIndexLoad(gfIdxFile, isTrans);
+dynSession->gfIdx = loadGfIndex(gfIdxFile, isTrans);
 
 char perSeqMaxFile[PATH_LEN];
-safef(perSeqMaxFile, PATH_LEN, "%s/%s/%s.perseqmax", rootDir, genomeDataDir, genome);
+safef(perSeqMaxFile, PATH_LEN, "%s/%s.perseqmax", seqFileDir, genome);
 if (fileExists(perSeqMaxFile))
     {
     /* only the basename of the file is saved in the index */
     char *slash = strrchr(seqFile, '/');
     char *seqFiles[1] = {(slash != NULL) ? slash + 1 : seqFile};
     dynSession->perSeqMaxHash = buildPerSeqMax(1, seqFiles, perSeqMaxFile);
     }
 logInfo("dynserver: index loading completed in %4.3f seconds", 0.001 * (clock1000() - startTime));
 }
 
 static char *dynReadCommand(char* rootDir, char *buf, int bufSize)
 /* read command and log, NULL if no more */
 {
 int readSize = read(STDIN_FILENO, buf, bufSize-1);
 if (readSize < 0)
@@ -1188,31 +1210,31 @@
     *qsizeRet = atoi(words[3]);
     }
 else if (sameString("untransInfo", command) || sameString("transInfo", command))
     {
     if (numWords != 3)
         errAbort("expected 3 words in info command, got %d", numWords);
     *qsizeRet = 0;
     }
 else
     errAbort("invalid command '%s'", command);
 
 safecpy(genome, sizeof(genome), words[1]);
 safecpy(genomeDataDir, sizeof(genomeDataDir), words[2]);
 
 // initialize session if new or changed
-if ((dynSession->isTrans != isTrans) || (!sameString(dynSession->genome, genome)) || (!sameString(dynSession->genomeDataDir, genomeDataDir)))
+if ((dynSession->isTrans != isTrans) || (!sameString(dynSession->genome, genome)))
     dynSessionInit(dynSession, rootDir, genome, genomeDataDir, isTrans);
 return TRUE;
 }
 
 static struct dnaSeq* dynReadQuerySeq(int qSize, boolean isTrans, boolean queryIsProt)
 /* read the DNA sequence from the query, filtering junk  */
 {
 struct dnaSeq *seq;
 AllocVar(seq);
 seq->size = qSize;
 seq->dna = needLargeMem(qSize+1);
 if (gfReadMulti(STDIN_FILENO, seq->dna, qSize) != qSize)
     errAbort("read of %d bytes of query sequence failed", qSize);
 seq->dna[qSize] = '\0';
 
@@ -1271,31 +1293,32 @@
 sprintf(buf, "stepSize %d", gf->stepSize);
 netSendString(STDOUT_FILENO, buf);
 sprintf(buf, "minMatch %d", gf->minMatch);
 netSendString(STDOUT_FILENO, buf);
 netSendString(STDOUT_FILENO, "end");
 }
 
 static bool dynamicServerCommand(char* rootDir, struct dynSession* dynSession)
 /* Execute one command from stdin, (re)initializing session as needed */
 {
 time_t startTime = clock1000();
 char *command;
 int qSize;
 if (!dynNextCommand(rootDir, dynSession, &command, &qSize))
     return FALSE;
-logInfo("dynserver: %s %s %s %s size=%d ", command, dynSession->genome, dynSession->genomeDataDir, (dynSession->isTrans ? "trans" : "untrans"), qSize);
+logInfo("dynserver: %s %s %s [%s] qsize=%d ", command, dynSession->genome, dynSession->gfIdxFile,
+        (dynSession->isTrans ? "trans" : "untrans"), qSize);
 if (endsWith(command, "Info"))
     dynamicServerInfo(command, dynSession->gfIdx);
 else
     dynamicServerQuery(command, qSize, dynSession->gfIdx, dynSession->perSeqMaxHash);
 logInfo("dynserver: %s completed in %4.3f seconds", command, 0.001 * (clock1000() - startTime));
 return TRUE;
 }
 
 static void dynamicServer(char* rootDir)
 /* dynamic server for inetd. Read query from stdin, open index, query, respond, exit.
  * only one query at a time */
 {
 logDebug("dynamicServer connect");
 
 // make sure errors are logged