ef850e264ac840b515b825f34314c36d58ccbcbe markd Thu Jul 2 11:12:15 2020 -0700 added info command similar to status to support hgBlat diff --git src/gfServer/gfServer.c src/gfServer/gfServer.c index d62be7d..172d2e0 100644 --- src/gfServer/gfServer.c +++ src/gfServer/gfServer.c @@ -98,30 +98,31 @@ " To get input file list:\n" " gfServer files host port\n" " To generate a precomputed index:\n" " gfServer index gfidx file(s)\n" " where the files are .2bit or .nib format files. Separate indexes must be created\n" " for untranslated and translated queries. These can be used with a persistent server\n" " as with 'start -indexFile or a dynamic server. They must follow the naming convention for\n" " for dynamic servers.\n" " To run a dynamic server (usually called by xinet):\n" " gfServer dynserver rootdir\n" " The root directory must contain directories for each genome with the twobit and index\n" " files following the convention:\n" " $genome/$genome.2bit\n" " $genome/$genome.untrans.gfidx\n" " $genome/$genome.trans.gfidx\n" + " Both indexes must exist.\n" "\n" "options:\n" " -tileSize=N Size of n-mers to index. Default is 11 for nucleotides, 4 for\n" " proteins (or translated nucleotides).\n" " -stepSize=N Spacing between tiles. Default is tileSize.\n" " -minMatch=N Number of n-mer matches that trigger detailed alignment.\n" " Default is 2 for nucleotides, 3 for proteins.\n" " -maxGap=N Number of insertions or deletions allowed between n-mers.\n" " Default is 2 for nucleotides, 0 for proteins.\n" " -trans Translate database to protein in 6 frames. Note: it is best\n" " to run this on RepeatMasked data in this case.\n" " -log=logFile Keep a log file that records server requests.\n" " -seqLog Include sequences in log file (not logged with -syslog).\n" " -ipLog Include user's IP in log file (not logged with -syslog).\n" " -debugLog Include debugging info in log file.\n" @@ -1027,57 +1028,72 @@ int msgLen = vsnprintf(buf, sizeof(buf) - 1, msg, args); buf[msgLen] = '\0'; logError("%s", buf); printf("Error: %s\n", buf); } static void dynReadBytes(char *buf, int bufSize) /* read pending bytes */ { int readSize = read(STDIN_FILENO, buf, bufSize-1); if (readSize < 0) errAbort("EOF from client"); buf[readSize] = '\0'; } - -static void dynReadQuery(char **commandRet, int *qsizeRet, char **genomeNameRet) +static void dynReadCommand(char **commandRet, int *qsizeRet, boolean *isTransRet, char **genomeNameRet) /* read query request from stdin, same as server expect includes database - * Format is: - * signature command qsize genome + * Format for query commands: + * signature+command qsize genome + * Formats for info command: + * signature+command genome */ { char buf[256]; dynReadBytes(buf, sizeof(buf)); logDebug("query: %s", buf); if (!startsWith(gfSignature(), buf)) errAbort("query does not start with signature, got '%s'", buf); -static int nwords = 3; -char *words[nwords]; -int numWords = chopByWhite(buf, words, nwords); -if (numWords != nwords) - errAbort("expected %d words in request, got %d", nwords, numWords); +char *words[5]; +int numWords = chopByWhite(buf, words, ArraySize(words)); +if (numWords == 0) + errAbort("empty command"); char *command = buf + strlen(gfSignature()); -if (!(sameString("query", command) || - sameString("protQuery", command) || sameString("transQuery", command))) - errAbort("invalid command '%s'", command); *commandRet = cloneString(command); +*isTransRet = sameString("protQuery", command) || sameString("transQuery", command) + || sameString("transInfo", command); + +if (sameString("query", command) || sameString("protQuery", command) + || sameString("transQuery", command)) + { + if (numWords != 3) + errAbort("expected 3 words in query command, got %d", numWords); *qsizeRet = atoi(words[1]); *genomeNameRet = cloneString(words[2]); } +else if (sameString("untransInfo", command) || sameString("transInfo", command)) + { + if (numWords != 2) + errAbort("expected 2 words in query command, got %d", numWords); + *qsizeRet = 0; + *genomeNameRet = cloneString(words[1]); + } +else + errAbort("invalid command '%s'", command); +} static struct dnaSeq* dynReadQuerySeq(int qSize, boolean isTrans, boolean queryIsProt) /* read the DNA sequence from the query, filtering junk */ { struct dnaSeq *seq; AllocVar(seq); seq->size = qSize; seq->dna = needLargeMem(qSize+1); if (gfReadMulti(STDIN_FILENO, seq->dna, qSize) != qSize) errAbort("read of %d bytes of query sequence failed", qSize); if (queryIsProt) { seq->size = aaFilteredSize(seq->dna); aaFilter(seq->dna, seq->dna); @@ -1096,89 +1112,94 @@ return seq; } static void dynGetDataFiles(char *rootDir, char* genomeName, boolean isTrans, char seqFile[PATH_LEN], char gfIdxFile[PATH_LEN]) /* get paths for sequence files to handle requests and validate they exist */ { safef(seqFile, PATH_LEN, "%s/%s/%s.2bit", rootDir, genomeName, genomeName); if (!fileExists(seqFile)) errAbort("sequence file for %s does not exist: %s", genomeName, seqFile); safef(gfIdxFile, PATH_LEN, "%s/%s/%s.%s.gfidx", rootDir, genomeName, genomeName, isTrans ? "trans" : "untrans"); if (!fileExists(gfIdxFile)) errAbort("gf index file for %s does not exist: %s", genomeName, gfIdxFile); } -static void dynWriteTrailer(struct genoFindIndex *gfIdx) -/* write trailer information, which is a subset of what status would return. - * This avoids the need to reconnect and reload index. - */ +static void dynamicServerQuery(char *command, int qSize, char *genomeName, + char *seqFiles[1], struct genoFindIndex *gfIdx) +/* handle search queries */ +{ +mustWriteFd(STDOUT_FILENO, "Y", 1); + +boolean queryIsProt = sameString(command, "protQuery"); +struct dnaSeq* seq = dynReadQuerySeq(qSize, gfIdx->isTrans, queryIsProt); +if (gfIdx->isTrans) + { + if (queryIsProt) + transQuery(gfIdx->transGf, seq, STDOUT_FILENO); + else + transTransQuery(gfIdx->transGf, seq, STDOUT_FILENO); + } +else + { + struct hash *perSeqMaxHash = maybePerSeqMax(1, seqFiles); + dnaQuery(gfIdx->untransGf, seq, STDOUT_FILENO, perSeqMaxHash); + } +netSendString(STDOUT_FILENO, "end"); +logDebug("query done"); +} + +static void dynamicServerInfo(char *command, char *genomeName, struct genoFindIndex *gfIdx) +/* handle one of the info commands */ { char buf[256]; struct genoFind *gf = gfIdx->isTrans ? gfIdx->transGf[0][0] : gfIdx->untransGf; sprintf(buf, "version %s", gfVersion); netSendString(STDOUT_FILENO, buf); sprintf(buf, "type %s", (gfIdx->isTrans ? "translated" : "nucleotide")); netSendString(STDOUT_FILENO, buf); sprintf(buf, "tileSize %d", gf->tileSize); netSendString(STDOUT_FILENO, buf); sprintf(buf, "stepSize %d", gf->stepSize); netSendString(STDOUT_FILENO, buf); sprintf(buf, "minMatch %d", gf->minMatch); netSendString(STDOUT_FILENO, buf); -netSendString(STDOUT_FILENO, "trailerEnd"); +netSendString(STDOUT_FILENO, "end"); } -void dynamicServer(char* rootDir) +static void dynamicServer(char* rootDir) /* dynamic server for inetd. Read query from stdin, open index, query, respond, exit. * only one query at a time */ { -// make sure error is logged, protocol doesn't allow reporting error to user +// make sure error is logged pushWarnHandler(dynWarnErrorVa); char *command, *genomeName; int qSize; -dynReadQuery(&command, &qSize, &genomeName); - -boolean isTrans = sameString("protQuery", command) || sameString("transQuery", command); -boolean queryIsProt = sameString(command, "protQuery"); +boolean isTrans; +dynReadCommand(&command, &qSize, &isTrans, &genomeName); char seqFile[PATH_LEN]; char *seqFiles[1] = {seqFile}; // functions expect list of files char gfIdxFile[PATH_LEN]; dynGetDataFiles(rootDir, genomeName, isTrans, seqFiles[0], gfIdxFile); - struct genoFindIndex *gfIdx = genoFindIndexLoad(gfIdxFile, isTrans); -mustWriteFd(STDOUT_FILENO, "Y", 1); - -struct dnaSeq* seq = dynReadQuerySeq(qSize, isTrans, queryIsProt); -if (isTrans) - { - if (queryIsProt) - transQuery(gfIdx->transGf, seq, STDOUT_FILENO); - else - transTransQuery(gfIdx->transGf, seq, STDOUT_FILENO); - } +if (endsWith(command, "Info")) + dynamicServerInfo(command, genomeName, gfIdx); else - { - struct hash *perSeqMaxHash = maybePerSeqMax(1, seqFiles); - dnaQuery(gfIdx->untransGf, seq, STDOUT_FILENO, perSeqMaxHash); - } -netSendString(STDOUT_FILENO, "end"); -dynWriteTrailer(gfIdx); -logDebug("query done"); + dynamicServerQuery(command, qSize, genomeName, seqFiles, gfIdx); } int main(int argc, char *argv[]) /* Process command line. */ { char *command; gfCatchPipes(); dnaUtilOpen(); optionInit(&argc, argv, optionSpecs); command = argv[1]; if (optionExists("trans")) { doTrans = TRUE; tileSize = 4;