e9492dbfa8a68aee133893d4b02475b0aec27bb9 markd Sun Dec 6 23:48:42 2020 -0800 webblat and hgBlat working diff --git src/gfServer/gfServer.c src/gfServer/gfServer.c index 482276b..22c2a81 100644 --- src/gfServer/gfServer.c +++ src/gfServer/gfServer.c @@ -106,31 +106,33 @@ " be created for untranslated and translated queries. These can be used\n" " with a persistent server as with 'start -indexFile or a dynamic server.\n" " They must follow the naming convention for for dynamic servers.\n" " To run a dynamic server (usually called by xinetd):\n" " gfServer dynserver rootdir\n" " Data files for genomes are found relative to the root directory.\n" " Queries are made using the prefix of the file path relative to the root\n" " directory. The files $genome.2bit, $genome.untrans.gfidx, and\n" " $genome.trans.gfidx are required. Typically the structure will be in\n" " the form:\n" " $rootdir/$genomeDataDir/$genome.2bit\n" " $rootdir/$genomeDataDir/$genome.untrans.gfidx\n" " $rootdir/$genomeDataDir/$genome.untrans.gfidx\n" " in this case, one would call gfClient with \n" " -genome=$genome -genomeDataDir=$genomeDataDir\n" - " Where the contain directories are optional.\n" + " Often $genomeDataDir will be the same name as $genome, however it\n" + " can be a multi-level path. The $genomeDataDir may also be an absolute\n" + " path.\n" " The -perSeqMax functionality can be implemented by creating a file\n" " $rootdir/$genomeDataDir/$genome.perseqmax\n" "\n" "options:\n" " -tileSize=N Size of n-mers to index. Default is 11 for nucleotides, 4 for\n" " proteins (or translated nucleotides).\n" " -stepSize=N Spacing between tiles. Default is tileSize.\n" " -minMatch=N Number of n-mer matches that trigger detailed alignment.\n" " Default is 2 for nucleotides, 3 for proteins.\n" " -maxGap=N Number of insertions or deletions allowed between n-mers.\n" " Default is 2 for nucleotides, 0 for proteins.\n" " -trans Translate database to protein in 6 frames. Note: it is best\n" " to run this on RepeatMasked data in this case.\n" " -log=logFile Keep a log file that records server requests.\n" " -seqLog Include sequences in log file (not logged with -syslog).\n" @@ -188,30 +190,31 @@ setsockopt(sockfd, SOL_SOCKET, SO_RCVTIMEO, (const char*)&tv, sizeof tv); setsockopt(sockfd, SOL_SOCKET, SO_SNDTIMEO, (const char*)&tv, sizeof tv); } static boolean sendOk = TRUE; void setSendOk() // Reset to OK to send { sendOk = TRUE; } void errSendString(int sd, char *s) // Send string. If not OK, remember we had an error, do not try to write anything more on this connection. { + if (sendOk) sendOk = netSendString(sd, s); } void errSendLongString(int sd, char *s) // Send string unless we had an error already on the connection. { if (sendOk) sendOk = netSendLongString(sd, s); } void genoFindDirect(char *probeName, int fileCount, char *seqFiles[]) /* Don't set up server - just directly look for matches. */ { struct genoFind *gf = NULL; struct lineFile *lf = lineFileOpen(probeName, TRUE); struct dnaSeq seq; @@ -1088,62 +1091,81 @@ char buf[4096]; int msgLen = vsnprintf(buf, sizeof(buf) - 1, msg, args); buf[msgLen] = '\0'; logError("%s", buf); printf("Error: %s\n", buf); } struct dynSession /* information on dynamic server connection session. This is all data * currently cached. If is not changed if the genome and query mode is the * same as the previous request. */ { boolean isTrans; // translated char genome[256]; // genome name - char genomeDataDir[PATH_LEN]; // relative directory of data dir char gfIdxFile[PATH_LEN]; // index file location struct hash *perSeqMaxHash; // max hits per sequence struct genoFindIndex *gfIdx; // index }; +static struct genoFindIndex *loadGfIndex(char *gfIdxFile, boolean isTrans) +/* load index and set globals from it */ +{ +struct genoFindIndex *gfIdx = genoFindIndexLoad(gfIdxFile, isTrans); +struct genoFind *gf = isTrans ? gfIdx->transGf[0][0] : gfIdx->untransGf; +minMatch = gf->minMatch; +maxGap = gf->maxGap; +tileSize = gf->tileSize; +noSimpRepMask = gf->noSimpRepMask; +allowOneMismatch = gf->allowOneMismatch; +stepSize = gf->stepSize; +return gfIdx; +} + static void dynSessionInit(struct dynSession *dynSession, char *rootDir, char *genome, char *genomeDataDir, boolean isTrans) /* Initialize or reinitialize a dynSession object */ { // will free current content if initialized genoFindIndexFree(&dynSession->gfIdx); hashFree(&dynSession->perSeqMaxHash); time_t startTime = clock1000(); dynSession->isTrans = isTrans; safecpy(dynSession->genome, sizeof(dynSession->genome), genome); -safecpy(dynSession->genomeDataDir, sizeof(dynSession->genomeDataDir), genomeDataDir); + +// construct path to sequence and index files +char seqFileDir[PATH_LEN]; +if (genomeDataDir[0] == '/') // abs or relative + safecpy(seqFileDir, sizeof(seqFileDir), genomeDataDir); +else + safef(seqFileDir, sizeof(seqFileDir), "%s/%s", rootDir, genomeDataDir); char seqFile[PATH_LEN]; -safef(seqFile, PATH_LEN, "%s/%s/%s.2bit", rootDir, genomeDataDir, genome); +safef(seqFile, PATH_LEN, "%s/%s.2bit", seqFileDir, genome); if (!fileExists(seqFile)) errAbort("sequence file for %s does not exist: %s", genome, seqFile); char gfIdxFile[PATH_LEN]; -safef(gfIdxFile, PATH_LEN, "%s/%s/%s.%s.gfidx", rootDir, genomeDataDir, genome, isTrans ? "trans" : "untrans"); +safef(gfIdxFile, PATH_LEN, "%s/%s.%s.gfidx", seqFileDir, genome, isTrans ? "trans" : "untrans"); if (!fileExists(gfIdxFile)) errAbort("gf index file for %s does not exist: %s", genome, gfIdxFile); -dynSession->gfIdx = genoFindIndexLoad(gfIdxFile, isTrans); +dynSession->gfIdx = loadGfIndex(gfIdxFile, isTrans); char perSeqMaxFile[PATH_LEN]; -safef(perSeqMaxFile, PATH_LEN, "%s/%s/%s.perseqmax", rootDir, genomeDataDir, genome); +safef(perSeqMaxFile, PATH_LEN, "%s/%s.perseqmax", seqFileDir, genome); if (fileExists(perSeqMaxFile)) { /* only the basename of the file is saved in the index */ char *slash = strrchr(seqFile, '/'); char *seqFiles[1] = {(slash != NULL) ? slash + 1 : seqFile}; dynSession->perSeqMaxHash = buildPerSeqMax(1, seqFiles, perSeqMaxFile); } logInfo("dynserver: index loading completed in %4.3f seconds", 0.001 * (clock1000() - startTime)); } static char *dynReadCommand(char* rootDir, char *buf, int bufSize) /* read command and log, NULL if no more */ { int readSize = read(STDIN_FILENO, buf, bufSize-1); if (readSize < 0) @@ -1188,31 +1210,31 @@ *qsizeRet = atoi(words[3]); } else if (sameString("untransInfo", command) || sameString("transInfo", command)) { if (numWords != 3) errAbort("expected 3 words in info command, got %d", numWords); *qsizeRet = 0; } else errAbort("invalid command '%s'", command); safecpy(genome, sizeof(genome), words[1]); safecpy(genomeDataDir, sizeof(genomeDataDir), words[2]); // initialize session if new or changed -if ((dynSession->isTrans != isTrans) || (!sameString(dynSession->genome, genome)) || (!sameString(dynSession->genomeDataDir, genomeDataDir))) +if ((dynSession->isTrans != isTrans) || (!sameString(dynSession->genome, genome))) dynSessionInit(dynSession, rootDir, genome, genomeDataDir, isTrans); return TRUE; } static struct dnaSeq* dynReadQuerySeq(int qSize, boolean isTrans, boolean queryIsProt) /* read the DNA sequence from the query, filtering junk */ { struct dnaSeq *seq; AllocVar(seq); seq->size = qSize; seq->dna = needLargeMem(qSize+1); if (gfReadMulti(STDIN_FILENO, seq->dna, qSize) != qSize) errAbort("read of %d bytes of query sequence failed", qSize); seq->dna[qSize] = '\0'; @@ -1271,31 +1293,32 @@ sprintf(buf, "stepSize %d", gf->stepSize); netSendString(STDOUT_FILENO, buf); sprintf(buf, "minMatch %d", gf->minMatch); netSendString(STDOUT_FILENO, buf); netSendString(STDOUT_FILENO, "end"); } static bool dynamicServerCommand(char* rootDir, struct dynSession* dynSession) /* Execute one command from stdin, (re)initializing session as needed */ { time_t startTime = clock1000(); char *command; int qSize; if (!dynNextCommand(rootDir, dynSession, &command, &qSize)) return FALSE; -logInfo("dynserver: %s %s %s %s size=%d ", command, dynSession->genome, dynSession->genomeDataDir, (dynSession->isTrans ? "trans" : "untrans"), qSize); +logInfo("dynserver: %s %s %s [%s] qsize=%d ", command, dynSession->genome, dynSession->gfIdxFile, + (dynSession->isTrans ? "trans" : "untrans"), qSize); if (endsWith(command, "Info")) dynamicServerInfo(command, dynSession->gfIdx); else dynamicServerQuery(command, qSize, dynSession->gfIdx, dynSession->perSeqMaxHash); logInfo("dynserver: %s completed in %4.3f seconds", command, 0.001 * (clock1000() - startTime)); return TRUE; } static void dynamicServer(char* rootDir) /* dynamic server for inetd. Read query from stdin, open index, query, respond, exit. * only one query at a time */ { logDebug("dynamicServer connect"); // make sure errors are logged