225c0d55992aefae478461bba278644bdfdda3c5 max Wed Jan 15 08:33:57 2014 -0800 library changes for the browser box: This changes mostly hdb and jksql,plus - to a smaller extent - various other places in the code that deal with /gbdb/ files. The overall aim is to make it possible to have the data remote at UCSC while having the CGIs on a machine far away. At up to 180msecs distance from UCSC (Europe,Japan), each query can get slow. So I tried to reduce the number of queries sent to UCSC while allowing to keep some mysql tables on localhost. I changed four things: - extend larry's table cache to include field names. The code uses "describe" very often, which is slow from remote. With a table name cache these queries can be handled locally. This is configured in hg.conf - mysql "failover" connections: a mysql connection can have a 2nd connection that is used if a query fails, configured in hg.conf (I didn't call it "remote" connections, because we use that term already in the code) - mysql lazy connects: don't connect a sqlConnection right away, but only when needed. a mysql connect takes >500msecs from across the atlantic. - move gbdb: patch various places that use absolute "/gbdb/" pathnames to go through a central function that can change the filename of gbdb files to something else, as configured in hg.conf Plus patch 1 or 2 lines for more speed + update the hgMirror script diff --git src/lib/linefile.c src/lib/linefile.c index 4fb4cb2..919c81b 100644 --- src/lib/linefile.c +++ src/lib/linefile.c @@ -2,30 +2,31 @@ * lines. * * This file is copyright 2002 Jim Kent, but license is hereby * granted for all use - public, private or commercial. */ #include "common.h" #include "hash.h" #include <fcntl.h> #include <signal.h> #include "dystring.h" #include "errabort.h" #include "linefile.h" #include "pipeline.h" #include "localmem.h" #include "cheapcgi.h" +#include "udc.h" char *getFileNameFromHdrSig(char *m) /* Check if header has signature of supported compression stream, and return a phoney filename for it, or NULL if no sig found. */ { char buf[20]; char *ext=NULL; if (startsWith("\x1f\x8b",m)) ext = "gz"; else if (startsWith("\x1f\x9d\x90",m)) ext = "Z"; else if (startsWith("BZ",m)) ext = "bz2"; else if (startsWith("PK\x03\x04",m)) ext = "zip"; if (ext==NULL) return NULL; safef(buf, sizeof(buf), "somefile.%s", ext); return cloneString(buf); @@ -278,30 +279,48 @@ if (lf->tabixIter != NULL) ti_iter_destroy(lf->tabixIter); lf->tabixIter = iter; lf->bufOffsetInFile = ti_bgzf_tell(lf->tabix->fp); lf->bytesInBuf = 0; lf->lineIx = -1; lf->lineStart = 0; lf->lineEnd = 0; return TRUE; #else // no USE_TABIX warn(COMPILE_WITH_TABIX, "lineFileSetTabixRegion"); return FALSE; #endif // no USE_TABIX } +struct lineFile *lineFileUdcMayOpen(char *fileOrUrl, bool zTerm) +/* Create a line file object with an underlying UDC cache. */ +{ +if (fileOrUrl == NULL) + errAbort("lineFileUdcMayOpen: fileOrUrl is NULL"); +struct lineFile *lf; +AllocVar(lf); +lf->fileName = cloneString(fileOrUrl); +lf->fd = -1; +lf->bufSize = 0; +lf->buf = NULL; +lf->zTerm = zTerm; +lf->udcFile = udcFileMayOpen(fileOrUrl, NULL); +if (lf->udcFile == NULL) + return NULL; +return lf; +} + void lineFileExpandBuf(struct lineFile *lf, int newSize) /* Expand line file buffer. */ { assert(newSize > lf->bufSize); lf->buf = needMoreMem(lf->buf, lf->bytesInBuf, newSize); lf->bufSize = newSize; } struct lineFile *lineFileStdin(bool zTerm) /* Wrap a line file around stdin. */ { return lineFileAttach("stdin", zTerm, fileno(stdin)); } @@ -343,30 +362,35 @@ #ifdef USE_TABIX if (lf->tabix != NULL) lineFileAbort(lf, "%s: not implemented for lineFile opened with lineFileTabixMayOpen.", where); #endif // USE_TABIX } void lineFileSeek(struct lineFile *lf, off_t offset, int whence) /* Seek to read next line from given position. */ { noTabixSupport(lf, "lineFileSeek"); if (lf->checkSupport) lf->checkSupport(lf, "lineFileSeek"); if (lf->pl != NULL) errnoAbort("Can't lineFileSeek on a compressed file: %s", lf->fileName); lf->reuse = FALSE; +if (lf->udcFile) + { + udcSeek(lf->udcFile, offset); + return; + } if (whence == SEEK_SET && offset >= lf->bufOffsetInFile && offset < lf->bufOffsetInFile + lf->bytesInBuf) { lf->lineStart = lf->lineEnd = offset - lf->bufOffsetInFile; } else { lf->lineStart = lf->lineEnd = lf->bytesInBuf = 0; if ((lf->bufOffsetInFile = lseek(lf->fd, offset, whence)) == -1) errnoAbort("Couldn't lineFileSeek %s", lf->fileName); } } void lineFileRewind(struct lineFile *lf) /* Return lineFile to start. */ @@ -428,30 +452,45 @@ if (lf->reuse) { lf->reuse = FALSE; if (retSize != NULL) *retSize = lf->lineEnd - lf->lineStart; *retStart = buf + lf->lineStart; if (lf->metaOutput && *retStart[0] == '#') metaDataAdd(lf, *retStart); return TRUE; } if (lf->nextCallBack) return lf->nextCallBack(lf, retStart, retSize); +if (lf->udcFile) + { + char *line = udcReadLine(lf->udcFile); + int lineSize = strlen(line); + lf->bufOffsetInFile = -1; + lf->bytesInBuf = lineSize; + lf->lineIx = -1; + lf->lineStart = 0; + lf->lineEnd = lineSize; + *retStart = line; + freeMem(lf->buf); + lf->buf = line; + lf->bufSize = lineSize; + return TRUE; + } #ifdef USE_TABIX if (lf->tabix != NULL && lf->tabixIter != NULL) { // Just use line-oriented ti_read: int lineSize = 0; const char *line = ti_read(lf->tabix, lf->tabixIter, &lineSize); if (line == NULL) return FALSE; lf->bufOffsetInFile = -1; lf->bytesInBuf = lineSize; lf->lineIx = -1; lf->lineStart = 0; lf->lineEnd = lineSize; if (lineSize > lf->bufSize) @@ -656,30 +695,33 @@ pipelineFree(&lf->pl); } else if (lf->fd > 0 && lf->fd != fileno(stdin)) { close(lf->fd); freeMem(lf->buf); } #ifdef USE_TABIX else if (lf->tabix != NULL) { if (lf->tabixIter != NULL) ti_iter_destroy(lf->tabixIter); ti_close(lf->tabix); } #endif // USE_TABIX + else if (lf->udcFile != NULL) + udcFileClose(&lf->udcFile); + if (lf->closeCallBack) lf->closeCallBack(lf); freeMem(lf->fileName); metaDataFree(lf); freez(pLf); } } void lineFileCloseList(struct lineFile **pList) /* Close up a list of line files. */ { struct lineFile *el, *next; for (el = *pList; el != NULL; el = next) {