4898794edd81be5285ea6e544acbedeaeb31bf78 max Tue Nov 23 08:10:57 2021 -0800 Fixing pointers to README file for license in all source code files. refs #27614 diff --git src/hg/utils/refreshNamedSessionCustomTracks/refreshNamedSessionCustomTracks.c src/hg/utils/refreshNamedSessionCustomTracks/refreshNamedSessionCustomTracks.c index 4fa8cb2..1c9ecf0 100644 --- src/hg/utils/refreshNamedSessionCustomTracks/refreshNamedSessionCustomTracks.c +++ src/hg/utils/refreshNamedSessionCustomTracks/refreshNamedSessionCustomTracks.c @@ -1,579 +1,579 @@ /* refreshNamedSessionCustomTracks -- cron robot for keeping alive custom * tracks that are referenced by saved sessions. */ /* Copyright (C) 2013 The Regents of the University of California - * See README in this or parent directory for licensing information. */ + * See kent/LICENSE or http://genome.ucsc.edu/license/ for licensing information. */ #include "common.h" #include "options.h" #include "hash.h" #include "cheapcgi.h" #include "customTrack.h" #include "customFactory.h" #include "hui.h" #include "hgConfig.h" #include <sys/wait.h> #include <signal.h> #include "obscure.h" int version = 44; // PLEASE INCREMENT THIS BEFORE PUSHING TO SHARED REPO // SO THAT OTHERS MAY TEST WITH IT, SO THAT EVERYONE KNOWS THEY HAVE THE // EXACT RIGHT VERSION. #define savedSessionTable "namedSessionDb" int CFTEcalls = 0; int numUpdates = 0; int numForks = 10; int timeoutSecs = 7200; // Timeout for each forked child process // default 7200 seconds is two hours char *testFailure = NULL; void usage() /* Explain usage and exit. */ { errAbort( "refreshNamedSessionCustomTracks -- scan central database table: '%s'\n" " contents for custom tracks and touch any that are found, to prevent\n" " them from being removed by the custom track cleanup process.\n" "usage:\n" " refreshNamedSessionCustomTracks hgcentral[test,beta] [-workDir=/path]\n" "options:\n" " -workDir=%s - a directory to work from where\n" " - ../trash can be found\n" " default will be %s\n" " which implies ../trash is: /usr/local/apache/trash\n" " -atime=N - If the session has not been accessed since N days ago,\n" " - don't refresh its custom tracks. Default: no limit.\n" " -forks=N - Number of times to fork to recover memory. Default: %d\n" " -timeoutSecs=N - Number of seconds to kill a timed-out forked child. Default: %d\n" " -testFailure={exitCode|errAbort|segfault|timeout} must cause the parent to return non-zero exitCode\n" "This is intended to be run as a nightly cron job for each central db.\n" "The ~/.hg.conf file (or $HGDB_CONF) must specify the same central db\n" "as the command line. [The command line arg helps to verify coordination.]", savedSessionTable, CGI_BIN, CGI_BIN, numForks, timeoutSecs ); } /* Options: */ static struct optionSpec options[] = { {"atime", OPTION_INT}, {"workDir", OPTION_STRING}, {"forks", OPTION_INT}, {"timeoutSecs", OPTION_INT}, {"testFailure", OPTION_STRING}, {"hardcore", OPTION_BOOLEAN}, /* Intentionally omitted from usage(). */ {NULL, 0}, }; struct sqlConnection *unCachedCentralConn() /* do not want a cached connection because we will close and fork */ { return sqlConnectRemote( cfgOption("central.host"), cfgOption("central.user" ), cfgOption("central.password"), cfgOption("central.db") ); } void showVmPeak() /* show peak mem usage */ { pid_t pid = getpid(); char temp[256]; printf("# pid=%d: ",pid); fflush(stdout); safef(temp, sizeof(temp), "grep VmPeak /proc/%d/status", (int) pid); if (system(temp) != 0) errAbort("system(\"%s\") failed", temp); fflush(stdout); } // due to bug in OS, won't work without a handler static void handle_SIGCHLD(int sig) { } sigset_t mask; sigset_t orig_mask; pid_t forkIt() /* block sigchld and fork */ { // due to bug in OS we have to set up a sigchld handler // even though we don't use it. struct sigaction act; (void)memset (&act, 0, sizeof(act)); act.sa_handler = handle_SIGCHLD; if (sigaction(SIGCHLD, &act, 0)) { errnoAbort("sigaction err"); } sigemptyset (&mask); sigaddset (&mask, SIGCHLD); /* BLOCK to prevent a race condition that loses SIGCHLD events */ if (sigprocmask(SIG_BLOCK, &mask, &orig_mask) < 0) { errnoAbort("sigprocmask"); } /* This is critical because we are about to fork, * otherwise your output is a mess with weird duplicates */ fflush(stdout); fflush(stderr); pid_t pid = fork(); if (pid < 0) errnoAbort("refreshNamedSessionCustomTracks can't fork"); if (pid == 0) // child { if (sigprocmask(SIG_SETMASK, &orig_mask, NULL) < 0) // unblock SIGCHLD { errnoAbort("sigprocmask SIG_SETMASK to unblock child SIGCHLD"); } } return pid; } void waitForChildWithTimeout(pid_t pid) /* wait for child with timeout. exit non-zero if child had error. */ { int wstat; struct timespec timeout; timeout.tv_sec = timeoutSecs; // default 3600 is one hour timeout.tv_nsec = 0; while (1) { int sig = sigtimedwait(&mask, NULL, &timeout); int savedErrno = errno; if (sig < 0) { if (savedErrno == EINTR) { /* Interrupted by a signal other than SIGCHLD. */ /* An minor improvement would be to subtract the time already consumed before continuing. */ verbose(1, "EINTR received, ignoring.\n"); fflush(stdout); fflush(stderr); continue; } else if (savedErrno == EAGAIN) { verbose(1,"Timed out, killing child pid %d\n", pid); fflush(stdout); fflush(stderr); kill (pid, SIGKILL); } else { errnoAbort ("sigtimedwait"); } } break; /* received SIGCHLD */ } if (sigprocmask(SIG_SETMASK, &orig_mask, NULL) < 0) // unblock SIGCHLD { errnoAbort("sigprocmask SIG_SETMASK to unblock SIGCHLD"); } if (waitpid(pid, &wstat, 0) < 0) { errnoAbort("waitpid failed"); } else { if (WIFEXITED(wstat)) { if (WEXITSTATUS(wstat) > 0) { verbose(1,"waitpid child had a error exit status %d. Exiting.\n", WEXITSTATUS(wstat)); exit(1); } } else if (WIFSIGNALED(wstat)) { verbose(1,"waitpid child had a signal. Exiting.\n"); exit(1); } } } void scanSettingsForCT(char *userName, char *sessionName, int *pLiveCount, int *pExpiredCount, struct sqlConnection *conn) /* Parse the CGI-encoded session contents into {var,val} pairs and search * for custom tracks. If found, refresh the custom track. Parsing code * taken from cartParseOverHash. * If any nonexistent custom track files are found, return a SQL update * command that will remove those from this session. We can't just do * the update here because that messes up the caller's query. */ { char query[512]; sqlSafef(query, sizeof(query), "select contents from %s " "where userName='%s' and sessionName = '%s'", savedSessionTable, userName, sessionName); char *contents = sqlQuickString(conn, query); if (!contents) return; int contentLength = strlen(contents); struct dyString *newContents = dyStringNew(contentLength+1); struct dyString *oneSetting = dyStringNew(contentLength / 4); char *contentsToChop = cloneString(contents); char *namePt = contentsToChop; verbose(3, "Scanning %s %s\n", userName, sessionName); while (isNotEmpty(namePt)) { char *dataPt = strchr(namePt, '='); char *nextNamePt; if (dataPt == NULL) errAbort("ERROR: Mangled session content string %s", namePt); *dataPt++ = 0; nextNamePt = strchr(dataPt, '&'); if (nextNamePt != NULL) *nextNamePt++ = 0; dyStringClear(oneSetting); dyStringPrintf(oneSetting, "%s=%s%s", namePt, dataPt, (nextNamePt ? "&" : "")); if (startsWith(CT_FILE_VAR_PREFIX, namePt)) { cgiDecode(dataPt, dataPt, strlen(dataPt)); boolean thisGotLiveCT = FALSE, thisGotExpiredCT = FALSE; verbose(3, "Found variable %s = %s\n", namePt, dataPt); /* If the file does not exist, omit this setting from newContents so * it doesn't get copied from session to session. If it does exist, * leave it up to customFactoryTestExistence to parse the file for * possible customTrash table references, some of which may exist * and some not. */ if (! fileExists(dataPt)) { verbose(3, "Removing %s from %s %s\n", oneSetting->string, userName, sessionName); thisGotExpiredCT = TRUE; } else { dyStringAppend(newContents, oneSetting->string); char *db = namePt + strlen(CT_FILE_VAR_PREFIX); customFactoryTestExistence(db, dataPt, &thisGotLiveCT, &thisGotExpiredCT, NULL); ++CFTEcalls; } if (thisGotLiveCT && pLiveCount != NULL) (*pLiveCount)++; if (thisGotExpiredCT && pExpiredCount != NULL) (*pExpiredCount)++; if (thisGotExpiredCT) { if (verboseLevel() >= 3) verbose(3, "Found expired custom track in %s %s: %s\n", userName, sessionName, dataPt); else verbose(2, "Found expired custom track: %s\n", dataPt); } if (thisGotLiveCT) verbose(4, "Found live custom track: %s\n", dataPt); } else if (sameString(namePt, "multiRegionsBedUrl")) { // touch corresponding multi-region custom regions .bed and .sha1 files to save them from trash cleaner. cgiDecode(dataPt, dataPt, strlen(dataPt)); char multiRegionsBedUrlSha1Name[1024]; safef(multiRegionsBedUrlSha1Name, sizeof multiRegionsBedUrlSha1Name, "%s.sha1", dataPt); if (!sameString(dataPt,"") && !strstr(dataPt,"://")) { // should have a bed file in trash and a sha1 for quick change detection if (fileExists(dataPt) && fileExists(multiRegionsBedUrlSha1Name)) { readAndIgnore(dataPt); // touch access time readAndIgnore(multiRegionsBedUrlSha1Name); verbose(4, "setting multiRegionsBedUrl: %s\n", dataPt); verbose(4, "setting multiRegionsBedUrl: %s\n", multiRegionsBedUrlSha1Name); dyStringAppend(newContents, oneSetting->string); } } } else if (startsWith("customComposite", namePt)) { cgiDecode(dataPt, dataPt, strlen(dataPt)); if (fileExists(dataPt)) { readAndIgnore(dataPt); // touch access time verbose(4, "setting compositeFile: %s\n", dataPt); dyStringAppend(newContents, oneSetting->string); } } else { dyStringAppend(newContents, oneSetting->string); } namePt = nextNamePt; } if (newContents->stringSize != contentLength) ++numUpdates; if (optionExists("hardcore") && newContents->stringSize != contentLength) // almost never used { struct dyString *update = dyStringNew(contentLength*2); if (newContents->stringSize > contentLength) errAbort("ERROR: Uh, why is newContents (%ld) longer than original (%d)", newContents->stringSize, contentLength); sqlDyStringPrintf(update, "UPDATE %s set contents='", savedSessionTable); dyStringAppendN(update, newContents->string, newContents->stringSize); dyStringPrintf(update, "', lastUse=now(), useCount=useCount+1 " "where userName=\"%s\" and sessionName=\"%s\";", userName, sessionName); verbose(3, "Removing one or more dead CT file settings from %s %s " "(original length %d, now %ld)\n", userName, sessionName, contentLength, newContents->stringSize); sqlUpdate(conn, update->string); dyStringFree(&update); } dyStringFree(&oneSetting); dyStringFree(&newContents); freeMem(contentsToChop); freeMem(contents); return; } struct sessionInfo { struct sessionInfo *next; char userName[256]; char sessionName[256]; }; void refreshNamedSessionCustomTracks(char *centralDbName) /* refreshNamedSessionCustomTracks -- cron robot for keeping alive custom * tracks that are referenced by saved sessions. */ { // Helpful for showing rapid version changes. // Be sure to increment this when committing and pushing. // People testing will know they have the correct new version. verbose(1, "refreshNamedSessionCustomTracks version #%d\n", version); // Avoids a problem in hdb.c deep in the library // that would otherwise fail when trying to set this // on a new child process using a custom track that // was defined on a hub that is not presently able to load. // However, because that hub might be OK in the future, // we need to not fail and keep touching the custom trash db files. // This static library variable has been wrong ever since this program was created // and run to clean the trash, so clearly it is not actually needed // for the trash cleaning. setMinIndexLengthForTrashCleaner(); struct sqlConnection *conn = unCachedCentralConn(); char *actualDbName = sqlGetDatabase(conn); int liveCount=0, expiredCount=0; setUdcCacheDir(); /* programs that use udc must call this to initialize cache dir location */ if (!sameString(centralDbName, actualDbName)) errAbort("ERROR: Central database specified in hg.conf file is %s but %s " "was specified on the command line.", actualDbName, centralDbName); else verbose(2, "Got connection to %s\n", centralDbName); long long threshold = 0; int atime = optionInt("atime", 0); if (atime > 0) { time_t now = time(NULL); threshold = now - ((long long)atime * 24 * 60 * 60); } struct sessionInfo *sessionList = NULL, *si; if (sqlTableExists(conn, savedSessionTable)) { struct sqlResult *sr = NULL; char **row = NULL; char query[512]; sqlSafef(query, sizeof(query), "select userName,sessionName,UNIX_TIMESTAMP(lastUse) from %s " "order by userName,sessionName", savedSessionTable); sr = sqlGetResult(conn, query); // Slurp results into memory instead of processing row by row, // reducing the chance of lost connection. while ((row = sqlNextRow(sr)) != NULL) { if (atime > 0) { long long lastUse = atoll(row[2]); if (lastUse < threshold) { verbose(2, "User %s session %s is older than %d days, skipping.\n", row[0], row[1], atime); continue; } } AllocVar(si); safecpy(si->userName, sizeof(si->userName), row[0]); safecpy(si->sessionName, sizeof(si->sessionName), row[1]); slAddHead(&sessionList, si); } // processing them in the same order they appear in the table helps testing slReverse(&sessionList); sqlFreeResult(&sr); } else errAbort("ERROR: can not find table %s.%s on central.host: '%s'", savedSessionTable, centralDbName, cfgOption("central.host")); // parent must close all db connections before forking sqlDisconnect(&conn); int sessionsPerForkDone=0; // number of sessions done for current fork int listLength = slCount(sessionList); int perFork = listLength / numForks; // number of sessions per fork int forkRem = listLength % numForks; // remainder of the above division if (forkRem > 0) ++perFork; // deal with remainder by starting off with elevated perFork pid_t pid = 0; boolean parent = TRUE; int fork = 0; // DEALING WITH USING LOTS OF MEMORY // To avoid taking too much ram from leaks when loading the saved named sessions, // this program splits the session list into numForks pieces (default 10), // and the hands them off to a child forked process. // When the child finishes, the operating system will free up its memory. // This repeats for each child until the entire list has finished. // The children run sequentially, not in parallel. // It is CRUCIAL that this program exits with non-zero exit code // when it or any of its children exit non-zero, abort, get killed, or crash, // in order to tell the calling program that it has failed. // This is the only thing will stop the deletion and loss of saved named sessions! // Every single session MUST be processed in order to save it from deletion. for (si = sessionList; si != NULL; si = si->next) { if (parent && sessionsPerForkDone == 0) { pid = forkIt(); if (pid == 0) { parent = FALSE; conn = unCachedCentralConn(); // avoid cached connections when forking } } if (!parent) { scanSettingsForCT(si->userName, si->sessionName, &liveCount, &expiredCount, conn); } ++sessionsPerForkDone; if (sessionsPerForkDone >= perFork) // the fork has done all of its sessions { // Adjust for the fact that divisions have remainders. // We want to split the list into numForks, but it often does not divide evenly. // The first forkRem forks will get one extra session to do if forkRem > 0. // It use important that we do not create any extra fork so that the count will // match numForks (default 10) in the output. ++fork; if (fork == forkRem) --perFork; sessionsPerForkDone = 0; if (parent) { waitForChildWithTimeout(pid); } else { verbose(1, "# of updates found: %d\n", numUpdates); verbose(1, "# of CustomFactoryTextExistence calls done: %d\n", CFTEcalls); verbose(1, "Found %d live and %d expired custom tracks in %s.\n", liveCount, expiredCount, centralDbName); sqlDisconnect(&conn); // Causes "VmPeak" to appear in the stderr output. // The caller of this program can look for exactly numForks // lines of "VmPeak" in the output as a double-check that the program completed. showVmPeak(); verbose(1, "forked child process# %d done.\n", fork); // It is important to be able to easily test that these failures // behave correctly, and cause the parent process to exit with non-zero exit code. if (testFailure) // {exitCode|errAbort|segfault|timeout} { if (sameString(testFailure, "exitCode")) exit(1); else if (sameString(testFailure, "errAbort")) errAbort("Test asked for errAbort in child"); else if (sameString(testFailure, "segfault")) { char *ptr = NULL; char c = *ptr; // invalid null pointer should cause segfault. printf("c=%c\n",c); // it should never get here. Make compiler happy. } else if (sameString(testFailure, "timeout")) { // nothing happens in child, but parent will timeout in 1 second and kill the child. } else errAbort("unknown value [%s] for testFailure", testFailure); } exit(0); } } } verbose(1, "parent process done.\n"); } int main(int argc, char *argv[]) /* Process command line. */ { // EXERCISE CARE WHEN MODIFYING THIS PROGRAM! // THIS PROGRAM IS CRITICAL FOR PRESERVING USER SAVED SESSION DATA // ESPECIALLY CUSTOM TRACKS DATA AND CUSTOM TRACKS DATABASE. // BE SURE TO DISCUSS CHANGES IN DETAIL WITH THE PEOPLE THAT RUN THE PROGRAM. // Please also increment the "version" variable at the top of the source. // This is helpful when you are commiting a new change and giving it // to others to run or test with the trash cleaning scripts, // especially when it is changing rapidly. optionInit(&argc, argv, options); // causes it to emit ### kent source version 999 ### if (argc != 2) usage(); numForks = optionInt("forks", numForks); if (numForks < 1) errAbort("forks option must specify positive integer >= 1"); timeoutSecs = optionInt("timeoutSecs", timeoutSecs); testFailure = optionVal("testFailure", testFailure); if (testFailure && sameString(testFailure,"timeout")) timeoutSecs = 1; // Timeout for each forked child process char *workDir = optionVal("workDir", CGI_BIN); setCurrentDir(workDir); refreshNamedSessionCustomTracks(argv[1]); return 0; }