96c969825f39b33ddb5697c9eb7c36ad622fc160 galt Mon Oct 1 16:28:44 2012 -0700 adding flushResults to para and parasol clients diff --git src/parasol/para/para.c src/parasol/para/para.c index 9fababd..b05b52f 100644 --- src/parasol/para/para.c +++ src/parasol/para/para.c @@ -140,30 +140,32 @@ "para running\n" " Print info on currently running jobs.\n" "para hippos time\n" " Print info on currently running jobs taking > 'time' (minutes) to run.\n" "para time\n" " List timing information.\n" "para recover jobList newJobList\n" " Generate a job list by selecting jobs from an existing list where\n" " the `check out' tests fail.\n" "para priority 999\n" " Set batch priority. Values explained under 'push' options above.\n" "para maxJob 999\n" " Set batch maxJob. Values explained under 'push' options above.\n" "para resetCounts\n" " Set batch done and crash counters to 0.\n" + "para flushResults\n" + " Flush results file. Warns if batch has jobs queued or running.\n" "para freeBatch\n" " Free all batch info on hub. Works only if batch has nothing queued or running.\n" "para showSickNodes\n" " Show sick nodes which have failed when running this batch.\n" "para clearSickNodes\n" " Clear sick nodes statistics and consecutive crash counts of batch.\n" "\n" "Common options\n" " -verbose=1 - set verbosity level.\n", version, NORMAL_PRIORITY, NORMAL_PRIORITY, NORMAL_PRIORITY * NORMAL_PRIORITY, NORMAL_PRIORITY-1 ); @@ -864,31 +866,31 @@ if (optionVal("priority",NULL)!=NULL) paraPriority(optionVal("priority","medium")); } void sendSetMaxJobMessage(int maxJob) /* Tell hub to change maxJob on batch */ { struct dyString *dy = newDyString(1024); char *result; if (maxJob <-1) errAbort("maxJob %d out of range, should be >=-1", maxJob); dyStringPrintf(dy, "setMaxJob %s %s %d", getUser(), resultsName, maxJob); result = hubSingleLineQuery(dy->string); dyStringFree(&dy); if (result == NULL || sameString(result, "-2")) - errAbort("Couldn't set maxJob %d for %s\n", maxJob, batchDir); + errAbort("Couldn't set maxJob %d for %s", maxJob, batchDir); freez(&result); verbose(1, "Told hub to set maxJob %d\n",maxJob); } void paraMaxJob(char *val) /* Tell hub to change maxJob on batch */ { if (sameWord(val,"unlimited")) maxJob = -1; else maxJob = atoi(val); sendSetMaxJobMessage(maxJob); } void checkMaxJobSetting() @@ -1349,31 +1351,31 @@ { struct jobDb *db = paraCycle(batch); jobDbFree(&db); } void clearSickNodes() /* Tell hub to clear sick nodes on batch */ { struct dyString *dy = newDyString(1024); char *result; dyStringPrintf(dy, "clearSickNodes %s %s", getUser(), resultsName); result = hubSingleLineQuery(dy->string); dyStringFree(&dy); if (!sameString(result, "0")) - errAbort("Couldn't clear sick nodes for %s\n", batchDir); + errAbort("Couldn't clear sick nodes for %s", batchDir); freez(&result); verbose(1, "Told hub to clear sick nodes\n"); } void paraShove(char *batch) /* Push batch of jobs and keep pushing until it's finished, polling * parasol every 5 minutes. */ { struct jobDb *db; struct job *job; struct submission *sub; int maxSleep = 5*60; int curSleep = 15; time_t start = time(NULL), now; @@ -1791,43 +1793,66 @@ freez(&result); verbose(1, "Told hub to reset done and crashed counts on batch %s\n", batchDir); } void freeBatch() /* Send msg to hub to reset done and crashed counts on batch */ { struct dyString *dy = newDyString(1024); char *result; dyStringPrintf(dy, "freeBatch %s %s", getUser(), resultsName); result = hubSingleLineQuery(dy->string); dyStringFree(&dy); verbose(1, "Told hub to free all batch-related resources\n"); if (result == NULL) - errAbort("result == NULL\n"); + errAbort("result == NULL"); if (sameOk(result, "-3")) - errAbort("User not found.\n"); + errAbort("User not found."); if (sameOk(result, "-2")) - errAbort("Batch not found.\n"); + errAbort("Batch not found."); if (sameOk(result, "-1")) - warn("Unable to free batch. Jobs are queued or running.\n"); + warn("Unable to free batch. Jobs are queued or running."); if (sameOk(result, "0")) verbose(1, "Batch freed.\n"); freez(&result); } +void flushResults() +/* Send msg to hub to flush results file */ +{ +struct dyString *dy = newDyString(1024); +char *result; +dyStringPrintf(dy, "flushResults %s %s", getUser(), resultsName); +result = hubSingleLineQuery(dy->string); +dyStringFree(&dy); +verbose(1, "Told hub to flush the results file\n"); +if (result == NULL) + errAbort("result == NULL"); +if (sameOk(result, "-3")) + errAbort("User not found."); +if (sameOk(result, "-2")) + errAbort("Batch not found."); +if (sameOk(result, "-1")) + warn("Flushed results. Some jobs are still queued or running."); +if (sameOk(result, "0")) + verbose(1, "Flushed results.\n"); +freez(&result); +} + + void paraCheck(char *batch) /* Check on progress of a batch. */ { struct jobDb *db = readBatch(batch); markQueuedJobs(db); markRunJobStatus(db); reportOnJobs(db); atomicWriteBatch(db, batch); showSickNodes(TRUE); if (sickBatch) errAbort("Sick batch! Correct problem and then run para clearSickNodes."); } @@ -2262,30 +2287,36 @@ warn("maxNode deprecated, use maxJob"); paraMaxJob(argv[2]); } else if (sameWord(command, "resetCounts")) { if (argc != 2) usage(); paraResetCounts(); } else if (sameWord(command, "freeBatch")) { if (argc != 2) usage(); freeBatch(); } +else if (sameWord(command, "flushResults")) + { + if (argc != 2) + usage(); + flushResults(); + } else if (sameWord(command, "clearSickNodes")) { clearSickNodes(); } else if (sameWord(command, "showSickNodes")) { showSickNodes(FALSE); } else { errAbort("Unrecognized command '%s'. Run para with no arguments for usage summary", command); } verbose(2, "Total para time: %.2f seconds\n", (clock1000() - startTime) / 1000.0);