567bb801c1502dd59b886e1ed10e76bcaf72cca4 jcasper Wed Dec 18 13:33:36 2019 -0800 Adding most recent validation stats to cdwCheckValidation status, per wrangler request (no ticket) diff --git src/hg/cirm/cdw/cdwCheckValidation/cdwCheckValidation.c src/hg/cirm/cdw/cdwCheckValidation/cdwCheckValidation.c index 56752d6..5b5cf37 100644 --- src/hg/cirm/cdw/cdwCheckValidation/cdwCheckValidation.c +++ src/hg/cirm/cdw/cdwCheckValidation/cdwCheckValidation.c @@ -1,378 +1,409 @@ /* cdwCheckValidation - Check if a cdwSubmit validation step has completed. */ #include "common.h" #include "linefile.h" #include "hash.h" #include "options.h" #include "jksql.h" #include "cdw.h" #include "cdwLib.h" #include "obscure.h" // Options and an int to keep track of the validation. int gSubmitId = -1; boolean gWait = FALSE; boolean gLong = FALSE; void usage() /* Explain usage and exit. */ { errAbort( "cdwCheckValidation - Check if a cdwSubmit validation step has completed and print some \n" "\t\t file metrics for the submission. Returns 0 if the submission has completed \n" "\t\t and -1 otherwise. \n" "usage:\n" "\tcdwCheckValidation command user@email.address outputFile\n" "commands are one of:\n" " status - print information\n" " failed - list failed files\n" " retry - rerun validation on failed files\n" "options:\n" "\t-submitId - Over ride the auto selection and use this specific id, this ignores the email" "provided. \n" "\t-wait - Wait until all files have been processed by cdwQaAgent, check every 5 seconds or" "so.\n" "\t-long - Prints file-by-file status as well as overall status. \n"); } /* Command line validation table. */ static struct optionSpec options[] = { {"submitId", OPTION_STRING}, {"wait", OPTION_BOOLEAN}, {"long", OPTION_BOOLEAN}, {NULL, 0}, }; int getSubmitId(FILE *f, char *cdwUser, char *command) /* Use the user email to get the userId out of cdwUser, then use the cdwUser id to key into * cdwSubmit and grab the last submission */ { struct sqlConnection *conn = sqlConnect("cdw"); // Query the cdwUser table to find the userId associated with the email address. char query[1024]; sqlSafef(query, sizeof(query), "select * from cdwUser where email = '%s';", cdwUser); struct cdwUser *submitter = cdwUserLoadByQuery(conn,query); if (submitter == NULL) uglyAbort("There are no users associated with the provided email %s",cdwUser); // Use the userId to key into cdwSubmit and get the last submission id. sqlSafef(query, sizeof(query), "select * from cdwSubmit where userId=%i order by id desc limit 1", submitter->id); struct cdwSubmit *submission = cdwSubmitLoadByQuery(conn, query); if (submission == NULL) uglyAbort("There are no submissions associated with the provided id."); // Print out stats. if (startsWith("status",command)) { fprintf(f,"User email:\t%s\nUser id:\t%i\n",submitter->email, submitter->id); fprintf(f,"Submission id:\t%i\n",submission->id); } sqlDisconnect(&conn); return submission->id; } int secondsUsed(int submitId) /* Lets figure out how long things have taken. */ { struct sqlConnection *conn = sqlConnect("cdw"); char query[1024]; // Get when the first job started. sqlSafef(query, sizeof(query), "select min(startTime) from cdwJob where submitId = '%i'" " and startTime != 0", submitId); int batchStart = sqlQuickNum(conn, query); // Get when the last job finished. sqlSafef(query, sizeof(query), "select max(endTime) from cdwJob where submitId = '%i'" " and endTime != 0", submitId); int batchEnd = sqlQuickNum(conn, query); sqlDisconnect(&conn); return batchEnd - batchStart; } void printEnrichmentStats(FILE *f, struct cdwValidFile *validFile) // Print out the enrichment stats for a valid file. { struct sqlConnection *conn = sqlConnect("cdw"); char query[1024]; fprintf(f, " Enrich"); // Return if no ucscDb is specified. if (!validFile->ucscDb) return; // Get the assembly id, default to hg19. char ucscDb[128] = "hg19"; if (!startsWith(validFile->ucscDb, " ")) safef(ucscDb, sizeof(ucscDb), "%s", validFile->ucscDb); sqlSafef(query, sizeof(query), "select * from cdwAssembly where ucscDb = '%s'", ucscDb); struct cdwAssembly *assembly = cdwAssemblyLoadByQuery(conn, query); if (!assembly) return; // Get the list of enrichment targets for this valid file. char enrichedIn[128] = "exon"; if (!startsWith(validFile->enrichedIn," ")) safef(enrichedIn, sizeof(enrichedIn), "%s", validFile->enrichedIn); sqlSafef(query, sizeof(query), "select * from cdwQaEnrichTarget where assemblyId = %i and name in" "('%s', 'chrX', 'chrY', 'chrM')", assembly->id, enrichedIn); struct cdwQaEnrichTarget *targetList = cdwQaEnrichTargetLoadByQuery(conn, query); if (!targetList) return; // Loop through the list of enrichment targets and print stats. struct cdwQaEnrichTarget *target; for (target = targetList; ; target = target->next) { sqlSafef(query, sizeof(query), "select enrichment from cdwQaEnrich where fileId = %i and" " qaEnrichTargetId = %i", validFile->fileId, target->id); double enrichment = sqlQuickDouble(conn, query); fprintf(f," %s: %0.3f", target->name, enrichment); if (target->next == NULL) break; else fprintf(f,","); } sqlDisconnect(&conn); } void printTimeStats(FILE *f, struct cdwJob *jobList, int finJobs) // Print out the time so far and an estimate of the time remaining in a pretty fashion. { int timeSoFar = secondsUsed(jobList->submitId); int hours, minutes, seconds; // Do math magic to get the proper hours, minutes and seconds. hours = timeSoFar/3600; minutes = (timeSoFar - (hours*3600))/ 60; seconds = (timeSoFar - (hours*3600) - (minutes*60)); assert(slCount(jobList) > 0); // Determine average time so far, multiply it by the number of jobs remaining. double avgTimeSoFar = (double) timeSoFar / (double) finJobs; int jobsRemaining = slCount(jobList) - finJobs; double estTimeRemaining = avgTimeSoFar * jobsRemaining; int eHours, eMinutes, eSeconds; eHours = estTimeRemaining/3600; eMinutes = (estTimeRemaining - (eHours*3600))/ 60; eSeconds = (estTimeRemaining - (eHours*3600) - (eMinutes*60)); fprintf(f,"Time so far:\t%ih %im %is\nTime remaining:\t%ih %im %is\n", hours, minutes, seconds, eHours, eMinutes, eSeconds); } void printValidFileStats(FILE *f, char *fileId) // Print out the stats for a valid file. Enrichment stats are handled in their own function. { struct sqlConnection *conn = sqlConnect("cdw"); char query[1024]; sqlSafef(query, sizeof(query), "select * from cdwValidFile where fileId = %s", fileId); struct cdwValidFile *validFile = cdwValidFileLoadByQuery(conn, query); sqlDisconnect(&conn); if (!validFile) // Usually indicates an errant cdwAddQaJob in some code some where. { fprintf(f,"File id: %s | Status: corrupted | The file passed validation yet has no entry in" " cdwValidFile | \n", fileId); sqlDisconnect(&conn); return; } // Default these to n/a to prevent seg faults. if (!validFile->mapRatio || !validFile->uniqueMapRatio || !validFile->sampleCoverage) { fprintf(f,"File id: %s | Valid: Yes | Format: %s |\n", fileId, validFile->format); return; } fprintf(f,"File id: %s | Valid: Yes | Format: %s | Map ratio (mr): %f | Unique mr: %f| " "Coverage: %f |", fileId, validFile->format, validFile->mapRatio, validFile->uniqueMapRatio, validFile->sampleCoverage); printEnrichmentStats(f, validFile); fprintf(f," |\n"); return; } void printSubmissionStatistics(FILE *f, struct cdwJob *jobList, char *command) /* Take a list of jobs and gather stats. Different stats are printed to file f depending on the * command given. */ { if (!jobList) uglyAbort("There are no jobs on the joblist"); // Keep track of all the possible job types. int finJobs = 0, workingJobs = 0, queuedJobs = 0, failedJobs = 0, validFiles = 0; if (startsWith("status", command) && gLong) fprintf(f,"Printing file statistics...\n"); // Loop through all jobs and gather stats. +struct hash *latestRuns = hashNew(0); struct cdwJob *job; for (job = jobList;; job = job->next) { char *prefix = "cdwQaAgent "; assert(startsWith(prefix, job->commandLine)); char *fileIdString = job->commandLine + strlen(prefix); long long fileId = sqlLongLong(fileIdString); if (job->startTime > 0) { if (job->endTime > 0) // Finished jobs. { ++finJobs; + + // Keep track of the most recent job found for this fileId + struct hashEl *fileEl = hashStore(latestRuns, fileIdString); + if (fileEl->val == NULL) + fileEl->val = job; + else + { + struct cdwJob *prevFileJob = (struct cdwJob*) fileEl->val; + if (prevFileJob->endTime < job->endTime) + fileEl->val = job; + } + if (job->returnCode == 0) // Jobs that passed validation. { ++validFiles; if (startsWith("status", command) && gLong) printValidFileStats(f, fileIdString); } else // Jobs that failed validation. { ++failedJobs; if (startsWith("failed", command)) fprintf(f,"File id: %s | Valid: No | Error: %s |\n", fileIdString, job->stderr); if (startsWith("status", command) && gLong) fprintf(f,"File id: %s | Valid: No | Error: %s |\n", fileIdString, job->stderr); if (startsWith("retry",command)) { fprintf(f,"Rerunning file %s \n", fileIdString); struct sqlConnection *conn = sqlConnect("cdw"); cdwAddQaJob(conn, fileId, job->submitId); sqlDisconnect(&conn); } } } else // Working jobs. { ++workingJobs; if (startsWith("status", command) && gLong) fprintf(f,"File id: %s | Valid: running | Run time: %i | \n", fileIdString, (int)((unsigned)time(NULL) - job->startTime)); } } else // Queued jobs. { ++queuedJobs; if (startsWith("status", command) && gLong) fprintf(f,"File id: %s | Valid: queued | \n", fileIdString); } if (!job->next) break; } // Print out overall submission stats. if (startsWith("status", command)) { fprintf(f,"Total files:\t%i\n",slCount(jobList)); fprintf(f,"Finished validation:\t%i\n", finJobs); fprintf(f,"Passed validation:\t\e[1;32m%i\e[0m\n", validFiles); fprintf(f,"Failed validation:\t\e[1;31m%i\e[0m\n", failedJobs); + + int passedMostRecent = 0, failedMostRecent = 0; + struct hashEl *file; + struct hashCookie cookie = hashFirst(latestRuns); + while ((file = hashNext(&cookie)) != NULL) + { + struct cdwJob *job = (struct cdwJob*) file->val; + if (job->returnCode == 0) + passedMostRecent++; + else + failedMostRecent++; + } + + fprintf(f,"Unique files:\t%d\n", passedMostRecent+failedMostRecent); + fprintf(f,"Files that passed their most recent run:\t\e[1;32m%d\e[0m\n", passedMostRecent); + fprintf(f,"Files that failed their most recent run:\t\e[1;32m%d\e[0m\n", failedMostRecent); fprintf(f,"Jobs in progress:\t%i\n", workingJobs); fprintf(f,"Jobs queued:\t%i\n", queuedJobs); printTimeStats(f, jobList, finJobs); if (slCount(jobList) == finJobs) fprintf(f,"Status:\t\e[1;32mCompleted\e[0m\n"); else fprintf(f,"Status:\tIn progress\n"); } if (startsWith("retry", command)) fprintf(f,"Started revalidating %i files.\n", failedJobs); if (startsWith("failed", command)) fprintf(f,"%i files failed validation.\n", failedJobs); + +hashFree(&latestRuns); } void waitLoop(int submitId) /* Hang out and wait for all files in the submission to pass through cdwQaAgent. */ { for (;;) { struct sqlConnection *conn = sqlConnect("cdw"); // Grab all cdwJob entries with the submitId. char query[1024]; sqlSafef(query, sizeof(query), "select * from cdwJob where submitId = '%i';", submitId); struct cdwJob *jobList = cdwJobLoadByQuery(conn, query); if (!jobList) uglyAbort("The submission has no entries in cdwJob.\nThere are several " "possibilities as to why; the submission could be corrupted, submitted before\n" " submitId's were implemented or the submission is still copying files and has " "not started validation"); // Determine how many cdwJob entries that have completed. sqlSafef(query, sizeof(query), "select count(*) from cdwJob where submitId = '%i'" " and endTime > 0", submitId); int finJobs = sqlQuickNum(conn, query); sqlDisconnect(&conn); if (slCount(jobList) != finJobs) { sleep(5); } else { break; } } } void cdwCheckValidation(char *command, char *cdwUser, char *outputFile) /* cdwCheckValidation - Check if a submission has completed validation. */ { struct sqlConnection *conn = sqlConnect("cdw"); FILE *f = mustOpen(outputFile,"w"); int submitId; // If no submitId is given grab the last submission associated with the users email address. if (gSubmitId == -1) submitId = getSubmitId(f, cdwUser, command); else submitId = gSubmitId; // Grab all cdwJob entries with the submitId. char query[1024]; sqlSafef(query, sizeof(query), "select * from cdwJob where submitId = '%i';", submitId); struct cdwJob *jobList = cdwJobLoadByQuery(conn, query); if (!jobList) uglyAbort("The submission has no entries in cdwJob.\nThere are several " "possibilities as to why; the submission could be corrupted, submitted before\n" " submitId's were implemented or the submission is still copying files and has " "not started validation"); assert(slCount(jobList) > 0); if (gWait) waitLoop(submitId); sqlSafef(query, sizeof(query), "select count(*) from cdwJob where submitId = '%i' and endTime > 0", submitId); int finJobs = sqlQuickNum(conn, query); sqlDisconnect(&conn); // Go through the jobList, gather and print statistics. printSubmissionStatistics(f, jobList, command); // Check if the submission has finished validation. if (finJobs == slCount(jobList)) exit(0); else exit(-1); } int main(int argc, char *argv[]) /* Process command line. */ { optionInit(&argc, argv, options); gWait = optionExists("wait"); gLong = optionExists("long"); gSubmitId = optionInt("submitId", gSubmitId); if (argc != 4) usage(); cdwCheckValidation(argv[1], argv[2], argv[3]); return 0; }