67ab751705ed1abbbce914adb0fd295b5a41127d galt Fri Aug 15 12:36:59 2014 -0700 tiny improvement in error message diff --git src/hg/hgTables/genomeSpace.c src/hg/hgTables/genomeSpace.c index d17534c..999558e 100644 --- src/hg/hgTables/genomeSpace.c +++ src/hg/hgTables/genomeSpace.c @@ -1,700 +1,700 @@ /* genomeSpace - stuff related to GenomeSpace. */ #include "common.h" #include "hgTables.h" #include "cart.h" #include "net.h" #include "textOut.h" #include "base64.h" #include "md5.h" #include "obscure.h" #include "net.h" #include "hgConfig.h" #include <sys/wait.h> // Declare external global variables that must be reset when // before outputting a new page. Used for outputting multiple pages. extern boolean webHeadAlreadyOutputed; extern boolean webInTextMode; extern struct hash *includedResourceFiles; extern boolean htmlWarnBoxSetUpAlready; // note there is also an inWeb boolean in cart.c // that would have needed resetting, but I added a line // in webEnd() to reset it. void md5hash(char * fileName, unsigned char md5[16]) /* read f in buffer pieces and update md5 hash */ { struct md5_context ctx; unsigned char buffer[MD5READBUFSIZE]; int bufRead = 0; FILE *f = mustOpen(fileName,"rb"); md5_starts(&ctx); while ((bufRead = fread(&buffer, 1, MD5READBUFSIZE, f)) > 0) { md5_update(&ctx, buffer, bufRead); } md5_finish(&ctx, md5); carefulClose(&f); } boolean doGenomeSpace() /* has the send to GenomeSpace checkbox been selected? */ { return cartUsualBoolean(cart, "sendToGenomeSpace", FALSE); } static void showMissingOutputFileForm() /* User needs to specify the output file */ { htmlOpen("GenomeSpace"); printf("Please specify the output file field for GenomeSpace Data Manager."); printf("<br>"); printf("<br>"); // TODO handle filename with a path. // ACTUALLY, this probably just works. printf("Your output file name may contain a path."); printf("<br>"); printf("<br>"); printf("<FORM ACTION=\"/cgi-bin/hgTables\" METHOD=GET>" "<INPUT TYPE=SUBMIT NAME=\"%s\" VALUE=\"Back\" ></FORM>", hgtaDoMainPage); htmlClose(); } static void showGsLoginForm() /* User needs to login to GS */ { // TODO should this be a redirect? // TODO should it require https? - note our apache virtual hosts are not set up to work with it yet? // GS Login Page htmlOpen("GenomeSpace"); printf("Please login to GenomeSpace."); printf("<br>"); printf("<br>"); printf("<FORM ACTION=\"/cgi-bin/hgTables\" METHOD=POST>"); printf("<table>"); printf("<tr><td align=right><B>User:</B></td><td><INPUT TYPE=TEXT NAME=\"%s\" SIZE=20 VALUE=\"\"></td></tr>", hgtaGsUser); printf("<tr><td><B>Password:</B></td><td><INPUT TYPE=PASSWORD NAME=\"%s\" SIZE=20 VALUE=\"\"></td></tr>", hgtaGsPassword); printf("<tr><td> </td><td><INPUT TYPE=SUBMIT NAME=\"%s\" VALUE=\"Login to GenomeSpace\"></td></tr>", hgtaDoGsLogin); printf("</form>"); printf("<tr><td> </td><td><FORM ACTION=\"/cgi-bin/hgTables\" METHOD=GET>" "<INPUT TYPE=SUBMIT NAME=\"%s\" VALUE=\"Cancel\" ></FORM></td></tr>", hgtaDoMainPage); printf("</table>"); htmlClose(); } static char *parseResponse(int sd, char **pResponseCode) /* parse the http response */ { struct dyString *dy = netSlurpFile(sd); close(sd); char *protocol = "HTTP/1.1 "; if (!startsWith(protocol, dy->string)) - errAbort("GenomeSpace: Expected HTTP/1.1 response: found %s", dy->string); + errAbort("GenomeSpace: Expected response to start with [%s], got [%s]", protocol, dy->string); if (pResponseCode) { char *rc = dy->string + strlen(protocol); char *rcEndString = "\r\n"; char *rcEnd = strstr(dy->string, rcEndString); *pResponseCode = cloneStringZ(rc, rcEnd - rc); } char *headerEndString = "\r\n\r\n"; char *headerEnd = strstr(dy->string, headerEndString); if (!headerEnd) errAbort("header end not found in response"); char *gsResponse = cloneString(headerEnd+strlen(headerEndString)); dyStringFree(&dy); return gsResponse; } static char *getGenomeSpaceConfig(char *variable) /* Read genomeSpace config setting or abort if not found */ { char *value = cfgOption2("genomeSpace", variable); return value; } boolean isGenomeSpaceEnabled() /* genomeSpace is enabled by the presence of GS config settings. */ { char *iSU = getGenomeSpaceConfig("identityServerUrl"); char *dmSvr = getGenomeSpaceConfig("dmServer"); if (isNotEmpty(iSU) && isNotEmpty(dmSvr)) return TRUE; return FALSE; } char *insertUserPasswordIntoUrl(char *url, char *user, char *password) /* Insert cgi-encoded user and password into url after protocol. Free returned string when done. */ { char resultUrl[1024]; char *encUser = cgiEncode(user); char *encPassword = cgiEncode(password); char *rest = stringIn("://", url); if (!rest) errAbort("expected url [%s] to have ://", url); char *protocol = cloneStringZ(url, rest - url); rest += strlen("://"); safef(resultUrl, sizeof resultUrl, "%s://%s:%s@%s", protocol, encUser, encPassword, rest); freeMem(protocol); freeMem(encUser); freeMem(encPassword); return cloneString(resultUrl); } static char *getAuthorizationToken(char *user, char *password) /* Authenticate against GenomeSpace * Returns a token like [IGYpFc1CNO7acOJicopKHBTCS6JwDgoy]*/ { //old url: safef(authUrl, sizeof authUrl, "https://%s:%s@identity.genomespace.org/identityServer/basic", encUser, encPassword); //old2: safef(authUrl, sizeof authUrl, "https://%s:%s@identitytest.genomespace.org:8443/identityServer/basic", encUser, encPassword); //old3: safef(authUrl, sizeof authUrl, "https://%s:%s@identity.genomespace.org/identityServer/basic", encUser, encPassword); char *iSU = getGenomeSpaceConfig("identityServerUrl"); char *authUrl = insertUserPasswordIntoUrl(iSU, user, password); int sd = netUrlOpen(authUrl); if (sd < 0) errAbort("failed to open socket for [%s]", authUrl); char *responseCode = NULL; char *authToken = parseResponse(sd, &responseCode); if (startsWith("401 ", responseCode)) return NULL; if (!sameString(responseCode, "200 OK")) errAbort("GenomeSpace getAuthorizationToken: %s", responseCode); freeMem(authUrl); return authToken; } static char *getGsPersonalDirectory(char *gsToken) /* Get User's default directory from GenomeSpace DM * Returns a url like [https://identity.genomespace.org/datamanager/files/users/<user>] */ { // DEFAULT DIRECTORY // old1 char *defaultDirectoryUrl = "https://identity.genomespace.org/datamanager/defaultdirectory"; // old2 char *defaultDirectoryUrl = "https://dmtest.genomespace.org:8444/datamanager/defaultdirectory"; // old3 char *defaultDirectoryUrl = "https://dm.genomespace.org/datamanager/v1.0/defaultdirectory"; // NOTE the defaultdirectory method got renamed to personaldirectory // old4 char *personalDirectoryUrl = "https://dm.genomespace.org/datamanager/v1.0/personaldirectory"; char *dmSvr = getGenomeSpaceConfig("dmServer"); char personalDirectoryUrl[1024]; safef(personalDirectoryUrl, sizeof personalDirectoryUrl, "%s/v1.0/personaldirectory", dmSvr); struct dyString *reqExtra = newDyString(256); dyStringPrintf(reqExtra, "Cookie: gs-token=%s\r\n", gsToken); int sd = netOpenHttpExt(personalDirectoryUrl, "GET", reqExtra->string); if (sd < 0) errAbort("failed to open socket for [%s]", personalDirectoryUrl); struct dyString *dy = netSlurpFile(sd); close(sd); char *personalDirectory = NULL; if (strstr(dy->string, "HTTP/1.1 303 See Other")) { char *valStart = strstr(dy->string, "Location: "); if (valStart) { valStart += strlen("Location: "); char *valEnd = strstr(valStart, "\r\n"); if (!valEnd) errAbort("location not found in response headers"); personalDirectory = cloneStringZ(valStart, valEnd - valStart); } } dyStringFree(&dy); dyStringFree(&reqExtra); return personalDirectory; } boolean checkGsReady() /* check that GS requirements are met */ { // check that the output file has been specified char *fileName = cartUsualString(cart, hgtaOutFileName, ""); if (sameString(fileName,"")) { cartRemove(cart, hgtaDoTopSubmit); showMissingOutputFileForm(); return FALSE; } // check login // is the GS login token in the cart? char *gsToken = cartUsualString(cart, "gsToken", NULL); if (!gsToken) { cartRemove(cart, hgtaDoTopSubmit); showGsLoginForm(); return FALSE; } else { // check if the token still valid char *temp = getGsPersonalDirectory(gsToken); if (!temp) { cartRemove(cart, hgtaDoTopSubmit); showGsLoginForm(); return FALSE; } freeMem(temp); } return TRUE; } void doGsLogin(struct sqlConnection *conn) /* Process user password post. * Log into GS * if successful save gsToken * else return to login page or to mainpage */ { char *user = cloneString(cartUsualString(cart, hgtaGsUser, NULL)); char *password = cloneString(cartUsualString(cart, hgtaGsPassword, NULL)); // do not leave them in the cart cartRemove(cart, hgtaGsUser); cartRemove(cart, hgtaGsPassword); if (!(user && password)) errAbort("expecting GenomeSpace user and password"); char *gsToken = getAuthorizationToken(user, password); if (gsToken) { cartSetString(cart, "gsToken", gsToken); } else { cartRemove(cart, "gsToken"); } cartSetString(cart, hgtaDoTopSubmit, "get output"); } char *gsUploadUrl(char *gsToken, char *user, char *uploadFileName, off_t contentLength, char *base64Md5, char *contentType) /* call uploadurl */ { // UPLOADURLS // TODO deal with creating parent dirs if uploadFileName contains a path? maybe not. // old: "https://identity.genomespace.org/datamanager/uploadurls/users/" // old "https://dm.genomespace.org/datamanager/v1.0/uploadurl/users/" // if this works, use default dir fetched earlier instead char *dmSvr = getGenomeSpaceConfig("dmServer"); char uploadUrl[1024]; safef(uploadUrl, sizeof(uploadUrl), "%s/v1.0/uploadurl/users/" "%s/" "%s" "?Content-Length=%lld" "&Content-MD5=%s" "&Content-Type=%s" , dmSvr , user , uploadFileName , (long long) contentLength , cgiEncode(base64Md5) , contentType ); struct dyString *reqExtra = newDyString(256); dyStringPrintf(reqExtra, "Cookie: gs-token=%s\r\n", gsToken); int sd = netOpenHttpExt(uploadUrl, "GET", reqExtra->string); if (sd < 0) errAbort("failed to open socket for [%s]", uploadUrl); char *responseCode = NULL; char *s3UploadUrl = parseResponse(sd, &responseCode); if (sameString(responseCode, "404 Not Found")) errAbort("GenomeSpace: %s, if a path was used in the output name, it may indicate the path does not exist in GenomeSpace.", responseCode); if (!sameString(responseCode, "200 OK")) errAbort("GenomeSpace: %s", responseCode); dyStringFree(&reqExtra); return s3UploadUrl; } #define S3UPBUFSIZE 65536 char *gsS3Upload(char *s3UploadUrl, char *inputFileName, off_t contentLength, char *base64Md5, char *hexMd5, char *contentType, boolean progress, char *fileName) /* call s3 upload */ { // S3 UPLOAD to Amazon Storage struct dyString *reqExtra = newDyString(256); dyStringPrintf(reqExtra, "Content-Length: %lld\r\n", (long long)contentLength); dyStringPrintf(reqExtra, "Content-MD5: %s\r\n", base64Md5); dyStringPrintf(reqExtra, "Content-Type: %s\r\n", contentType); int sd = netOpenHttpExt(s3UploadUrl, "PUT", reqExtra->string); if (sd < 0) errAbort("failed to open socket for [%s]", s3UploadUrl); unsigned char buffer[S3UPBUFSIZE]; int bufRead = 0; FILE *f = mustOpen(inputFileName,"rb"); off_t totalUploaded = 0; int lastPctUploaded = -1; // upload the file contents while ((bufRead = fread(&buffer, 1, S3UPBUFSIZE, f)) > 0) { int bufWrite = 0; while (bufWrite < bufRead) { int socketWrite = write(sd, buffer + bufWrite, bufRead - bufWrite); if (socketWrite == -1) { if (errno == 32) // broken pipe often happens when the ssh connection shuts down or has errors. { warn("broken pipe, S3 server closed the ssh connection."); break; } errnoAbort("error writing to socket for GenomeSpace upload"); } bufWrite += socketWrite; } if (errno == 32) break; totalUploaded += bufRead; int pctUploaded = 100.0*totalUploaded/contentLength; if (progress && (pctUploaded != lastPctUploaded)) { char nicenumber[1024]=""; sprintWithGreekByte(nicenumber, sizeof(nicenumber), contentLength); // Various global flags must be reset to draw a fresh html output page. webHeadAlreadyOutputed = FALSE; webInTextMode = FALSE; includedResourceFiles = NULL; htmlWarnBoxSetUpAlready=FALSE; htmlOpen("Uploading Output to GenomeSpace"); printf("Name: %s<br>\n", fileName); printf("Size: %s<br>\n", nicenumber); printf("Progress: %0d%%<br>\n", pctUploaded); printf("<br>\n"); printf("<FORM ACTION=\"/cgi-bin/hgTables\" METHOD=GET>\n" "<INPUT TYPE=SUBMIT NAME=\"%s\" VALUE=\"Back\" >" "<INPUT TYPE=SUBMIT NAME=\"Refresh\" VALUE=\"Refresh\" onclick='window.location=window.location;return false;' >" "</FORM>\n" , hgtaDoMainPage); puts("<script type=\"text/JavaScript\">"); puts("<!--"); puts("setTimeout(\"location = location;\",5000);"); puts("-->"); puts("</script>"); htmlClose(); fflush(stdout); lastPctUploaded = pctUploaded; } } carefulClose(&f); char *responseCode = NULL; char *s3UploadResponse = parseResponse(sd, &responseCode); if (!sameString(responseCode, "200 OK")) errAbort("Amazon S3 Response: %s", responseCode); dyStringFree(&reqExtra); return s3UploadResponse; } void getBackgroundStatus(char *url) /* fetch status as the latest complete html block available */ { char *html = NULL; if (fileSize(url)==0) { htmlOpen("Background Status"); errAbort("No output found. Expecting output in [%s].", url); htmlClose(); return; } readInGulp(url, &html, NULL); int numLines = chopString(html, "\n", NULL, 1000000); char **lines = NULL; AllocArray(lines, numLines); chopString(html, "\n", lines, numLines); int end; for (end=numLines-1; end >= 0 && ! (endsWith(lines[end], "</html>") || endsWith(lines[end], "</HTML>")) ; --end) /* do nothing */ ; if (end < 0) { htmlOpen("Background Status"); errAbort("No complete html found"); htmlClose(); return; } int start; for (start=end; start >= 0 && ! (startsWith("<html>", lines[start]) || startsWith("<HTML>", lines[start])) ; --start) /* do nothing */ ; if (start < 0) { htmlOpen("Background Status"); errAbort("No html start tag found"); htmlClose(); return; } puts("Content-Type: text/html\n"); int line; for (line=start; line <= end; line++) puts(lines[line]); } #include "trashDir.h" // TODO move this to a generic re-usable location void startBackgroundWork(char *exec, char **pWorkUrl) /* deal with forking off child for background work * and setting up the trash file for communicating * from the child to the browser */ { char *workUrl = NULL; char hgsid[64]; struct tempName tn; safef(hgsid, sizeof(hgsid), "%s", cartSessionId(cart)); trashDirFile(&tn, "backGround", hgsid, ".tmp"); workUrl = cloneString(tn.forCgi); fflush(stdout); fflush(stderr); // seems that we need to use the double-fork trick // to create enough separation between the non-waiting parent // and the grand-child process. otherwise the OS and Apache are waiting on the child. int pid = fork(); if (pid == -1) { errAbort("can't fork, error %d", errno); } if (pid == 0) // child { int pid2 = fork(); if (pid2 == -1) { errAbort("can't fork, error %d", errno); } if (pid2 == 0) // grand child { // we need to close or redup to open stdout, stderr, stdin // in order for apache to break ties with it. // Will the grandchild cgi still be able to function? // redirect stdout of child to the trash file for easier use of // library functions that output html to stdout. int out = mustOpenFd(tn.forCgi, O_WRONLY | O_CREAT); fflush(stdout); dup2(out,STDOUT_FILENO); /* closes STDOUT before setting it back to saved descriptor */ close(out); // Unfortunately we must create our own stderr log file char errName[1024]; safef(errName, sizeof errName, "%s.err", tn.forCgi); int err = mustOpenFd(errName, O_CREAT | O_WRONLY | O_APPEND); dup2(err, STDERR_FILENO); close(err); // stdin input is just empty int in = mustOpenFd("/dev/null", O_RDONLY); dup2(in, STDIN_FILENO); close(in); // execute so that we will be able to use database and other operations normally. char execPath[4096]; safef(execPath, sizeof execPath, "%s hgsid=%s", exec, hgsid); char *args[10]; int numArgs = chopString(execPath, " ", args, 10); args[numArgs] = NULL; // by creating a minimal environment and not inheriting from the parent, // it cause cgiSpoof to run, picking up command-line params as cgi vars. char *newenviron[] = { "HGDB_CONF=hg.conf", NULL }; int sleepSeconds = 1; // was 5 sleep(sleepSeconds); // Give the foreground process time to write the cart. execve(args[0], args+1, newenviron); // SHOULD NOT GET HERE UNLESS EXEC FAILED. verbose(1,"execve failed for %s\n", exec); _exit(0); // exit without the usual cleanup which messes up parent's db connections etc. } else // child { _exit(0); // exit without the usual cleanup which messes up parent's db connections etc. } } else // parent { *pWorkUrl = workUrl; // wait for the exiting child (not grandchild) int w, status; do { w = waitpid(pid, &status, WUNTRACED | WCONTINUED); if (w == -1) { perror("waitpid"); exit(EXIT_FAILURE); } if (WIFEXITED(status)) { if (WEXITSTATUS(status) != 0) verbose(1, "exited, status=%d\n", WEXITSTATUS(status)); } else if (WIFSIGNALED(status)) { verbose(1, "killed by signal %d\n", WTERMSIG(status)); } else if (WIFSTOPPED(status)) { verbose(1, "stopped by signal %d\n", WSTOPSIG(status)); } else if (WIFCONTINUED(status)) { verbose(1, "continued\n"); } } while (!WIFEXITED(status) && !WIFSIGNALED(status)); // done waiting for child. } } void gsSendToDM() /* upload the generated file to DM */ { // This is now run via fork/exec as a separate background process. char *trashFileName = cartUsualString(cart, "gsTemp", ""); char *fileName = cartUsualString(cart, hgtaOutFileName, ""); // adjust upload name based on compression and existing extension char *compressType = cartUsualString(cart, hgtaCompressType, textOutCompressNone); if (!(isEmpty(compressType) || sameWord(compressType, textOutCompressNone))) { char *suffix = getCompressSuffix(compressType); if (!endsWith(fileName, suffix)) fileName = addSuffix(fileName, suffix); } off_t fSize = fileSize(trashFileName); char *gsToken = cartUsualString(cart, "gsToken", NULL); char *contentType = "text/plain"; // some examples show applicaton/octet-stream char *persDir = getGsPersonalDirectory(gsToken); char *user = strrchr(persDir,'/'); ++user; char nicenumber[1024]=""; sprintWithGreekByte(nicenumber, sizeof(nicenumber), fSize); htmlOpen("Uploading Output to GenomeSpace"); printf("Name: %s<br>\n", fileName); printf("Size: %s<br>\n", nicenumber); printf("Progress: 0%%<br>\n"); printf("You can remain on this page and monitor upload progress.<br>\n"); printf("Otherwise, feel free to continue working, and your output will appear in GenomeSpace when the upload is complete.<br>\n"); printf("<br>\n"); printf("<FORM ACTION=\"/cgi-bin/hgTables\" METHOD=GET>\n" "<INPUT TYPE=SUBMIT NAME=\"%s\" VALUE=\"Back\" >\n" "<INPUT TYPE=SUBMIT NAME=\"Refresh\" VALUE=\"Refresh\" onclick='window.location=window.location;return false;' >" "</FORM>\n" , hgtaDoMainPage); puts("<script type=\"text/JavaScript\">"); puts("<!--"); puts("setTimeout(\"location = location;\",5000);"); puts("-->"); puts("</script>"); htmlClose(); fflush(stdout); // MD5 COMPUTE unsigned char md5[16]; /* Keep the md5 checksum here. */ md5hash(trashFileName,md5); char *hexMd5 = md5ToHex(md5); char *base64Md5 = base64Encode((char*)md5, 16); char *s3UploadUrl = gsUploadUrl(gsToken, user, fileName, fSize, base64Md5, contentType); char *s3Response = gsS3Upload(s3UploadUrl, trashFileName, fSize, base64Md5, hexMd5, contentType, TRUE, fileName); if (sameString(s3Response,"")) { // Reset global flags before drawing brand new page webHeadAlreadyOutputed = FALSE; webInTextMode = FALSE; includedResourceFiles = NULL; htmlWarnBoxSetUpAlready=FALSE; htmlOpen("Uploaded Output to GenomeSpace"); printf("Name: %s<br>\n", fileName); printf("Size: %s<br>\n", nicenumber); printf("Output has been successfully uploaded.<br>\n"); printf("<br>"); printf("<FORM ACTION=\"/cgi-bin/hgTables\" METHOD=GET>\n" "<INPUT TYPE=SUBMIT NAME=\"%s\" VALUE=\"Back\" ></FORM>\n" , hgtaDoMainPage); htmlClose(); fflush(stdout); } //printf("s3UploadUrl [%s]", s3UploadUrl); //printf("<br>"); //printf("s3Response [%s]", s3Response); //printf("<br>"); exit(0); // CANNOT RETURN }