b318572799b35ed9cd6fabdeb247495d72d4da4c chmalee Tue Jun 3 18:07:28 2025 -0700 Make the serverName a required input to hubspace uploads so the hubspace machine can determine where to place uploads. Prevents any current or future username collisions between euro and rr and prevents tusd temp file collisions between the machines, refs #31058 diff --git src/hg/lib/userdata.c src/hg/lib/userdata.c index 3823add7dac..f916c5c4113 100644 --- src/hg/lib/userdata.c +++ src/hg/lib/userdata.c @@ -27,125 +27,206 @@ { return (loginSystemEnabled() || wikiLinkEnabled()) ? wikiLinkUserName() : NULL; } char *emailForUserName(char *userName) /* Fetch the email for this user from gbMembers hgcentral table */ { struct sqlConnection *sc = hConnectCentral(); struct dyString *query = sqlDyStringCreate("select email from gbMembers where userName = '%s'", userName); char *email = sqlQuickString(sc, dyStringCannibalize(&query)); hDisconnectCentral(&sc); // this should be freeMem'd: return email; } -char *getDataDir(char *userName) +char *normalizePath(const char *path) +/* Fix up multiple '/', './', '..' chars in a path name without checking + * that each component must exist. This was mostly generated by ChatGPT */ +{ +if (path == NULL) + return NULL; + +// Make a copy of the path to tokenize +char *copy = cloneString(path); +if (!copy) + return NULL; + +char *parts[1024]; // max components +int count = 0; + +char *token = strtok(copy, "/"); +while (token != NULL) + { + if (strcmp(token, "..") == 0) + { + if (count > 0) + count--; // go up a level + } + else if (strlen(token) > 0) + { + parts[count++] = token; + } + token = strtok(NULL, "/"); + } + +// Rebuild the normalized path +size_t len = 2; // at least '/' +for (int i = 0; i < count; i++) + { + len += strlen(parts[i]) + 1; + } + +char *normalized = needMem(len); +if (!normalized) + { + free(copy); + return NULL; + } + +normalized[0] = '/'; +normalized[1] = '\0'; + +for (int i = 0; i < count; i++) + { + strcat(normalized, parts[i]); + if (i != count - 1) + strcat(normalized, "/"); + } + +freeMem(copy); +return normalized; +} + +char *getEncodedUserNamePath(char *userName) +/* Compute the path for just the userName part of the users upload */ +{ +struct dyString *ret = dyStringNew(0); +if (!userName) + return NULL; +char *encUserName = cgiEncode(userName); +char *userPrefix = md5HexForString(encUserName); +userPrefix[2] = '\0'; +dyStringPrintf(ret, "%s/%s", userPrefix, encUserName); +return dyStringCannibalize(&ret); +} + +char *getDataDir(char *userName, char *serverName) /* Return the full path to the user specific data directory, can be configured via hg.conf - * on hgwdev, this is /data/tusd */ + * on hgwdev, this is /data/tusd + * serverName is required, that becomes a prefix, for separating uploads by machine */ { +if (!userName) + return NULL; + char *tusdDataBaseDir = cfgOption("tusdDataDir"); if (!tusdDataBaseDir || isEmpty(tusdDataBaseDir)) - errAbort("trying to save user file but no tusdDataDir defined in hg.conf"); + errAbort("No tusdDataDir defined in hg.conf"); if (tusdDataBaseDir[0] != '/') errAbort("config setting tusdDataDir must be an absolute path (starting with '/')"); +// the tusdDataDir must be resolved in case it is a symlink +char resolvedDataDir[PATH_MAX]; +realpath(tusdDataBaseDir, resolvedDataDir); +if (isEmpty(resolvedDataDir)) + errAbort("Error resolving hg.conf:tusdDataDir. Ensure that setting points to a valid location"); -char *encUserName = cgiEncode(userName); -char *userPrefix = md5HexForString(encUserName); -userPrefix[2] = '\0'; +tusdDataBaseDir = resolvedDataDir; struct dyString *newDataDir = dyStringNew(0); -dyStringPrintf(newDataDir, "%s/%s/%s", - tusdDataBaseDir, userPrefix, encUserName); +dyStringPrintf(newDataDir, "%s/", tusdDataBaseDir); +if (serverName) + dyStringPrintf(newDataDir, "%s/", serverName); +dyStringPrintf(newDataDir, "%s", getEncodedUserNamePath(userName)); + +// don't use realpath here as the userPrefix and serverName directory +// may not have been created yet +char *canonicalPath = normalizePath(dyStringContents(newDataDir)); -char *canonicalPath = needMem(PATH_MAX); -realpath(dyStringContents(newDataDir), canonicalPath); // now that we have canonicalized the path we need to add a '/' back on // so the rest of the routines can append to this result - dyStringClear(newDataDir); dyStringPrintf(newDataDir, "%s/", canonicalPath); return dyStringCannibalize(&newDataDir); } -char *stripDataDir(char *fname, char *userName) +char *stripDataDir(char *fname, char *userName, char *serverName) /* Strips the getDataDir(userName) off of fname. The dataDir may be a symbolic * link, we will resolve it here. NOTE that this relies on * calling realpath(3) on the fname argument prior to calling stripDataDir() */ { -char *dataDir = getDataDir(userName); +char *dataDir = getDataDir(userName, serverName); if (!dataDir) { // catch a realpath error return NULL; } int prefixSize = strlen(dataDir); if (startsWith(dataDir, fname)) { char *ret = fname + prefixSize; return ret; } return NULL; } -char *getHubDataDir(char *userName, char *hub) +char *getHubDataDir(char *userName, char *hub, char *serverName) { -char *dataDir = getDataDir(userName); +char *dataDir = getDataDir(userName, serverName); return catTwoStrings(dataDir, cgiEncode(hub)); } char *hubSpaceUrl = NULL; static char *getHubSpaceUrl() { if (!hubSpaceUrl) hubSpaceUrl = cfgOption("hubSpaceUrl"); return hubSpaceUrl; } -char *webDataDir(char *userName) +char *webDataDir(char *userName, char *serverName) /* Return a web accesible path to the userDataDir, this is different from the full path tusd uses */ { char *retUrl = NULL; if (userName) { char *encUserName = cgiEncode(userName); char *userPrefix = md5HexForString(encUserName); userPrefix[2] = '\0'; struct dyString *userDirDy = dyStringNew(0); - dyStringPrintf(userDirDy, "%s/%s/%s/", getHubSpaceUrl(), userPrefix, encUserName); + dyStringPrintf(userDirDy, "%s/%s/%s/%s/", getHubSpaceUrl(), serverName, userPrefix, encUserName); retUrl = dyStringCannibalize(&userDirDy); } return retUrl; } -char *urlForFile(char *userName, char *filePath) +char *urlForFile(char *userName, char *filePath, char *serverName) /* Return a web accessible URL to filePath */ { -char *webDataUrl = webDataDir(userName); +char *webDataUrl = webDataDir(userName, serverName); if (webDataUrl) { return catTwoStrings(webDataUrl, filePath); } return NULL; } -char *prefixUserFile(char *userName, char *fname, char *parentDir) +char *prefixUserFile(char *userName, char *fname, char *parentDir, char *serverName) /* Allocate a new string that contains the full per-user path to fname. return NULL if * we cannot construct a full path because of a realpath(3) failure. * parentDir is optional and will go in between the per-user dir and the fname */ { -char *pathPrefix = getDataDir(userName); +char *pathPrefix = getDataDir(userName, serverName); char *path = NULL; if (pathPrefix) { if (parentDir) { struct dyString *ret = dyStringCreate("%s%s%s%s", pathPrefix, parentDir, lastChar(parentDir) == '/' ? "" : "/", fname); path = dyStringCannibalize(&ret); } else path = catTwoStrings(pathPrefix, fname); char canonicalPath[PATH_MAX]; realpath(path, canonicalPath); // after canonicalizing the path, make sure it starts with the userDataDir, to prevent // deleting files like blah/../../../../systemFile.text if (startsWith(pathPrefix, canonicalPath)) @@ -359,37 +440,37 @@ hubTextRow->md5sum = md5HexForFile(hubPath); hubTextRow->parentDir = hubNameFromPath(hubPath); if (!checkHubSpaceRowExists(hubTextRow)) addHubSpaceRowForFile(hubTextRow); } static void deleteHubSpaceRow(char *fname, char *userName) /* Deletes a row from the hubspace table for a given fname */ { struct sqlConnection *conn = hConnectCentral(); struct dyString *deleteQuery = sqlDyStringCreate("delete from hubSpace where location='%s' and userName='%s'", fname, userName); sqlUpdate(conn, dyStringCannibalize(&deleteQuery)); hDisconnectCentral(&conn); } -void removeFileForUser(char *fname, char *userName) +void removeFileForUser(char *fname, char *userName, char *serverName) /* Remove a file for this user if it exists */ { // The file to remove must be prefixed by the hg.conf userDataDir char canonicalPath[PATH_MAX]; realpath(fname, canonicalPath); -if (!startsWith(getDataDir(userName), canonicalPath)) +if (!startsWith(getDataDir(userName, serverName), canonicalPath)) return; if (fileExists(canonicalPath)) { // delete the actual file mustRemove(canonicalPath); // delete the table row deleteHubSpaceRow(canonicalPath, userName); } // TODO: we should also modify the hub.txt associated with this file } struct hubSpace *listFilesForUser(char *userName) /* Return the files the user has uploaded */ { struct sqlConnection *conn = hConnectCentral();