404d5bb6d8c0418d5f06535ef470e36c35d2a237 chmalee Thu Apr 16 15:57:56 2026 -0700 Add assembly hub support to hubSpace. Users can upload a .2bit to create an assembly hub, optionally alongside their own *.hub.txt (prefix names like araTha1.hub.txt are recognized) and sibling track files. Uploads run in parallel; hub.txt mutations are serialized per-hub via flock so arrival order does not matter. - hubSpace table gains a hubType column ('trackHub' or 'assemblyHub'); ON DUPLICATE KEY UPDATE excludes it so a re-upload cannot revert an upgraded hub. - writeHubText can now emit an assembly stanza derived from the 2bit; upgradeHubTxtForAssembly promotes an existing plain hub.txt in place when a 2bit arrives after tracks. - pre-finish decides synthesize vs upgrade vs leave-alone from server state (existing rows, hub.txt on disk) plus a single client flag (batchHasHubTxt); client-supplied hubType is no longer trusted. - Client UI adds 2bit as a file type, locks the genome field when the hub is authoritative (drilled-in or batch hub.txt), defaults new uploads to an existing assembly hub at top level, and routes hgTracks URLs through 'genome=' vs 'db=' by hubType. - Fix pre-existing nested-path bug in hubPathFromParentDir (*firstSlash = 0). Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> diff --git src/hg/hgHubConnect/hooks/pre-finish.c src/hg/hgHubConnect/hooks/pre-finish.c index e300d2d87e0..3d7eea2adbe 100644 --- src/hg/hgHubConnect/hooks/pre-finish.c +++ src/hg/hgHubConnect/hooks/pre-finish.c @@ -92,30 +92,38 @@ errAbort("You are not logged in. Please navigate to My Data -> My Sessions and log in or create an account."); } fprintf(stderr, "userName='%s'\n", userName); // NOTE: All Upload.MetaData values are strings // Check multiple possible metadata keys for filename (Uppy sends 'filename' and 'name' by default, // our JS code also sets 'fileName' - try all to handle resumed uploads with old metadata) char *rawFileName = jsonQueryString(req, "", "Event.Upload.MetaData.fileName", NULL); if (!rawFileName) rawFileName = jsonQueryString(req, "", "Event.Upload.MetaData.filename", NULL); if (!rawFileName) rawFileName = jsonQueryString(req, "", "Event.Upload.MetaData.name", NULL); fileName = rawFileName ? cgiEncodeFull(rawFileName) : NULL; fileSize = jsonQueryInt(req, "", "Event.Upload.Size", 0, NULL); fileType = jsonQueryString(req, "", "Event.Upload.MetaData.fileType", NULL); db = jsonQueryString(req, "", "Event.Upload.MetaData.genome", NULL); + // Blocks newline injection into the synthesized hub.txt. + if (db && db[0]) + { + char *p; + for (p = db; *p; p++) + if (!(isalnum((unsigned char)*p) || *p == '_' || *p == '-')) + errAbort("Invalid genome name '%s': only letters, digits, '_' and '-' are allowed", db); + } reqLm = jsonQueryString(req, "", "Event.Upload.MetaData.lastModified", NULL); if (reqLm) lastModified = sqlLongLong(reqLm) / 1000; // yes Javascript dates are in millis else lastModified = time(NULL); // fallback to current time if not provided parentDir = jsonQueryString(req, "", "Event.Upload.MetaData.parentDir", NULL); fprintf(stderr, "parentDir = '%s'\n", parentDir ? parentDir : "(null)"); // strip out plain leading '.' and '/' components // middle '.' components are dealt with later if (parentDir && (startsWith("./", parentDir) || startsWith("/", parentDir))) parentDir = skipBeyondDelimit(parentDir, '/'); tusFile = jsonQueryString(req, "", "Event.Upload.Storage.Path", NULL); tusInfo = jsonQueryString(req, "", "Event.Upload.Storage.InfoPath", NULL); if (fileName == NULL) { @@ -168,38 +176,76 @@ // the command line specified dataDir + pre-create's ChangeFileInfo // this was leading to a bug where the uploaded file had the symlinked // path, but the containing hub.txt and directory row had the realpath, // which was causing confusion in the UI code char *canonicalPath = realpath(tusFile, NULL); if (canonicalPath != NULL) row->location = canonicalPath; else { // all upload data should have been received and thus the realpath // should not fail, but just in case, put something valid here row->location = tusFile; } row->md5sum = md5HexForFile(row->location); row->parentDir = encodedParentDir ? encodedParentDir : ""; - if (!isHubToolsUpload && !(sameString(fileType, "hub.txt"))) + // Derive hubType server-side; never trust the client's hubType. + // A 2bit always promotes its hub to assembly. Otherwise inherit + // the existing hub's type, defaulting to trackHub. + char *parentDirForCheck = encodedParentDir ? hubNameFromPath(encodedParentDir) : ""; + if (sameOk(fileType, "2bit")) + row->hubType = "assemblyHub"; + else + { + char *existingType = existingHubTypeForDir(userName, parentDirForCheck); + row->hubType = existingType ? existingType : "trackHub"; + } + char *batchHasHubTxtStr = jsonQueryString(req, "", "Event.Upload.MetaData.batchHasHubTxt", NULL); + boolean batchHasHubTxt = sameOk(batchHasHubTxtStr, "true"); + boolean userOwnNamedHubTxt = userHasOwnNamedHubTxtInDir(userName, parentDirForCheck); + boolean userAuth = batchHasHubTxt || userOwnNamedHubTxt; + boolean isHubTxt = sameOk(fileType, "hub.txt"); + boolean isTwoBit = sameOk(fileType, "2bit"); + + // Serialize hub.txt read-modify-write across parallel pre-finish + // processes for the same hub. flock is held for the entire + // decision + action so writeHubText's fileExists check and the + // upgrade's read-rewrite are atomic with respect to siblings. + // Without a parentDir there is no hub to protect. + int hubLockFd = encodedParentDir ? lockHubDir(dataDir) : -1; + if (!isHubToolsUpload && !isHubTxt) + { + if (!userAuth) + { + if (isTwoBit) { + if (!literalHubTxtExistsOnDisk(parentDirForCheck, userDataDir)) createNewTempHubForUpload(reqId, row, userDataDir, encodedParentDir); - fprintf(stderr, "added hub.txt and hubSpace row for hub for file: '%s'\n", fileName); - fflush(stderr); + upgradeExistingHubToAssembly(row, userDataDir, encodedParentDir); + } + else + createNewTempHubForUpload(reqId, row, userDataDir, encodedParentDir); + } + else if (isTwoBit) + { + // user's hub.txt is authoritative; just flip rows to assemblyHub. + upgradeExistingHubToAssembly(row, userDataDir, encodedParentDir); + } } + unlockHubDir(hubLockFd); // first make the parentDir rows - makeParentDirRows(row->userName, sqlDateToUnixTime(row->lastModified), row->db, row->parentDir, userDataDir); + makeParentDirRows(row->userName, sqlDateToUnixTime(row->lastModified), row->db, row->parentDir, userDataDir, row->hubType); row->parentDir = encodedParentDir ? hubNameFromPath(encodedParentDir) : ""; addHubSpaceRowForFile(row); fprintf(stderr, "added hubSpace row for file '%s'\n", fileName); fflush(stderr); } } if (errCatch->gotError) { rejectUpload(response, errCatch->message->string); // must remove the tusd temp files so if the users tries again after a temp error // the upload will work if (tusFile) { mustRemove(tusFile); mustRemove(tusInfo);