404d5bb6d8c0418d5f06535ef470e36c35d2a237
chmalee
  Thu Apr 16 15:57:56 2026 -0700
Add assembly hub support to hubSpace.

Users can upload a .2bit to create an assembly hub, optionally alongside
their own *.hub.txt (prefix names like araTha1.hub.txt are recognized)
and sibling track files. Uploads run in parallel; hub.txt mutations are
serialized per-hub via flock so arrival order does not matter.

- hubSpace table gains a hubType column ('trackHub' or 'assemblyHub');
ON DUPLICATE KEY UPDATE excludes it so a re-upload cannot revert an
upgraded hub.
- writeHubText can now emit an assembly stanza derived from the 2bit;
upgradeHubTxtForAssembly promotes an existing plain hub.txt in place
when a 2bit arrives after tracks.
- pre-finish decides synthesize vs upgrade vs leave-alone from server
state (existing rows, hub.txt on disk) plus a single client flag
(batchHasHubTxt); client-supplied hubType is no longer trusted.
- Client UI adds 2bit as a file type, locks the genome field when the
hub is authoritative (drilled-in or batch hub.txt), defaults new
uploads to an existing assembly hub at top level, and routes
hgTracks URLs through 'genome=' vs 'db=' by hubType.
- Fix pre-existing nested-path bug in hubPathFromParentDir
(*firstSlash = 0).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

diff --git src/hg/hgHubConnect/hooks/pre-finish.c src/hg/hgHubConnect/hooks/pre-finish.c
index e300d2d87e0..3d7eea2adbe 100644
--- src/hg/hgHubConnect/hooks/pre-finish.c
+++ src/hg/hgHubConnect/hooks/pre-finish.c
@@ -92,30 +92,38 @@
                 errAbort("You are not logged in. Please navigate to My Data -> My Sessions and log in or create an account.");
             }
         fprintf(stderr, "userName='%s'\n", userName);
         // NOTE: All Upload.MetaData values are strings
         // Check multiple possible metadata keys for filename (Uppy sends 'filename' and 'name' by default,
         // our JS code also sets 'fileName' - try all to handle resumed uploads with old metadata)
         char *rawFileName = jsonQueryString(req, "", "Event.Upload.MetaData.fileName", NULL);
         if (!rawFileName)
             rawFileName = jsonQueryString(req, "", "Event.Upload.MetaData.filename", NULL);
         if (!rawFileName)
             rawFileName = jsonQueryString(req, "", "Event.Upload.MetaData.name", NULL);
         fileName = rawFileName ? cgiEncodeFull(rawFileName) : NULL;
         fileSize = jsonQueryInt(req, "",  "Event.Upload.Size", 0, NULL);
         fileType = jsonQueryString(req, "", "Event.Upload.MetaData.fileType", NULL);
         db = jsonQueryString(req, "", "Event.Upload.MetaData.genome", NULL);
+        // Blocks newline injection into the synthesized hub.txt.
+        if (db && db[0])
+            {
+            char *p;
+            for (p = db; *p; p++)
+                if (!(isalnum((unsigned char)*p) || *p == '_' || *p == '-'))
+                    errAbort("Invalid genome name '%s': only letters, digits, '_' and '-' are allowed", db);
+            }
         reqLm = jsonQueryString(req, "", "Event.Upload.MetaData.lastModified", NULL);
         if (reqLm)
             lastModified = sqlLongLong(reqLm) / 1000; // yes Javascript dates are in millis
         else
             lastModified = time(NULL); // fallback to current time if not provided
         parentDir = jsonQueryString(req, "", "Event.Upload.MetaData.parentDir", NULL);
         fprintf(stderr, "parentDir = '%s'\n", parentDir ? parentDir : "(null)");
         // strip out plain leading '.' and '/' components
         // middle '.' components are dealt with later
         if (parentDir && (startsWith("./", parentDir) || startsWith("/", parentDir)))
             parentDir = skipBeyondDelimit(parentDir, '/');
         tusFile = jsonQueryString(req, "", "Event.Upload.Storage.Path", NULL);
         tusInfo = jsonQueryString(req, "", "Event.Upload.Storage.InfoPath", NULL);
         if (fileName == NULL)
             {
@@ -168,38 +176,76 @@
             // the command line specified dataDir + pre-create's ChangeFileInfo
             // this was leading to a bug where the uploaded file had the symlinked
             // path, but the containing hub.txt and directory row had the realpath,
             // which was causing confusion in the UI code
             char *canonicalPath = realpath(tusFile, NULL);
             if (canonicalPath != NULL)
                 row->location = canonicalPath;
             else
                 {
                 // all upload data should have been received and thus the realpath
                 // should not fail, but just in case, put something valid here
                 row->location = tusFile;
                 }
             row->md5sum = md5HexForFile(row->location);
             row->parentDir = encodedParentDir ? encodedParentDir : "";
-            if (!isHubToolsUpload && !(sameString(fileType, "hub.txt")))
+            // Derive hubType server-side; never trust the client's hubType.
+            // A 2bit always promotes its hub to assembly. Otherwise inherit
+            // the existing hub's type, defaulting to trackHub.
+            char *parentDirForCheck = encodedParentDir ? hubNameFromPath(encodedParentDir) : "";
+            if (sameOk(fileType, "2bit"))
+                row->hubType = "assemblyHub";
+            else
+                {
+                char *existingType = existingHubTypeForDir(userName, parentDirForCheck);
+                row->hubType = existingType ? existingType : "trackHub";
+                }
+            char *batchHasHubTxtStr = jsonQueryString(req, "", "Event.Upload.MetaData.batchHasHubTxt", NULL);
+            boolean batchHasHubTxt = sameOk(batchHasHubTxtStr, "true");
+            boolean userOwnNamedHubTxt = userHasOwnNamedHubTxtInDir(userName, parentDirForCheck);
+            boolean userAuth = batchHasHubTxt || userOwnNamedHubTxt;
+            boolean isHubTxt = sameOk(fileType, "hub.txt");
+            boolean isTwoBit = sameOk(fileType, "2bit");
+
+            // Serialize hub.txt read-modify-write across parallel pre-finish
+            // processes for the same hub. flock is held for the entire
+            // decision + action so writeHubText's fileExists check and the
+            // upgrade's read-rewrite are atomic with respect to siblings.
+            // Without a parentDir there is no hub to protect.
+            int hubLockFd = encodedParentDir ? lockHubDir(dataDir) : -1;
+            if (!isHubToolsUpload && !isHubTxt)
+                {
+                if (!userAuth)
+                    {
+                    if (isTwoBit)
                         {
+                        if (!literalHubTxtExistsOnDisk(parentDirForCheck, userDataDir))
                             createNewTempHubForUpload(reqId, row, userDataDir, encodedParentDir);
-                fprintf(stderr, "added hub.txt and hubSpace row for hub for file: '%s'\n", fileName);
-                fflush(stderr);
+                        upgradeExistingHubToAssembly(row, userDataDir, encodedParentDir);
+                        }
+                    else
+                        createNewTempHubForUpload(reqId, row, userDataDir, encodedParentDir);
+                    }
+                else if (isTwoBit)
+                    {
+                    // user's hub.txt is authoritative; just flip rows to assemblyHub.
+                    upgradeExistingHubToAssembly(row, userDataDir, encodedParentDir);
+                    }
                 }
+            unlockHubDir(hubLockFd);
             // first make the parentDir rows
-            makeParentDirRows(row->userName, sqlDateToUnixTime(row->lastModified), row->db, row->parentDir, userDataDir);
+            makeParentDirRows(row->userName, sqlDateToUnixTime(row->lastModified), row->db, row->parentDir, userDataDir, row->hubType);
             row->parentDir = encodedParentDir ? hubNameFromPath(encodedParentDir) : "";
             addHubSpaceRowForFile(row);
             fprintf(stderr, "added hubSpace row for file '%s'\n", fileName);
             fflush(stderr);
             }
         }
     if (errCatch->gotError)
         {
         rejectUpload(response, errCatch->message->string);
         // must remove the tusd temp files so if the users tries again after a temp error
         // the upload will work
         if (tusFile)
             {
             mustRemove(tusFile);
             mustRemove(tusInfo);