55de7ef426a462247f89f48afd9aa6a2eb2cdd19
chmalee
  Thu Jan 16 17:11:17 2025 -0800
Remove a bunch of dead experimental hubspace code, and make sure to cgi-encode hub names, refs #35018

diff --git src/hg/lib/userdata.c src/hg/lib/userdata.c
index 5cfe613..d62bd66 100644
--- src/hg/lib/userdata.c
+++ src/hg/lib/userdata.c
@@ -1,516 +1,411 @@
 /* userdata.c - code for managing data stored on a per user basis */
 
 /* Copyright (C) 2014 The Regents of the University of California 
  * See kent/LICENSE or http://genome.ucsc.edu/license/ for licensing information. */
 
 
 #include "common.h"
 #include "hash.h"
 #include "portable.h"
 #include "trashDir.h"
 #include "md5.h"
 #include "hgConfig.h"
 #include "dystring.h"
 #include "cheapcgi.h"
 #include "customFactory.h"
 #include "wikiLink.h"
 #include "userdata.h"
 #include "jksql.h"
 #include "hdb.h"
 #include "hubSpace.h"
 #include "hubSpaceQuotas.h"
 #include <limits.h>
 
 char *getUserName()
 /* Query the right system for the users name */
 {
 return (loginSystemEnabled() || wikiLinkEnabled()) ? wikiLinkUserName() : NULL;
 }
 
 char *emailForUserName(char *userName)
 /* Fetch the email for this user from gbMembers hgcentral table */
 {
 struct sqlConnection *sc = hConnectCentral();
 struct dyString *query = sqlDyStringCreate("select email from gbMembers where userName = '%s'", userName);
 char *email = sqlQuickString(sc, dyStringCannibalize(&query));
 hDisconnectCentral(&sc);
 // this should be freeMem'd:
 return email;
 }
 
 char *getDataDir(char *userName)
 /* Return the full path to the user specific data directory, can be configured via hg.conf
  * on hgwdev, this is /data/apache/userdata/hubspace/hash/userName/
  * on the RR, this is /userdata/hubspace/hash/userName/ */
 {
 char *userDataBaseDir = cfgOption("userDataDir");
 if (!userDataBaseDir  || isEmpty(userDataBaseDir))
     errAbort("trying to save user file but no userDataDir defined in hg.conf");
 if (userDataBaseDir[0] != '/')
     errAbort("config setting userDataDir must be an absolute path (starting with '/')");
 
 char *encUserName = cgiEncode(userName);
 char *userPrefix = md5HexForString(encUserName);
 userPrefix[2] = '\0';
 
 struct dyString *newDataDir = dyStringNew(0);
 dyStringPrintf(newDataDir, "%s/%s/%s/", 
     userDataBaseDir, userPrefix, encUserName);
 
 return dyStringCannibalize(&newDataDir);
 }
 
 char *stripDataDir(char *fname, char *userName)
 /* Strips the getDataDir(userName) off of fname */
 {
 char *dataDir = getDataDir(userName);
 int prefixSize = strlen(dataDir);
 if (startsWith(dataDir, fname))
     {
     char *ret = fname + prefixSize;
     return ret;
     }
 return NULL;
 }
 
 char *getHubDataDir(char *userName, char *hub)
 {
 char *dataDir = getDataDir(userName);
-return catTwoStrings(dataDir, hub);
+return catTwoStrings(dataDir, cgiEncode(hub));
 }
 
 char *hubSpaceUrl = NULL;
 static char *getHubSpaceUrl()
 {
 if (!hubSpaceUrl)
     hubSpaceUrl = cfgOption("hubSpaceUrl");
 return hubSpaceUrl;
 }
 
 char *webDataDir(char *userName)
 /* Return a web accesible path to the userDataDir, this is different from the full path tusd uses */
 {
 char *retUrl = NULL;
 if (userName)
     {
     char *encUserName = cgiEncode(userName);
     char *userPrefix = md5HexForString(encUserName);
     userPrefix[2] = '\0';
     struct dyString *userDirDy = dyStringNew(0);
     dyStringPrintf(userDirDy, "%s/%s/%s/", getHubSpaceUrl(), userPrefix, encUserName);
     retUrl = dyStringCannibalize(&userDirDy);
     }
 return retUrl;
 }
 
 char *prefixUserFile(char *userName, char *fname, char *parentDir)
-/* Allocate a new string that contains the full per-user path to fname, NULL otherwise.
+/* Allocate a new string that contains the full per-user path to fname. return NULL if
+ * we cannot construct a full path because of a realpath(3) failure.
  * parentDir is optional and will go in between the per-user dir and the fname */
 {
 char *pathPrefix = getDataDir(userName);
 char *path = NULL;
 if (pathPrefix)
     {
     if (parentDir)
         {
         struct dyString *ret = dyStringCreate("%s%s%s%s", pathPrefix, parentDir, lastChar(parentDir) == '/' ? "" : "/", fname);
         path = dyStringCannibalize(&ret);
         }
     else
         path = catTwoStrings(pathPrefix, fname);
     char canonicalPath[PATH_MAX];
     realpath(path, canonicalPath);
     // after canonicalizing the path, make sure it starts with the userDataDir, to prevent
     // deleting files like blah/../../../../systemFile.text
     if (startsWith(pathPrefix, canonicalPath))
         return cloneString(canonicalPath);
     }
 return NULL;
 }
 
 static boolean checkHubSpaceRowExists(struct hubSpace *row)
 /* Return TRUE if row already exists */
 {
 struct sqlConnection *conn = hConnectCentral();
 struct dyString *queryCheck = sqlDyStringCreate("select count(*) from hubSpace where userName='%s' and fileName='%s' and parentDir='%s'", row->userName, row->fileName, row->parentDir);
 int ret = sqlQuickNum(conn, dyStringCannibalize(&queryCheck));
 hDisconnectCentral(&conn);
 return ret > 0;
 }
 
 char *hubNameFromPath(char *path)
 /* Return the last directory component of path. Assume that a '.' char in the last component
  * means that component is a filename and go back further */
 {
-fprintf(stderr, "hubNameFromPath('%s')\n", path);
-fflush(stderr);
 char *copy = cloneString(path);
 if (endsWith(copy, "/"))
     trimLastChar(copy);
 char *ptr = strrchr(copy, '/');
 // check to see if we're in a file name, like /blah/blah/name/hub.txt
 if (ptr)
     {
     if (strchr(ptr, '.'))
         {
         *ptr = 0;
         ptr = strrchr(copy, '/');
         }
     if (ptr)
         {
         ++ptr;
         fprintf(stderr, "ptr= '%s'\n", ptr);
         fflush(stderr);
         return cloneString(ptr);
         }
     }
 return copy;
 }
 
 void addHubSpaceRowForFile(struct hubSpace *row)
 /* We created a file for a user, now add an entry to the hubSpace table for it */
 {
 struct sqlConnection *conn = hConnectCentral();
 
 // now write out row to hubSpace table
 if (!sqlTableExistsOnMain(conn, "hubSpace"))
     {
     errAbort("No hubSpace MySQL table is present. Please send an email to genome-www@soe.ucsc.edu  describing the exact steps you took just before you got this error");
     }
 hubSpaceSaveToDb(conn, row, "hubSpace", 0);
 hDisconnectCentral(&conn);
 }
 
 void makeParentDirRows(char *userName, time_t lastModified, char *db, char *parentDirStr, char *userDataDir)
 /* For each '/' separated component of parentDirStr, create a row in hubSpace. Return the 
  * final subdirectory component of parentDirStr */
 {
 int i, slashCount = countChars(parentDirStr, '/');
 char *components[256];
 struct dyString *currLocation = dyStringCreate("%s", userDataDir);
 int foundSlashes = chopByChar(cloneString(parentDirStr), '/', components, slashCount);
 if (foundSlashes > 256)
     errAbort("parentDir setting '%s' too long", parentDirStr);
 for (i = 0; i < foundSlashes; i++)
     {
     char *subdir = components[i];
     if (sameString(subdir, "."))
         continue;
     fprintf(stderr, "making row for parent dir: '%s'\n", subdir);
     if (!subdir)
         errAbort("error: empty subdirectory components for parentDir string '%s'", parentDirStr);
     dyStringAppend(currLocation, components[i]);
     dyStringAppendC(currLocation, '/');
     struct hubSpace *row = NULL;
     AllocVar(row);
     row->userName = userName;
     row->fileName = subdir;
     row->fileSize = 0;
     row->fileType = "dir";
     row->creationTime = NULL;
     row->lastModified = sqlUnixTimeToDate(&lastModified, TRUE);
     row->db = db;
     row->location = cloneString(dyStringContents(currLocation));
     row->md5sum = "";
     row->parentDir = i > 0 ? components[i-1] : "";
     // only insert a row for this parentDir if it's unique to the table
     if (!checkHubSpaceRowExists(row))
         addHubSpaceRowForFile(row);
     }
 }
 
 char *writeHubText(char *path, char *userName, char *db)
 /* Create a hub.txt file, optionally creating the directory holding it. For convenience, return
  * the file name of the created hub, which can be freed. */
 {
 int oldUmask = 00;
 oldUmask = umask(0);
 makeDirsOnPath(path);
 // restore umask
 umask(oldUmask);
 // now make the hub.txt with some basic information
 char *hubFile = NULL;
 struct dyString *hubFileDy = dyStringCreate("%s%shub.txt", path, endsWith(path, "/") ? "" : "/");
 hubFile = dyStringCannibalize(&hubFileDy);
 if (fileExists(hubFile))
     return hubFile;
 
 char *hubName = hubNameFromPath(path);
 FILE *f = mustOpen(hubFile, "w");
 fprintf(f, "hub %s\n"
     "email %s\n"
     "shortLabel %s\n"
     "longLabel %s\n"
     "useOneFile on\n"
     "\n"
     "genome %s\n"
     "\n",
     hubName, emailForUserName(userName), hubName, hubName, db);
 carefulClose(&f);
 return hubFile;
 }
 
 static char *hubPathFromParentDir(char *parentDir, char *userDataDir)
 /* Assume parentDir does not have leading '/' or '.', parse out the first dir component
  * and add it to the users directory*/
 {
 char *copy = cloneString(parentDir);
 char *firstSlash = strchr(copy, '/');
 if (!firstSlash)
     {
     return copy;
     }
 firstSlash = 0;
 return catTwoStrings(userDataDir, copy);
 }
 
 static void writeTrackStanza(char *hubFileName, char *track, char *bigDataUrl, char *type, char *label, char *bigFileLocation)
 {
 FILE *f = mustOpen(hubFileName, "a");
 char *trackDbType = type;
 if (sameString(type, "bigBed"))
     {
     // figure out the type based on the bbiFile header
     struct bbiFile *bbi = bigBedFileOpen(bigFileLocation);
     char tdbType[32];
     safef(tdbType, sizeof(tdbType), "bigBed %d%s", bbi->definedFieldCount, bbi->fieldCount > bbi->definedFieldCount ? " +" : "");
     trackDbType = tdbType;
     bigBedFileClose(&bbi);
     }
 fprintf(f, "track %s\n"
     "bigDataUrl %s\n"
     "type %s\n"
     "shortLabel %s\n"
     "longLabel %s\n"
     "\n",
     track, bigDataUrl, trackDbType, label, label);
 carefulClose(&f);
 }
 
 static char *writeHubStanzasForFile(struct hubSpace *rowForFile, char *userDataDir, char *parentDir)
 /* Create a hub.txt (if necessary) and add track stanzas for the file described by rowForFile.
  * Returns the path to the hub.txt */
 {
 char *hubFileName = NULL;
 char *hubDir = hubPathFromParentDir(rowForFile->parentDir, userDataDir);
 fprintf(stderr, "hubDir: %s\n", hubDir);
 hubFileName = writeHubText(hubDir, rowForFile->userName, rowForFile->db);
 
 char *encodedTrack = cgiEncodeFull(rowForFile->fileName);
 writeTrackStanza(hubFileName, encodedTrack, encodedTrack, rowForFile->fileType, encodedTrack, rowForFile->location);
 return hubFileName;
 }
 
 void createNewTempHubForUpload(char *requestId, struct hubSpace *rowForFile, char *userDataDir, char *parentDir)
 /* Creates a hub.txt for this upload, and updates the hubSpace table for the
  * hub.txt and any parentDirs we need to create. */
 {
 // first create the hub.txt if necessary and write the stanza for this track
 char *hubPath = writeHubStanzasForFile(rowForFile, userDataDir, parentDir);
 
 // update the mysql table with a record of the hub.txt:
 struct hubSpace *hubTextRow = NULL;
 AllocVar(hubTextRow);
 hubTextRow->userName = rowForFile->userName;
 hubTextRow->fileName = "hub.txt";
 hubTextRow->fileSize = fileSize(hubPath);
 hubTextRow->fileType = "hub.txt";
 hubTextRow->creationTime = NULL;
 time_t lastModTime = fileModTime(hubPath);
 hubTextRow->lastModified = sqlUnixTimeToDate(&lastModTime, TRUE);
 hubTextRow->db = rowForFile->db;
 hubTextRow->location = hubPath;
 hubTextRow->md5sum = md5HexForFile(hubPath);
 hubTextRow->parentDir = hubNameFromPath(hubPath);
 if (!checkHubSpaceRowExists(hubTextRow))
     addHubSpaceRowForFile(hubTextRow);
 }
 
 static void deleteHubSpaceRow(char *fname, char *userName)
 /* Deletes a row from the hubspace table for a given fname */
 {
 struct sqlConnection *conn = hConnectCentral();
 struct dyString *deleteQuery = sqlDyStringCreate("delete from hubSpace where location='%s' and userName='%s'", fname, userName);
 sqlUpdate(conn, dyStringCannibalize(&deleteQuery));
 }
 
 void removeFileForUser(char *fname, char *userName)
 /* Remove a file for this user if it exists */
 {
 // The file to remove must be prefixed by the hg.conf userDataDir
 if (!startsWith(getDataDir(userName), fname))
     return;
 if (fileExists(fname))
     {
     // delete the actual file
     mustRemove(fname);
     // delete the table row
     deleteHubSpaceRow(fname, userName);
     }
-}
-
-void removeHubForUser(char *path, char *userName)
-/* Remove a hub directory for this user (and all files in the directory), if it exists */
-{
-if (!startsWith(getDataDir(userName), path))
-    return;
-if (isDirectory(path))
-    {
-    struct fileInfo *f, *flist = listDirX(path, NULL, TRUE);
-    for (f = flist; f != NULL; f = f->next)
-        mustRemove(f->name);
-    // now we have deleted all the files in the dir we can safely rmdir
-    mustRemove(path);
-    deleteHubSpaceRow(path, userName);
-    }
-}
-
-static time_t getFileListLatestTime(struct userFiles *userFiles)
-/* Return the greatest last access time of the files in userFiles->fileList */
-{
-if (!userFiles->fileList)
-    errAbort("no files in userFiles->fileList");
-time_t modTime = 0;
-struct fileInfo *f;
-for (f = userFiles->fileList; f != NULL; f = f->next)
-    {
-    if (f->lastAccess > modTime)
-        {
-        modTime = f->lastAccess;
-        }
-    }
-return modTime;
-}
-
-time_t getHubLatestTime(struct userHubs *hub)
-/* Return the latest access time of the files in a hub */
-{
-// NOTE: every hub is guaranteed to have at least one file
-return getFileListLatestTime(hub->fileList);
-}
-
-char *findParentDirs(char *parentDir, char *userName, char *fname)
-/* For a given file with parentDir, go up the tree and find the full path back to
- * the rootmost parentDir */
-{
-return NULL;
-}
-
-struct userFiles *listFilesForUserHub(char *userName, char *hubName)
-/* Get all the files for a particular hub for a particular user */
-{
-struct userFiles *userListing;
-AllocVar(userListing);
-char *path = getHubDataDir(userName, hubName);
-struct fileInfo *fiList = listDirX(path,NULL,FALSE);
-userListing->userName = userName;
-userListing->fileList = fiList;
-return userListing;
-}
-
-struct userHubs *listHubsForUser(char *userName)
-/* Lists the directories for a particular user */
-{
-struct userHubs *userHubs = NULL;
-char *path = getDataDir(userName);
-struct fileInfo *fi, *fiList = listDirX(path,NULL,FALSE);
-for (fi = fiList; fi != NULL; fi = fi->next)
-    {
-    if (fi->isDir)
-        {
-        struct userHubs *hub;
-        AllocVar(hub);
-        hub->hubName = cloneString(fi->name);
-        hub->userName = cloneString(userName);
-        char hubPath[PATH_LEN];
-        safef(hubPath, sizeof(hubPath), "%s%s", path, fi->name);
-        struct userFiles *hubFileList = listFilesForUserHub(userName, hub->hubName);
-        hub->lastModified = getFileListLatestTime(hubFileList);
-        hub->fileList = hubFileList;
-        slAddHead(&userHubs, hub);
-        }
-    }
-return userHubs;
+// TODO: we should also modify the hub.txt associated with this file
 }
 
 struct hubSpace *listFilesForUser(char *userName)
 /* Return the files the user has uploaded */
 {
 struct sqlConnection *conn = hConnectCentral();
 struct dyString *query = sqlDyStringCreate("select userName, fileName, fileSize, fileType, creationTime, DATE_FORMAT(lastModified, '%%c/%%d/%%Y, %%l:%%i:%%s %%p') as lastModified, db, location, md5sum, parentDir from hubSpace where userName='%s' order by location,creationTime", userName);
 struct hubSpace *fileList = hubSpaceLoadByQuery(conn, dyStringCannibalize(&query));
 hDisconnectCentral(&conn);
 return fileList;
 }
 
 #define defaultHubName "defaultHub"
 char *defaultHubNameForUser(char *userName)
 /* Return a name to use as a default for a hub, starts with defaultHub, then defaultHub2, ... */
 {
 if (!userName)
     return defaultHubName;
 struct dyString *query = sqlDyStringCreate("select distinct(fileName) from hubSpace where parentDir='' and fileName like '%s%%' and userName='%s'", defaultHubName, userName);
 struct sqlConnection *conn = hConnectCentral();
 struct slName *hubNames = sqlQuickList(conn, dyStringCannibalize(&query));;
 hDisconnectCentral(&conn);
 if (hubNames == NULL)
     // user has no hubs created
     return defaultHubName;
 slSort(&hubNames,slNameCmpStringsWithEmbeddedNumbers);
 slReverse(&hubNames);
 // now the first element of the list has the most recent integer to use (or no integer)
 char *currHubName = cloneString(hubNames->name);
 int currHubStrLen = strlen(currHubName);
 int defaultLen = strlen(defaultHubName);
 if (currHubStrLen == defaultLen)
     // probably a common case
     return "defaultHub2";
 else
     {
     currHubName[defaultLen-1] = 0;
     currHubName += strlen(defaultHubName);
     int hubNum = sqlUnsigned(currHubName) + 1;
     struct dyString *hubName = dyStringCreate("%s%d", defaultHubName, hubNum);
     return dyStringCannibalize(&hubName);
     }
 }
 
 long long getMaxUserQuota(char *userName)
 /* Return how much space is allocated for this user or the default */
 {
 long long specialQuota = quotaForUserName(userName);
 return specialQuota == 0 ? HUB_SPACE_DEFAULT_QUOTA : specialQuota;
 }
 
 long long checkUserQuota(char *userName)
 /* Return the amount of space a user is currently using */
 {
 long long quota = 0;
 struct hubSpace *hubSpace, *hubSpaceList = listFilesForUser(userName);
 for (hubSpace = hubSpaceList; hubSpace != NULL; hubSpace = hubSpace->next)
     {
     quota += hubSpace->fileSize;
     }
 return quota;
 }
-
-char *storeUserFile(char *userName, char *newFileName, void *data, size_t dataSize)
-/* Give a fileName and a data stream, write the data to:
- * userDataDir/hashedUserName/userName/fileName
- * where userDataDir comes from hg.conf and
- * hashedUserName is based on the md5sum of the userName
- * to prevent proliferation of too many directories.
- *
- * After sucessfully saving the file, return a web accessible url
- * to the file. */
-{
-char *userDir = getDataDir(userName);
-makeDirsOnPath(userDir);
-char *pathToFile = catTwoStrings(userDir, newFileName);
-FILE *newFile = mustOpen(pathToFile, "wb");
-// the data will start with a line feed so get rid of that
-mustWrite(newFile, data, dataSize);
-// missing an EOF?
-carefulClose(&newFile);
-return pathToFile;
-}