404d5bb6d8c0418d5f06535ef470e36c35d2a237 chmalee Thu Apr 16 15:57:56 2026 -0700 Add assembly hub support to hubSpace. Users can upload a .2bit to create an assembly hub, optionally alongside their own *.hub.txt (prefix names like araTha1.hub.txt are recognized) and sibling track files. Uploads run in parallel; hub.txt mutations are serialized per-hub via flock so arrival order does not matter. - hubSpace table gains a hubType column ('trackHub' or 'assemblyHub'); ON DUPLICATE KEY UPDATE excludes it so a re-upload cannot revert an upgraded hub. - writeHubText can now emit an assembly stanza derived from the 2bit; upgradeHubTxtForAssembly promotes an existing plain hub.txt in place when a 2bit arrives after tracks. - pre-finish decides synthesize vs upgrade vs leave-alone from server state (existing rows, hub.txt on disk) plus a single client flag (batchHasHubTxt); client-supplied hubType is no longer trusted. - Client UI adds 2bit as a file type, locks the genome field when the hub is authoritative (drilled-in or batch hub.txt), defaults new uploads to an existing assembly hub at top level, and routes hgTracks URLs through 'genome=' vs 'db=' by hubType. - Fix pre-existing nested-path bug in hubPathFromParentDir (*firstSlash = 0). Co-Authored-By: Claude Opus 4.7 (1M context) diff --git src/hg/lib/hubSpace.c src/hg/lib/hubSpace.c index 35e8fd4d2b3..c91b1157740 100644 --- src/hg/lib/hubSpace.c +++ src/hg/lib/hubSpace.c @@ -1,312 +1,328 @@ /* hubSpace.c was originally generated by the autoSql program, which also * generated hubSpace.h and hubSpace.sql. This module links the database and * the RAM representation of objects. */ #include "common.h" #include "linefile.h" #include "dystring.h" #include "jksql.h" #include "hubSpace.h" -char *hubSpaceCommaSepFieldNames = "userName,fileName,fileSize,fileType,creationTime,lastModified,db,location,md5sum,parentDir"; +char *hubSpaceCommaSepFieldNames = "userName,fileName,fileSize,fileType,creationTime,lastModified,db,location,md5sum,parentDir,hubType"; void hubSpaceStaticLoad(char **row, struct hubSpace *ret) /* Load a row from hubSpace table into ret. The contents of ret will * be replaced at the next call to this function. */ { ret->userName = row[0]; ret->fileName = row[1]; ret->fileSize = sqlLongLong(row[2]); ret->fileType = row[3]; ret->creationTime = row[4]; ret->lastModified = row[5]; ret->db = row[6]; ret->location = row[7]; ret->md5sum = row[8]; ret->parentDir = row[9]; +ret->hubType = row[10]; } struct hubSpace *hubSpaceLoadByQuery(struct sqlConnection *conn, char *query) /* Load all hubSpace from table that satisfy the query given. * Where query is of the form 'select * from example where something=something' * or 'select example.* from example, anotherTable where example.something = * anotherTable.something'. * Dispose of this with hubSpaceFreeList(). */ { struct hubSpace *list = NULL, *el; struct sqlResult *sr; char **row; sr = sqlGetResult(conn, query); while ((row = sqlNextRow(sr)) != NULL) { el = hubSpaceLoad(row); slAddHead(&list, el); } slReverse(&list); sqlFreeResult(&sr); return list; } void hubSpaceSaveToDb(struct sqlConnection *conn, struct hubSpace *el, char *tableName, int updateSize) /* Save hubSpace as a row to the table specified by tableName. * As blob fields may be arbitrary size updateSize specifies the approx size * of a string that would contain the entire query. Arrays of native types are * converted to comma separated strings and loaded as such, User defined types are * inserted as NULL. This function automatically escapes quoted strings for mysql. * Uses ON DUPLICATE KEY UPDATE to handle file overwrites. */ { struct dyString *update = dyStringNew(updateSize); -sqlDyStringPrintf(update, "insert into %s values ( '%s','%s',%lld,'%s',NULL,'%s','%s','%s','%s','%s') " +char *hubType = el->hubType ? el->hubType : "trackHub"; +// Exclude hubType from the UPDATE clause so a re-upload does not revert a +// row's hubType back to trackHub after the hub was promoted to assemblyHub +// (e.g. by a 2bit arriving later). The initial INSERT value stands. +sqlDyStringPrintf(update, "insert into %s values ( '%s','%s',%lld,'%s',NULL,'%s','%s','%s','%s','%s','%s') " "ON DUPLICATE KEY UPDATE fileSize=%lld, lastModified='%s', md5sum='%s', location='%s', db='%s'", - tableName, el->userName, el->fileName, el->fileSize, el->fileType, el->lastModified, el->db, el->location, el->md5sum, el->parentDir, + tableName, el->userName, el->fileName, el->fileSize, el->fileType, el->lastModified, el->db, el->location, el->md5sum, el->parentDir, hubType, el->fileSize, el->lastModified, el->md5sum, el->location, el->db); -fprintf(stderr, "hubSpace row insert:\n\n%s\n\n", update->string); -fflush(stderr); sqlUpdate(conn, update->string); dyStringFree(&update); -fprintf(stderr, "hubSpace update successful\n"); -fflush(stderr); } struct hubSpace *hubSpaceLoad(char **row) /* Load a hubSpace from row fetched with select * from hubSpace * from database. Dispose of this with hubSpaceFree(). */ { struct hubSpace *ret; AllocVar(ret); ret->userName = cloneString(row[0]); ret->fileName = cloneString(row[1]); ret->fileSize = sqlLongLong(row[2]); ret->fileType = cloneString(row[3]); ret->creationTime = cloneString(row[4]); ret->lastModified = cloneString(row[5]); ret->db = cloneString(row[6]); ret->location = cloneString(row[7]); ret->md5sum = cloneString(row[8]); ret->parentDir = cloneString(row[9]); +ret->hubType = cloneString(row[10]); return ret; } struct hubSpace *hubSpaceLoadAll(char *fileName) /* Load all hubSpace from a whitespace-separated file. * Dispose of this with hubSpaceFreeList(). */ { struct hubSpace *list = NULL, *el; struct lineFile *lf = lineFileOpen(fileName, TRUE); -char *row[10]; +char *row[11]; while (lineFileRow(lf, row)) { el = hubSpaceLoad(row); slAddHead(&list, el); } lineFileClose(&lf); slReverse(&list); return list; } struct hubSpace *hubSpaceLoadAllByChar(char *fileName, char chopper) /* Load all hubSpace from a chopper separated file. * Dispose of this with hubSpaceFreeList(). */ { struct hubSpace *list = NULL, *el; struct lineFile *lf = lineFileOpen(fileName, TRUE); -char *row[10]; +char *row[11]; while (lineFileNextCharRow(lf, chopper, row, ArraySize(row))) { el = hubSpaceLoad(row); slAddHead(&list, el); } lineFileClose(&lf); slReverse(&list); return list; } struct hubSpace *hubSpaceCommaIn(char **pS, struct hubSpace *ret) /* Create a hubSpace out of a comma separated string. * This will fill in ret if non-null, otherwise will * return a new hubSpace */ { char *s = *pS; if (ret == NULL) AllocVar(ret); ret->userName = sqlStringComma(&s); ret->fileName = sqlStringComma(&s); ret->fileSize = sqlLongLongComma(&s); ret->fileType = sqlStringComma(&s); ret->creationTime = sqlStringComma(&s); ret->lastModified = sqlStringComma(&s); ret->db = sqlStringComma(&s); ret->location = sqlStringComma(&s); ret->md5sum = sqlStringComma(&s); ret->parentDir = sqlStringComma(&s); +ret->hubType = sqlStringComma(&s); *pS = s; return ret; } void hubSpaceFree(struct hubSpace **pEl) /* Free a single dynamically allocated hubSpace such as created * with hubSpaceLoad(). */ { struct hubSpace *el; if ((el = *pEl) == NULL) return; freeMem(el->userName); freeMem(el->fileName); freeMem(el->fileType); freeMem(el->creationTime); freeMem(el->lastModified); freeMem(el->db); freeMem(el->location); freeMem(el->md5sum); freeMem(el->parentDir); +freeMem(el->hubType); freez(pEl); } void hubSpaceFreeList(struct hubSpace **pList) /* Free a list of dynamically allocated hubSpace's */ { struct hubSpace *el, *next; for (el = *pList; el != NULL; el = next) { next = el->next; hubSpaceFree(&el); } *pList = NULL; } void hubSpaceOutput(struct hubSpace *el, FILE *f, char sep, char lastSep) /* Print out hubSpace. Separate fields with sep. Follow last field with lastSep. */ { if (sep == ',') fputc('"',f); fprintf(f, "%s", el->userName); if (sep == ',') fputc('"',f); fputc(sep,f); if (sep == ',') fputc('"',f); fprintf(f, "%s", el->fileName); if (sep == ',') fputc('"',f); fputc(sep,f); fprintf(f, "%lld", el->fileSize); fputc(sep,f); if (sep == ',') fputc('"',f); fprintf(f, "%s", el->fileType); if (sep == ',') fputc('"',f); fputc(sep,f); if (sep == ',') fputc('"',f); fprintf(f, "%s", el->creationTime); if (sep == ',') fputc('"',f); fputc(sep,f); if (sep == ',') fputc('"',f); fprintf(f, "%s", el->lastModified); if (sep == ',') fputc('"',f); fputc(sep,f); if (sep == ',') fputc('"',f); -fputc(sep,f); -if (sep == ',') fputc('"',f); fprintf(f, "%s", el->db); if (sep == ',') fputc('"',f); fputc(sep,f); if (sep == ',') fputc('"',f); fprintf(f, "%s", el->location); if (sep == ',') fputc('"',f); fputc(sep,f); if (sep == ',') fputc('"',f); fprintf(f, "%s", el->md5sum); if (sep == ',') fputc('"',f); +fputc(sep,f); +if (sep == ',') fputc('"',f); fprintf(f, "%s", el->parentDir); if (sep == ',') fputc('"',f); +fputc(sep,f); +if (sep == ',') fputc('"',f); +fprintf(f, "%s", el->hubType ? el->hubType : "trackHub"); +if (sep == ',') fputc('"',f); fputc(lastSep,f); } void hubSpaceJsonOutput(struct hubSpace *el, FILE *f) /* Print out hubSpace in JSON format. */ { fputc('{',f); fputc('"',f); fprintf(f,"userName"); fputc('"',f); fputc(':',f); fputc('"',f); fprintf(f, "%s", el->userName); fputc('"',f); fputc(',',f); fputc('"',f); fprintf(f,"fileName"); fputc('"',f); fputc(':',f); fputc('"',f); fprintf(f, "%s", el->fileName); fputc('"',f); fputc(',',f); fputc('"',f); fprintf(f,"fileSize"); fputc('"',f); fputc(':',f); fprintf(f, "%lld", el->fileSize); fputc(',',f); fputc('"',f); fprintf(f,"fileType"); fputc('"',f); fputc(':',f); fputc('"',f); fprintf(f, "%s", el->fileType); fputc('"',f); fputc(',',f); fputc('"',f); fprintf(f,"creationTime"); fputc('"',f); fputc(':',f); fputc('"',f); fprintf(f, "%s", el->creationTime); fputc('"',f); fputc(',',f); fputc('"',f); fprintf(f,"lastModified"); fputc('"',f); fputc(':',f); fputc('"',f); fprintf(f, "%s", el->lastModified); fputc('"',f); fputc(',',f); fputc('"',f); fprintf(f,"db"); fputc('"',f); fputc(':',f); fputc('"',f); fprintf(f, "%s", el->db); fputc('"',f); fputc(',',f); fputc('"',f); fprintf(f,"location"); fputc('"',f); fputc(':',f); fputc('"',f); fprintf(f, "%s", el->location); fputc('"',f); fputc(',',f); fputc('"',f); fprintf(f,"md5sum"); fputc('"',f); fputc(':',f); fputc('"',f); fprintf(f, "%s", el->md5sum); fputc('"',f); fputc(',',f); fputc('"',f); fprintf(f,"hubNameList"); fputc('"',f); fputc(':',f); fputc('"',f); fprintf(f, "%s", el->parentDir); fputc('"',f); +fputc(',',f); +fputc('"',f); +fprintf(f,"hubType"); +fputc('"',f); +fputc(':',f); +fputc('"',f); +fprintf(f, "%s", el->hubType ? el->hubType : "trackHub"); +fputc('"',f); fputc('}',f); } /* -------------------------------- End autoSql Generated Code -------------------------------- */