b02d20dede590203b6ecca10be473e7e22179a51 braney Thu Jan 12 13:12:53 2023 -0800 deal with some weird edge cases in hub that Hiram's hub illicit. Don't read a single trackDb.txt file more than once even if it's referenced from mulitple hubs. Also, only the first time a genome is defined does it get to populate its own groups. Subsequent examples get put into their own group, just like a normal track hub does. diff --git src/hg/lib/hubConnect.c src/hg/lib/hubConnect.c index b3d5891..c109cfa 100644 --- src/hg/lib/hubConnect.c +++ src/hg/lib/hubConnect.c @@ -368,31 +368,32 @@ trackHubAddDescription(hubGenome->trackDbFile, tdb); } struct trackDb *hubConnectAddHubForTrackAndFindTdb( char *database, char *trackName, struct trackDb **pTdbList, struct hash *trackHash) /* Go find hub for trackName (which will begin with hub_), and load the tracks * for it, appending to end of list and adding to trackHash. Return the * trackDb associated with trackName. This will also fill in the html fields, * but just for that track and it's parents. */ { unsigned hubId = hubIdFromTrackName(trackName); struct hubConnectStatus *hub = hubFromId(hubId); struct trackHubGenome *hubGenome = trackHubFindGenome(hub->trackHub, database); if (hubGenome == NULL) errAbort("Cannot find genome %s in hub %s", database, hub->hubUrl); -struct trackDb *tdbList = trackHubTracksForGenome(hub->trackHub, hubGenome, NULL); +boolean foundFirstGenome = FALSE; +struct trackDb *tdbList = trackHubTracksForGenome(hub->trackHub, hubGenome, NULL, &foundFirstGenome); tdbList = trackDbLinkUpGenerations(tdbList); tdbList = trackDbPolishAfterLinkup(tdbList, database); //this next line causes warns to print outside of warn box on hgTrackUi //trackDbPrioritizeContainerItems(tdbList); trackHubPolishTrackNames(hub->trackHub, tdbList); char *fixTrackName = cloneString(trackName); trackHubFixName(fixTrackName); rAddTrackListToHash(trackHash, tdbList, NULL, FALSE); if (pTdbList != NULL) *pTdbList = slCat(*pTdbList, tdbList); struct trackDb *tdb = hashFindVal(trackHash, fixTrackName); if (tdb == NULL) // superTracks aren't in the hash... look in tdbList tdb = findSuperTrack(tdbList, fixTrackName); @@ -739,64 +740,71 @@ else if (tHub != NULL) { int dbCount = 0; char *dbList = getDbList(tHub, &dbCount); // users may include quotes in their hub names requiring escaping sqlSafef(query, sizeof(query), "update %s set shortLabel=\"%s\",longLabel=\"%s\",dbCount=\"%d\",dbList=\"%s\",errorMessage=\"\",lastOkTime=now() where id=%d", getHubStatusTableName(), tHub->shortLabel, tHub->longLabel, dbCount, dbList, hub->id); sqlUpdate(conn, query); } hDisconnectCentral(&conn); } -struct trackDb *hubAddTracks(struct hubConnectStatus *hub, char *database) -/* Load up stuff from data hub and return list. */ +struct trackDb *hubAddTracks(struct hubConnectStatus *hub, char *database, boolean *foundFirstGenome, struct hash *trackDbNameHash) +/* Load up stuff from data hub and append to list. The hubUrl points to + * a trackDb.ra format file. Only the first example of a genome gets to + * populate groups, the others get a group for the trackHub. A particular + * trackDb is only read once even if referenced from more than one hub. */ { -/* Load trackDb.ra file and make it into proper trackDb tree */ struct trackDb *tdbList = NULL; struct trackHub *trackHub = hub->trackHub; if (trackHub != NULL) { struct trackHubGenome *hubGenome = trackHubFindGenome(trackHub, database); + if (hashLookup(trackDbNameHash, hubGenome->trackDbFile)) + hubGenome = NULL; // we already saw this trackDb, so ignore this stanza + else + hashStore(trackDbNameHash, hubGenome->trackDbFile); + if (hubGenome != NULL) { boolean doCache = trackDbCacheOn(); if (doCache) { // we have to open the trackDb file to get the udc cache to check for an update struct udcFile *checkCache = udcFileMayOpen(hubGenome->trackDbFile, NULL); if (checkCache != NULL) { time_t time = udcUpdateTime(checkCache); udcFileClose(&checkCache); struct trackDb *cacheTdb = trackDbHubCache(hubGenome->trackDbFile, time); if (cacheTdb != NULL) return cacheTdb; } memCheckPoint(); // we want to know how much memory is used to build the tdbList } struct dyString *incFiles = dyStringNew(4096); - tdbList = trackHubTracksForGenome(trackHub, hubGenome, incFiles); + tdbList = trackHubTracksForGenome(trackHub, hubGenome, incFiles, foundFirstGenome); tdbList = trackDbLinkUpGenerations(tdbList); tdbList = trackDbPolishAfterLinkup(tdbList, database); trackDbPrioritizeContainerItems(tdbList); trackHubPolishTrackNames(trackHub, tdbList); if (doCache) trackDbHubCloneTdbListToSharedMem(hubGenome->trackDbFile, tdbList, memCheckPoint(), incFiles->string); } } return tdbList; } static struct grp *grpFromHub(struct hubConnectStatus *hub) /* Make up a grp structur from hub */ { @@ -814,39 +822,41 @@ * Make grp structures for each hub. Returned group list is reversed. */ { // return the cached copy if it exists static struct trackDb *hubTrackDbs; static struct grp *hubGroups; if (hubTrackDbs != NULL) { if (pGroupList != NULL) *pGroupList = hubGroups; return hubTrackDbs; } struct hubConnectStatus *hub, *hubList = hubConnectGetHubs(); struct trackDb *tdbList = NULL; +boolean foundFirstGenome = FALSE; +struct hash *trackDbNameHash = newHash(5); for (hub = hubList; hub != NULL; hub = hub->next) { if (isEmpty(hub->errorMessage)) { /* error catching in so it won't just abort */ struct errCatch *errCatch = errCatchNew(); if (errCatchStart(errCatch)) { - struct trackDb *thisList = hubAddTracks(hub, database); + struct trackDb *thisList = hubAddTracks(hub, database, &foundFirstGenome, trackDbNameHash); tdbList = slCat(tdbList, thisList); } errCatchEnd(errCatch); if (errCatch->gotError) { warn("%s", errCatch->message->string); hubUpdateStatus( errCatch->message->string, hub); } else { struct grp *grp = grpFromHub(hub); slAddHead(&hubGroups, grp); hubUpdateStatus(NULL, hub); }