a9fde73d32daf74780765442de44324061b01d66 markd Sun Jan 22 22:13:52 2023 -0800 Add URL resolver plugin functionality to allow an external program to convert cloud URLs (s3:, gs: drs:, or really any non-HTTP RUL) to http/https URLs. This can include signed URLs. The cloud URL is used to index the UDC cache rather than the resolved URL. This allows for re-resolving signed URLs if they time out. Joint work by Max and Markd diff --git src/hg/utils/hubCrawl/hubCrawl.c src/hg/utils/hubCrawl/hubCrawl.c index 3abf1f8..7b3f235 100644 --- src/hg/utils/hubCrawl/hubCrawl.c +++ src/hg/utils/hubCrawl/hubCrawl.c @@ -238,31 +238,31 @@ } struct hashEl *hel = NULL; if (genome->settingsHash && (hel = hashLookup(genome->settingsHash, "scientificName")) != NULL) { char *sciName = (char *)(hel->val); if (differentString(sciName, genome->name)) { genomeHst->text = cloneString(sciName); hubSearchTextTabOut(genomeHst, searchFp); } } if (genome->settingsHash && (hel = hashLookup(genome->settingsHash, "htmlPath")) != NULL) { char *htmlPath = (char *)(hel->val); genomeHst->textLength = hubSearchTextLong; - char *rawHtml = netReadTextFileIfExists(htmlPath); + char *rawHtml = udcFileReadAllIfExists(htmlPath, NULL, 0, NULL); genomeHst->text = cleanHubHtml(rawHtml); if (isNotEmpty(genomeHst->text)) hubSearchTextTabOut(genomeHst, searchFp); } /* Write out trackDb search text */ boolean foundFirstGenome = FALSE; struct trackDb *tdbList = trackHubTracksForGenome(hub, genome,NULL, &foundFirstGenome); tdbList = trackDbLinkUpGenerations(tdbList); tdbList = trackDbPolishAfterLinkup(tdbList, genome->name); trackHubPolishTrackNames(hub, tdbList); struct hash *visitedTracks = newHash(5); struct trackDb *tdb = NULL; for (tdb = tdbList; tdb != NULL; tdb = tdb->next) @@ -302,31 +302,31 @@ hubHst->db = cloneString(""); hubHst->track = cloneString(""); hubHst->label = cloneString(""); hubHst->textLength = hubSearchTextShort; hubHst->text = cloneString(hub->shortLabel); hubHst->parents = cloneString(""); hubHst->parentTypes = cloneString(""); hubSearchTextTabOut(hubHst, searchFp); hubHst->text = cloneString(hub->longLabel); hubSearchTextTabOut(hubHst, searchFp); if (hub->descriptionUrl != NULL) { hubHst->textLength = hubSearchTextLong; - char *rawHtml = netReadTextFileIfExists(hub->descriptionUrl); + char *rawHtml = udcFileReadAllIfExists(hub->descriptionUrl, NULL, 0, NULL); hubHst->text = cleanHubHtml(rawHtml); if (isNotEmpty(hubHst->text)) hubSearchTextTabOut(hubHst, searchFp); } struct trackHubGenome *genome; for (genome = hub->genomeList; genome != NULL; genome = genome->next) trackHubCrawlGenome(genome, hub, searchFp); trackHubClose(&hub); return retVal; } int main(int argc, char *argv[])