f07f43476ae7ac72bdbd83603c63191bb699104a galt Tue Nov 4 12:05:12 2025 -0800 Allows local curated genomes by checking for /gbdb/$db. Does not allow those local bigDataUrl directories to be used for custom tracks. Allows those hub URLs with local paths to be loaded in the background for better speed. Allows for vcfTabix etc to work with hs1. Removes need for udc.localDir setting in hg.conf for hgwdev, hgwbeta, RR, and mirrors. Warning do not set udc.localDir= to an empty string since it is completely permissive suffix matches all URLs. fixes #rm34544 diff --git src/hg/lib/customFactory.c src/hg/lib/customFactory.c index ca3d34d79bb..0eea77e3d51 100644 --- src/hg/lib/customFactory.c +++ src/hg/lib/customFactory.c @@ -47,30 +47,34 @@ #include "bedTabix.h" #include "barChartBed.h" #include "barChartUi.h" #include "interact.h" #include "interactUi.h" #include "hic.h" #include "cgiApoptosis.h" #include "chromAlias.h" #include "bedMethyl.h" // placeholder when custom track uploaded file name is not known #define CT_NO_FILE_NAME "custom track" static boolean doExtraChecking = FALSE; +// SET to true to allow loading local URLs in custom tracks. +// We normally do NOT want local CTs. +static boolean allowLocalsForCTs = FALSE; + /*** Utility routines used by many factories. ***/ char *customFactoryNextTilTrack(struct customPp *cpp) /* Return next line. Return NULL at end of input or at line starting with * "track." */ { char *line = customPpNext(cpp); if (line != NULL && startsWithWord("track", line)) { customPpReuse(cpp, line); line = NULL; } return line; } @@ -148,79 +152,90 @@ { if (a == NULL && b == NULL) errAbort("sameType should not be called when both inputs are NULL."); else if (a == NULL || b == NULL) return FALSE; char *aCopy = cloneString(a); char *bCopy = cloneString(b); char *aWord = firstWordInLine(aCopy); char *bWord = firstWordInLine(bCopy); boolean same = sameString(aWord, bWord); freeMem(aCopy); freeMem(bCopy); return same; } -boolean isValidBigDataUrl(char *url, boolean doAbort) +boolean isValidBigDataUrl(char *url, boolean doAbort, char *db, boolean allowLocals) /* return True if the URL is a valid bigDataUrl. * It can be a local filename if this is allowed by udc.localDir + * or is localFile for curated genomes /gbdb/$db */ { if ((startsWith("http://", url) || startsWith("https://", url) || startsWith("ftp://", url))) return TRUE; // we allow bigDataUrl's to point to trash (or sessionDataDir, if configured) char *sessionDataDir = cfgOption("sessionDataDir"); char *sessionDataDirOld = cfgOption("sessionDataDirOld"); if (startsWith(trashDir(), url) || (isNotEmpty(sessionDataDir) && startsWith(sessionDataDir, url)) || (isNotEmpty(sessionDataDirOld) && startsWith(sessionDataDirOld, url))) return TRUE; if (udcIsResolvable(url)) return TRUE; +if (allowLocals) + { + char gbdbPrefix[256]; + safef(gbdbPrefix, sizeof gbdbPrefix, "/gbdb/%s/", trackHubSkipHubName(db)); + if (startsWith(gbdbPrefix, url)) + return TRUE; + } + char *prefix = cfgOption("udc.localDir"); if (prefix == NULL) { if (doAbort) errAbort("Only network protocols http, https, or ftp allowed in bigDataUrl: '%s', unless " \ "the udc.localDir variable is set to a prefix of the file's path in the " \ "cgi-bin/hg.conf of this UCSC Genome Browser", url); return FALSE; } else if (!startsWith(prefix, url)) { if (doAbort) errAbort("bigDataUrl '%s' is not an internet URL but udc.localDir is set in cgi-bin/hg.conf of this " \ "UCSC Genome Browser, so the bigDataUrl can be a file " \ "on the local hard disk of this UCSC Genome Browser instance. However, for such a file path to be acceptable from " "the local file system, bigDataUrl has to start with the prefix set by udc.localDir, which is '%s' on this Genome Browser.", url, prefix); return FALSE; } else return TRUE; } -static void checkAllowedBigDataUrlProtocols(char *url) +static void checkAllowedBigDataUrlProtocols(char *url, char *db, boolean allowLocals) /* Abort if url is not using one of the allowed bigDataUrl network protocols. * In particular, do not allow a local file reference, unless explicitely - * allowed by hg.conf's udc.localDir directive. */ + * allowed by hg.conf's udc.localDir directive + * or url begins with local path for curated genome /gbdb/$db/ + */ { -isValidBigDataUrl(url, TRUE); +isValidBigDataUrl(url, TRUE, db, allowLocals); } static char *bigDataDocPath(char *type) /* If type is a bigData type, provide a relative path to its custom track/format doc page. */ { char *docUrl = NULL; if (sameString(type, "bigWig")) docUrl = "../goldenPath/help/bigWig.html"; else if (sameString(type, "bigBed")) docUrl = "../goldenPath/help/bigBed.html"; else if (sameString(type, "bam")) docUrl = "../goldenPath/help/bam.html"; else if (sameString(type, "vcfTabix")) docUrl = "../goldenPath/help/vcf.html"; return docUrl; @@ -1810,31 +1825,31 @@ struct customPp *cpp, char *type, struct customTrack *track) /* Return TRUE if looks like we're handling a hic track */ { return (sameType(type, "hic")); } static struct customTrack *hicLoader(struct customFactory *fac, struct hash *chromHash, struct customPp *cpp, struct customTrack *track, boolean dbRequested) /* Load up hic data until get next track line. */ { struct hash *settings = track->tdb->settingsHash; char *bigDataUrl = hashFindVal(settings, "bigDataUrl"); requireBigDataUrl(bigDataUrl, fac->name, track->tdb->shortLabel); -checkAllowedBigDataUrlProtocols(bigDataUrl); +checkAllowedBigDataUrlProtocols(bigDataUrl, track->genomeDb, allowLocalsForCTs); if (doExtraChecking) { struct hicMeta *meta; char *hicErrMsg = hicLoadHeader(bigDataUrl, &meta, track->genomeDb); if (hicErrMsg != NULL) { track->networkErrMsg = cloneString(hicErrMsg); } } return track; } struct customFactory hicFactory = /* Factory for Hi-C tracks */ @@ -2840,31 +2855,31 @@ char text[1024]; safef(text, sizeof(text), "%f:%f", sum.minVal, sum.maxVal); hashAdd(settings, "viewLimits", cloneString(text)); } } static struct customTrack *bigWigLoader(struct customFactory *fac, struct hash *chromHash, struct customPp *cpp, struct customTrack *track, boolean dbRequested) /* Load up wiggle data until get next track line. */ { /* Not much to this. A bigWig has nothing here but a track line. */ struct hash *settings = track->tdb->settingsHash; char *bigDataUrl = hashFindVal(settings, "bigDataUrl"); requireBigDataUrl(bigDataUrl, fac->name, track->tdb->shortLabel); -checkAllowedBigDataUrlProtocols(bigDataUrl); +checkAllowedBigDataUrlProtocols(bigDataUrl, track->genomeDb, allowLocalsForCTs); /* protect against temporary network error */ struct errCatch *errCatch = errCatchNew(); if (errCatchStart(errCatch)) { track->bbiFile = bigWigFileOpenAlias(bigDataUrl, chromAliasFindAliases); setBbiViewLimits(track); } errCatchEnd(errCatch); if (errCatch->gotError) { track->networkErrMsg = cloneString(errCatch->message->string); } errCatchFree(&errCatch); @@ -3008,31 +3023,31 @@ hashAdd(hash, "baseColorDefault", "diffBases"); } } static struct customTrack *bigBedLoader(struct customFactory *fac, struct hash *chromHash, struct customPp *cpp, struct customTrack *track, boolean dbRequested) /* Load up big bed data until get next track line. */ { /* Not much to this. A bigBed has nothing here but a track line. */ struct hash *settings = track->tdb->settingsHash; if (sameString(track->tdb->type, "bigPsl")) addSpecialSettings(settings); char *bigDataUrl = hashFindVal(settings, "bigDataUrl"); requireBigDataUrl(bigDataUrl, fac->name, track->tdb->shortLabel); -checkAllowedBigDataUrlProtocols(bigDataUrl); +checkAllowedBigDataUrlProtocols(bigDataUrl, track->genomeDb, allowLocalsForCTs); /* protect against temporary network error */ struct errCatch *errCatch = errCatchNew(); if (errCatchStart(errCatch)) { track->bbiFile = bigBedFileOpenAlias(bigDataUrl, chromAliasFindAliases); } errCatchEnd(errCatch); if (errCatch->gotError) { track->networkErrMsg = cloneString(errCatch->message->string); return track; } errCatchFree(&errCatch); @@ -3066,31 +3081,31 @@ NULL, "bigChain", bigChainRecognizer, bigBedLoader, }; static struct customTrack *bedTabixLoader(struct customFactory *fac, struct hash *chromHash, struct customPp *cpp, struct customTrack *track, boolean dbRequested) /* Process the bedTabix track line. */ { struct hash *settings = track->tdb->settingsHash; char *bigDataUrl = hashFindVal(settings, "bigDataUrl"); requireBigDataUrl(bigDataUrl, fac->name, track->tdb->shortLabel); struct dyString *dyErr = dyStringNew(0); -checkAllowedBigDataUrlProtocols(bigDataUrl); +checkAllowedBigDataUrlProtocols(bigDataUrl, track->genomeDb, allowLocalsForCTs); if (doExtraChecking) { /* protect against temporary network error */ struct errCatch *errCatch = errCatchNew(); if (errCatchStart(errCatch)) { struct bedTabixFile *btf = bedTabixFileMayOpen(bigDataUrl, NULL, 0, 0); if (btf == NULL) { dyStringPrintf(dyErr, "Unable to open %s's bigDataUrl %s", track->tdb->shortLabel, bigDataUrl); } else bedTabixFileClose(&btf); } @@ -3241,33 +3256,33 @@ return (sameType(type, "bam") || sameType(type, "cram")); } static struct customTrack *bamLoader(struct customFactory *fac, struct hash *chromHash, struct customPp *cpp, struct customTrack *track, boolean dbRequested) /* Process the bam track line. */ { struct hash *settings = track->tdb->settingsHash; char *bigDataUrl = hashFindVal(settings, "bigDataUrl"); char *bigDataIndexUrl = hashFindVal(settings, "bigDataIndex"); struct dyString *dyErr = dyStringNew(0); requireBigDataUrl(bigDataUrl, fac->name, track->tdb->shortLabel); -checkAllowedBigDataUrlProtocols(bigDataUrl); +checkAllowedBigDataUrlProtocols(bigDataUrl, track->genomeDb, allowLocalsForCTs); if (bigDataIndexUrl != NULL) - checkAllowedBigDataUrlProtocols(bigDataIndexUrl); + checkAllowedBigDataUrlProtocols(bigDataIndexUrl, track->genomeDb, allowLocalsForCTs); if (doExtraChecking) { /* protect against temporary network error */ struct errCatch *errCatch = errCatchNew(); if (errCatchStart(errCatch)) { bamFileAndIndexMustExist(bigDataUrl, bigDataIndexUrl); } errCatchEnd(errCatch); if (isNotEmpty(errCatch->message->string)) dyStringPrintf(dyErr, ": %s", errCatch->message->string); errCatchFree(&errCatch); } @@ -3294,33 +3309,33 @@ { return (sameType(type, "vcfTabix") || sameType(type, "vcfPhasedTrio")); } static struct customTrack *vcfTabixLoader(struct customFactory *fac, struct hash *chromHash, struct customPp *cpp, struct customTrack *track, boolean dbRequested) /* Process the vcfTabix track line. */ { struct hash *settings = track->tdb->settingsHash; char *bigDataUrl = hashFindVal(settings, "bigDataUrl"); char *bigDataIndexUrl = hashFindVal(settings, "bigDataIndex"); requireBigDataUrl(bigDataUrl, fac->name, track->tdb->shortLabel); struct dyString *dyErr = dyStringNew(0); -checkAllowedBigDataUrlProtocols(bigDataUrl); +checkAllowedBigDataUrlProtocols(bigDataUrl, track->genomeDb, allowLocalsForCTs); if (bigDataIndexUrl) - checkAllowedBigDataUrlProtocols(bigDataIndexUrl); + checkAllowedBigDataUrlProtocols(bigDataIndexUrl, track->genomeDb, allowLocalsForCTs); boolean isVcfPhasedTrio = sameString(hashFindVal(settings,"type"),"vcfPhasedTrio"); if (isVcfPhasedTrio) { char *reqSampleName = hashFindVal(settings, VCF_PHASED_CHILD_SAMPLE_SETTING); if (reqSampleName == NULL) errAbort("Missing required setting '%s' from track line", VCF_PHASED_CHILD_SAMPLE_SETTING); } if (doExtraChecking) { /* protect against temporary network error */ int vcfMaxErr = 100; struct errCatch *errCatch = errCatchNew(); if (errCatchStart(errCatch)) @@ -4306,45 +4321,45 @@ char temp[256]; safef(temp, sizeof temp, "Track timed out: %s took more than %d milliseconds to load. Zoom in or increase max load time via menu 'Genome Browser > Configure'", pfd->track->tdb->track, maxTimeInMilliseconds); pfd->track->networkErrMsg = cloneString(temp); ++errCount; } for (pfd = pfdDone; pfd; pfd = pfd->next) { // some done tracks may have errors if (pfd->track->networkErrMsg) ++errCount; } pthread_mutex_unlock( &pfdMutex ); return errCount; } -boolean customFactoryParallelLoad(char *bdu, char *type) +boolean customFactoryParallelLoad(char *bdu, char *type, char *db, boolean allowLocals) /* Is this a data type that should be loaded in parallel ? */ { if ((type == NULL) || (bdu == NULL)) return FALSE; return (startsWith("big", type) || startsWithWord("mathWig" , type) || startsWithWord("bam" , type) || startsWithWord("halSnake", type) || startsWithWord("bigRmsk", type) || startsWithWord("bigLolly", type) || startsWithWord("vcfTabix", type)) // XX code-review: shouldn't we error abort if the URL is not valid? - && (bdu && isValidBigDataUrl(bdu, FALSE)) + && (bdu && isValidBigDataUrl(bdu, FALSE, db, allowLocals)) && !(containsStringNoCase(bdu, "dl.dropboxusercontent.com")) && (!startsWith("bigInteract", type)) && (!startsWith("bigMaf", type)); } static struct customTrack *customFactoryParseOptionalDb(char *genomeDb, char *text, boolean isFile, char *fileName, struct slName **retBrowserLines, boolean mustBeCurrentDb, boolean doParallelLoad) /* Parse text into a custom set of tracks. Text parameter is a * file name if 'isFile' is set. If mustBeCurrentDb, die if custom track * is for some database other than genomeDb. * If doParallelLoad is true, load the big tracks */ { struct customTrack *trackList = NULL, *track = NULL; char *line = NULL; @@ -4532,31 +4547,31 @@ #define TYPE_NOT_BIGDATAURL "Type '%s' is not a bigDataUrl type. Please see the documentation links above." if (lf) lineFileAbort(lf, TYPE_NOT_BIGDATAURL, type); else errAbort(TYPE_NOT_BIGDATAURL, type); } else { #define TYPE_UNRECOGNIZED "Unrecognized format 'type=%s'. Please see the documentation links above." if (lf) lineFileAbort(lf, TYPE_UNRECOGNIZED, type); else errAbort(TYPE_UNRECOGNIZED, type); } } - if (customFactoryParallelLoad(bigDataUrl, type) && (ptMax > 0)) // handle separately in parallel so long timeouts don't accrue serially + if (customFactoryParallelLoad(bigDataUrl, type, ctDb, allowLocalsForCTs) && (ptMax > 0)) // handle separately in parallel so long timeouts don't accrue serially // (unless ptMax == 0 which means turn parallel loading off) { if (doParallelLoad) { struct paraFetchData *pfd; AllocVar(pfd); pfd->track = track; // need pointer to be stable pfd->fac = fac; slAddHead(&pfdList, pfd); } oneList = track; } else oneList = fac->loader(fac, chromHash, cpp, track, dbTrack);