9440548e67d816a02d14939e7114ab57a12f6965 braney Sat Apr 12 15:35:48 2014 -0700 first cut at writing out search terms from public hubs #11633 diff --git src/hg/lib/trackHub.c src/hg/lib/trackHub.c index 6e476ee..05d16cd 100644 --- src/hg/lib/trackHub.c +++ src/hg/lib/trackHub.c @@ -29,30 +29,32 @@ #include "hgBam.h" #include "bigWig.h" #include "bigBed.h" #include "hdb.h" #include "chromInfo.h" #include "grp.h" #include "twoBit.h" #include "dbDb.h" #include "net.h" #include "bbiFile.h" #include "bPlusTree.h" #include "hgFind.h" #include "hubConnect.h" #include "trix.h" #include "vcf.h" +#include "htmshell.h" +#include "hubConnect.h" static struct hash *hubCladeHash; // mapping of clade name to hub pointer static struct hash *hubAssemblyHash; // mapping of assembly name to genome struct static struct hash *hubOrgHash; // mapping from organism name to hub pointer struct trackHub *globalAssemblyHubList; // list of trackHubs in the user's cart char *trackHubRelativeUrl(char *hubUrl, char *path) /* Return full path (in URL form if it's a remote hub) given * path possibly relative to hubUrl. Do a freeMem of result * when done. */ { /* If path itself is a URL then just return a copy of it. */ if (hasProtocol(path)) return cloneString(path); @@ -724,30 +726,31 @@ /* Get list of tracks associated with genome. Check that it only is composed of legal * types. Do a few other quick checks to catch errors early. */ { struct lineFile *lf = udcWrapShortLineFile(genome->trackDbFile, NULL, 64*1024*1024); struct trackDb *tdbList = trackDbFromOpenRa(lf, NULL); lineFileClose(&lf); /* Make bigDataUrls more absolute rather than relative to genome.ra dir */ struct trackDb *tdb; for (tdb = tdbList; tdb != NULL; tdb = tdb->next) expandBigDataUrl(hub, genome, tdb); validateTracks(hub, genome, tdbList); trackDbAddTableField(tdbList); +if (!isEmpty(hub->name)) trackHubAddNamePrefix(hub->name, tdbList); if (genome->twoBitPath == NULL) trackHubAddGroupName(hub->name, tdbList); for (tdb = tdbList; tdb != NULL; tdb = tdb->next) { trackDbFieldsFromSettings(tdb); trackDbPolish(tdb); } return tdbList; } static void reprefixString(char **pString, char *prefix) /* Replace *pString with prefix + *pString, freeing * whatever was in *pString before. */ { @@ -794,46 +797,96 @@ return name; return strchr(&name[4], '_') + 1; } void trackHubAddGroupName(char *hubName, struct trackDb *tdbList) /* Add group tag that references the hubs symbolic name. */ { struct trackDb *tdb; for (tdb = tdbList; tdb != NULL; tdb = tdb->next) { tdb->grp = cloneString(hubName); hashReplace(tdb->settingsHash, "group", tdb->grp); } } +static void addOneDescription(char *trackDbFile, struct trackDb *tdb) +/* Fetch tdb->track's html description and store in tdb->html. */ +{ +/* html setting should always be set because we set it at load time */ +char *htmlName = trackDbSetting(tdb, "html"); +if (htmlName == NULL) + return; + +char *simpleName = hubConnectSkipHubPrefix(htmlName); +char *url = trackHubRelativeUrl(trackDbFile, simpleName); +char buffer[10*1024]; +safef(buffer, sizeof buffer, "%s.html", url); +tdb->html = netReadTextFileIfExists(buffer); +freez(&url); +} + +void trackHubAddDescription(char *trackDbFile, struct trackDb *tdb) +/* Fetch tdb->track's html description (or nearest ancestor's non-empty description) + * and store in tdb->html. */ +{ +addOneDescription(trackDbFile, tdb); +if (isEmpty(tdb->html)) + { + struct trackDb *parent; + for (parent = tdb->parent; isEmpty(tdb->html) && parent != NULL; parent = parent->parent) + { + addOneDescription(trackDbFile, parent); + if (isNotEmpty(parent->html)) + tdb->html = cloneString(parent->html); + } + } +} + static int hubCheckTrack(struct trackHub *hub, struct trackHubGenome *genome, - struct trackDb *tdb, struct dyString *errors) + struct trackDb *tdb, struct dyString *errors, FILE *searchFp) /* Make sure that track is ok. */ { +int retVal = 0; struct errCatch *errCatch = errCatchNew(); + +if (errCatchStart(errCatch)) + { + if (searchFp != NULL) + { + addOneDescription(genome->trackDbFile, tdb); + if (tdb->html != NULL) + { + char *stripHtml =htmlTextStripTags(tdb->html); + strSwapChar(stripHtml, '\n', ' '); + strSwapChar(stripHtml, '\t', ' '); + fprintf(searchFp, "%s.%s\t%s\t%s\t%s\n",hub->url, tdb->track, + tdb->shortLabel, tdb->longLabel, stripHtml); + } + else + fprintf(searchFp, "%s.%s\t%s\t%s\n",hub->url, tdb->track, + tdb->shortLabel, tdb->longLabel); + } + else + { char *relativeUrl = trackDbSetting(tdb, "bigDataUrl"); -int retVal = 0; + char *type = trackDbRequiredSetting(tdb, "type"); if (relativeUrl != NULL) { - if (errCatchStart(errCatch)) - { char *bigDataUrl = trackHubRelativeUrl(genome->trackDbFile, relativeUrl); - char *type = trackDbRequiredSetting(tdb, "type"); verbose(2, "checking %s.%s type %s at %s\n", genome->name, tdb->track, type, bigDataUrl); - if (startsWithWord("bigWig", type)) { /* Just open and close to verify file exists and is correct type. */ struct bbiFile *bbi = bigWigFileOpen(bigDataUrl); bbiFileClose(&bbi); } else if (startsWithWord("bigBed", type)) { /* Just open and close to verify file exists and is correct type. */ struct bbiFile *bbi = bigBedFileOpen(bigDataUrl); bbiFileClose(&bbi); } else if (startsWithWord("vcfTabix", type)) { /* Just open and close to verify file exists and is correct type. */ @@ -841,38 +894,39 @@ if (vcf == NULL) // Warnings already indicated whether the tabix file is missing etc. errAbort("Couldn't open %s and/or its tabix index (.tbi) file. " "See http://genome.ucsc.edu/goldenPath/help/vcf.html", bigDataUrl); vcfFileFree(&vcf); } else if (startsWithWord("bam", type)) { bamFileAndIndexMustExist(bigDataUrl); } else errAbort("unrecognized type %s in genome %s track %s", type, genome->name, tdb->track); freez(&bigDataUrl); } + } + } errCatchEnd(errCatch); if (errCatch->gotError) { retVal = 1; dyStringPrintf(errors, "%s", errCatch->message->string); } errCatchFree(&errCatch); - } return retVal; } void trackHubFixName(char *name) /* Change all characters other than alphanumeric, dash, and underbar * to underbar. */ { if (name == NULL) return; char *in = name; char c; for(; (c = *in) != 0; in++) @@ -918,93 +972,94 @@ for (tdb = tdbList; tdb != NULL; tdb = next) { if (tdb->parent != NULL) polishOneTrack(hub, tdb->parent, nameHash); next = tdb->next; polishOneTrack(hub, tdb, nameHash); if (tdb->subtracks != NULL) { trackHubPolishTrackNames(hub, tdb->subtracks); } } } static int hubCheckGenome(struct trackHub *hub, struct trackHubGenome *genome, - struct dyString *errors, boolean checkTracks) + struct dyString *errors, boolean checkTracks, FILE *searchFp) /* Check out genome within hub. */ { struct errCatch *errCatch = errCatchNew(); struct trackDb *tdbList = NULL; int retVal = 0; if (errCatchStart(errCatch)) { tdbList = trackHubTracksForGenome(hub, genome); trackHubPolishTrackNames(hub, tdbList); } errCatchEnd(errCatch); if (errCatch->gotError) { retVal = 1; dyStringPrintf(errors, "%s", errCatch->message->string); } errCatchFree(&errCatch); if (!checkTracks) return retVal; struct trackDb *tdb; for (tdb = tdbList; tdb != NULL; tdb = tdb->next) - retVal |= hubCheckTrack(hub, genome, tdb, errors); + retVal |= hubCheckTrack(hub, genome, tdb, errors, searchFp); verbose(2, "%d tracks in %s\n", slCount(tdbList), genome->name); return retVal; } -int trackHubCheck(char *hubUrl, struct dyString *errors, boolean checkTracks) +int trackHubCheck(char *hubUrl, struct dyString *errors, + boolean checkTracks, FILE *searchFp) /* hubCheck - Check a track data hub for integrity. Put errors in dyString. * return 0 if hub has no errors, 1 otherwise * if checkTracks is TRUE, individual tracks are checked */ { struct errCatch *errCatch = errCatchNew(); struct trackHub *hub = NULL; int retVal = 0; if (errCatchStart(errCatch)) hub = trackHubOpen(hubUrl, ""); errCatchEnd(errCatch); if (errCatch->gotError) { retVal = 1; dyStringPrintf(errors, "%s", errCatch->message->string); } errCatchFree(&errCatch); if (hub == NULL) return 1; verbose(2, "hub %s\nshortLabel %s\nlongLabel %s\n", hubUrl, hub->shortLabel, hub->longLabel); verbose(2, "%s has %d elements\n", hub->genomesFile, slCount(hub->genomeList)); struct trackHubGenome *genome; for (genome = hub->genomeList; genome != NULL; genome = genome->next) { - retVal |= hubCheckGenome(hub, genome, errors, checkTracks); + retVal |= hubCheckGenome(hub, genome, errors, checkTracks, searchFp); } trackHubClose(&hub); return retVal; } static struct hgPos *bigBedIntervalListToHgPositions(struct bbiFile *bbi, char *term, struct bigBedInterval *intervalList, char *description) /* Given an open bigBed file, and an interval list, return a pointer to a list of hgPos structures. */ { struct hgPos *posList = NULL; char chromName[bbi->chromBpt->keySize+1]; int lastChromId = -1; struct bigBedInterval *interval; @@ -1126,15 +1181,16 @@ void trackHubFindPos(char *db, char *term, struct hgPositions *hgp) /* Look for term in track hubs. Update hgp if found */ { struct trackDb *tdbList = NULL; if (trackHubDatabase(db)) { struct trackHubGenome *genome = trackHubGetGenome(db); tdbList = trackHubTracksForGenome(genome->trackHub, genome); } else tdbList = hubCollectTracks(db, NULL); findPosInTdbList(tdbList, term, hgp); } +