8bf8ee14d61c75fca3ea5e005b66b8c1b7ee3560 braney Thu Jan 4 15:20:27 2024 -0800 add a better error message when trying to use identifiers with whole genome regions in hgTables diff --git src/hg/hgTables/bigBed.c src/hg/hgTables/bigBed.c index 0feb564..68567b5 100644 --- src/hg/hgTables/bigBed.c +++ src/hg/hgTables/bigBed.c @@ -15,30 +15,31 @@ #include "bed.h" #include "hdb.h" #include "trackDb.h" #include "obscure.h" #include "hmmstats.h" #include "correlate.h" #include "asParse.h" #include "bbiFile.h" #include "bigBed.h" #include "hubConnect.h" #include "asFilter.h" #include "hgTables.h" #include "trackHub.h" #include "chromAlias.h" #include "bPlusTree.h" +#include "errCatch.h" boolean isBigBed(char *database, char *table, struct trackDb *parent, struct customTrack *(*ctLookupName)(char *table)) /* Local test to see if something is big bed. Handles hub tracks unlike hIsBigBed. */ { struct trackDb *tdb = hashFindVal(fullTableToTdbHash, table); // if "table" is explicitly listed, we're going to use that instead of any bigDataUrl if (tdb && hashLookup(tdb->settingsHash, "table")) return FALSE; if (tdb) return tdbIsBigBed(tdb); else return hIsBigBed(database, table, parent, ctLookupName); @@ -142,53 +143,74 @@ bigBedIntervalToRow(iv, region->chrom, startBuf, endBuf, row, bbi->fieldCount); if (asFilterOnRow(filter, row)) { if ((idHash != NULL) && (hashLookup(idHash, row[3]) == NULL)) continue; struct bed *bed = bedLoadN(row, bbi->definedFieldCount); struct bed *lmBed = lmCloneBed(bed, bedLm); slAddHead(pBedList, lmBed); bedFree(&bed); } } lmCleanup(&bbLm); } +static struct bptFile *getNameIndexOrDie(struct bbiFile *bbi, int *pFieldIndex) +/* Return the index on the 'name' field in the passed bbi. errAbort on failure. */ +{ +struct bptFile *bpt = NULL; +struct errCatch *errCatch = errCatchNew(); + +if (errCatchStart(errCatch)) + { + bpt = bigBedOpenExtraIndex(bbi, "name", pFieldIndex); + } +errCatchEnd(errCatch); +if (errCatch->gotError) + { + errAbort("Getting identifiers from whole genome regions requires an index on the name field of the bigBedFile %s", bbi->fileName); + } +errCatchFree(&errCatch); + +return bpt; +} + struct bed *bigBedGetFilteredBedsOnRegions(struct sqlConnection *conn, char *db, char *table, struct region *regionList, struct lm *lm, int *retFieldCount) /* Get list of beds from bigBed, in all regions, that pass filtering. */ { /* Connect to big bed and get metadata and filter. */ char *fileName = bigBedFileName(table, conn); struct bbiFile *bbi = bigBedFileOpenAlias(fileName, chromAliasFindAliases); struct asObject *as = bigBedAsOrDefault(bbi); struct asFilter *filter = asFilterFromCart(cart, db, table, as); struct bed *bedList = NULL; /* If we're doing a whole-genome query with a name index then use the name index to retrieve items * instead of iterating over regions. */ struct hash *idHash = NULL; if (bbi->definedFieldCount >= 4) idHash = identifierHash(db, table); int fieldIx; struct bptFile *bpt = NULL; struct lm *bbLm = NULL; struct bigBedInterval *ivList = NULL; if (idHash && isRegionWholeGenome()) - bpt = bigBedOpenExtraIndex(bbi, "name", &fieldIx); + bpt = getNameIndexOrDie(bbi, &fieldIx); + if (bpt != NULL) { struct slName *nameList = hashSlNameFromHash(idHash), *name; int count = slCount(nameList); char *names[count]; int ii; for (ii=0, name = nameList; ii < count; ii++, name = name->next) { names[ii] = name->name; } bbLm = lmInit(0); ivList = bigBedMultiNameQuery(bbi, bpt, fieldIx, names, count, bbLm); slNameFreeList(&nameList); } struct region *region; @@ -304,31 +326,31 @@ struct asFilter *filter = NULL; if (anyFilter()) { filter = asFilterFromCart(cart, db, table, as); if (filter) { fprintf(f, "# Filtering on %d columns\n", slCount(filter->columnList)); } } struct bptFile *bpt = NULL; int fieldIx; if (idHash && isRegionWholeGenome()) - bpt = bigBedOpenExtraIndex(bbi, "name", &fieldIx); + bpt = getNameIndexOrDie(bbi, &fieldIx); char *row[bbi->fieldCount]; char startBuf[16], endBuf[16]; if (bpt) // if we have an index it means we're whole genome and don't need to filter based on regions { struct slName *nameList = hashSlNameFromHash(idHash); int count = slCount(nameList); char *names[count]; int ii; for (ii=0; ii < count; ii++) { names[ii] = nameList->name; nameList = nameList->next; }