src/hg/hgTables/bigBed.c 8bf8ee14d61c75fca3ea5e005b66b8c1b7ee3560

8bf8ee14d61c75fca3ea5e005b66b8c1b7ee3560
braney
  Thu Jan 4 15:20:27 2024 -0800
add a better error message when trying to use identifiers with whole
genome regions in hgTables

diff --git src/hg/hgTables/bigBed.c src/hg/hgTables/bigBed.c
index 0feb564..68567b5 100644
--- src/hg/hgTables/bigBed.c
+++ src/hg/hgTables/bigBed.c
@@ -15,30 +15,31 @@
 #include "bed.h"
 #include "hdb.h"
 #include "trackDb.h"
 #include "obscure.h"
 #include "hmmstats.h"
 #include "correlate.h"
 #include "asParse.h"
 #include "bbiFile.h"
 #include "bigBed.h"
 #include "hubConnect.h"
 #include "asFilter.h"
 #include "hgTables.h"
 #include "trackHub.h"
 #include "chromAlias.h"
 #include "bPlusTree.h"
+#include "errCatch.h"
 
 
 boolean isBigBed(char *database, char *table, struct trackDb *parent,
 	struct customTrack *(*ctLookupName)(char *table))
 /* Local test to see if something is big bed.  Handles hub tracks unlike hIsBigBed. */
 {
 struct trackDb *tdb = hashFindVal(fullTableToTdbHash, table);
 
 // if "table" is explicitly listed, we're going to use that instead of any bigDataUrl
 if (tdb && hashLookup(tdb->settingsHash, "table"))
     return FALSE;
 if (tdb)
     return tdbIsBigBed(tdb);
 else
     return hIsBigBed(database, table, parent, ctLookupName);
@@ -142,53 +143,74 @@
     bigBedIntervalToRow(iv, region->chrom, startBuf, endBuf, row, bbi->fieldCount);
     if (asFilterOnRow(filter, row))
         {
         if ((idHash != NULL) && (hashLookup(idHash, row[3]) == NULL))
             continue;
 	struct bed *bed = bedLoadN(row, bbi->definedFieldCount);
 	struct bed *lmBed = lmCloneBed(bed, bedLm);
 	slAddHead(pBedList, lmBed);
 	bedFree(&bed);
 	}
     }
 
 lmCleanup(&bbLm);
 }
 
+static struct bptFile *getNameIndexOrDie(struct bbiFile *bbi, int *pFieldIndex)
+/* Return the index on the 'name' field in the passed bbi. errAbort on failure. */
+{
+struct bptFile *bpt = NULL;
+struct errCatch *errCatch = errCatchNew();
+
+if (errCatchStart(errCatch))
+    {
+    bpt = bigBedOpenExtraIndex(bbi, "name", pFieldIndex);
+    }
+errCatchEnd(errCatch);
+if (errCatch->gotError)
+    {
+    errAbort("Getting identifiers from whole genome regions requires an index on the name field of the bigBedFile %s", bbi->fileName);
+    }
+errCatchFree(&errCatch);
+
+return bpt;
+}
+
 struct bed *bigBedGetFilteredBedsOnRegions(struct sqlConnection *conn,
 	char *db, char *table, struct region *regionList, struct lm *lm,
 	int *retFieldCount)
 /* Get list of beds from bigBed, in all regions, that pass filtering. */
 {
 /* Connect to big bed and get metadata and filter. */
 char *fileName = bigBedFileName(table, conn);
 struct bbiFile *bbi =  bigBedFileOpenAlias(fileName, chromAliasFindAliases);
 struct asObject *as = bigBedAsOrDefault(bbi);
 struct asFilter *filter = asFilterFromCart(cart, db, table, as);
 struct bed *bedList = NULL;
 
 /* If we're doing a whole-genome query with a name index then use the name index to retrieve items
  * instead of iterating over regions. */
 struct hash *idHash = NULL;
 if (bbi->definedFieldCount >= 4)
     idHash = identifierHash(db, table);
 int fieldIx;
 struct bptFile *bpt = NULL;
 struct lm *bbLm = NULL;
 struct bigBedInterval *ivList = NULL;
 if (idHash && isRegionWholeGenome())
-    bpt = bigBedOpenExtraIndex(bbi, "name", &fieldIx);
+    bpt = getNameIndexOrDie(bbi, &fieldIx);
+
 if (bpt != NULL)
     {
     struct slName *nameList = hashSlNameFromHash(idHash), *name;
     int count = slCount(nameList);
     char *names[count];
     int ii;
     for (ii=0, name = nameList; ii < count; ii++, name = name->next)
         {
         names[ii] = name->name;
         }
     bbLm = lmInit(0);
     ivList = bigBedMultiNameQuery(bbi, bpt, fieldIx, names, count, bbLm);
     slNameFreeList(&nameList);
     }
 struct region *region;
@@ -304,31 +326,31 @@
 struct asFilter *filter = NULL;
 
 if (anyFilter())
     {
     filter = asFilterFromCart(cart, db, table, as);
     if (filter)
         {
 	fprintf(f, "# Filtering on %d columns\n", slCount(filter->columnList));
 	}
     }
 
 struct bptFile *bpt = NULL;
 int fieldIx;
 
 if (idHash && isRegionWholeGenome())
-    bpt = bigBedOpenExtraIndex(bbi, "name", &fieldIx);
+    bpt = getNameIndexOrDie(bbi, &fieldIx);
 
 char *row[bbi->fieldCount];
 char startBuf[16], endBuf[16];
 if (bpt) // if we have an index it means we're whole genome and don't need to filter based on regions
     {
     struct slName *nameList = hashSlNameFromHash(idHash);
     int count = slCount(nameList);
     char *names[count];
     int ii;
     for (ii=0; ii < count; ii++)
     {
         names[ii] = nameList->name;
         nameList = nameList->next;
     }