src/hg/hgTables/bigBed.c 7338fb10bb7ffdc0a75445a36cd128e6be593d36

7338fb10bb7ffdc0a75445a36cd128e6be593d36
braney
  Mon Feb 26 10:51:47 2024 -0800
fix a bunch of places that were printing out hub_#_db instead of just db

diff --git src/hg/hgTables/bigBed.c src/hg/hgTables/bigBed.c
index 68567b5..f0467fd 100644
--- src/hg/hgTables/bigBed.c
+++ src/hg/hgTables/bigBed.c
@@ -1,626 +1,626 @@
 /* bigBed - stuff to handle bigBed in the Table Browser. */
 
 /* Copyright (C) 2013 The Regents of the University of California 
  * See kent/LICENSE or http://genome.ucsc.edu/license/ for licensing information. */
 
 #include "common.h"
 #include "hash.h"
 #include "linefile.h"
 #include "dystring.h"
 #include "localmem.h"
 #include "jksql.h"
 #include "cheapcgi.h"
 #include "cart.h"
 #include "web.h"
 #include "bed.h"
 #include "hdb.h"
 #include "trackDb.h"
 #include "obscure.h"
 #include "hmmstats.h"
 #include "correlate.h"
 #include "asParse.h"
 #include "bbiFile.h"
 #include "bigBed.h"
 #include "hubConnect.h"
 #include "asFilter.h"
 #include "hgTables.h"
 #include "trackHub.h"
 #include "chromAlias.h"
 #include "bPlusTree.h"
 #include "errCatch.h"
 
 
 boolean isBigBed(char *database, char *table, struct trackDb *parent,
 	struct customTrack *(*ctLookupName)(char *table))
 /* Local test to see if something is big bed.  Handles hub tracks unlike hIsBigBed. */
 {
 struct trackDb *tdb = hashFindVal(fullTableToTdbHash, table);
 
 // if "table" is explicitly listed, we're going to use that instead of any bigDataUrl
 if (tdb && hashLookup(tdb->settingsHash, "table"))
     return FALSE;
 if (tdb)
     return tdbIsBigBed(tdb);
 else
     return hIsBigBed(database, table, parent, ctLookupName);
 }
 
 static struct hash *asColumnHash(struct asObject *as)
 /* Return a hash full of the object's columns, keyed by colum name */
 {
 struct hash *hash = hashNew(6);
 struct asColumn *col;
 for (col = as->columnList; col != NULL; col = col->next)
     hashAdd(hash, col->name, col);
 return hash;
 }
 
 static void fillField(struct hash *colHash, char *key, char output[HDB_MAX_FIELD_STRING])
 /* If key is in colHash, then copy key to output. */
 {
 if (hashLookup(colHash, key))
     strncpy(output, key, HDB_MAX_FIELD_STRING-1);
 }
 
 struct hTableInfo *bigBedToHti(char *table, struct sqlConnection *conn)
 /* Get fields of bigBed into hti structure. */
 {
 /* Get columns in asObject format. */
 char *fileName = bigBedFileName(table, conn);
 struct bbiFile *bbi =  bigBedFileOpenAlias(fileName, chromAliasFindAliases);
 struct asObject *as = bigBedAsOrDefault(bbi);
 
 /* Allocate hTableInfo structure and fill in info about bed fields. */
 struct hash *colHash = asColumnHash(as);
 struct hTableInfo *hti;
 AllocVar(hti);
 hti->rootName = cloneString(table);
 hti->isPos= TRUE;
 fillField(colHash, "chrom", hti->chromField);
 fillField(colHash, "chromStart", hti->startField);
 fillField(colHash, "chromEnd", hti->endField);
 fillField(colHash, "name", hti->nameField);
 fillField(colHash, "score", hti->scoreField);
 fillField(colHash, "strand", hti->strandField);
 fillField(colHash, "thickStart", hti->cdsStartField);
 fillField(colHash, "thickEnd", hti->cdsEndField);
 fillField(colHash, "blockCount", hti->countField);
 fillField(colHash, "chromStarts", hti->startsField);
 fillField(colHash, "blockSizes", hti->endsSizesField);
 hti->hasCDS = (bbi->definedFieldCount >= 8);
 hti->hasBlocks = (bbi->definedFieldCount >= 12);
 char type[256];
 safef(type, sizeof(type), "bed %d %c", bbi->definedFieldCount,
 	(bbi->definedFieldCount == bbi->fieldCount ? '.' : '+'));
 hti->type = cloneString(type);
 
 freeMem(fileName);
 hashFree(&colHash);
 bbiFileClose(&bbi);
 return hti;
 }
 
 struct slName *bigBedGetFields(char *table, struct sqlConnection *conn)
 /* Get fields of bigBed as simple name list. */
 {
 char *fileName = bigBedFileName(table, conn);
 struct bbiFile *bbi =  bigBedFileOpenAlias(fileName, chromAliasFindAliases);
 struct asObject *as = bigBedAsOrDefault(bbi);
 struct slName *names = asColNames(as);
 freeMem(fileName);
 bbiFileClose(&bbi);
 return names;
 }
 
 struct sqlFieldType *bigBedListFieldsAndTypes(struct trackDb *tdb, struct sqlConnection *conn)
 /* Get fields of bigBed as list of sqlFieldType. */
 {
 char *fileOrUrl = bigFileNameFromCtOrHub(tdb->table, conn);
 if (fileOrUrl == NULL)
     fileOrUrl = bbiNameFromSettingOrTable(tdb, conn, tdb->table);
 struct bbiFile *bbi =  bigBedFileOpenAlias(fileOrUrl, chromAliasFindAliases);
 struct asObject *as = bigBedAsOrDefault(bbi);
 struct sqlFieldType *list = sqlFieldTypesFromAs(as);
 bbiFileClose(&bbi);
 return list;
 }
 
 
 static void addFilteredBedsOnRegion(struct bbiFile *bbi, struct region *region,
 	char *table, struct asFilter *filter, struct lm *bedLm, struct bed **pBedList)
 /* Add relevant beds in reverse order to pBedList */
 {
 struct lm *bbLm = lmInit(0);
 struct bigBedInterval *ivList = NULL, *iv;
 ivList = bigBedIntervalQuery(bbi, region->chrom, region->start, region->end, 0, bbLm);
 char *row[bbi->fieldCount];
 char startBuf[16], endBuf[16];
 struct hash *idHash = NULL;
 if (bbi->fieldCount >= 4)
     idHash = identifierHash(database, table);
 for (iv = ivList; iv != NULL; iv = iv->next)
     {
     bigBedIntervalToRow(iv, region->chrom, startBuf, endBuf, row, bbi->fieldCount);
     if (asFilterOnRow(filter, row))
         {
         if ((idHash != NULL) && (hashLookup(idHash, row[3]) == NULL))
             continue;
 	struct bed *bed = bedLoadN(row, bbi->definedFieldCount);
 	struct bed *lmBed = lmCloneBed(bed, bedLm);
 	slAddHead(pBedList, lmBed);
 	bedFree(&bed);
 	}
     }
 
 lmCleanup(&bbLm);
 }
 
 static struct bptFile *getNameIndexOrDie(struct bbiFile *bbi, int *pFieldIndex)
 /* Return the index on the 'name' field in the passed bbi. errAbort on failure. */
 {
 struct bptFile *bpt = NULL;
 struct errCatch *errCatch = errCatchNew();
 
 if (errCatchStart(errCatch))
     {
     bpt = bigBedOpenExtraIndex(bbi, "name", pFieldIndex);
     }
 errCatchEnd(errCatch);
 if (errCatch->gotError)
     {
     errAbort("Getting identifiers from whole genome regions requires an index on the name field of the bigBedFile %s", bbi->fileName);
     }
 errCatchFree(&errCatch);
 
 return bpt;
 }
 
 struct bed *bigBedGetFilteredBedsOnRegions(struct sqlConnection *conn,
 	char *db, char *table, struct region *regionList, struct lm *lm,
 	int *retFieldCount)
 /* Get list of beds from bigBed, in all regions, that pass filtering. */
 {
 /* Connect to big bed and get metadata and filter. */
 char *fileName = bigBedFileName(table, conn);
 struct bbiFile *bbi =  bigBedFileOpenAlias(fileName, chromAliasFindAliases);
 struct asObject *as = bigBedAsOrDefault(bbi);
 struct asFilter *filter = asFilterFromCart(cart, db, table, as);
 struct bed *bedList = NULL;
 
 /* If we're doing a whole-genome query with a name index then use the name index to retrieve items
  * instead of iterating over regions. */
 struct hash *idHash = NULL;
 if (bbi->definedFieldCount >= 4)
     idHash = identifierHash(db, table);
 int fieldIx;
 struct bptFile *bpt = NULL;
 struct lm *bbLm = NULL;
 struct bigBedInterval *ivList = NULL;
 if (idHash && isRegionWholeGenome())
     bpt = getNameIndexOrDie(bbi, &fieldIx);
 
 if (bpt != NULL)
     {
     struct slName *nameList = hashSlNameFromHash(idHash), *name;
     int count = slCount(nameList);
     char *names[count];
     int ii;
     for (ii=0, name = nameList; ii < count; ii++, name = name->next)
         {
         names[ii] = name->name;
         }
     bbLm = lmInit(0);
     ivList = bigBedMultiNameQuery(bbi, bpt, fieldIx, names, count, bbLm);
     slNameFreeList(&nameList);
     }
 struct region *region;
 for (region = regionList; region != NULL; region = region->next)
     {
     if (bpt)
         {
         /*** NOTE: it is inefficient to convert intervals from a name-index query to filtered bed
          * inside the loop on regionList.  However, bigBedGetFilteredBedsOnRegions is called by
          * getFilteredBeds on a "regionList" that has been doctored to one region at a time,
          * so we can do intersection one region at a time.  Since this is called once per region,
          * we really do need to restrict items to region->chrom, otherwise all items would be
          * returned for every region.  It is still much more efficient for large bigBeds to do
          * name-index queries when names are pasted/uploaded than to fetch all intervals in all
          * regions and then check names.  See MLQ #32625. */
         char chromBuf[4096];
         struct bigBedInterval *iv = NULL;
         char *displayChromName = NULL;
         int lastChromId = -1;
         for (iv = ivList; iv != NULL; iv = iv->next)
             {
             if (iv->chromId != lastChromId)
                 {
                 bptStringKeyAtPos(bbi->chromBpt, iv->chromId, chromBuf, sizeof chromBuf);
                 displayChromName = chromAliasGetDisplayChrom(database, cart, hgOfficialChromName(database, chromBuf));
                 }
             if (sameString(displayChromName, region->chrom))
                 {
                 char *row[bbi->fieldCount];
                 char startBuf[16], endBuf[16];
                 bigBedIntervalToRow(iv, displayChromName, startBuf, endBuf, row, bbi->fieldCount);
                 if (asFilterOnRow(filter, row))
                     {
                     struct bed *bed = bedLoadN(row, bbi->definedFieldCount);
                     struct bed *lmBed = lmCloneBed(bed, lm);
                     slAddHead(&bedList, lmBed);
                     bedFree(&bed);
                     }
                 }
             lastChromId = iv->chromId;
             }
         }
     else
         {
         /* Get beds a region at a time. */
         addFilteredBedsOnRegion(bbi, region, table, filter, lm, &bedList);
         }
     slReverse(&bedList);
     }
 
 /* Clean up and return. */
 if (retFieldCount != NULL)
     *retFieldCount = bbi->definedFieldCount;
 lmCleanup(&bbLm);
 hashFree(&idHash);
 bptFileDetach(&bpt);
 bbiFileClose(&bbi);
 freeMem(fileName);
 return bedList;
 }
 
 void bigBedTabOut(char *db, char *table, struct sqlConnection *conn, char *fields, FILE *f, char outSep)
 /* Print out selected fields from Big Bed.  If fields is NULL, then print out all fields. */
 {
 if (f == NULL)
     f = stdout;
 
 /* Convert comma separated list of fields to array. */
 int fieldCount = chopByChar(fields, ',', NULL, 0);
 char **fieldArray;
 AllocArray(fieldArray, fieldCount);
 chopByChar(fields, ',', fieldArray, fieldCount);
 
 /* Get list of all fields in big bed and turn it into a hash of column indexes keyed by
  * column name. */
 struct hash *fieldHash = hashNew(0);
 struct slName *bb, *bbList = bigBedGetFields(table, conn);
 int i;
 for (bb = bbList, i=0; bb != NULL; bb = bb->next, ++i)
     hashAddInt(fieldHash, bb->name, i);
 
 // If bigBed has name column, look up pasted/uploaded identifiers if any:
 struct hash *idHash = NULL;
 if (slCount(bbList) >= 4)
     idHash = identifierHash(db, table);
 
 /* Create an array of column indexes corresponding to the selected field list. */
 int *columnArray;
 AllocArray(columnArray, fieldCount);
 for (i=0; i<fieldCount; ++i)
     {
     columnArray[i] = hashIntVal(fieldHash, fieldArray[i]);
     }
 
 /* Output row of labels */
 fprintf(f, "#");
 if (outSep == ',') fputc('"', f);
 fprintf(f, "%s", fieldArray[0]);
 if (outSep == ',') fputc('"', f);
 for (i=1; i<fieldCount; ++i)
     {
     fputc(outSep, f);
     if (outSep == ',') fputc('"', f);
     fprintf(f, "%s", fieldArray[i]);
     if (outSep == ',') fputc('"', f);
     }
 fprintf(f, "\n");
 
 /* Open up bigBed file. */
 char *fileName = bigBedFileName(table, conn);
 struct bbiFile *bbi =  bigBedFileOpenAlias(fileName, chromAliasFindAliases);
 struct asObject *as = bigBedAsOrDefault(bbi);
 struct asFilter *filter = NULL;
 
 if (anyFilter())
     {
     filter = asFilterFromCart(cart, db, table, as);
     if (filter)
         {
 	fprintf(f, "# Filtering on %d columns\n", slCount(filter->columnList));
 	}
     }
 
 struct bptFile *bpt = NULL;
 int fieldIx;
 
 if (idHash && isRegionWholeGenome())
     bpt = getNameIndexOrDie(bbi, &fieldIx);
 
 char *row[bbi->fieldCount];
 char startBuf[16], endBuf[16];
 if (bpt) // if we have an index it means we're whole genome and don't need to filter based on regions
     {
     struct slName *nameList = hashSlNameFromHash(idHash);
     int count = slCount(nameList);
     char *names[count];
     int ii;
     for (ii=0; ii < count; ii++)
     {
         names[ii] = nameList->name;
         nameList = nameList->next;
     }
 
     struct lm *lm = lmInit(0);
     struct bigBedInterval *ivList = bigBedMultiNameQuery(bbi, bpt, fieldIx, names, count, lm);
     char chromBuf[4096];
     struct bigBedInterval *interval, *prevInterval = NULL;
     char * displayChromName = NULL;
     for (interval = ivList; interval != NULL; prevInterval = interval, interval = interval->next)
         {       
         int lastChromId = (prevInterval == NULL ? -1 : prevInterval->chromId);
         if (interval->chromId != lastChromId)
             {
             bptStringKeyAtPos(bbi->chromBpt, interval->chromId, chromBuf, sizeof chromBuf);
             displayChromName = chromAliasGetDisplayChrom(database, cart, hgOfficialChromName(database, chromBuf));
             }
         bigBedIntervalToRow(interval, displayChromName, startBuf, endBuf, row, bbi->fieldCount);
         if (asFilterOnRow(filter, row))
             {
             int i;
             if (outSep == ',') fputc('"', f);
             fprintf(f, "%s", row[columnArray[0]]);
             if (outSep == ',') fputc('"', f);
             for (i=1; i<fieldCount; ++i)
                 {
                 fputc(outSep, f);
                 if (outSep == ',') fputc('"', f);
                 fprintf(f, "%s", row[columnArray[i]]);
                 if (outSep == ',') fputc('"', f);
                 }
             fprintf(f, "\n");
             }
         }
     }
 else 
     {
     /* Loop through outputting each region */
     struct region *region, *regionList = getRegions();
     for (region = regionList; region != NULL; region = region->next)
         {
         struct lm *lm = lmInit(0);
         struct bigBedInterval *iv, *ivList = bigBedIntervalQuery(bbi, region->chrom,
             region->start, region->end, 0, lm);
         char * displayChromName = chromAliasGetDisplayChrom(database, cart, region->chrom);
         for (iv = ivList; iv != NULL; iv = iv->next)
             {
             bigBedIntervalToRow(iv, displayChromName, startBuf, endBuf, row, bbi->fieldCount);
             if (asFilterOnRow(filter, row))
                 {
                 if ((idHash != NULL) && (hashLookup(idHash, row[3]) == NULL))
                     continue;
                 int i;
                 if (outSep == ',') fputc('"', f);
                 fprintf(f, "%s", row[columnArray[0]]);
                 if (outSep == ',') fputc('"', f);
                 for (i=1; i<fieldCount; ++i)
                     {
                     fputc(outSep, f);
                     if (outSep == ',') fputc('"', f);
                     fprintf(f, "%s", row[columnArray[i]]);
                     if (outSep == ',') fputc('"', f);
                     }
                 fprintf(f, "\n");
                 }
             }
         lmCleanup(&lm);
         }
     }
 
 /* Clean up and exit. */
 bbiFileClose(&bbi);
 hashFree(&fieldHash);
 freeMem(fieldArray);
 freeMem(columnArray);
 }
 
 static unsigned slCountAtMost(const void *list, unsigned max)
 // return the length of the list, but only count up to max
 {
 struct slList *pt = (struct slList *)list;
 int len = 0;
 
 while (pt != NULL)
     {
     len += 1;
     pt = pt->next;
     if (len == max)
 	break;
     }
 return len;
 }
 
 static struct bigBedInterval *getNElements(struct bbiFile *bbi, struct bbiChromInfo *chromList,
 					   struct lm *lm, int n)
 // get up to n sample rows from the first chrom listed in the bigBed.
 // will return less than n if there are less than n on the first chrom.
 {
 struct bigBedInterval *ivList = NULL;
 // start out requesting only 10k bp so we don't hang if the bigBed is huge
 int currentLen = 10000;
 // look about 2/3 of the way through the chrom to avoid the telomeres
 // and the centromere
 int startAddr = 2 * chromList->size / 3;
 int endAddr;
 
 while ((slCountAtMost(ivList, n)) < n)
     {
     endAddr = startAddr + currentLen;
 
     // if we're pointing beyond the end of the chromosome
     if (endAddr > chromList->size)
 	{
 	// move the start address back
 	startAddr -= (endAddr - chromList->size);
 	endAddr = chromList->size;
 	}
 
     // if we're pointing to before the start of the chrom
     if (startAddr < 0)
 	startAddr = 0;
 
     // ask for n items
     ivList = bigBedIntervalQuery(bbi, chromList->name, startAddr, endAddr, n, lm);
     currentLen *= 2;
 
     if ((startAddr == 0) && (endAddr == chromList->size))
 	break;
     }
 
 return  ivList;
 }
 
 struct slName *randomBigBedIds(char *table, struct sqlConnection *conn, int count)
 /* Return some arbitrary IDs from a bigBed file. */
 {
 /* Figure out bigBed file name and open it.  Get contents for first chromosome as an example. */
 struct slName *idList = NULL;
 char *fileName = bigBedFileName(table, conn);
 struct bbiFile *bbi =  bigBedFileOpenAlias(fileName, chromAliasFindAliases);
 struct bbiChromInfo *chromList = bbiChromList(bbi);
 struct lm *lm = lmInit(0);
 int orderedCount = count * 4;
 if (orderedCount < 100)
     orderedCount = 100;
 struct bigBedInterval *iv, *ivList = getNElements(bbi, chromList, lm, orderedCount);
 shuffleList(&ivList);
 // Make a list of item names from intervals.
 int outCount = 0;
 for (iv = ivList;  iv != NULL && outCount < count;  iv = iv->next)
     {
     char *row[bbi->fieldCount];
     char startBuf[16], endBuf[16];
     bigBedIntervalToRow(iv, chromList->name, startBuf, endBuf, row, bbi->fieldCount);
     if (idList == NULL || differentString(row[3], idList->name))
 	{
 	slAddHead(&idList, slNameNew(row[3]));
 	outCount++;
 	}
     }
 lmCleanup(&lm);
 bbiFileClose(&bbi);
 freeMem(fileName);
 return idList;
 }
 
 void showSchemaBigBed(char *table, struct trackDb *tdb)
 /* Show schema on bigBed. */
 {
 /* Figure out bigBed file name and open it.  Get contents for first chromosome as an example. */
 struct sqlConnection *conn = NULL;
 if (!trackHubDatabase(database))
     conn = hAllocConn(database);
 char *fileName = bigBedFileName(table, conn);
 struct bbiFile *bbi =  bigBedFileOpenAlias(fileName, chromAliasFindAliases);
 struct bbiChromInfo *chromList = bbiChromList(bbi);
 struct lm *lm = lmInit(0);
 struct bigBedInterval *ivList = getNElements(bbi, chromList, lm, 10);
 time_t timep = bbiUpdateTime(bbi);
 
 /* Get description of columns, making it up from BED records if need be. */
 struct asObject *as = bigBedAsOrDefault(bbi);
 
-hPrintf("<B>Database:</B> %s", database);
+hPrintf("<B>Database:</B> %s", trackHubSkipHubName(database));
 hPrintf("&nbsp;&nbsp;&nbsp;&nbsp;<B>Primary Table:</B> %s ", table);
 printf("<B>Data last updated:&nbsp;</B>%s<BR>\n", firstWordInLine(sqlUnixTimeToDate(&timep, FALSE)));
 
 printDownloadLink("Big Bed", fileName);
 
 if (bbi->version >= 2)
     {
     hPrintf("<BR><B>Item Count:</B> ");
     printLongWithCommas(stdout, bigBedItemCount(bbi));
     }
 printTypeHelpDesc(tdb->type);
 hPrintf("<BR>\n");
 hPrintf("<B>Format description:</B> %s<BR>", as->comment);
 
 /* Put up table that describes fields. */
 hTableStart();
 hPrintf("<TR><TH>field</TH>");
 if (ivList != NULL)
     hPrintf("<TH>example</TH>");
 hPrintf("<TH>description</TH> ");
 puts("</TR>\n");
 struct asColumn *col;
 int colCount = 0;
 char *row[bbi->fieldCount];
 char startBuf[16], endBuf[16];
 if (ivList != NULL)
     {
     char *dupeRest = lmCloneString(lm, ivList->rest);	/* Manage rest-stomping side-effect */
     bigBedIntervalToRow(ivList, chromList->name, startBuf, endBuf, row, bbi->fieldCount);
     ivList->rest = dupeRest;
     }
 for (col = as->columnList; col != NULL; col = col->next)
     {
     hPrintf("<TR><TD><TT>%s</TT></TD>", col->name);
     if (ivList != NULL)
 	hPrintf("<TD>%s</TD>", row[colCount]);
     hPrintf("<TD>%s</TD></TR>", col->comment);
     ++colCount;
     }
 
 /* If more fields than descriptions put up minimally helpful info (at least has example). */
 for ( ; colCount < bbi->fieldCount; ++colCount)
     {
     hPrintf("<TR><TD><TT>column%d</TT></TD>", colCount+1);
     if (ivList != NULL)
 	hPrintf("<TD>%s</TD>", row[colCount]);
     hPrintf("<TD>n/a</TD></TR>\n");
     }
 hTableEnd();
 
 
 if (ivList != NULL)
     {
     /* Put up another section with sample rows. */
     webNewSection("Sample Rows");
     hTableStart();
 
     /* Print field names as column headers for example */
     hPrintf("<TR>");
     int colIx = 0;
     for (col = as->columnList; col != NULL; col = col->next)
 	{
 	hPrintf("<TH>%s</TH>", col->name);
 	++colIx;
 	}
     for (; colIx < colCount; ++colIx)
 	hPrintf("<TH>column%d</TH>", colIx+1);
     hPrintf("</TR>\n");
 
     /* Print sample lines. */
     struct bigBedInterval *iv;
     for (iv=ivList; iv != NULL; iv = iv->next)
 	{
 	bigBedIntervalToRow(iv, chromList->name, startBuf, endBuf, row, bbi->fieldCount);
 	hPrintf("<TR>");
 	for (colIx=0; colIx<colCount; ++colIx)
 	    {
 	    writeHtmlCell(row[colIx]);
 	    }
 	hPrintf("</TR>\n");
 	}
     hTableEnd();
     }
 printTrackHtml(tdb);
 /* Clean up and go home. */
 lmCleanup(&lm);
 bbiFileClose(&bbi);
 freeMem(fileName);
 hFreeConn(&conn);
 }