4b2e806a0d57f97bbadf9a762778965ccd36989d angie Fri Dec 21 13:57:15 2012 -0800 Bug #9860 (Table Browser paste identifiers button not working for 1000 genomes ph1 Accessible Regions):getExamples() was unaware of bigBed, so an illegal mysql query was constructed. Fix: added randomBigBedIds(). While in there, enhanced ramdomVcfIds() to ignore runs of identical IDs because those are placeholders, and might otherwise crowd out a few meaningful IDs (#8886 note 28). diff --git src/hg/hgTables/bigBed.c src/hg/hgTables/bigBed.c index ac61621..54b2271 100644 --- src/hg/hgTables/bigBed.c +++ src/hg/hgTables/bigBed.c @@ -248,80 +248,113 @@ // return the length of the list, but only count up to max { struct slList *pt = (struct slList *)list; int len = 0; while (pt != NULL) { len += 1; pt = pt->next; if (len == max) break; } return len; } -static struct bigBedInterval *getTenElements(struct bbiFile *bbi, - struct bbiChromInfo *chromList, struct lm *lm) -// get up to ten sample rows from the first chrom listed in the bigBed. -// will return less than ten if there are less than ten on the first chrom. +static struct bigBedInterval *getNElements(struct bbiFile *bbi, struct bbiChromInfo *chromList, + struct lm *lm, int n) +// get up to n sample rows from the first chrom listed in the bigBed. +// will return less than n if there are less than n on the first chrom. { struct bigBedInterval *ivList = NULL; // start out requesting only 10k bp so we don't hang if the bigBed is huge int currentLen = 10000; // look about 2/3 of the way through the chrom to avoid the telomeres // and the centromere int startAddr = 2 * chromList->size / 3; int endAddr; -while ((slCountAtMost(ivList,10)) < 10) +while ((slCountAtMost(ivList, n)) < n) { endAddr = startAddr + currentLen; // if we're pointing beyond the end of the chromosome if (endAddr > chromList->size) { // move the start address back startAddr -= (endAddr - chromList->size); endAddr = chromList->size; } // if we're pointing to before the start of the chrom if (startAddr < 0) startAddr = 0; - // ask for ten items - ivList = bigBedIntervalQuery(bbi, chromList->name, startAddr, endAddr, 10, lm); + // ask for n items + ivList = bigBedIntervalQuery(bbi, chromList->name, startAddr, endAddr, n, lm); currentLen *= 2; if ((startAddr == 0) && (endAddr == chromList->size)) break; } return ivList; } +struct slName *randomBigBedIds(char *table, struct sqlConnection *conn, int count) +/* Return some arbitrary IDs from a bigBed file. */ +{ +/* Figure out bigBed file name and open it. Get contents for first chromosome as an example. */ +struct slName *idList = NULL; +char *fileName = bigBedFileName(table, conn); +struct bbiFile *bbi = bigBedFileOpen(fileName); +struct bbiChromInfo *chromList = bbiChromList(bbi); +struct lm *lm = lmInit(0); +int orderedCount = count * 4; +if (orderedCount < 100) + orderedCount = 100; +struct bigBedInterval *iv, *ivList = getNElements(bbi, chromList, lm, orderedCount); +shuffleList(&ivList, 1); +// Make a list of item names from intervals. +int outCount = 0; +for (iv = ivList; iv != NULL && outCount < count; iv = iv->next) + { + char *row[bbi->fieldCount]; + char startBuf[16], endBuf[16]; + bigBedIntervalToRow(iv, chromList->name, startBuf, endBuf, row, bbi->fieldCount); + if (idList == NULL || differentString(row[3], idList->name)) + { + slAddHead(&idList, slNameNew(row[3])); + outCount++; + } + } +lmCleanup(&lm); +bbiFileClose(&bbi); +freeMem(fileName); +return idList; +} + void showSchemaBigBed(char *table, struct trackDb *tdb) /* Show schema on bigBed. */ { /* Figure out bigBed file name and open it. Get contents for first chromosome as an example. */ struct sqlConnection *conn = hAllocConn(database); char *fileName = bigBedFileName(table, conn); struct bbiFile *bbi = bigBedFileOpen(fileName); struct bbiChromInfo *chromList = bbiChromList(bbi); struct lm *lm = lmInit(0); -struct bigBedInterval *ivList = getTenElements(bbi, chromList, lm); +struct bigBedInterval *ivList = getNElements(bbi, chromList, lm, 10); /* Get description of columns, making it up from BED records if need be. */ struct asObject *as = bigBedAsOrDefault(bbi); hPrintf("<B>Database:</B> %s", database); hPrintf(" <B>Primary Table:</B> %s<br>", table); hPrintf("<B>Big Bed File:</B> %s", fileName); if (bbi->version >= 2) { hPrintf("<BR><B>Item Count:</B> "); printLongWithCommas(stdout, bigBedItemCount(bbi)); } hPrintf("<BR>\n"); hPrintf("<B>Format description:</B> %s<BR>", as->comment);