4b2e806a0d57f97bbadf9a762778965ccd36989d angie Fri Dec 21 13:57:15 2012 -0800 Bug #9860 (Table Browser paste identifiers button not working for 1000 genomes ph1 Accessible Regions):getExamples() was unaware of bigBed, so an illegal mysql query was constructed. Fix: added randomBigBedIds(). While in there, enhanced ramdomVcfIds() to ignore runs of identical IDs because those are placeholders, and might otherwise crowd out a few meaningful IDs (#8886 note 28). diff --git src/hg/hgTables/vcf.c src/hg/hgTables/vcf.c index e084e16..6588348 100644 --- src/hg/hgTables/vcf.c +++ src/hg/hgTables/vcf.c @@ -341,65 +341,71 @@ /* Get list of beds from VCF, in all regions, that pass filtering. */ { int maxOut = bigFileMaxOutput(); /* Figure out vcf file name get column info and filter. */ struct asObject *as = vcfAsObj(); struct asFilter *filter = asFilterFromCart(cart, db, table, as); struct hash *idHash = identifierHash(db, table); /* Get beds a region at a time. */ struct trackDb *tdb = hashFindVal(fullTableToTdbHash, table); struct bed *bedList = NULL; struct region *region; for (region = regionList; region != NULL; region = region->next) { char *fileName = bbiNameFromSettingOrTableChrom(tdb, conn, table, region->chrom); + if (fileName == NULL) + continue; addFilteredBedsOnRegion(fileName, region, table, filter, lm, &bedList, idHash, &maxOut); freeMem(fileName); if (maxOut <= 0) { warn("Reached output limit of %d data values, please make region smaller,\n" "\tor set a higher output line limit with the filter settings.", bigFileMaxOutput()); break; } } slReverse(&bedList); return bedList; } struct slName *randomVcfIds(char *table, struct sqlConnection *conn, int count) /* Return some semi-random IDs from a VCF file. */ { /* Read 10000 items from vcf file, or if they ask for a big list, then 4x what they ask for. */ struct trackDb *tdb = hashFindVal(fullTableToTdbHash, table); char *fileName = bbiNameFromSettingOrTableChrom(tdb, conn, table, hDefaultChrom(database)); struct lineFile *lf = lineFileTabixMayOpen(fileName, TRUE); if (lf == NULL) noWarnAbort(); int orderedCount = count * 4; -if (orderedCount < 10000) - orderedCount = 10000; +if (orderedCount < 100) + orderedCount = 100; struct slName *idList = NULL; char *words[4]; int i; for (i = 0; i < orderedCount && lineFileChop(lf, words); i++) + { + // compress runs of identical ID, in case most are placeholder + if (i == 0 || !sameString(words[2], idList->name)) slAddHead(&idList, slNameNew(words[2])); + } lineFileClose(&lf); /* Shuffle list and trim it to count if necessary. */ shuffleList(&idList, 1); struct slName *sl; -for (sl = idList, i = 0; sl != NULL; i++, sl = sl->next, i++) +for (sl = idList, i = 0; sl != NULL; sl = sl->next, i++) { if (i+1 >= count) { slNameFreeList(&(sl->next)); break; } } freez(&fileName); return idList; } #define VCF_MAX_SCHEMA_COLS 20 void showSchemaVcf(char *table, struct trackDb *tdb) /* Show schema on vcf. */