97d9ef321d57124ba40826c384e6a50a48e844dd angie Wed Dec 7 14:44:01 2011 -0800 Thanks Brian for catching the "need to warn here" loose end in code review --now there is an actual warning. I also updated bam.c to have the same maxOut improvements as vcf.c. Unfortunately, we still have the problem that several layers of code will have to be modified in order to have genome-wide maxOut instead of per-region, for non-wiggle data types. diff --git src/hg/hgTables/bam.c src/hg/hgTables/bam.c index b45ee7b..346bdea 100644 --- src/hg/hgTables/bam.c +++ src/hg/hgTables/bam.c @@ -228,76 +228,87 @@ case 'N': // long deletion from query (intron as opposed to small del) tLength += n; break; case 'S': // skipped query bases at beginning or end ("soft clipping") case 'H': // skipped query bases not stored in record's query sequence ("hard clipping") case 'P': // P="silent deletion from padded reference sequence" -- ignore these. break; default: errAbort("cigarWidth: unrecognized CIGAR op %c -- update me", op); } } return tLength; } static void addFilteredBedsOnRegion(char *fileName, struct region *region, - char *table, struct asFilter *filter, struct lm *bedLm, struct bed **pBedList, struct hash *idHash) + char *table, struct asFilter *filter, struct lm *bedLm, struct bed **pBedList, + struct hash *idHash, int *pMaxOut) /* Add relevant beds in reverse order to pBedList */ { struct lm *lm = lmInit(0); struct samAlignment *sam, *samList = bamFetchSamAlignment(fileName, region->chrom, region->start, region->end, lm); char *row[SAMALIGNMENT_NUM_COLS]; char numBuf[BAM_NUM_BUF_SIZE]; for (sam = samList; sam != NULL; sam = sam->next) { samAlignmentToRow(sam, numBuf, row); if (asFilterOnRow(filter, row)) { if ((idHash != NULL) && (hashLookup(idHash, sam->qName) == NULL)) continue; struct bed *bed; lmAllocVar(bedLm, bed); bed->chrom = lmCloneString(bedLm, sam->rName); bed->chromStart = sam->pos - 1; bed->chromEnd = bed->chromStart + cigarWidth(sam->cigar, strlen(sam->cigar)); bed->name = lmCloneString(bedLm, sam->qName); slAddHead(pBedList, bed); } + (*pMaxOut)--; + if (*pMaxOut <= 0) + break; } lmCleanup(&lm); } struct bed *bamGetFilteredBedsOnRegions(struct sqlConnection *conn, char *db, char *table, struct region *regionList, struct lm *lm, int *retFieldCount) /* Get list of beds from BAM, in all regions, that pass filtering. */ { +int maxOut = bigFileMaxOutput(); /* Figure out bam file name get column info and filter. */ struct asObject *as = bamAsObj(); struct asFilter *filter = asFilterFromCart(cart, db, table, as); struct hash *idHash = identifierHash(db, table); /* Get beds a region at a time. */ struct bed *bedList = NULL; struct region *region; for (region = regionList; region != NULL; region = region->next) { char *fileName = bamFileName(table, conn, region->chrom); - addFilteredBedsOnRegion(fileName, region, table, filter, lm, &bedList, idHash); + addFilteredBedsOnRegion(fileName, region, table, filter, lm, &bedList, idHash, &maxOut); freeMem(fileName); + if (maxOut <= 0) + { + warn("Reached output limit of %d data values, please make region smaller,\n" + "\tor set a higher output line limit with the filter settings.", bigFileMaxOutput()); + break; + } } slReverse(&bedList); return bedList; } struct slName *randomBamIds(char *table, struct sqlConnection *conn, int count) /* Return some semi-random qName based IDs from a BAM file. */ { /* Read 10000 items from bam file, or if they ask for a big list, then 4x what they ask for. */ char *fileName = bamFileName(table, conn, NULL); samfile_t *fh = bamOpen(fileName, NULL); struct lm *lm = lmInit(0); int orderedCount = count * 4; if (orderedCount < 10000) orderedCount = 10000;