12d0bf30d4a9fbe7aee60fdf9e7f8827116ba400 markd Wed Jan 27 17:49:12 2021 -0800 allow overlap select to ignore non-standard columns in BED as well as genePred and PSL, as often come with bigBed diff --git src/hg/utils/overlapSelect/chromAnn.c src/hg/utils/overlapSelect/chromAnn.c index f1da034..a7d1056 100644 --- src/hg/utils/overlapSelect/chromAnn.c +++ src/hg/utils/overlapSelect/chromAnn.c @@ -148,31 +148,31 @@ } if (start < end) chromAnnBlkNew(ca, start, end); } } static struct chromAnn* chromAnnBedReaderRead(struct chromAnnReader *car) /* read next BED and convert to a chromAnn */ { struct rowReader *rr = car->data; if (!rowReaderNext(rr)) return NULL; rowReaderExpectAtLeast(rr, 3); char **rawCols = (car->opts & chromAnnSaveLines) ? rowReaderCloneColumns(rr) : NULL; -struct bed *bed = bedLoadN(rr->row, rr->numCols); +struct bed *bed = bedLoadN(rr->row, min(rr->numCols, rr->maxParsedCols)); struct chromAnn *ca = chromAnnNew(bed->chrom, bed->strand[0], bed->name, rawCols, strVectorWrite, strVectorFree); if ((bed->blockCount == 0) || (car->opts & chromAnnRange)) { if (car->opts & chromAnnCds) { if (bed->thickStart < bed->thickEnd) chromAnnBlkNew(ca, bed->thickStart, bed->thickEnd); } else chromAnnBlkNew(ca, bed->chromStart, bed->chromEnd); } else addBedBlocks(ca, car->opts, bed); @@ -182,39 +182,40 @@ return ca; } static void chromAnnBedReaderFree(struct chromAnnReader **carPtr) /* free object */ { struct chromAnnReader *car = *carPtr; if (car != NULL) { struct rowReader *rr = car->data; rowReaderFree(&rr); freez(carPtr); } } -struct chromAnnReader *chromAnnBedReaderNew(char *fileName, unsigned opts) +struct chromAnnReader *chromAnnBedReaderNew(char *fileName, unsigned opts, + unsigned maxParsedCols) /* construct a reader for a BED file */ { struct chromAnnReader *car; AllocVar(car); car->caRead = chromAnnBedReaderRead; car->carFree = chromAnnBedReaderFree; car->opts = opts; -car->data = rowReaderOpen(fileName, FALSE); +car->data = rowReaderOpen(fileName, maxParsedCols, FALSE); return car; } static void addGenePredBlocks(struct chromAnn* ca, unsigned opts, struct genePred* gp) /* add blocks from a genePred */ { int iExon; for (iExon = 0; iExon < gp->exonCount; iExon++) { int start = gp->exonStarts[iExon]; int end = gp->exonEnds[iExon]; if ((opts & chromAnnCds) && (gp->cdsStart > start)) start = gp->cdsStart; if ((opts & chromAnnCds) && (gp->cdsEnd < end)) end = gp->cdsEnd; @@ -264,31 +265,31 @@ { struct rowReader *rr = car->data; rowReaderFree(&rr); freez(carPtr); } } struct chromAnnReader *chromAnnGenePredReaderNew(char *fileName, unsigned opts) /* construct a reader for a genePred file */ { struct chromAnnReader *car; AllocVar(car); car->caRead = chromAnnGenePredReaderRead; car->carFree = chromAnnGenePredReaderFree; car->opts = opts; -car->data = rowReaderOpen(fileName, FALSE); +car->data = rowReaderOpen(fileName, GENEPREDX_NUM_COLS, FALSE); return car; } static void addPslBlocks(struct chromAnn* ca, unsigned opts, struct psl* psl) /* add blocks from a psl */ { boolean blkStrand = (opts & chromAnnUseQSide) ? pslQStrand(psl) : pslTStrand(psl); int size = (opts & chromAnnUseQSide) ? psl->qSize : psl->tSize; unsigned *blocks = (opts & chromAnnUseQSide) ? psl->qStarts : psl->tStarts; boolean blkSizeMult = pslIsProtein(psl) ? 3 : 1; int iBlk; for (iBlk = 0; iBlk < psl->blockCount; iBlk++) { int start = blocks[iBlk]; int end = start + (blkSizeMult * psl->blockSizes[iBlk]); @@ -369,58 +370,58 @@ { struct rowReader *rr = car->data; rowReaderFree(&rr); freez(carPtr); } } struct chromAnnReader *chromAnnPslReaderNew(char *fileName, unsigned opts) /* construct a reader for a PSL file */ { struct chromAnnReader *car; AllocVar(car); car->caRead = chromAnnPslReaderRead; car->carFree = chromAnnPslReaderFree; car->opts = opts; -car->data = rowReaderOpen(fileName, FALSE); +car->data = rowReaderOpen(fileName, PSL_NUM_COLS, FALSE); return car; } static void addChainQBlocks(struct chromAnn* ca, unsigned opts, struct chain* chain) /* add query blocks from a chain */ { struct cBlock *blk; for (blk = chain->blockList; blk != NULL; blk = blk->next) { int start = blk->qStart; int end = blk->qEnd; if (chain->qStrand == '-') reverseIntRange(&start, &end, chain->qSize); chromAnnBlkNew(ca, start, end); } } static void addChainTBlocks(struct chromAnn* ca, unsigned opts, struct chain* chain) /* add target blocks from a chain */ { struct cBlock *blk; for (blk = chain->blockList; blk != NULL; blk = blk->next) chromAnnBlkNew(ca, blk->tStart, blk->tEnd); } struct chromAnnChainReader -/* reader data for tab files */ +/* reader data for chain files */ { struct lineFile *lf; }; static void chainRecWrite(struct chromAnn *ca, FILE *fh, char term) /* write a chromAnn that is chain */ { struct chain *chain = ca->rec; assert(term == '\n'); chainWrite(chain, fh); } static void chainRecFree(struct chromAnn *ca) /* free chromAnn chain data */ { @@ -526,31 +527,31 @@ if (car != NULL) { struct chromAnnTabReader *catr = car->data; rowReaderFree(&catr->rr); freeMem(catr); freez(carPtr); } } struct chromAnnReader *chromAnnTabReaderNew(char *fileName, struct coordCols* cols, unsigned opts) /* construct a reader for an arbitrary tab file. */ { struct chromAnnTabReader *catr; AllocVar(catr); catr->cols = *cols; -catr->rr = rowReaderOpen(fileName, FALSE); +catr->rr = rowReaderOpen(fileName, 0, FALSE); struct chromAnnReader *car; AllocVar(car); car->caRead = chromAnnTabReaderRead; car->carFree = chromAnnTabReaderFree; car->opts = opts; car->data = catr; return car; } int chromAnnRefLocCmp(const void *va, const void *vb) /* Compare location of two chromAnnRef objects. */ { const struct chromAnnRef *a = *((struct chromAnnRef **)va); const struct chromAnnRef *b = *((struct chromAnnRef **)vb);