b95ff3835509b242bd9007ba55c4f60c1022da47 markd Thu Dec 30 14:58:16 2010 -0800 moved to programs to hg/utils, fixed build of distributed utilities diff --git src/hg/overlapSelect/chromAnn.c src/hg/overlapSelect/chromAnn.c deleted file mode 100644 index 6bb0b8c..0000000 --- src/hg/overlapSelect/chromAnn.c +++ /dev/null @@ -1,561 +0,0 @@ -/* chromAnn - chomosome annotations, generic object to store annotations from - * other formats */ -#include "common.h" -#include "chromAnn.h" -#include "binRange.h" -#include "rowReader.h" -#include "psl.h" -#include "bed.h" -#include "chain.h" -#include "genePred.h" -#include "coordCols.h" -#include "verbose.h" - -static char const rcsid[] = "$Id: chromAnn.c,v 1.16 2010/01/30 01:13:59 markd Exp $"; - -static struct chromAnnBlk* chromAnnBlkNew(struct chromAnn *ca, int start, int end) -/* create new block object and add to chromAnn object */ -{ -struct chromAnnBlk* caBlk; -AllocVar(caBlk); -if (end < start) - errAbort("invalid block coordinates for %s: start=%d end=%d", ca->name, start, end); - -caBlk->ca = ca;; -caBlk->start = start; -caBlk->end = end; - -if (ca->blocks == NULL) - { - ca->start = start; - ca->end = end; - } -else - { - ca->start = min(ca->start, start); - ca->end = max(ca->end, end); - } -ca->totalSize += (end - start); -slAddHead(&ca->blocks, caBlk); -return caBlk; -} - -static void chromAnnBlkFreeList(struct chromAnnBlk *blks) -/* free list of objects */ -{ -struct chromAnnBlk *blk; -while ((blk = slPopHead(&blks)) != NULL) - freeMem(blk); -} - -static struct chromAnn* chromAnnNew(char* chrom, char strand, char* name, void *rec, - void (*recWrite)(struct chromAnn*, FILE *, char), - void (*recFree)(struct chromAnn *)) -/* create new object, ownership of rawCols is passed */ -{ -struct chromAnn* ca; -AllocVar(ca); -ca->chrom = cloneString(chrom); -ca->strand = strand; -if (name != NULL) - ca->name = cloneString(name); -ca->start = 0; -ca->end = 0; -ca->rec = rec; -ca->recWrite = recWrite; -ca->recFree = recFree; -return ca; -} - -static int chromAnnBlkCmp(const void *va, const void *vb) -/* sort compare of two chromAnnBlk objects */ -{ -const struct chromAnnBlk *a = *((struct chromAnnBlk **)va); -const struct chromAnnBlk *b = *((struct chromAnnBlk **)vb); -int diff = a->start - b->start; -if (diff == 0) - diff = a->end - b->end; -return diff; -} - -static void chromAnnFinish(struct chromAnn* ca) -/* finish creation of a chromAnn after all blocks are added */ -{ -slSort(&ca->blocks, chromAnnBlkCmp); -} - -void chromAnnFree(struct chromAnn **caPtr) -/* free an object */ -{ -struct chromAnn *ca = *caPtr; -if (ca != NULL) - { - ca->recFree(ca); - freeMem(ca->chrom); - freeMem(ca->name); - chromAnnBlkFreeList(ca->blocks); - freez(caPtr); - } -} - -int chromAnnTotalBlockSize(struct chromAnn* ca) -/* count the total bases in the blocks of a chromAnn */ -{ -int bases = 0; -struct chromAnnBlk *cab; -for (cab = ca->blocks; cab != NULL; cab = cab->next) - bases += (cab->end - cab->start); -return bases; -} - -static void strVectorWrite(struct chromAnn *ca, FILE *fh, char term) -/* write a chromAnn that is represented as a vector of strings */ -{ -char **cols = ca->rec; -assert(cols != NULL); -int i; -for (i = 0; cols[i] != NULL; i++) - { - if (i > 0) - putc_unlocked('\t', fh); - fputs(cols[i], fh); - } -putc_unlocked(term, fh); -} - -static void strVectorFree(struct chromAnn *ca) -/* free chromAnn data that is represented as a vector of strings */ -{ -freez(&ca->rec); -} - -static void addBedBlocks(struct chromAnn* ca, unsigned opts, struct bed* bed) -/* add blocks from a bed */ -{ -int iBlk; -for (iBlk = 0; iBlk < bed->blockCount; iBlk++) - { - int start = bed->chromStart + bed->chromStarts[iBlk]; - int end = start + bed->blockSizes[iBlk]; - if (opts & chromAnnCds) - { - if (start < bed->thickStart) - start = bed->thickStart; - if (end > bed->thickEnd) - end = bed->thickEnd; - } - if (start < end) - chromAnnBlkNew(ca, start, end); - } -} - -static struct chromAnn* chromAnnBedReaderRead(struct chromAnnReader *car) -/* read next BED and convert to a chromAnn */ -{ -struct rowReader *rr = car->data; -if (!rowReaderNext(rr)) - return NULL; -rowReaderExpectAtLeast(rr, 3); - -char **rawCols = (car->opts & chromAnnSaveLines) ? rowReaderCloneColumns(rr) : NULL; -struct bed *bed = bedLoadN(rr->row, rr->numCols); -struct chromAnn *ca = chromAnnNew(bed->chrom, bed->strand[0], bed->name, rawCols, - strVectorWrite, strVectorFree); - -if ((bed->blockCount == 0) || (car->opts & chromAnnRange)) - { - if (car->opts & chromAnnCds) - { - if (bed->thickStart < bed->thickEnd) - chromAnnBlkNew(ca, bed->thickStart, bed->thickEnd); - } - else - chromAnnBlkNew(ca, bed->chromStart, bed->chromEnd); - } -else - addBedBlocks(ca, car->opts, bed); - -chromAnnFinish(ca); -bedFree(&bed); -return ca; -} - -static void chromAnnBedReaderFree(struct chromAnnReader **carPtr) -/* free object */ -{ -struct chromAnnReader *car = *carPtr; -if (car != NULL) - { - struct rowReader *rr = car->data; - rowReaderFree(&rr); - freez(carPtr); - } -} - -struct chromAnnReader *chromAnnBedReaderNew(char *fileName, unsigned opts) -/* construct a reader for a BED file */ -{ -struct chromAnnReader *car; -AllocVar(car); -car->caRead = chromAnnBedReaderRead; -car->carFree = chromAnnBedReaderFree; -car->opts = opts; -car->data = rowReaderOpen(fileName, FALSE); -return car; -} - -static void addGenePredBlocks(struct chromAnn* ca, unsigned opts, struct genePred* gp) -/* add blocks from a genePred */ -{ -int iExon; -for (iExon = 0; iExon < gp->exonCount; iExon++) - { - int start = gp->exonStarts[iExon]; - int end = gp->exonEnds[iExon]; - if ((opts & chromAnnCds) && (gp->cdsStart > start)) - start = gp->cdsStart; - if ((opts & chromAnnCds) && (gp->cdsEnd < end)) - end = gp->cdsEnd; - if (start < end) - chromAnnBlkNew(ca, start, end); - } -} - -static struct chromAnn* chromAnnGenePredReaderRead(struct chromAnnReader *car) -/* Read the next genePred row and create a chromAnn object row read from a - * GenePred file or table. If there is no CDS, and chromAnnCds is specified, - * it will return a record with zero-length range.*/ -{ -struct rowReader *rr = car->data; -if (!rowReaderNext(rr)) - return NULL; -rowReaderExpectAtLeast(rr, GENEPRED_NUM_COLS); - -char **rawCols = (car->opts & chromAnnSaveLines) ? rowReaderCloneColumns(rr) : NULL; -struct genePred *gp = genePredLoad(rr->row); -struct chromAnn* ca = chromAnnNew(gp->chrom, gp->strand[0], gp->name, rawCols, - strVectorWrite, strVectorFree); - -if (car->opts & chromAnnRange) - { - if (car->opts & chromAnnCds) - { - if (gp->cdsStart < gp->cdsEnd) - chromAnnBlkNew(ca, gp->cdsStart, gp->cdsEnd); - } - else - chromAnnBlkNew(ca, gp->txStart, gp->txEnd); - } -else - addGenePredBlocks(ca, car->opts, gp); - -chromAnnFinish(ca); -genePredFree(&gp); -return ca; -} - -static void chromAnnGenePredReaderFree(struct chromAnnReader **carPtr) -/* free object */ -{ -struct chromAnnReader *car = *carPtr; -if (car != NULL) - { - struct rowReader *rr = car->data; - rowReaderFree(&rr); - freez(carPtr); - } -} - -struct chromAnnReader *chromAnnGenePredReaderNew(char *fileName, unsigned opts) -/* construct a reader for a genePred file */ -{ -struct chromAnnReader *car; -AllocVar(car); -car->caRead = chromAnnGenePredReaderRead; -car->carFree = chromAnnGenePredReaderFree; -car->opts = opts; -car->data = rowReaderOpen(fileName, FALSE); -return car; -} - -static void addPslBlocks(struct chromAnn* ca, unsigned opts, struct psl* psl) -/* add blocks from a psl */ -{ -boolean blkStrand = (opts & chromAnnUseQSide) ? pslQStrand(psl) : pslTStrand(psl); -int size = (opts & chromAnnUseQSide) ? psl->qSize : psl->tSize; -unsigned *blocks = (opts & chromAnnUseQSide) ? psl->qStarts : psl->tStarts; -boolean blkSizeMult = pslIsProtein(psl) ? 3 : 1; -int iBlk; -for (iBlk = 0; iBlk < psl->blockCount; iBlk++) - { - int start = blocks[iBlk]; - int end = start + (blkSizeMult * psl->blockSizes[iBlk]); - if (blkStrand == '-') - reverseIntRange(&start, &end, size); - chromAnnBlkNew(ca, start, end); - } -} - -static char getPslTSideStrand(struct psl *psl) -/* get the strand to use for a PSL when doing target side overlaps */ -{ -if (psl->strand[1] != '\0') - { - // translated - char strand = pslTStrand(psl); - if (pslQStrand(psl) == '-') - strand = (strand == '-') ? '+' : '-'; // query reverse complemented - return strand; - } -else - return pslQStrand(psl); // untranslated -} - -static char getPslQSideStrand(struct psl *psl) -/* get the strand to use for a PSL when doing query side overlaps */ -{ -if (psl->strand[1] != '\0') - { - // translated - char strand = pslQStrand(psl); - if (pslTStrand(psl) == '-') - strand = (strand == '-') ? '+' : '-'; // query reverse complemented - return strand; - } -else - return pslTStrand(psl); // untranslated -} - -static struct chromAnn* chromAnnPslReaderRead(struct chromAnnReader *car) -/* read next chromAnn from a PSL file */ -{ -struct rowReader *rr = car->data; -if (!rowReaderNext(rr)) - return NULL; -rowReaderExpectAtLeast(rr, PSL_NUM_COLS); - -char **rawCols = (car->opts & chromAnnSaveLines) ? rowReaderCloneColumns(rr) : NULL; - -struct psl *psl = pslLoad(rr->row); -struct chromAnn* ca; -if (car->opts & chromAnnUseQSide) - ca = chromAnnNew(psl->qName, getPslQSideStrand(psl), psl->tName, rawCols, - strVectorWrite, strVectorFree); -else - ca = chromAnnNew(psl->tName, getPslTSideStrand(psl), psl->qName, rawCols, - strVectorWrite, strVectorFree); - -if (car->opts & chromAnnRange) - { - if (car->opts & chromAnnUseQSide) - chromAnnBlkNew(ca, psl->qStart, psl->qEnd); - else - chromAnnBlkNew(ca, psl->tStart, psl->tEnd); - } -else - addPslBlocks(ca, car->opts, psl); -chromAnnFinish(ca); -pslFree(&psl); -return ca; -} - -static void chromAnnPslReaderFree(struct chromAnnReader **carPtr) -/* free object */ -{ -struct chromAnnReader *car = *carPtr; -if (car != NULL) - { - struct rowReader *rr = car->data; - rowReaderFree(&rr); - freez(carPtr); - } -} - -struct chromAnnReader *chromAnnPslReaderNew(char *fileName, unsigned opts) -/* construct a reader for a PSL file */ -{ -struct chromAnnReader *car; -AllocVar(car); -car->caRead = chromAnnPslReaderRead; -car->carFree = chromAnnPslReaderFree; -car->opts = opts; -car->data = rowReaderOpen(fileName, FALSE); -return car; -} - -static void addChainQBlocks(struct chromAnn* ca, unsigned opts, struct chain* chain) -/* add query blocks from a chain */ -{ -struct cBlock *blk; -for (blk = chain->blockList; blk != NULL; blk = blk->next) - { - int start = blk->qStart; - int end = blk->qEnd; - if (chain->qStrand == '-') - reverseIntRange(&start, &end, chain->qSize); - chromAnnBlkNew(ca, start, end); - } -} - -static void addChainTBlocks(struct chromAnn* ca, unsigned opts, struct chain* chain) -/* add target blocks from a chain */ -{ -struct cBlock *blk; -for (blk = chain->blockList; blk != NULL; blk = blk->next) - chromAnnBlkNew(ca, blk->tStart, blk->tEnd); -} - -struct chromAnnChainReader -/* reader data for tab files */ -{ - struct lineFile *lf; -}; - -static void chainRecWrite(struct chromAnn *ca, FILE *fh, char term) -/* write a chromAnn that is chain */ -{ -struct chain *chain = ca->rec; -assert(term == '\n'); -chainWrite(chain, fh); -} - -static void chainRecFree(struct chromAnn *ca) -/* free chromAnn chain data */ -{ -chainFree((struct chain**)&ca->rec); -} - -static struct chromAnn* chromAnnChainReaderRead(struct chromAnnReader *car) -/* read a chromAnn object from a tab file or table */ -{ -struct chromAnnChainReader *carr = car->data; -struct chain *chain = chainRead(carr->lf); -if (chain == NULL) - return NULL; - -struct chromAnn* ca; -if (car->opts & chromAnnUseQSide) - ca = chromAnnNew(chain->qName, '+', chain->tName, - ((car->opts & chromAnnSaveLines) ? chain : NULL), - chainRecWrite, chainRecFree); -else - ca = chromAnnNew(chain->tName, chain->qStrand, chain->qName, - ((car->opts & chromAnnSaveLines) ? chain : NULL), - chainRecWrite, chainRecFree); - -if (car->opts & chromAnnRange) - { - if (car->opts & chromAnnUseQSide) - chromAnnBlkNew(ca, chain->qStart, chain->qEnd); - else - chromAnnBlkNew(ca, chain->tStart, chain->tEnd); - } -else - { - if (car->opts & chromAnnUseQSide) - addChainQBlocks(ca, car->opts, chain); - else - addChainTBlocks(ca, car->opts, chain); - } -chromAnnFinish(ca); -if (!(car->opts & chromAnnSaveLines)) - chainFree(&chain); -return ca; -} - -static void chromAnnChainReaderFree(struct chromAnnReader **carPtr) -/* free object */ -{ -struct chromAnnReader *car = *carPtr; -if (car != NULL) - { - struct chromAnnChainReader *carr = car->data; - lineFileClose(&carr->lf); - freeMem(carr); - freez(carPtr); - } -} - -struct chromAnnReader *chromAnnChainReaderNew(char *fileName, unsigned opts) -/* construct a reader for an arbitrary tab file. */ -{ -struct chromAnnChainReader *carr; -AllocVar(carr); -carr->lf = lineFileOpen(fileName, TRUE); - -struct chromAnnReader *car; -AllocVar(car); -car->caRead = chromAnnChainReaderRead; -car->carFree = chromAnnChainReaderFree; -car->opts = opts; -car->data = carr; -return car; -} - -struct chromAnnTabReader -/* reader data for tab files */ -{ - struct coordCols cols; // column spec - struct rowReader *rr; // tab row reader -}; - -static struct chromAnn* chromAnnTabReaderRead(struct chromAnnReader *car) -/* read a chromAnn object from a tab file or table */ -{ -struct chromAnnTabReader *catr = car->data; -if (!rowReaderNext(catr->rr)) - return NULL; -rowReaderExpectAtLeast(catr->rr, catr->cols.minNumCols); - -char **rawCols = (car->opts & chromAnnSaveLines) ? rowReaderCloneColumns(catr->rr) : NULL; -struct coordColVals colVals = coordColParseRow(&catr->cols, catr->rr); - -struct chromAnn *ca = chromAnnNew(colVals.chrom, colVals.strand, colVals.name, rawCols, - strVectorWrite, strVectorFree); -chromAnnBlkNew(ca, colVals.start, colVals.end); -coordColsValsRelease(&colVals); -return ca; -} - -static void chromAnnTabReaderFree(struct chromAnnReader **carPtr) -/* free object */ -{ -struct chromAnnReader *car = *carPtr; -if (car != NULL) - { - struct chromAnnTabReader *catr = car->data; - rowReaderFree(&catr->rr); - freeMem(catr); - freez(carPtr); - } -} - -struct chromAnnReader *chromAnnTabReaderNew(char *fileName, struct coordCols* cols, unsigned opts) -/* construct a reader for an arbitrary tab file. */ -{ -struct chromAnnTabReader *catr; -AllocVar(catr); -catr->cols = *cols; -catr->rr = rowReaderOpen(fileName, FALSE); - -struct chromAnnReader *car; -AllocVar(car); -car->caRead = chromAnnTabReaderRead; -car->carFree = chromAnnTabReaderFree; -car->opts = opts; -car->data = catr; -return car; -} - -int chromAnnRefLocCmp(const void *va, const void *vb) -/* Compare location of two chromAnnRef objects. */ -{ -const struct chromAnnRef *a = *((struct chromAnnRef **)va); -const struct chromAnnRef *b = *((struct chromAnnRef **)vb); -int diff = strcmp(a->ref->chrom, b->ref->chrom); -if (diff == 0) - diff = a->ref->start - b->ref->start; -if (diff == 0) - diff = a->ref->end - b->ref->end; -return diff; -}