src/hg/overlapSelect/chromAnn.c 1.15
1.15 2009/07/31 18:09:23 markd
added rcsids
Index: src/hg/overlapSelect/chromAnn.c
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/overlapSelect/chromAnn.c,v
retrieving revision 1.14
retrieving revision 1.15
diff -b -B -U 1000000 -r1.14 -r1.15
--- src/hg/overlapSelect/chromAnn.c 22 Jul 2009 07:01:28 -0000 1.14
+++ src/hg/overlapSelect/chromAnn.c 31 Jul 2009 18:09:23 -0000 1.15
@@ -1,558 +1,560 @@
/* chromAnn - chomosome annotations, generic object to store annotations from
* other formats */
#include "common.h"
#include "chromAnn.h"
#include "binRange.h"
#include "rowReader.h"
#include "psl.h"
#include "bed.h"
#include "chain.h"
#include "genePred.h"
#include "coordCols.h"
#include "verbose.h"
+static char const rcsid[] = "$Id$";
+
static struct chromAnnBlk* chromAnnBlkNew(struct chromAnn *ca, int start, int end)
/* create new block object and add to chromAnn object */
{
struct chromAnnBlk* caBlk;
AllocVar(caBlk);
if (end < start)
errAbort("invalid block coordinates for %s: start=%d end=%d", ca->name, start, end);
caBlk->ca = ca;;
caBlk->start = start;
caBlk->end = end;
if (ca->blocks == NULL)
{
ca->start = start;
ca->end = end;
}
else
{
ca->start = min(ca->start, start);
ca->end = max(ca->end, end);
}
ca->totalSize += (end - start);
slAddHead(&ca->blocks, caBlk);
return caBlk;
}
static void chromAnnBlkFreeList(struct chromAnnBlk *blks)
/* free list of objects */
{
struct chromAnnBlk *blk;
while ((blk = slPopHead(&blks)) != NULL)
freeMem(blk);
}
static struct chromAnn* chromAnnNew(char* chrom, char strand, char* name, void *rec,
void (*recWrite)(struct chromAnn*, FILE *, char),
void (*recFree)(struct chromAnn *))
/* create new object, ownership of rawCols is passed */
{
struct chromAnn* ca;
AllocVar(ca);
ca->chrom = cloneString(chrom);
ca->strand = strand;
if (name != NULL)
ca->name = cloneString(name);
ca->start = 0;
ca->end = 0;
ca->rec = rec;
ca->recWrite = recWrite;
ca->recFree = recFree;
return ca;
}
static int chromAnnBlkCmp(const void *va, const void *vb)
/* sort compare of two chromAnnBlk objects */
{
const struct chromAnnBlk *a = *((struct chromAnnBlk **)va);
const struct chromAnnBlk *b = *((struct chromAnnBlk **)vb);
int diff = a->start - b->start;
if (diff == 0)
diff = a->end - b->end;
return diff;
}
static void chromAnnFinish(struct chromAnn* ca)
/* finish creation of a chromAnn after all blocks are added */
{
slSort(&ca->blocks, chromAnnBlkCmp);
}
void chromAnnFree(struct chromAnn **caPtr)
/* free an object */
{
struct chromAnn *ca = *caPtr;
if (ca != NULL)
{
ca->recFree(ca);
freeMem(ca->chrom);
freeMem(ca->name);
chromAnnBlkFreeList(ca->blocks);
freez(caPtr);
}
}
int chromAnnTotalBlockSize(struct chromAnn* ca)
/* count the total bases in the blocks of a chromAnn */
{
int bases = 0;
struct chromAnnBlk *cab;
for (cab = ca->blocks; cab != NULL; cab = cab->next)
bases += (cab->end - cab->start);
return bases;
}
static void strVectorWrite(struct chromAnn *ca, FILE *fh, char term)
/* write a chromAnn that is represented as a vector of strings */
{
char **cols = ca->rec;
assert(cols != NULL);
int i;
for (i = 0; cols[i] != NULL; i++)
{
if (i > 0)
putc_unlocked('\t', fh);
fputs(cols[i], fh);
}
putc_unlocked(term, fh);
}
static void strVectorFree(struct chromAnn *ca)
/* free chromAnn data that is represented as a vector of strings */
{
freez(&ca->rec);
}
static void addBedBlocks(struct chromAnn* ca, unsigned opts, struct bed* bed)
/* add blocks from a bed */
{
int iBlk;
for (iBlk = 0; iBlk < bed->blockCount; iBlk++)
{
int start = bed->chromStart + bed->chromStarts[iBlk];
int end = start + bed->blockSizes[iBlk];
if (opts & chromAnnCds)
{
if (start < bed->thickStart)
start = bed->thickStart;
if (end > bed->thickEnd)
end = bed->thickEnd;
}
if (start < end)
chromAnnBlkNew(ca, start, end);
}
}
static struct chromAnn* chromAnnBedReaderRead(struct chromAnnReader *car)
/* read next BED and convert to a chromAnn */
{
struct rowReader *rr = car->data;
if (!rowReaderNext(rr))
return NULL;
rowReaderExpectAtLeast(rr, 3);
char **rawCols = (car->opts & chromAnnSaveLines) ? rowReaderCloneColumns(rr) : NULL;
struct bed *bed = bedLoadN(rr->row, rr->numCols);
struct chromAnn *ca = chromAnnNew(bed->chrom, bed->strand[0], bed->name, rawCols,
strVectorWrite, strVectorFree);
if ((bed->blockCount == 0) || (car->opts & chromAnnRange))
{
if (car->opts & chromAnnCds)
{
if (bed->thickStart < bed->thickEnd)
chromAnnBlkNew(ca, bed->thickStart, bed->thickEnd);
}
else
chromAnnBlkNew(ca, bed->chromStart, bed->chromEnd);
}
else
addBedBlocks(ca, car->opts, bed);
chromAnnFinish(ca);
bedFree(&bed);
return ca;
}
static void chromAnnBedReaderFree(struct chromAnnReader **carPtr)
/* free object */
{
struct chromAnnReader *car = *carPtr;
if (car != NULL)
{
struct rowReader *rr = car->data;
rowReaderFree(&rr);
freez(carPtr);
}
}
struct chromAnnReader *chromAnnBedReaderNew(char *fileName, unsigned opts)
/* construct a reader for a BED file */
{
struct chromAnnReader *car;
AllocVar(car);
car->caRead = chromAnnBedReaderRead;
car->carFree = chromAnnBedReaderFree;
car->opts = opts;
car->data = rowReaderOpen(fileName, FALSE);
return car;
}
static void addGenePredBlocks(struct chromAnn* ca, unsigned opts, struct genePred* gp)
/* add blocks from a genePred */
{
int iExon;
for (iExon = 0; iExon < gp->exonCount; iExon++)
{
int start = gp->exonStarts[iExon];
int end = gp->exonEnds[iExon];
if ((opts & chromAnnCds) && (gp->cdsStart > start))
start = gp->cdsStart;
if ((opts & chromAnnCds) && (gp->cdsEnd < end))
end = gp->cdsEnd;
if (start < end)
chromAnnBlkNew(ca, start, end);
}
}
static struct chromAnn* chromAnnGenePredReaderRead(struct chromAnnReader *car)
/* Read the next genePred row and create a chromAnn object row read from a
* GenePred file or table. If there is no CDS, and chromAnnCds is specified,
* it will return a record with zero-length range.*/
{
struct rowReader *rr = car->data;
if (!rowReaderNext(rr))
return NULL;
rowReaderExpectAtLeast(rr, GENEPRED_NUM_COLS);
char **rawCols = (car->opts & chromAnnSaveLines) ? rowReaderCloneColumns(rr) : NULL;
struct genePred *gp = genePredLoad(rr->row);
struct chromAnn* ca = chromAnnNew(gp->chrom, gp->strand[0], gp->name, rawCols,
strVectorWrite, strVectorFree);
if (car->opts & chromAnnRange)
{
if (car->opts & chromAnnCds)
{
if (gp->cdsStart < gp->cdsEnd)
chromAnnBlkNew(ca, gp->cdsStart, gp->cdsEnd);
}
else
chromAnnBlkNew(ca, gp->txStart, gp->txEnd);
}
else
addGenePredBlocks(ca, car->opts, gp);
chromAnnFinish(ca);
genePredFree(&gp);
return ca;
}
static void chromAnnGenePredReaderFree(struct chromAnnReader **carPtr)
/* free object */
{
struct chromAnnReader *car = *carPtr;
if (car != NULL)
{
struct rowReader *rr = car->data;
rowReaderFree(&rr);
freez(carPtr);
}
}
struct chromAnnReader *chromAnnGenePredReaderNew(char *fileName, unsigned opts)
/* construct a reader for a genePred file */
{
struct chromAnnReader *car;
AllocVar(car);
car->caRead = chromAnnGenePredReaderRead;
car->carFree = chromAnnGenePredReaderFree;
car->opts = opts;
car->data = rowReaderOpen(fileName, FALSE);
return car;
}
static void addPslBlocks(struct chromAnn* ca, unsigned opts, struct psl* psl)
/* add blocks from a psl */
{
boolean blkStrand = (opts & chromAnnUseQSide) ? pslQStrand(psl) : pslTStrand(psl);
int size = (opts & chromAnnUseQSide) ? psl->qSize : psl->tSize;
unsigned *blocks = (opts & chromAnnUseQSide) ? psl->qStarts : psl->tStarts;
boolean blkSizeMult = pslIsProtein(psl) ? 3 : 1;
int iBlk;
for (iBlk = 0; iBlk < psl->blockCount; iBlk++)
{
int start = blocks[iBlk];
int end = start + (blkSizeMult * psl->blockSizes[iBlk]);
if (blkStrand == '-')
reverseIntRange(&start, &end, size);
chromAnnBlkNew(ca, start, end);
}
}
static char getPslTSideStrand(struct psl *psl)
/* get the strand to use for a PSL when doing target side overlaps */
{
if (psl->strand[1] != '\0')
{
// translated
char strand = pslTStrand(psl);
if (pslQStrand(psl) == '-')
strand = (strand == '-') ? '+' : '-'; // query reverse complemented
return strand;
}
else
return pslQStrand(psl); // untranslated
}
static char getPslQSideStrand(struct psl *psl)
/* get the strand to use for a PSL when doing query side overlaps */
{
if (psl->strand[1] != '\0')
{
// translated
char strand = pslQStrand(psl);
if (pslTStrand(psl) == '-')
strand = (strand == '-') ? '+' : '-'; // query reverse complemented
return strand;
}
else
return pslTStrand(psl); // untranslated
}
static struct chromAnn* chromAnnPslReaderRead(struct chromAnnReader *car)
/* read next chromAnn from a PSL file */
{
struct rowReader *rr = car->data;
if (!rowReaderNext(rr))
return NULL;
rowReaderExpectAtLeast(rr, PSL_NUM_COLS);
char **rawCols = (car->opts & chromAnnSaveLines) ? rowReaderCloneColumns(rr) : NULL;
struct psl *psl = pslLoad(rr->row);
struct chromAnn* ca;
if (car->opts & chromAnnUseQSide)
ca = chromAnnNew(psl->qName, getPslQSideStrand(psl), psl->tName, rawCols,
strVectorWrite, strVectorFree);
else
ca = chromAnnNew(psl->tName, getPslTSideStrand(psl), psl->qName, rawCols,
strVectorWrite, strVectorFree);
if (car->opts & chromAnnRange)
{
if (car->opts & chromAnnUseQSide)
chromAnnBlkNew(ca, psl->qStart, psl->qEnd);
else
chromAnnBlkNew(ca, psl->tStart, psl->tEnd);
}
else
addPslBlocks(ca, car->opts, psl);
chromAnnFinish(ca);
pslFree(&psl);
return ca;
}
static void chromAnnPslReaderFree(struct chromAnnReader **carPtr)
/* free object */
{
struct chromAnnReader *car = *carPtr;
if (car != NULL)
{
struct rowReader *rr = car->data;
rowReaderFree(&rr);
freez(carPtr);
}
}
struct chromAnnReader *chromAnnPslReaderNew(char *fileName, unsigned opts)
/* construct a reader for a PSL file */
{
struct chromAnnReader *car;
AllocVar(car);
car->caRead = chromAnnPslReaderRead;
car->carFree = chromAnnPslReaderFree;
car->opts = opts;
car->data = rowReaderOpen(fileName, FALSE);
return car;
}
static void addChainQBlocks(struct chromAnn* ca, unsigned opts, struct chain* chain)
/* add query blocks from a chain */
{
struct cBlock *blk;
for (blk = chain->blockList; blk != NULL; blk = blk->next)
{
int start = blk->qStart;
int end = blk->qEnd;
if (chain->qStrand == '-')
reverseIntRange(&start, &end, chain->qSize);
chromAnnBlkNew(ca, start, end);
}
}
static void addChainTBlocks(struct chromAnn* ca, unsigned opts, struct chain* chain)
/* add target blocks from a chain */
{
struct cBlock *blk;
for (blk = chain->blockList; blk != NULL; blk = blk->next)
chromAnnBlkNew(ca, blk->tStart, blk->tEnd);
}
struct chromAnnChainReader
/* reader data for tab files */
{
struct lineFile *lf;
};
static void chainRecWrite(struct chromAnn *ca, FILE *fh, char term)
/* write a chromAnn that is chain */
{
struct chain *chain = ca->rec;
assert(term == '\n');
chainWrite(chain, fh);
}
static void chainRecFree(struct chromAnn *ca)
/* free chromAnn chain data */
{
chainFree((struct chain**)&ca->rec);
}
static struct chromAnn* chromAnnChainReaderRead(struct chromAnnReader *car)
/* read a chromAnn object from a tab file or table */
{
struct chromAnnChainReader *carr = car->data;
struct chain *chain = chainRead(carr->lf);
if (chain == NULL)
return NULL;
struct chromAnn* ca;
if (car->opts & chromAnnUseQSide)
ca = chromAnnNew(chain->qName, '+', chain->tName,
((car->opts & chromAnnSaveLines) ? chain : NULL),
chainRecWrite, chainRecFree);
else
ca = chromAnnNew(chain->tName, chain->qStrand, chain->qName,
((car->opts & chromAnnSaveLines) ? chain : NULL),
chainRecWrite, chainRecFree);
if (car->opts & chromAnnRange)
{
if (car->opts & chromAnnUseQSide)
chromAnnBlkNew(ca, chain->qStart, chain->qEnd);
else
chromAnnBlkNew(ca, chain->tStart, chain->tEnd);
}
else
{
if (car->opts & chromAnnUseQSide)
addChainQBlocks(ca, car->opts, chain);
else
addChainTBlocks(ca, car->opts, chain);
}
chromAnnFinish(ca);
if (!(car->opts & chromAnnSaveLines))
chainFree(&chain);
return ca;
}
static void chromAnnChainReaderFree(struct chromAnnReader **carPtr)
/* free object */
{
struct chromAnnReader *car = *carPtr;
if (car != NULL)
{
struct chromAnnChainReader *carr = car->data;
lineFileClose(&carr->lf);
freeMem(carr);
freez(carPtr);
}
}
struct chromAnnReader *chromAnnChainReaderNew(char *fileName, unsigned opts)
/* construct a reader for an arbitrary tab file. */
{
struct chromAnnChainReader *carr;
AllocVar(carr);
carr->lf = lineFileOpen(fileName, TRUE);
struct chromAnnReader *car;
AllocVar(car);
car->caRead = chromAnnChainReaderRead;
car->carFree = chromAnnChainReaderFree;
car->opts = opts;
car->data = carr;
return car;
}
struct chromAnnTabReader
/* reader data for tab files */
{
struct coordCols cols; // column spec
struct rowReader *rr; // tab row reader
};
static struct chromAnn* chromAnnTabReaderRead(struct chromAnnReader *car)
/* read a chromAnn object from a tab file or table */
{
struct chromAnnTabReader *catr = car->data;
if (!rowReaderNext(catr->rr))
return NULL;
rowReaderExpectAtLeast(catr->rr, catr->cols.minNumCols);
char **rawCols = (car->opts & chromAnnSaveLines) ? rowReaderCloneColumns(catr->rr) : NULL;
struct coordColVals colVals = coordColParseRow(&catr->cols, catr->rr);
struct chromAnn *ca = chromAnnNew(colVals.chrom, colVals.strand, NULL, rawCols,
strVectorWrite, strVectorFree);
chromAnnBlkNew(ca, colVals.start, colVals.end);
return ca;
}
static void chromAnnTabReaderFree(struct chromAnnReader **carPtr)
/* free object */
{
struct chromAnnReader *car = *carPtr;
if (car != NULL)
{
struct chromAnnTabReader *catr = car->data;
rowReaderFree(&catr->rr);
freeMem(catr);
freez(carPtr);
}
}
struct chromAnnReader *chromAnnTabReaderNew(char *fileName, struct coordCols* cols, unsigned opts)
/* construct a reader for an arbitrary tab file. */
{
struct chromAnnTabReader *catr;
AllocVar(catr);
catr->cols = *cols;
catr->rr = rowReaderOpen(fileName, FALSE);
struct chromAnnReader *car;
AllocVar(car);
car->caRead = chromAnnTabReaderRead;
car->carFree = chromAnnTabReaderFree;
car->opts = opts;
car->data = catr;
return car;
}
int chromAnnRefLocCmp(const void *va, const void *vb)
/* Compare location of two chromAnnRef objects. */
{
const struct chromAnnRef *a = *((struct chromAnnRef **)va);
const struct chromAnnRef *b = *((struct chromAnnRef **)vb);
int diff = strcmp(a->ref->chrom, b->ref->chrom);
if (diff == 0)
diff = a->ref->start - b->ref->start;
if (diff == 0)
diff = a->ref->end - b->ref->end;
return diff;
}