src/hg/overlapSelect/chromAnn.c 1.15

1.15 2009/07/31 18:09:23 markd
added rcsids
Index: src/hg/overlapSelect/chromAnn.c
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/overlapSelect/chromAnn.c,v
retrieving revision 1.14
retrieving revision 1.15
diff -b -B -U 1000000 -r1.14 -r1.15
--- src/hg/overlapSelect/chromAnn.c	22 Jul 2009 07:01:28 -0000	1.14
+++ src/hg/overlapSelect/chromAnn.c	31 Jul 2009 18:09:23 -0000	1.15
@@ -1,558 +1,560 @@
 /* chromAnn - chomosome annotations, generic object to store annotations from
  * other formats */
 #include "common.h"
 #include "chromAnn.h"
 #include "binRange.h"
 #include "rowReader.h"
 #include "psl.h"
 #include "bed.h"
 #include "chain.h"
 #include "genePred.h"
 #include "coordCols.h"
 #include "verbose.h"
 
+static char const rcsid[] = "$Id$";
+
 static struct chromAnnBlk* chromAnnBlkNew(struct chromAnn *ca, int start, int end)
 /* create new block object and add to chromAnn object */
 {
 struct chromAnnBlk* caBlk;
 AllocVar(caBlk);
 if (end < start)
     errAbort("invalid block coordinates for %s: start=%d end=%d", ca->name, start, end);
 
 caBlk->ca = ca;;
 caBlk->start = start;
 caBlk->end = end;
 
 if (ca->blocks == NULL)
     {
     ca->start = start;
     ca->end = end;
     }    
 else
     {
     ca->start = min(ca->start, start);
     ca->end = max(ca->end, end);
     }
 ca->totalSize += (end - start);
 slAddHead(&ca->blocks, caBlk);
 return caBlk;
 }
 
 static void chromAnnBlkFreeList(struct chromAnnBlk *blks)
 /* free list of objects */
 {
 struct chromAnnBlk *blk;
 while ((blk = slPopHead(&blks)) != NULL)
     freeMem(blk);
 }
 
 static struct chromAnn* chromAnnNew(char* chrom, char strand, char* name, void *rec,
                                     void (*recWrite)(struct chromAnn*, FILE *, char),
                                     void (*recFree)(struct chromAnn *))
 /* create new object, ownership of rawCols is passed */
 {
 struct chromAnn* ca;
 AllocVar(ca);
 ca->chrom = cloneString(chrom);
 ca->strand = strand;
 if (name != NULL)
     ca->name = cloneString(name);
 ca->start = 0;
 ca->end = 0; 
 ca->rec = rec;
 ca->recWrite = recWrite;
 ca->recFree = recFree;
 return ca;
 }
 
 static int chromAnnBlkCmp(const void *va, const void *vb)
 /* sort compare of two chromAnnBlk objects */
 {
 const struct chromAnnBlk *a = *((struct chromAnnBlk **)va);
 const struct chromAnnBlk *b = *((struct chromAnnBlk **)vb);
 int diff = a->start - b->start;
 if (diff == 0)
     diff = a->end - b->end;
 return diff;
 }
 
 static void chromAnnFinish(struct chromAnn* ca)
 /* finish creation of a chromAnn after all blocks are added */
 {
 slSort(&ca->blocks, chromAnnBlkCmp);
 }
 
 void chromAnnFree(struct chromAnn **caPtr)
 /* free an object */
 {
 struct chromAnn *ca = *caPtr;
 if (ca != NULL)
     {
     ca->recFree(ca);
     freeMem(ca->chrom);
     freeMem(ca->name);
     chromAnnBlkFreeList(ca->blocks);
     freez(caPtr);
     }
 }
 
 int chromAnnTotalBlockSize(struct chromAnn* ca)
 /* count the total bases in the blocks of a chromAnn */
 {
 int bases = 0;
 struct chromAnnBlk *cab;
 for (cab = ca->blocks; cab != NULL; cab = cab->next)
     bases += (cab->end - cab->start);
 return bases;
 }
 
 static void strVectorWrite(struct chromAnn *ca, FILE *fh, char term)
 /* write a chromAnn that is represented as a vector of strings */
 {
 char **cols = ca->rec;
 assert(cols != NULL);
 int i;
 for (i = 0; cols[i] != NULL; i++)
     {
     if (i > 0)
         putc_unlocked('\t', fh);
     fputs(cols[i], fh);
     }
 putc_unlocked(term, fh);
 }
 
 static void strVectorFree(struct chromAnn *ca)
 /* free chromAnn data that is represented as a vector of strings */
 {
 freez(&ca->rec);
 }
 
 static void addBedBlocks(struct chromAnn* ca, unsigned opts, struct bed* bed)
 /* add blocks from a bed */
 {
 int iBlk;
 for (iBlk = 0; iBlk < bed->blockCount; iBlk++)
     {
     int start = bed->chromStart + bed->chromStarts[iBlk];
     int end = start + bed->blockSizes[iBlk];
     if (opts & chromAnnCds)
         {
         if (start < bed->thickStart)
             start = bed->thickStart;
         if (end > bed->thickEnd)
             end = bed->thickEnd;
         }
     if (start < end)
         chromAnnBlkNew(ca, start, end);
     }
 }
 
 static struct chromAnn* chromAnnBedReaderRead(struct chromAnnReader *car)
 /* read next BED and convert to a chromAnn */
 {
 struct rowReader *rr = car->data;
 if (!rowReaderNext(rr))
     return NULL;
 rowReaderExpectAtLeast(rr, 3);
 
 char **rawCols = (car->opts & chromAnnSaveLines) ? rowReaderCloneColumns(rr) : NULL;
 struct bed *bed = bedLoadN(rr->row, rr->numCols);
 struct chromAnn *ca = chromAnnNew(bed->chrom, bed->strand[0], bed->name, rawCols,
                                   strVectorWrite, strVectorFree);
 
 if ((bed->blockCount == 0) || (car->opts & chromAnnRange))
     {
     if (car->opts & chromAnnCds)
         {
         if (bed->thickStart < bed->thickEnd)
             chromAnnBlkNew(ca, bed->thickStart, bed->thickEnd);
         }
     else
         chromAnnBlkNew(ca, bed->chromStart, bed->chromEnd);
     }
 else
     addBedBlocks(ca, car->opts, bed);
 
 chromAnnFinish(ca);
 bedFree(&bed);
 return ca;
 }
 
 static void chromAnnBedReaderFree(struct chromAnnReader **carPtr)
 /* free object */
 {
 struct chromAnnReader *car = *carPtr;
 if (car != NULL)
     {
     struct rowReader *rr = car->data;
     rowReaderFree(&rr);
     freez(carPtr);
     }
 }
 
 struct chromAnnReader *chromAnnBedReaderNew(char *fileName, unsigned opts)
 /* construct a reader for a BED file */
 {
 struct chromAnnReader *car;
 AllocVar(car);
 car->caRead = chromAnnBedReaderRead;
 car->carFree = chromAnnBedReaderFree;
 car->opts = opts;
 car->data = rowReaderOpen(fileName, FALSE);
 return car;
 }
 
 static void addGenePredBlocks(struct chromAnn* ca, unsigned opts, struct genePred* gp)
 /* add blocks from a genePred */
 {
 int iExon;
 for (iExon = 0; iExon < gp->exonCount; iExon++)
     {
     int start = gp->exonStarts[iExon];
     int end = gp->exonEnds[iExon];
     if ((opts & chromAnnCds) && (gp->cdsStart > start))
         start = gp->cdsStart;
     if ((opts & chromAnnCds) && (gp->cdsEnd < end))
         end = gp->cdsEnd;
     if (start < end)
         chromAnnBlkNew(ca, start, end);
     }
 }
 
 static struct chromAnn* chromAnnGenePredReaderRead(struct chromAnnReader *car)
 /* Read the next genePred row and create a chromAnn object row read from a
  * GenePred file or table.  If there is no CDS, and chromAnnCds is specified,
  * it will return a record with zero-length range.*/
 {
 struct rowReader *rr = car->data;
 if (!rowReaderNext(rr))
     return NULL;
 rowReaderExpectAtLeast(rr, GENEPRED_NUM_COLS);
 
 char **rawCols = (car->opts & chromAnnSaveLines) ? rowReaderCloneColumns(rr) : NULL;
 struct genePred *gp = genePredLoad(rr->row);
 struct chromAnn* ca = chromAnnNew(gp->chrom, gp->strand[0], gp->name, rawCols,
                                   strVectorWrite, strVectorFree);
 
 if (car->opts & chromAnnRange)
     {
     if (car->opts & chromAnnCds)
         {
         if (gp->cdsStart < gp->cdsEnd)
             chromAnnBlkNew(ca, gp->cdsStart, gp->cdsEnd);
         }
     else
         chromAnnBlkNew(ca, gp->txStart, gp->txEnd);
     }
 else
     addGenePredBlocks(ca, car->opts, gp);
 
 chromAnnFinish(ca);
 genePredFree(&gp);
 return ca;
 }
 
 static void chromAnnGenePredReaderFree(struct chromAnnReader **carPtr)
 /* free object */
 {
 struct chromAnnReader *car = *carPtr;
 if (car != NULL)
     {
     struct rowReader *rr = car->data;
     rowReaderFree(&rr);
     freez(carPtr);
     }
 }
 
 struct chromAnnReader *chromAnnGenePredReaderNew(char *fileName, unsigned opts)
 /* construct a reader for a genePred file */
 {
 struct chromAnnReader *car;
 AllocVar(car);
 car->caRead = chromAnnGenePredReaderRead;
 car->carFree = chromAnnGenePredReaderFree;
 car->opts = opts;
 car->data = rowReaderOpen(fileName, FALSE);
 return car;
 }
 
 static void addPslBlocks(struct chromAnn* ca, unsigned opts, struct psl* psl)
 /* add blocks from a psl */
 {
 boolean blkStrand = (opts & chromAnnUseQSide) ? pslQStrand(psl) : pslTStrand(psl);
 int size = (opts & chromAnnUseQSide) ? psl->qSize : psl->tSize;
 unsigned *blocks = (opts & chromAnnUseQSide) ? psl->qStarts : psl->tStarts;
 boolean blkSizeMult = pslIsProtein(psl) ? 3 : 1;
 int iBlk;
 for (iBlk = 0; iBlk < psl->blockCount; iBlk++)
     {
     int start = blocks[iBlk];
     int end = start + (blkSizeMult * psl->blockSizes[iBlk]);
     if (blkStrand == '-')
         reverseIntRange(&start, &end, size);
     chromAnnBlkNew(ca, start, end);
     }
 }
 
 static char getPslTSideStrand(struct psl *psl)
 /* get the strand to use for a PSL when doing target side overlaps */
 {
 if (psl->strand[1] != '\0')
     {
     // translated
     char strand = pslTStrand(psl);
     if (pslQStrand(psl) == '-')
         strand = (strand == '-') ? '+' : '-'; // query reverse complemented
     return strand;
     }
 else
     return pslQStrand(psl);  // untranslated
 }
 
 static char getPslQSideStrand(struct psl *psl)
 /* get the strand to use for a PSL when doing query side overlaps */
 {
 if (psl->strand[1] != '\0')
     {
     // translated
     char strand = pslQStrand(psl);
     if (pslTStrand(psl) == '-')
         strand = (strand == '-') ? '+' : '-'; // query reverse complemented
     return strand;
     }
 else
     return pslTStrand(psl);  // untranslated
 }
 
 static struct chromAnn* chromAnnPslReaderRead(struct chromAnnReader *car)
 /* read next chromAnn from a PSL file  */
 {
 struct rowReader *rr = car->data;
 if (!rowReaderNext(rr))
     return NULL;
 rowReaderExpectAtLeast(rr, PSL_NUM_COLS);
 
 char **rawCols = (car->opts & chromAnnSaveLines) ? rowReaderCloneColumns(rr) : NULL;
 
 struct psl *psl = pslLoad(rr->row);
 struct chromAnn* ca;
 if (car->opts & chromAnnUseQSide)
     ca = chromAnnNew(psl->qName, getPslQSideStrand(psl), psl->tName, rawCols,
                      strVectorWrite, strVectorFree);
 else
     ca = chromAnnNew(psl->tName, getPslTSideStrand(psl), psl->qName, rawCols,
                      strVectorWrite, strVectorFree);
 
 if (car->opts & chromAnnRange)
     {
     if (car->opts & chromAnnUseQSide)
         chromAnnBlkNew(ca, psl->qStart, psl->qEnd);
     else
         chromAnnBlkNew(ca, psl->tStart, psl->tEnd);
     }
 else    
     addPslBlocks(ca, car->opts, psl);
 chromAnnFinish(ca);
 pslFree(&psl);
 return ca;
 }
 
 static void chromAnnPslReaderFree(struct chromAnnReader **carPtr)
 /* free object */
 {
 struct chromAnnReader *car = *carPtr;
 if (car != NULL)
     {
     struct rowReader *rr = car->data;
     rowReaderFree(&rr);
     freez(carPtr);
     }
 }
 
 struct chromAnnReader *chromAnnPslReaderNew(char *fileName, unsigned opts)
 /* construct a reader for a PSL file */
 {
 struct chromAnnReader *car;
 AllocVar(car);
 car->caRead = chromAnnPslReaderRead;
 car->carFree = chromAnnPslReaderFree;
 car->opts = opts;
 car->data = rowReaderOpen(fileName, FALSE);
 return car;
 }
 
 static void addChainQBlocks(struct chromAnn* ca, unsigned opts, struct chain* chain)
 /* add query blocks from a chain */
 {
 struct cBlock *blk;
 for (blk = chain->blockList; blk != NULL; blk = blk->next)
     {
     int start = blk->qStart;
     int end = blk->qEnd;
     if (chain->qStrand == '-')
         reverseIntRange(&start, &end, chain->qSize);
     chromAnnBlkNew(ca, start, end);
     }
 }
 
 static void addChainTBlocks(struct chromAnn* ca, unsigned opts, struct chain* chain)
 /* add target blocks from a chain */
 {
 struct cBlock *blk;
 for (blk = chain->blockList; blk != NULL; blk = blk->next)
     chromAnnBlkNew(ca, blk->tStart, blk->tEnd);
 }
 
 struct chromAnnChainReader
 /* reader data for tab files */
 {
     struct lineFile *lf;
 };
 
 static void chainRecWrite(struct chromAnn *ca, FILE *fh, char term)
 /* write a chromAnn that is chain */
 {
 struct chain *chain = ca->rec;
 assert(term == '\n');
 chainWrite(chain, fh);
 }
 
 static void chainRecFree(struct chromAnn *ca)
 /* free chromAnn chain data */
 {
 chainFree((struct chain**)&ca->rec);
 }
 
 static struct chromAnn* chromAnnChainReaderRead(struct chromAnnReader *car)
 /* read a chromAnn object from a tab file or table */
 {
 struct chromAnnChainReader *carr = car->data;
 struct chain *chain = chainRead(carr->lf);
 if (chain == NULL)
     return NULL;
 
 struct chromAnn* ca;
 if (car->opts & chromAnnUseQSide)
     ca = chromAnnNew(chain->qName, '+', chain->tName,
                      ((car->opts & chromAnnSaveLines) ? chain : NULL),
                      chainRecWrite, chainRecFree);
 else
     ca = chromAnnNew(chain->tName, chain->qStrand, chain->qName,
                      ((car->opts & chromAnnSaveLines) ? chain : NULL),
                      chainRecWrite, chainRecFree);
 
 if (car->opts & chromAnnRange)
     {
     if (car->opts & chromAnnUseQSide)
         chromAnnBlkNew(ca, chain->qStart, chain->qEnd);
     else
         chromAnnBlkNew(ca, chain->tStart, chain->tEnd);
     }
 else    
     {
     if (car->opts & chromAnnUseQSide)
         addChainQBlocks(ca, car->opts, chain);
     else
         addChainTBlocks(ca, car->opts, chain);
     }
 chromAnnFinish(ca);
 if (!(car->opts & chromAnnSaveLines))
     chainFree(&chain);
 return ca;
 }
 
 static void chromAnnChainReaderFree(struct chromAnnReader **carPtr)
 /* free object */
 {
 struct chromAnnReader *car = *carPtr;
 if (car != NULL)
     {
     struct chromAnnChainReader *carr = car->data;
     lineFileClose(&carr->lf);
     freeMem(carr);
     freez(carPtr);
     }
 }
 
 struct chromAnnReader *chromAnnChainReaderNew(char *fileName, unsigned opts)
 /* construct a reader for an arbitrary tab file. */
 {
 struct chromAnnChainReader *carr;
 AllocVar(carr);
 carr->lf = lineFileOpen(fileName, TRUE);
 
 struct chromAnnReader *car;
 AllocVar(car);
 car->caRead = chromAnnChainReaderRead;
 car->carFree = chromAnnChainReaderFree;
 car->opts = opts;
 car->data = carr;
 return car;
 }
 
 struct chromAnnTabReader
 /* reader data for tab files */
 {
     struct coordCols  cols;  // column spec
     struct rowReader *rr;    // tab row reader
 };
 
 static struct chromAnn* chromAnnTabReaderRead(struct chromAnnReader *car)
 /* read a chromAnn object from a tab file or table */
 {
 struct chromAnnTabReader *catr = car->data;
 if (!rowReaderNext(catr->rr))
     return NULL;
 rowReaderExpectAtLeast(catr->rr, catr->cols.minNumCols);
 
 char **rawCols = (car->opts & chromAnnSaveLines) ? rowReaderCloneColumns(catr->rr) : NULL;
 struct coordColVals colVals = coordColParseRow(&catr->cols, catr->rr);
 
 struct chromAnn *ca = chromAnnNew(colVals.chrom, colVals.strand, NULL, rawCols,
                                   strVectorWrite, strVectorFree);
 chromAnnBlkNew(ca, colVals.start, colVals.end);
 return ca;
 }
 
 static void chromAnnTabReaderFree(struct chromAnnReader **carPtr)
 /* free object */
 {
 struct chromAnnReader *car = *carPtr;
 if (car != NULL)
     {
     struct chromAnnTabReader *catr = car->data;
     rowReaderFree(&catr->rr);
     freeMem(catr);
     freez(carPtr);
     }
 }
 
 struct chromAnnReader *chromAnnTabReaderNew(char *fileName, struct coordCols* cols, unsigned opts)
 /* construct a reader for an arbitrary tab file. */
 {
 struct chromAnnTabReader *catr;
 AllocVar(catr);
 catr->cols = *cols;
 catr->rr = rowReaderOpen(fileName, FALSE);
 
 struct chromAnnReader *car;
 AllocVar(car);
 car->caRead = chromAnnTabReaderRead;
 car->carFree = chromAnnTabReaderFree;
 car->opts = opts;
 car->data = catr;
 return car;
 }
 
 int chromAnnRefLocCmp(const void *va, const void *vb)
 /* Compare location of two chromAnnRef objects. */
 {
 const struct chromAnnRef *a = *((struct chromAnnRef **)va);
 const struct chromAnnRef *b = *((struct chromAnnRef **)vb);
 int diff = strcmp(a->ref->chrom, b->ref->chrom);
 if (diff == 0)
     diff = a->ref->start - b->ref->start;
 if (diff == 0)
     diff = a->ref->end - b->ref->end;
 return diff;
 }