src/hg/overlapSelect/selectTable.c 1.31

1.31 2009/07/31 18:09:24 markd
added rcsids
Index: src/hg/overlapSelect/selectTable.c
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/overlapSelect/selectTable.c,v
retrieving revision 1.30
retrieving revision 1.31
diff -b -B -U 1000000 -r1.30 -r1.31
--- src/hg/overlapSelect/selectTable.c	14 Jan 2009 18:43:35 -0000	1.30
+++ src/hg/overlapSelect/selectTable.c	31 Jul 2009 18:09:24 -0000	1.31
@@ -1,328 +1,330 @@
 /* selectTable - module that contains ranges use to select.  This module
  * functions as a global object. */
 
 #include "common.h"
 #include "selectTable.h"
 #include "rowReader.h"
 #include "bits.h"
 #include "chromAnn.h"
 #include "chromAnnMap.h"
 #include "verbose.h"
 
+static char const rcsid[] = "$Id$";
+
 static struct chromAnnMap* selectMap = NULL; // select object map
 
 static void selectMapEnsure()
 /* create select map if it doesn't exist */
 {
 if (selectMap == NULL)
     selectMap = chromAnnMapNew();
 }
 
 struct chromAnnMapIter selectTableFirst()
 /* iterator over select table */
 {
 selectMapEnsure();
 return chromAnnMapFirst(selectMap);
 }
 
 void selectTableFree()
 /* free selectTable structures. */
 {
 chromAnnMapFree(&selectMap);
 }
 
 static void selectDumpChromAnn(struct chromAnn *ca, char *label)
 /* print a chromAnn if select by verbose level */
 {
 if (verboseLevel() >= 2)
     {
     verbose(2, "%s: %s: %s %c %d-%d\n", label, ca->name, ca->chrom,
             ((ca->strand == 0) ? '?' : ca->strand), ca->start, ca->end);
     if (verboseLevel() >= 3)
         {
         struct chromAnnBlk *cab;
         for (cab = ca->blocks; cab != NULL; cab = cab->next)
             verbose(3, "    blk: %d-%d\n", cab->start, cab->end);
         }
     }
 }
 
 void selectTableAddRecords(struct chromAnnReader *car)
 /* add records to the select table */
 {
 selectMapEnsure();
 
 struct chromAnn* ca;
 while ((ca = car->caRead(car)) != NULL)
     {
     selectDumpChromAnn(ca, "selectAddChromAnn");
     chromAnnMapAdd(selectMap, ca);
     }
 }
 
 static boolean isSelfMatch(unsigned opts, struct chromAnn *inCa, struct chromAnn* selCa)
 /* check if this is a self record */
 {
 struct chromAnnBlk *inCaBlk, *selCaBlk;
 
 /* already know we are on same chrom and strand (if check strand) */
 if ((inCa->start != selCa->start) || (inCa->end != selCa->end))
     return FALSE;
 if (((inCa->name != NULL) && (selCa->name != NULL))
     && !sameString(inCa->name, selCa->name))
     return FALSE;
 
 /* check for identical block structures */
 for (inCaBlk = inCa->blocks, selCaBlk = selCa->blocks;
      ((inCaBlk != NULL) && (selCaBlk != NULL));
      inCaBlk = inCaBlk->next, selCaBlk = selCaBlk->next)
     {
     if ((inCaBlk->start != selCaBlk->start) || (inCaBlk->end != selCaBlk->end))
         return FALSE;
     }
 if ((inCaBlk != NULL) || (selCaBlk != NULL))
     return FALSE;  /* different lengths */
 
 return TRUE;
 }
 
 static boolean passCriteria(unsigned opts, struct chromAnn *inCa, struct chromAnn* selCa)
 /* see if the global criteria for overlap are satisfied */
 {
 if ((opts & selStrand) && (inCa->strand != selCa->strand))
     return FALSE;
 if ((opts & selOppositeStrand) && (inCa->strand == selCa->strand))
     return FALSE;
 if ((opts & selExcludeSelf) && isSelfMatch(opts, inCa, selCa))
     return FALSE;
 if ((opts & selIdMatch) && (inCa->name != NULL) && (selCa->name != NULL)
     && sameString(inCa->name, selCa->name))
     return FALSE;
 return TRUE;
 }
 
 int selectOverlapBases(struct chromAnn *ca1, struct chromAnn *ca2)
 /* determine the number of bases of overlap in two annotations */
 {
 int overBases = 0;
 struct chromAnnBlk *ca1Blk, *ca2Blk;
 
 for (ca1Blk = ca1->blocks; ca1Blk != NULL; ca1Blk = ca1Blk->next)
     {
     for (ca2Blk = ca2->blocks; ca2Blk != NULL; ca2Blk = ca2Blk->next)
         {
         int ob = min(ca1Blk->end, ca2Blk->end) - max(ca1Blk->start, ca2Blk->start);
         if (ob > 0)
             overBases += ob;
         }
     }
 return overBases;
 }
 
 float selectFracOverlap(struct chromAnn *ca, int overBases)
 /* get the fraction of ca overlapped give number of overlapped bases */
 {
 return ((float)overBases) / ((float)ca->totalSize);
 }
 
 float selectFracSimilarity(struct chromAnn *ca1, struct chromAnn *ca2,
                            int overBases)
 /* get the fractions similarity betten two annotations, give number of
  * overlapped bases */
 {
 return ((float)(2*overBases)) / ((float)(ca1->totalSize+ca2->totalSize));
 }
 
 static boolean isOverlapped(unsigned opts, struct chromAnn *inCa, struct chromAnn* selCa,
                             struct overlapCriteria *criteria)
 /* see if a chromAnn objects overlap base on thresholds.  If thresholds are zero,
  * any overlap will select. */
 {
 boolean anyCriteria = FALSE;
 boolean overlapped = FALSE;
 unsigned overBases = selectOverlapBases(inCa, selCa);
 // positive criteria first
 if (criteria->bases >= 0)
     {
     // base overlap
     if (overBases >= criteria->bases)
         overlapped = TRUE;
     anyCriteria = TRUE;
     }
 if (criteria->similarity > 0.0)
     {
     // similarity
     if (selectFracSimilarity(inCa, selCa, overBases) >= criteria->similarity)
         overlapped = TRUE;
     anyCriteria = TRUE;
     }
 if (criteria->threshold > 0.0)
     {
     // uni-directional
     if (selectFracOverlap(inCa, overBases) >= criteria->threshold)
         overlapped = TRUE;
     anyCriteria = TRUE;
     }
 if (!anyCriteria)
     {
     // test for any overlap
     if (overBases > 0)
         overlapped = TRUE;
     }
 
 // negative criteria (ceiling)
 boolean notOverlapped = FALSE;
 if (criteria->thresholdCeil <= 1.0)
     {
     // uni-directional ceiling
     if (selectFracOverlap(inCa, overBases) >= criteria->thresholdCeil)
         notOverlapped = TRUE;
     }
 if (criteria->similarityCeil <= 1.0)
     {
     // bi-directional ceiling
     if (selectFracSimilarity(inCa, selCa, overBases) >= criteria->similarityCeil)
         notOverlapped = TRUE;
     }
 return overlapped && !notOverlapped;
 }
 
 static void addOverlapRecs(struct chromAnnRef **overlappingRecs, struct chromAnnRef *newRecs)
 /* add overlapping records that are not dups */
 {
 struct chromAnnRef *orl;
 for (orl = newRecs; orl != NULL; orl = orl->next)
     chromAnnRefAdd(overlappingRecs, orl->ref);
 }
 static boolean selectOverlappingEntry(unsigned opts, struct chromAnn *inCa,
                                       struct chromAnn* selCa, struct overlapCriteria *criteria)
 /* select based on a single select chromAnn */
 {
 boolean overlapped = FALSE;
 verbose(2, "\toverlapping: enter %s: %d-%d, %c\n", selCa->name, selCa->start, selCa->end,
         ((selCa->strand == '\0') ? '?' : selCa->strand));
 if (!passCriteria(opts, inCa, selCa))
     {
     verbose(2, "\toverlapping: leave %s: fail criteria\n", selCa->name);
     }
 else
     {
     overlapped = isOverlapped(opts, inCa, selCa, criteria);
     verbose(2, "\toverlapping: leave %s: %s\n", selCa->name,
             (overlapped ? "yes" : "no"));
     }
 return overlapped;
 }
 
 static boolean selectWithOverlapping(unsigned opts, struct chromAnn *inCa,
                                      struct chromAnnRef* overlapping,
                                      struct overlapCriteria *criteria,
                                      struct chromAnnRef **overlappingRecs)
 /* given a list of overlapping elements, see if inCa is selected, optionally returning
  * the list of selected records */
 {
 boolean anyHits = FALSE;
 struct chromAnnRef *curOverRecs = NULL;  /* don't add til; the end */
 struct chromAnnRef *selCa;
 
 /* check each overlapping chomAnn */
 for (selCa = overlapping; selCa != NULL; selCa = selCa->next)
     {
     if (selectOverlappingEntry(opts, inCa, selCa->ref, criteria))
         {
         anyHits = TRUE;
         selCa->ref->used = TRUE;
         if (overlappingRecs != NULL)
             chromAnnRefAdd(&curOverRecs, selCa->ref);
         else
             break;  /* only need one overlap */
         }
     }
 /* n.b. delayed adding to list so minCoverage can some day be implemented */
 if (overlappingRecs != NULL)
     {
     if (anyHits)
         addOverlapRecs(overlappingRecs, curOverRecs);
     slFreeList(&curOverRecs);
     }
 return anyHits;
 }
 
 boolean selectIsOverlapped(unsigned opts, struct chromAnn *inCa,
                            struct overlapCriteria *criteria,
                            struct chromAnnRef **overlappingRecs)
 /* Determine if a range is overlapped.  If overlappingRecs is not null, a list
  * of the of selected records is returned.  Free with slFreelList. */
 {
 selectMapEnsure();
 selectDumpChromAnn(inCa, "selectIsOverlapped");
 boolean hit = FALSE;
 struct chromAnnRef *overlapping = chromAnnMapFindOverlap(selectMap, inCa);
 if (overlapping != NULL)
     {
     hit = selectWithOverlapping(opts, inCa, overlapping, criteria, overlappingRecs);
     slFreeList(&overlapping);
     }
 verbose(2, "selectIsOverlapped: leave %s %s\n", inCa->name, (hit ? "yes" : "no"));
 return hit;
 }
 
 static void addToAggregateMap(Bits *overMap, int mapOff, struct chromAnn *ca1,
                               struct chromAnn *ca2)
 /* set bits based on overlap between two chromAnn */
 {
 struct chromAnnBlk *ca1Blk, *ca2Blk;
 
 for (ca1Blk = ca1->blocks; ca1Blk != NULL; ca1Blk = ca1Blk->next)
     {
     for (ca2Blk = ca2->blocks; ca2Blk != NULL; ca2Blk = ca2Blk->next)
         {
         if ((ca1Blk->start < ca2Blk->end) && (ca1Blk->end > ca2Blk->start))
             {
             int start = max(ca1Blk->start, ca2Blk->start);
             int end = min(ca1Blk->end, ca2Blk->end);
             bitSetRange(overMap, start-mapOff, end-start);
             }
         }
     }
 }
 
 static void computeAggregateOverlap(unsigned opts, struct chromAnn *inCa,
                                     struct chromAnnRef* overlapping,
                                     struct overlapAggStats *stats)
 /* Compute the aggregate overlap */
 {
 
 int mapOff = inCa->start;
 int mapLen = (inCa->end - inCa->start);
 assert(mapLen >= 0);
 if (mapLen == 0)
     return;  /* no CDS */
 
 Bits *overMap = bitAlloc(mapLen);
 struct chromAnnRef *selCa;
 for (selCa = overlapping; selCa != NULL; selCa = selCa->next)
     {
     if (passCriteria(opts, inCa, selCa->ref))
         addToAggregateMap(overMap, mapOff, inCa, selCa->ref);
     }
 stats->inOverBases = bitCountRange(overMap, 0, mapLen);
 bitFree(&overMap);
 stats->inOverlap = ((float)stats->inOverBases) / ((float)inCa->totalSize);
 }
 
 struct overlapAggStats selectAggregateOverlap(unsigned opts, struct chromAnn *inCa)
 /* Compute the aggregate overlap of a chromAnn */
 {
 selectMapEnsure();
 struct overlapAggStats stats;
 ZeroVar(&stats);
 stats.inBases = inCa->totalSize;
 struct chromAnnRef *overlapping = chromAnnMapFindOverlap(selectMap, inCa);
 computeAggregateOverlap(opts, inCa, overlapping, &stats);
 slFreeList(&overlapping);
 verbose(2, "selectAggregateOverlap: %s: %s %d-%d, %c => %0.3g\n", inCa->name, inCa->chrom, inCa->start, inCa->end,
         ((inCa->strand == '\0') ? '?' : inCa->strand), stats.inOverlap);
 return stats;
 }