src/hg/hgTables/intersect.c 1.45
1.45 2009/03/12 16:45:15 kent
Making filter and non-base-pair intersections work with bigWig. (Base-by-base intersections work 80%, but going to have to change design to get last 20% working.) Fixed bug I introduced in correlation of regular wig that Kayla spotted. Moving bigWig stuff to it's own module.
Index: src/hg/hgTables/intersect.c
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/hgTables/intersect.c,v
retrieving revision 1.44
retrieving revision 1.45
diff -b -B -U 4 -r1.44 -r1.45
--- src/hg/hgTables/intersect.c 2 Oct 2008 21:10:43 -0000 1.44
+++ src/hg/hgTables/intersect.c 12 Mar 2009 16:45:15 -0000 1.45
@@ -198,10 +198,12 @@
if (!wigOptions)
{
+ boolean bigWig = isBigWig(curTable);
hPrintf("<H4>Intersect bases covered by %s and/or %s:</H4>\n",
name, iName);
+ if (!bigWig)
hPrintf("These combinations will discard the names and "
"gene/alignment structure (if any) of %s and produce a simple "
"list of position ranges.<P>\n",
name);
@@ -214,11 +216,14 @@
hPrintf("Check the following boxes to complement one or both tables. "
"To complement a table means to include a base pair in the "
"intersection/union if it is <I>not</I> included in the table."
"<P>\n");
+ if (!bigWig)
+ {
jsMakeTrackingCheckBox(cart, hgtaNextInvertTable, "invertTable", FALSE);
printf("Complement %s before base-pair-wise intersection/union <BR>\n",
name);
+ }
jsMakeTrackingCheckBox(cart, hgtaNextInvertTable2, "invertTable2", FALSE);
printf("Complement %s before base-pair-wise intersection/union <P>\n",
iName);
}
@@ -336,9 +341,9 @@
int chromSize)
/* Return the number of bases belonging to bedItem covered by bits. */
{
int count = 0;
-int i, j;
+int i;
if (bedItem->chromStart < 0 || bedItem->chromEnd > chromSize)
errAbort("Item out of range [0,%d): %s %s:%d-%d",
chromSize, (bedItem->name ? bedItem->name : ""),
@@ -355,21 +360,16 @@
{
for (i=0; i < bedItem->blockCount; i++)
{
int start = bedItem->chromStart + bedItem->chromStarts[i];
- int end = start + bedItem->blockSizes[i];
- for (j=start; j < end; j++)
- if (bitReadOne(bits, j))
- count++;
+ count += bitCountRange(bits, start, bedItem->blockSizes[i]);
}
}
else
{
- for (i=bedItem->chromStart; i < bedItem->chromEnd; i++)
- if (bitReadOne(bits, i))
- count++;
+ count = bitCountRange(bits, bedItem->chromStart, bedItem->chromEnd - bedItem->chromStart);
}
- return(count);
+return(count);
}
static struct bed *bitsToBed4List(Bits *bits, int bitSize,
char *chrom, int minSize, int rangeStart, int rangeEnd,
@@ -409,11 +409,21 @@
slReverse(&bedList);
return(bedList);
}
+boolean intersectOverlapFilter(char *op, double moreThresh, double lessThresh, double overlap)
+/* Return TRUE if have enough (or not too much) overlap according to thresholds and op. */
+{
+return ((sameString("any", op) && (overlap > 0.0)) ||
+ (sameString("none", op) && (overlap <= 0.0)) ||
+ (sameString("more", op) && (overlap >= moreThresh)) ||
+ (sameString("less", op) && (overlap <= lessThresh)));
+}
+
static struct bed *filterBedByOverlap(struct bed *bedListIn, boolean hasBlocks,
- char *op, int moreThresh, int lessThresh,
+ char *op, double moreThresh, double lessThresh,
Bits *bits, int chromSize)
+/* Return list of beds that pass overlap filter. */
{
struct bed *intersectedBedList = NULL;
struct bed *bed = NULL, *nextBed = NULL;
/* Loop through primary bed list seeing if each one intersects
@@ -434,14 +444,9 @@
length = (bed->chromEnd - bed->chromStart);
if (length == 0)
length = 2;
pctBasesOverlap = ((numBasesOverlap * 100.0) / length);
- if ((sameString("any", op) && (numBasesOverlap > 0)) ||
- (sameString("none", op) && (numBasesOverlap == 0)) ||
- (sameString("more", op) &&
- (pctBasesOverlap >= moreThresh)) ||
- (sameString("less", op) &&
- (pctBasesOverlap <= lessThresh)))
+ if (intersectOverlapFilter(op, moreThresh, lessThresh, pctBasesOverlap))
{
slAddHead(&intersectedBedList, bed);
}
}
@@ -472,8 +477,44 @@
}
}
}
+Bits *bitsForIntersectingTable(struct sqlConnection *conn, struct region *region, int chromSize,
+ boolean isBpWise)
+/* Get a bitmap that corresponds to the table we are intersecting with.
+ * Consult CGI vars to figure out what table it is. */
+{
+boolean invTable2 = cartCgiUsualBoolean(cart, hgtaInvertTable2, FALSE);
+char *table2 = cartString(cart, hgtaIntersectTable);
+struct hTableInfo *hti2 = getHti(database, table2);
+struct lm *lm2 = lmInit(64*1024);
+Bits *bits2 = bitAlloc(chromSize+8);
+struct bed *bedList2 = getFilteredBeds(conn, table2, region, lm2, NULL);
+if (!isBpWise)
+ expandZeroSize(bedList2, hti2->hasBlocks, chromSize);
+bedOrBits(bits2, chromSize, bedList2, hti2->hasBlocks, 0);
+if (invTable2)
+ bitNot(bits2, chromSize);
+lmCleanup(&lm2);
+return bits2;
+}
+
+char *intersectOp()
+/* Get intersect op from CGI var and make sure it's ok. */
+{
+char *op = cartString(cart, hgtaIntersectOp);
+if ((!sameString("any", op)) &&
+ (!sameString("none", op)) &&
+ (!sameString("more", op)) &&
+ (!sameString("less", op)) &&
+ (!sameString("and", op)) &&
+ (!sameString("or", op)))
+ {
+ errAbort("Invalid value \"%s\" of CGI variable %s", op, hgtaIntersectOp);
+ }
+return op;
+}
+
static struct bed *intersectOnRegion(
struct sqlConnection *conn, /* Open connection to database. */
struct region *region, /* Region to work inside */
char *table1, /* Table input list is from. */
@@ -486,43 +527,21 @@
* which is independent from input. This potentially will
* chew up bedList1. */
{
/* Grab parameters for intersection from cart. */
-int moreThresh = cartCgiUsualInt(cart, hgtaMoreThreshold, 0);
-int lessThresh = cartCgiUsualInt(cart, hgtaLessThreshold, 100);
+double moreThresh = cartCgiUsualDouble(cart, hgtaMoreThreshold, 0);
+double lessThresh = cartCgiUsualDouble(cart, hgtaLessThreshold, 100);
boolean invTable = cartCgiUsualBoolean(cart, hgtaInvertTable, FALSE);
-boolean invTable2 = cartCgiUsualBoolean(cart, hgtaInvertTable2, FALSE);
-char *op = cartString(cart, hgtaIntersectOp);
-char *table2 = cartString(cart, hgtaIntersectTable);
+char *op = intersectOp();
/* --- TODO MIKE - replace bedList2, bits2 with baseMask stuff. */
/* Load up intersecting bedList2 (to intersect with) */
-struct hTableInfo *hti2 = getHti(database, table2);
-struct lm *lm2 = lmInit(64*1024);
-struct bed *bedList2 = getFilteredBeds(conn, table2, region, lm2, NULL);
-/* Set up some other local vars. */
-struct hTableInfo *hti1 = getHti(database, table1);
int chromSize = hChromSize(database, region->chrom);
-Bits *bits2 = bitAlloc(chromSize+8);
boolean isBpWise = (sameString("and", op) || sameString("or", op));
+Bits *bits2 = bitsForIntersectingTable(conn, region, chromSize, isBpWise);
+/* Set up some other local vars. */
+struct hTableInfo *hti1 = getHti(database, table1);
struct bed *intersectedBedList = NULL;
-/* Sanity check on intersect op. */
-if ((!sameString("any", op)) &&
- (!sameString("none", op)) &&
- (!sameString("more", op)) &&
- (!sameString("less", op)) &&
- (!sameString("and", op)) &&
- (!sameString("or", op)))
- {
- errAbort("Invalid value \"%s\" of CGI variable %s", op, hgtaIntersectOp);
- }
-
-
-/* Load intersecting track into a bitmap. */
-if (!isBpWise)
- expandZeroSize(bedList2, hti2->hasBlocks, chromSize);
-bedOrBits(bits2, chromSize, bedList2, hti2->hasBlocks, 0);
-
/* Produce intersectedBedList. */
if (isBpWise)
{
/* --- TODO MIKE - replace, bits1 with baseMask stuff. */
@@ -534,10 +553,8 @@
bedOrBits(bits1, chromSize, bedList1, hasBlocks, 0);
/* invert inputs if necessary */
if (invTable)
bitNot(bits1, chromSize);
- if (invTable2)
- bitNot(bits2, chromSize);
/* do the intersection/union */
if (sameString("and", op))
bitAnd(bits1, bits2, chromSize);
else
@@ -558,9 +575,8 @@
intersectedBedList = filterBedByOverlap(bedList1, hti1->hasBlocks, op,
moreThresh, lessThresh, bits2,
chromSize);
bitFree(&bits2);
-lmCleanup(&lm2);
return intersectedBedList;
}
static char *getPrimaryType(char *primarySubtrack, struct trackDb *tdb)
@@ -604,10 +620,10 @@
struct trackDb *subtrack = NULL;
char *primaryType = getPrimaryType(table, curTrack);
char *op = cartString(cart, hgtaSubtrackMergeOp);
boolean isBpWise = (sameString(op, "and") || sameString(op, "or"));
- int moreThresh = cartInt(cart, hgtaSubtrackMergeMoreThreshold);
- int lessThresh = cartInt(cart, hgtaSubtrackMergeLessThreshold);
+ double moreThresh = cartDouble(cart, hgtaSubtrackMergeMoreThreshold);
+ double lessThresh = cartDouble(cart, hgtaSubtrackMergeLessThreshold);
boolean firstTime = TRUE;
if (sameString(op, "cat"))
{
struct bed *bedList = getRegionAsBed(db, table, region, filter,