src/hg/hgTables/intersect.c 1.45

1.45 2009/03/12 16:45:15 kent
Making filter and non-base-pair intersections work with bigWig. (Base-by-base intersections work 80%, but going to have to change design to get last 20% working.) Fixed bug I introduced in correlation of regular wig that Kayla spotted. Moving bigWig stuff to it's own module.
Index: src/hg/hgTables/intersect.c
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/hgTables/intersect.c,v
retrieving revision 1.44
retrieving revision 1.45
diff -b -B -U 4 -r1.44 -r1.45
--- src/hg/hgTables/intersect.c	2 Oct 2008 21:10:43 -0000	1.44
+++ src/hg/hgTables/intersect.c	12 Mar 2009 16:45:15 -0000	1.45
@@ -198,10 +198,12 @@
 
 
 if (!wigOptions)
     {
+    boolean bigWig = isBigWig(curTable);
     hPrintf("<H4>Intersect bases covered by %s and/or %s:</H4>\n",
 	    name, iName);
+    if (!bigWig)
     hPrintf("These combinations will discard the names and "
 	    "gene/alignment structure (if any) of %s and produce a simple "
 	    "list of position ranges.<P>\n",
 	    name);
@@ -214,11 +216,14 @@
     hPrintf("Check the following boxes to complement one or both tables.  "
 	    "To complement a table means to include a base pair in the "
 	    "intersection/union if it is <I>not</I> included in the table."
 	    "<P>\n");
+    if (!bigWig)
+	{
     jsMakeTrackingCheckBox(cart, hgtaNextInvertTable, "invertTable", FALSE);
     printf("Complement %s before base-pair-wise intersection/union <BR>\n",
 	   name);
+	}
     jsMakeTrackingCheckBox(cart, hgtaNextInvertTable2, "invertTable2", FALSE);
     printf("Complement %s before base-pair-wise intersection/union <P>\n",
 	   iName);
     }
@@ -336,9 +341,9 @@
 			     int chromSize)
 /* Return the number of bases belonging to bedItem covered by bits. */
 {
 int count = 0;
-int i, j;
+int i;
 
 if (bedItem->chromStart < 0 || bedItem->chromEnd > chromSize)
     errAbort("Item out of range [0,%d): %s %s:%d-%d",
 	     chromSize, (bedItem->name ? bedItem->name : ""),
@@ -355,21 +360,16 @@
     {
     for (i=0;  i < bedItem->blockCount;  i++)
 	{
 	int start = bedItem->chromStart + bedItem->chromStarts[i];
-	int end   = start + bedItem->blockSizes[i];
-	for (j=start;  j < end;  j++)
-	    if (bitReadOne(bits, j))
-		count++;
+	count += bitCountRange(bits, start, bedItem->blockSizes[i]);
 	}
     }
 else
     {
-    for (i=bedItem->chromStart;  i < bedItem->chromEnd;  i++)
-	if (bitReadOne(bits, i))
-	    count++;
+    count = bitCountRange(bits, bedItem->chromStart, bedItem->chromEnd - bedItem->chromStart);
     }
-    return(count);
+return(count);
 }
 
 static struct bed *bitsToBed4List(Bits *bits, int bitSize, 
 	char *chrom, int minSize, int rangeStart, int rangeEnd,
@@ -409,11 +409,21 @@
 slReverse(&bedList);
 return(bedList);
 }
 
+boolean intersectOverlapFilter(char *op, double moreThresh, double lessThresh, double overlap)
+/* Return TRUE if have enough (or not too much) overlap according to thresholds and op. */
+{
+return ((sameString("any", op) && (overlap > 0.0)) ||
+    (sameString("none", op) && (overlap <= 0.0)) ||
+    (sameString("more", op) && (overlap >= moreThresh)) ||
+    (sameString("less", op) && (overlap <= lessThresh)));
+}
+
 static struct bed *filterBedByOverlap(struct bed *bedListIn, boolean hasBlocks,
-				      char *op, int moreThresh, int lessThresh,
+				      char *op, double moreThresh, double lessThresh,
 				      Bits *bits, int chromSize)
+/* Return list of beds that pass overlap filter. */
 {
 struct bed *intersectedBedList = NULL;
 struct bed *bed = NULL, *nextBed = NULL;
 /* Loop through primary bed list seeing if each one intersects
@@ -434,14 +444,9 @@
 	length = (bed->chromEnd - bed->chromStart);
     if (length == 0)
 	length = 2;
     pctBasesOverlap = ((numBasesOverlap * 100.0) / length);
-    if ((sameString("any", op) && (numBasesOverlap > 0)) ||
-	(sameString("none", op) && (numBasesOverlap == 0)) ||
-	(sameString("more", op) &&
-	 (pctBasesOverlap >= moreThresh)) ||
-	(sameString("less", op) &&
-	 (pctBasesOverlap <= lessThresh)))
+    if (intersectOverlapFilter(op, moreThresh, lessThresh, pctBasesOverlap))
 	{
 	slAddHead(&intersectedBedList, bed);
 	}
     }
@@ -472,8 +477,44 @@
 	    }
 	}
 }
 
+Bits *bitsForIntersectingTable(struct sqlConnection *conn, struct region *region, int chromSize,
+	boolean isBpWise)
+/* Get a bitmap that corresponds to the table we are intersecting with. 
+ * Consult CGI vars to figure out what table it is. */
+{
+boolean invTable2 = cartCgiUsualBoolean(cart, hgtaInvertTable2, FALSE);
+char *table2 = cartString(cart, hgtaIntersectTable);
+struct hTableInfo *hti2 = getHti(database, table2);
+struct lm *lm2 = lmInit(64*1024);
+Bits *bits2 = bitAlloc(chromSize+8);
+struct bed *bedList2 = getFilteredBeds(conn, table2, region, lm2, NULL);
+if (!isBpWise)
+    expandZeroSize(bedList2, hti2->hasBlocks, chromSize);
+bedOrBits(bits2, chromSize, bedList2, hti2->hasBlocks, 0);
+if (invTable2)
+    bitNot(bits2, chromSize);
+lmCleanup(&lm2);
+return bits2;
+}
+
+char *intersectOp()
+/* Get intersect op from CGI var and make sure it's ok. */
+{
+char *op = cartString(cart, hgtaIntersectOp);
+if ((!sameString("any", op)) &&
+    (!sameString("none", op)) &&
+    (!sameString("more", op)) &&
+    (!sameString("less", op)) &&
+    (!sameString("and", op)) &&
+    (!sameString("or", op)))
+    {
+    errAbort("Invalid value \"%s\" of CGI variable %s", op, hgtaIntersectOp);
+    }
+return op;
+}
+
 static struct bed *intersectOnRegion(
 	struct sqlConnection *conn,	/* Open connection to database. */
 	struct region *region, 		/* Region to work inside */
 	char *table1,			/* Table input list is from. */
@@ -486,43 +527,21 @@
  * which is independent from input.  This potentially will
  * chew up bedList1. */
 {
 /* Grab parameters for intersection from cart. */
-int moreThresh = cartCgiUsualInt(cart, hgtaMoreThreshold, 0);
-int lessThresh = cartCgiUsualInt(cart, hgtaLessThreshold, 100);
+double moreThresh = cartCgiUsualDouble(cart, hgtaMoreThreshold, 0);
+double lessThresh = cartCgiUsualDouble(cart, hgtaLessThreshold, 100);
 boolean invTable = cartCgiUsualBoolean(cart, hgtaInvertTable, FALSE);
-boolean invTable2 = cartCgiUsualBoolean(cart, hgtaInvertTable2, FALSE);
-char *op = cartString(cart, hgtaIntersectOp);
-char *table2 = cartString(cart, hgtaIntersectTable);
+char *op = intersectOp();
 /* --- TODO MIKE - replace bedList2, bits2 with baseMask stuff. */
 /* Load up intersecting bedList2 (to intersect with) */
-struct hTableInfo *hti2 = getHti(database, table2);
-struct lm *lm2 = lmInit(64*1024);
-struct bed *bedList2 = getFilteredBeds(conn, table2, region, lm2, NULL);
-/* Set up some other local vars. */
-struct hTableInfo *hti1 = getHti(database, table1);
 int chromSize = hChromSize(database, region->chrom);
-Bits *bits2 = bitAlloc(chromSize+8);
 boolean isBpWise = (sameString("and", op) || sameString("or", op));
+Bits *bits2 = bitsForIntersectingTable(conn, region, chromSize, isBpWise);
+/* Set up some other local vars. */
+struct hTableInfo *hti1 = getHti(database, table1);
 struct bed *intersectedBedList = NULL;
 
-/* Sanity check on intersect op. */
-if ((!sameString("any", op)) &&
-    (!sameString("none", op)) &&
-    (!sameString("more", op)) &&
-    (!sameString("less", op)) &&
-    (!sameString("and", op)) &&
-    (!sameString("or", op)))
-    {
-    errAbort("Invalid value \"%s\" of CGI variable %s", op, hgtaIntersectOp);
-    }
-
-
-/* Load intersecting track into a bitmap. */
-if (!isBpWise)
-    expandZeroSize(bedList2, hti2->hasBlocks, chromSize);
-bedOrBits(bits2, chromSize, bedList2, hti2->hasBlocks, 0);
-
 /* Produce intersectedBedList. */
 if (isBpWise)
     {
 /* --- TODO MIKE - replace, bits1 with baseMask stuff. */
@@ -534,10 +553,8 @@
     bedOrBits(bits1, chromSize, bedList1, hasBlocks, 0);
     /* invert inputs if necessary */
     if (invTable)
 	bitNot(bits1, chromSize);
-    if (invTable2)
-	bitNot(bits2, chromSize);
     /* do the intersection/union */
     if (sameString("and", op))
 	bitAnd(bits1, bits2, chromSize);
     else
@@ -558,9 +575,8 @@
     intersectedBedList = filterBedByOverlap(bedList1, hti1->hasBlocks, op,
 					    moreThresh, lessThresh, bits2,
 					    chromSize);
 bitFree(&bits2);
-lmCleanup(&lm2);
 return intersectedBedList;
 }
 
 static char *getPrimaryType(char *primarySubtrack, struct trackDb *tdb)
@@ -604,10 +620,10 @@
     struct trackDb *subtrack = NULL;
     char *primaryType = getPrimaryType(table, curTrack);
     char *op = cartString(cart, hgtaSubtrackMergeOp);
     boolean isBpWise = (sameString(op, "and") || sameString(op, "or"));
-    int moreThresh = cartInt(cart, hgtaSubtrackMergeMoreThreshold);
-    int lessThresh = cartInt(cart, hgtaSubtrackMergeLessThreshold);
+    double moreThresh = cartDouble(cart, hgtaSubtrackMergeMoreThreshold);
+    double lessThresh = cartDouble(cart, hgtaSubtrackMergeLessThreshold);
     boolean firstTime = TRUE;
     if (sameString(op, "cat"))
 	{
 	struct bed *bedList = getRegionAsBed(db, table, region, filter,