src/hg/hgTables/wiggle.c 1.75

1.75 2009/07/20 23:33:13 angie
In doOutWig, when handling a composite's subtrack table: instead of applying the appropriate subtrack info only to shortLabel and longLabel, keep the whole subtrack tdb. Its type is used below, and we can no longer assume that composite type = subtrack type.
Index: src/hg/hgTables/wiggle.c
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/hgTables/wiggle.c,v
retrieving revision 1.74
retrieving revision 1.75
diff -b -B -U 1000000 -r1.74 -r1.75
--- src/hg/hgTables/wiggle.c	29 May 2009 22:07:01 -0000	1.74
+++ src/hg/hgTables/wiggle.c	20 Jul 2009 23:33:13 -0000	1.75
@@ -1,1243 +1,1240 @@
 /* wiggle - stuff to process wiggle tracks.
  * Much of this is lifted from hgText/hgWigText.c */
 
 #include "common.h"
 #include "hash.h"
 #include "linefile.h"
 #include "dystring.h"
 #include "localmem.h"
 #include "portable.h"
 #include "obscure.h"
 #include "jksql.h"
 #include "cheapcgi.h"
 #include "cart.h"
 #include "web.h"
 #include "bed.h"
 #include "hdb.h"
 #include "hui.h"
 #include "hgColors.h"
 #include "trackDb.h"
 #include "customTrack.h"
 #include "wiggle.h"
 #include "hmmstats.h"
 #include "correlate.h"
 #include "hgTables.h"
 
 static char const rcsid[] = "$Id$";
 
 extern char *maxOutMenu[];
 
 
 /*	a common set of stuff is accumulating in the various
  *	routines that call getData.  For the moment make it a macro,
  *	perhaps later it can turn into a routine of its own.
  */
 #define WIG_INIT \
 if (isCustomTrack(table)) \
     { \
     ct = ctLookupName(table); \
     isCustom = TRUE; \
     if (! ct->wiggle) \
 	errAbort("called to work on a custom track '%s' that isn't wiggle data ?", table); \
  \
     if (ct->dbTrack) \
 	safef(splitTableOrFileName,ArraySize(splitTableOrFileName), "%s", \
 		ct->dbTableName); \
     else \
 	safef(splitTableOrFileName,ArraySize(splitTableOrFileName), "%s", \
 		ct->wigFile); \
     hasConstraint = checkWigDataFilter("ct", table, &dataConstraint, &ll, &ul); \
     } \
 else \
     hasConstraint = checkWigDataFilter(database, table, &dataConstraint, \
 			&ll, &ul); \
 \
 wds = wiggleDataStreamNew(); \
 \
 if (anyIntersection()) \
     { \
     char *t2 = cartString(cart, hgtaIntersectTable); \
     if (table && t2 && differentWord(t2,table)) \
 	table2 = t2; \
     } \
 \
 if (hasConstraint)\
     wds->setDataConstraint(wds, dataConstraint, ll, ul);
 
 boolean checkWigDataFilter(char *db, char *table,
 	char **constraint, double *ll, double *ul)
 /*	check if filter exists, return its values, call with db="ct" for
  *	custom tracks	*/
 {
 char varPrefix[128];
 struct hashEl *varList, *var;
 char *pat = NULL;
 char *cmp = NULL;
 if (constraint != NULL)
     *constraint = NULL;	// Make sure return variable gets set to something at least.
 
 if (isCustomTrack(table))
     db = "ct";
 
 safef(varPrefix, sizeof(varPrefix), "%s%s.%s.", hgtaFilterVarPrefix, db, table);
 
 varList = cartFindPrefix(cart, varPrefix);
 if (varList == NULL)
     return FALSE;
 
 /*	check varList, look for dataValue.pat and dataValue.cmp	*/
 
 for (var = varList; var != NULL; var = var->next)
     {
     if (endsWith(var->name, ".pat"))
 	{
 	char *name;
 	name = cloneString(var->name);
 	tolowers(name);
 	/*	make sure we are actually looking at datavalue	*/
 	if (stringIn("datavalue", name) || stringIn("score", name))
 	    {
 	    pat = cloneString(var->val);
 	    }
 	freeMem(name);
 	}
     if (endsWith(var->name, ".cmp"))
 	{
 	char *name;
 	name = cloneString(var->name);
 	tolowers(name);
 	/*	make sure we are actually looking at datavalue	*/
 	if (stringIn("datavalue", name) || stringIn("score", name))
 	    {
 	    cmp = cloneString(var->val);
 	    tolowers(cmp);
 	    if (stringIn("ignored", cmp))
 		freez(&cmp);
 	    }
 	freeMem(name);
 	}
     }
 
 /*	Must get them both for this to work	*/
 if (cmp && pat)
     {
     int wordCount = 0;
     char *words[2];
     char *dupe = cloneString(pat);
 
     wordCount = chopString(dupe, ", \t\n", words, ArraySize(words));
     switch (wordCount)
 	{
 	case 2: if (ul) *ul = sqlDouble(words[1]);
 	case 1: if (ll) *ll = sqlDouble(words[0]);
 		break;
 	default:
 	    warn("can not understand numbers input for dataValue filter");
 	    errAbort(
 	    "dataValue filter must be one or two numbers (two for 'in range')");
 	}
     if (sameWord(cmp,"in range") && (wordCount != 2))
 	errAbort("'in range' dataValue filter must have two numbers input\n");
 
     if (constraint)
 	*constraint = cmp;
 
     return TRUE;
     }
 else
     return FALSE;
 }	/*	static boolean checkWigDataFilter()	*/
 
 
 static void wigDataHeader(char *name, char *description, char *visibility,
 	enum wigOutputType wigOutType)
 /* Write out custom track header for this wiggle region. */
 {
 switch (wigOutType)
     {
     case wigOutBed:
 	hPrintf("track ");
         break;
     default:
     case wigOutData:
 	hPrintf("track type=wiggle_0");
         break;
     };
 
 if (name != NULL)
     hPrintf(" name=\"%s\"", name);
 if (description != NULL)
     hPrintf(" description=\"%s\"", description);
 if (visibility != NULL)
     hPrintf(" visibility=%s", visibility);
 hPrintf("\n");
 }
 
 static void addBedElement(struct bed **bedList, char *chrom,
 	unsigned start, unsigned end, unsigned count, struct lm *lm)
 {
 struct bed *bed;
 char name[128];
 
 lmAllocVar(lm, bed);
 bed->chrom = lmCloneString(lm, chrom);
 bed->chromStart = start;
 bed->chromEnd = end;
 safef(name,ArraySize(name), "%s.%u", bed->chrom, count);
 bed->name = lmCloneString(lm, name);
 slAddHead(bedList, bed);
 }
 
 static struct bed *invertBedList(struct bed *bedList, struct lm *lm,
 	char *chrom, unsigned chromStart, unsigned chromEnd, unsigned chromSize)
 /*	bed list result is everything NOT in the given bedList	*/
 {
 unsigned elCount = 1;
 unsigned start = 0;				/*	start == end	*/
 unsigned end = 0;				/*	means no element yet */
 unsigned lastEnd = 0;			/*	in the bedList	*/
 struct bed *inverseBedList = NULL;		/*	new list	*/
 struct bed *el;
 
 slSort(&bedList, bedLineCmp);	/* make sure it is sorted */
 
 for (el = bedList; el != NULL; el=el->next)
     {
     /*	past end of chrom, bad bed list	*/
     if (el->chromStart >= chromSize)
 	{
 	end = chromSize;
 	break;
 	}
     if (el->chromStart > end)
 	end = el->chromStart + 1;
     if (end > start)			/* do we have an element */
 	{
 	addBedElement(&inverseBedList, chrom, start, end, elCount++, lm);
 	start = el->chromEnd - 1;	/*	reset to no element yet */
 	end = start;
 	}
     else if (el->chromEnd > start)
 	{
 	start = el->chromEnd - 1;	/*	reset to no element yet */
 	end = start;
 	}
     lastEnd = max(lastEnd,el->chromEnd);
     }
 
 /*	A last element ?	*/
 if (lastEnd < chromSize)
     end = chromSize;
 
 if (end > start)			/* potential last element	*/
     addBedElement(&inverseBedList, chrom, start, end, elCount++, lm);
 
 slSort(&inverseBedList, bedLineCmp);	/* make sure it is sorted */
 
 return inverseBedList;
 }
 
 static struct bed *bedTable2(struct sqlConnection *conn,
 	struct region *region, char *table2)
 /*	get a bed list, possibly complement, for table2	*/
 {
 /* This use of bedTable rather than a bitmap is not really working. The
  * rest of the table browser does intersection at the exon level, while
  * the wig code, which this is part of, does it at the gene level.  I
  * noticed it while working on the corresponding routines for bigWig,
  * which I'm building to work with bitmaps at the exon level.  I'm not
  * sure it's worth fixing this code since nobody has complained, and we're
  * probably going to be doing mostly bigWig rather than wig in the future.
  *    -JK */
 boolean invTable2 = cartCgiUsualBoolean(cart, hgtaInvertTable2, FALSE);
 char *op = cartString(cart, hgtaIntersectOp);
 struct bed *bedList = NULL;
 struct lm *lm1 = lmInit(64*1024);
 
 /*	fetch table 2 as a bed list	*/
 bedList = getFilteredBeds(conn, table2, region, lm1, NULL);
 
 /*	If table 2 bed list needs to be complemented (!table2), then do so */
 if (invTable2 || sameString("none", op))
     {
     unsigned chromStart = 0;		/*	start == end == 0	*/
     unsigned chromEnd = 0;		/*	means do full chrom	*/
     unsigned chromSize = hChromSize(database, region->chrom);
     struct lm *lm2 = lmInit(64*1024);
     struct bed *inverseBedList = NULL;		/*	new list	*/
 
     if ((region->start != 0) || (region->end != 0))
 	{
 	chromStart = region->start;
 	chromEnd = region->end;
 	}
 
     if ((struct bed *)NULL == bedList)
 	{
 	if (0 == region->end)
 	    chromEnd = chromSize;
 	addBedElement(&inverseBedList, region->chrom, chromStart, chromEnd,
 		1, lm2);
 	}
     else
 	inverseBedList=invertBedList(bedList, lm2, region->chrom, chromStart,
 	    chromEnd, chromSize);
 
     lmCleanup(&lm1);			/*	== bedFreeList(&bedList) */
 
     return inverseBedList;
     }
 else
     return bedList;
 }
 
 static unsigned long long getWigglePossibleIntersection(
     struct wiggleDataStream *wds, struct region *region, char *db,
 	char *table2, struct bed **intersectBedList,
 	    char splitTableOrFileName[256], int operations)
 {
 unsigned long long valuesMatched = 0;
 
 /*	Intersection is either type "any" or "none"
  *	The "none" case is already taken care of during the loading of
  *	table 2 since it was then inverted at that time so it is already
  *	"none" of itself.
  */
 
 
 /* If table2 is NULL, it means that the WIG_INIT macro recognized that we
  * are working on table2 now, so we should not use intersectBedList
  * (in fact we may be trying to compute it here). */
 if ((table2 != NULL) && anyIntersection())
     {
     if (*intersectBedList)
 	{
 	valuesMatched = wds->getDataViaBed(wds, db, splitTableOrFileName,
 	    operations, intersectBedList);
 	}
     }
 else
     {
     valuesMatched = wds->getData(wds, db, splitTableOrFileName, operations);
     }
 
 return valuesMatched;
 }
 
 static void intersectDataVector(char *table, struct dataVector *dataVector1,
 			struct region *region, struct sqlConnection *conn)
 /* Perform intersection (if specified) on dataVector. */
 {
 /* If table is type wig (not bedGraph), then intersection has already been
  * performed on each input (other selected subtracks must be the same type
  * as table).
  * Otherwise, handle intersection here. */
 if (anyIntersection() && !isWiggle(database, table) && !isBigWig(table))
     {
     char *track2 = cartString(cart, hgtaIntersectTrack);
     char *table2 = cartString(cart, hgtaIntersectTable);
     if (table2 && differentWord(table2, table))
 	{
 	struct trackDb *tdb2 = findTrack(track2, fullTrackList);
 	struct trackTable *tt2 = trackTableNew(tdb2, table2, conn);
 	struct dataVector *dataVector2 = dataVectorFetchOneRegion(tt2, region,
 								  conn);
 	char *op = cartString(cart, hgtaIntersectOp);
 	boolean dv2IsWiggle = (isWiggle(database, table2) || isBigWig(table2) ||
 			       isBedGraph(table2));
 	dataVectorIntersect(dataVector1, dataVector2,
 			    dv2IsWiggle, sameString(op, "none"));
 	dataVectorFree(&dataVector2);
 	}
     }
 }
 
 int wigPrintDataVectorOut(struct dataVector *dataVectorList,
 			      enum wigOutputType wigOutType, int maxOut,
 			      char *description)
 /* Print out bed or data points from list of dataVectors. */
 {
 int count = 0;
 switch (wigOutType)
     {
     case wigOutBed:
 	count = dataVectorWriteBed(dataVectorList, "stdout", maxOut,
 				   description);
 	break;
     case wigDataNoPrint:
 	break;
     case wigOutData:
     default:
 	count = dataVectorWriteWigAscii(dataVectorList, "stdout", maxOut,
 					description);
 	break;
     };
 return count;
 }
 
 struct dataVector *mergedWigDataVector(char *table,
 	struct sqlConnection *conn, struct region *region)
 /* Perform the specified subtrack merge wiggle-operation on table and
  * all other selected subtracks and intersect if necessary. */
 {
 struct trackDb *tdb1 = hTrackDbForTrack(database, table);
 struct trackTable *tt1 = trackTableNew(tdb1, table, conn);
 struct dataVector *dataVector1 = dataVectorFetchOneRegion(tt1, region, conn);
 struct trackDb *cTdb = hCompositeTrackDbForSubtrack(database, tdb1);
 struct trackDb *sTdb = NULL;
 int numSubtracks = 1;
 char *op = cartString(cart, hgtaSubtrackMergeWigOp);
 boolean requireAll = cartBoolean(cart, hgtaSubtrackMergeRequireAll);
 boolean useMinScore = cartBoolean(cart, hgtaSubtrackMergeUseMinScore);
 float minScore = atof(cartString(cart, hgtaSubtrackMergeMinScore));
 
 if (cTdb == NULL)
     errAbort("mergedWigDataVector: could not find parent/composite trackDb "
 	     "entry for subtrack %s", table);
 
 if (dataVector1 == NULL)
     {
     return NULL;
     }
 
 for (sTdb = cTdb->subtracks;  sTdb != NULL;  sTdb = sTdb->next)
     {
     if (isSubtrackMerged(sTdb->tableName) &&
 	! sameString(tdb1->tableName, sTdb->tableName) &&
 	hSameTrackDbType(tdb1->type, sTdb->type))
 	{
 	struct trackTable *tt2 = trackTableNew(sTdb, sTdb->tableName, conn);
 	struct dataVector *dataVector2 = dataVectorFetchOneRegion(tt2, region,
 								  conn);
 	numSubtracks++;
 	if (dataVector2 == NULL)
 	    {
 	    if (requireAll)
 		{
 		freeDataVector(&dataVector1);
 		return NULL;
 		}
 	    continue;
 	    }
 	if (sameString(op, "average") || sameString(op, "sum"))
 	    dataVectorSum(dataVector1, dataVector2, requireAll);
 	else if (sameString(op, "product"))
 	    dataVectorProduct(dataVector1, dataVector2, requireAll);
 	else if (sameString(op, "min"))
 	    dataVectorMin(dataVector1, dataVector2, requireAll);
 	else if (sameString(op, "max"))
 	    dataVectorMax(dataVector1, dataVector2, requireAll);
 	else
 	    errAbort("mergedWigOutRegion: unknown WigOp %s", op);
 	dataVectorFree(&dataVector2);
 	}
     }
 if (sameString(op, "average"))
     dataVectorNormalize(dataVector1, numSubtracks);
 if (useMinScore)
     dataVectorFilterMin(dataVector1, minScore);
 
 intersectDataVector(table, dataVector1, region, conn);
 
 return dataVector1;
 }
 
 static int mergedWigOutRegion(char *table, struct sqlConnection *conn,
 			      struct region *region, int maxOut,
 			      enum wigOutputType wigOutType)
 /* Perform the specified subtrack merge wiggle-operation on table and
  * all other selected subtracks, intersect if necessary, and print out. */
 {
 struct dataVector *dv = mergedWigDataVector(table, conn, region);
 int resultCount =
     wigPrintDataVectorOut(dv, wigOutType, maxOut, describeSubtrackMerge("#\t"));
 dataVectorFree(&dv);
 return resultCount;
 }
 
 
 static int bedGraphOutRegion(char *table, struct sqlConnection *conn,
 			     struct region *region, int maxOut,
 			     enum wigOutputType wigOutType)
 /* Read in bedGraph as dataVector (filtering is handled there),
  * intersect if necessary, and print it out. */
 {
 struct dataVector *dv =bedGraphDataVector(table, conn, region);
 int resultCount = wigPrintDataVectorOut(dv, wigOutType, maxOut, NULL);
 dataVectorFree(&dv);
 return resultCount;
 }
 
 static int wigOutRegion(char *table, struct sqlConnection *conn,
 	struct region *region, int maxOut, enum wigOutputType wigOutType,
 	struct wigAsciiData **data, int spanConstraint)
 /* Write out wig data in region.  Write up to maxOut elements.
  * Returns number of elements written. */
 {
 int linesOut = 0;
 char splitTableOrFileName[256];
 struct customTrack *ct = NULL;
 boolean isCustom = FALSE;
 boolean hasConstraint = FALSE;
 struct wiggleDataStream *wds = NULL;
 unsigned long long valuesMatched = 0;
 int operations = wigFetchAscii;
 char *dataConstraint;
 double ll = 0.0;
 double ul = 0.0;
 char *table2 = NULL;
 struct bed *intersectBedList = NULL;
 
 switch (wigOutType)
     {
     case wigOutBed:
 	operations = wigFetchBed;
 	break;
     default:
     case wigDataNoPrint:
     case wigOutData:
 	operations = wigFetchAscii;
 	break;
     };
 
 WIG_INIT;  /* ct, isCustom, hasConstraint, wds and table2 are set here */
 
 if (hasConstraint)
     freeMem(dataConstraint);	/* been cloned into wds */
 
 wds->setMaxOutput(wds, maxOut);
 wds->setChromConstraint(wds, region->chrom);
 wds->setPositionConstraint(wds, region->start, region->end);
 
 if (table2)
     intersectBedList = bedTable2(conn, region, table2);
 
 if (isCustom)
     {
     if (ct->dbTrack)
 	{
 	if (spanConstraint)
 	    wds->setSpanConstraint(wds,spanConstraint);
 	else
 	    {
 	    struct sqlConnection *trashConn = hAllocConn(CUSTOM_TRASH);
 	    struct trackDb *tdb = findTdbForTable(database, curTrack, table, ctLookupName);
 	    unsigned span = minSpan(trashConn, splitTableOrFileName,
 		region->chrom, region->start, region->end, cart, tdb);
 	    wds->setSpanConstraint(wds, span);
 	    hFreeConn(&trashConn);
 	    }
 	valuesMatched = getWigglePossibleIntersection(wds, region,
 	    CUSTOM_TRASH, table2, &intersectBedList,
 		splitTableOrFileName, operations);
 	}
     else
 	valuesMatched = getWigglePossibleIntersection(wds, region, NULL, table2,
 	    &intersectBedList, splitTableOrFileName, operations);
     }
 else
     {
     boolean hasBin = FALSE;
 
     if (hFindSplitTable(database, region->chrom, table, splitTableOrFileName, &hasBin))
 	{
 	/* XXX TBD, watch for a span limit coming in as an SQL filter */
 	if (intersectBedList)
 	    {
 	    struct trackDb *tdb = findTdbForTable(database, curTrack, table, ctLookupName);
 	    unsigned span;
 	    span = minSpan(conn, splitTableOrFileName, region->chrom,
 		region->start, region->end, cart, tdb);
 	    wds->setSpanConstraint(wds, span);
 	    }
 	else if (spanConstraint)
 	    wds->setSpanConstraint(wds,spanConstraint);
 
 	valuesMatched = getWigglePossibleIntersection(wds, region, database,
 	    table2, &intersectBedList, splitTableOrFileName, operations);
 	}
     }
 
 switch (wigOutType)
     {
     case wigDataNoPrint:
 	if (data)
 	    {
 	    if (*data != NULL)	/* no exercise of this function yet	*/
 		{	/*	data not null, add to existing list	*/
 		struct wigAsciiData *asciiData;
 		struct wigAsciiData *next;
 		for (asciiData = *data; asciiData; asciiData = next)
 		    {
 		    next = asciiData->next;
 		    slAddHead(&wds->ascii, asciiData);
 		    }
 		}
 	    wds->sortResults(wds);
 	    *data = wds->ascii;	/* moving the list to *data */
 	    wds->ascii = NULL;	/* gone as far as wds is concerned */
 	    }
 	    linesOut = valuesMatched;
 	break;
     case wigOutBed:
 	linesOut = wds->bedOut(wds, "stdout", TRUE);/* TRUE == sort output */
 	break;
     default:
     case wigOutData:
 	linesOut = wds->asciiOut(wds, database, "stdout", TRUE, FALSE);
 	break;		/* TRUE == sort output, FALSE == not raw data out */
     };
 
 wiggleDataStreamFree(&wds);
 
 return linesOut;
 }	/*	static int wigOutRegion()	*/
 
 static int wigMaxOutput()
 /*	return maxOut value (cart variable defined on curTable)	*/
 {
 char *maxOutputStr = NULL;
 char *name;
 int maxOut;
 char *maxOutput = NULL;
 
 if (isCustomTrack(curTable))
     name = filterFieldVarName("ct", curTable, "_", filterMaxOutputVar);
 else
     name = filterFieldVarName(database, curTable, "_", filterMaxOutputVar);
 
 maxOutputStr = cartOptionalString(cart, name);
 /*	Don't modify(stripChar) the values sitting in the cart hash	*/
 if (NULL == maxOutputStr)
     maxOutput = cloneString(maxOutMenu[0]);
 else
     maxOutput = cloneString(maxOutputStr);
 
 stripChar(maxOutput, ',');
 maxOut = sqlUnsigned(maxOutput);
 freeMem(maxOutput);
 
 return maxOut;
 }
 
 static void doOutWig(struct trackDb *track, char *table, struct sqlConnection *conn,
 	enum wigOutputType wigOutType)
 {
 struct region *regionList = getRegions(), *region;
 int maxOut = 0, outCount, curOut = 0;
 char *shortLabel = table, *longLabel = table;
 
 if (track == NULL)
     errAbort("Sorry, can't find necessary track information for %s.  "
 	     "If you reached this page by selecting \"All tables\" as the "
 	     "group, please go back and select the same table via a regular "
 	     "track group if possible.",
 	     table);
 
 maxOut = wigMaxOutput();
 
-
 textOpen();
 
-
 if (track != NULL)
     {
-    shortLabel = track->shortLabel;
-    longLabel = track->longLabel;
     if (!sameString(track->tableName, table) && track->subtracks != NULL)
 	{
 	struct trackDb *tdb = NULL;
 	for (tdb = track->subtracks;  tdb != NULL;  tdb = tdb->next)
 	    {
 	    if (sameString(tdb->tableName, table))
 		{
-		shortLabel = tdb->shortLabel;
-		longLabel = tdb->longLabel;
+		track = tdb;
 		break;
 		}
 	    }
 	}
+    shortLabel = track->shortLabel;
+    longLabel = track->longLabel;
     }
 wigDataHeader(shortLabel, longLabel, NULL, wigOutType);
 
 for (region = regionList; region != NULL; region = region->next)
     {
     int curMaxOut = maxOut - curOut;
     if (anySubtrackMerge(database, table))
 	outCount = mergedWigOutRegion(table, conn, region, curMaxOut,
 				      wigOutType);
     else if (startsWithWord("bedGraph", track->type))
 	outCount = bedGraphOutRegion(table, conn, region, curMaxOut,
 				     wigOutType);
     else if (startsWithWord("bigWig", track->type))
         outCount = bigWigOutRegion(table, conn, region, curMaxOut, wigOutType);
     else
 	outCount = wigOutRegion(table, conn, region, curMaxOut,
 				wigOutType, NULL, 0);
     curOut += outCount;
     if (curOut >= maxOut)
         break;
     }
 if (curOut >= maxOut)
     warn("Reached output limit of %d data values, please make region smaller,\n\tor set a higher output line limit with the filter settings.", curOut);
 }
 
 
 /***********   PUBLIC ROUTINES  *********************************/
 
 struct dataVector *bedGraphDataVector(char *table,
 	struct sqlConnection *conn, struct region *region)
 /* Read in bedGraph as dataVector and return it.  Filtering, subtrack merge
  * and intersection are handled. */
 {
 struct dataVector *dv = NULL;
 
 if (anySubtrackMerge(database, table))
     dv = mergedWigDataVector(table, conn, region);
 else
     {
     struct trackDb *tdb;
     if (isCustomTrack(table))
         {
         struct customTrack *ct = ctLookupName(table);
         tdb = ct->tdb;
         conn = hAllocConn(CUSTOM_TRASH);
         }
     else
         {
         tdb = hTrackDbForTrack(database, table);
         }
     struct trackTable *tt1 = trackTableNew(tdb, table, conn);
     dv = dataVectorFetchOneRegion(tt1, region, conn);
     intersectDataVector(table, dv, region, conn);
     if (isCustomTrack(table))
         hFreeConn(&conn);
     }
 return dv;
 }
 
 
 struct dataVector *wiggleDataVector(struct trackDb *tdb, char *table,
 	struct sqlConnection *conn, struct region *region)
 /* Read in wiggle as dataVector and return it.  Filtering, subtrack merge
  * and intersection are handled. */
 {
 struct dataVector *dv = NULL;
 
 if (anySubtrackMerge(database, table))
     dv = mergedWigDataVector(table, conn, region);
 else
     {
     struct trackTable *tt1 = trackTableNew(tdb, table, conn);
     dv = dataVectorFetchOneRegion(tt1, region, conn);
     }
 return dv;
 }
 
 struct bed *getBedGraphAsBed(struct sqlConnection *conn, char *table,
 			     struct region *region)
 /* Extract a bedList in region from the given bedGraph table -- filtering and
  * intersection are handled inside of this function. */
 {
 struct dataVector *dv = NULL;
 
 dv = bedGraphDataVector(table, conn, region);
 
 return dataVectorToBedList(dv);
 }
 
 void wiggleMinMax(struct trackDb *tdb, double *min, double *max)
 {
 /*	obtain wiggle data limits from trackDb or cart or settings */
 if ((tdb != NULL) && (tdb->type != NULL))
     {
     double tDbMin, tDbMax;
     char *typeLine = cloneString(tdb->type);
     char *words[8];
     int wordCount;
     wordCount = chopLine(typeLine, words);
     wigFetchMinMaxY(tdb, min, max, &tDbMin, &tDbMax, wordCount, words);
     if (tDbMin < *min)
 	*min = tDbMin;
     if (tDbMax > *max)
 	*max = tDbMax;
     freeMem(typeLine);
     }
 }
 
 boolean isWiggle(char *db, char *table)
 /* Return TRUE if db.table is a wiggle. */
 {
 boolean typeWiggle = FALSE;
 
 if (db != NULL && table != NULL)
     {
     if (isCustomTrack(table))
 	{
 	struct customTrack *ct = ctLookupName(table);
 	if (ct != NULL && ct->wiggle)
 	    typeWiggle = TRUE;
 	}
     else
 	{
 	struct hTableInfo *hti = maybeGetHtiOnDb(db, table);
 	typeWiggle = (hti != NULL && HTI_IS_WIGGLE);
 	}
     }
 return(typeWiggle);
 }
 
 boolean isBedGraph(char *table)
 /* Return TRUE if table is specified as a bedGraph in the current database's
  * trackDb. */
 {
 return trackIsType(database, table, NULL, "bedGraph", ctLookupName);
 }
 
 struct bed *getWiggleAsBed(
     char *db, char *table, 	/* Database and table. */
     struct region *region,	/* Region to get data for. */
     char *filter, 		/* Filter to add to SQL where clause if any. */
     struct hash *idHash, 	/* Restrict to id's in this hash if non-NULL. */
     struct lm *lm,		/* Where to allocate memory. */
     struct sqlConnection *conn)	/* SQL connection to work with */
 /* Return a bed list of all items in the given range in table.
  * Cleanup result via lmCleanup(&lm) rather than bedFreeList.  */
 /* filter, idHash and lm are currently unused, perhaps future use	*/
 {
 struct bed *bedList=NULL;
 char splitTableOrFileName[256];
 struct customTrack *ct = NULL;
 boolean isCustom = FALSE;
 boolean hasConstraint = FALSE;
 struct wiggleDataStream *wds = NULL;
 unsigned long long valuesMatched = 0;
 int operations = wigFetchBed;
 char *dataConstraint;
 double ll = 0.0;
 double ul = 0.0;
 char *table2 = NULL;
 struct bed *intersectBedList = NULL;
 int maxOut;
 
 WIG_INIT;  /* ct, isCustom, hasConstraint, wds and table2 are set here */
 
 if (hasConstraint)
     freeMem(dataConstraint);	/* been cloned into wds */
 
 maxOut = wigMaxOutput();
 
 wds->setMaxOutput(wds, maxOut);
 
 wds->setChromConstraint(wds, region->chrom);
 wds->setPositionConstraint(wds, region->start, region->end);
 
 if (table2)
     intersectBedList = bedTable2(conn, region, table2);
 
 if (isCustom)
     {
     if (ct->dbTrack)
 	{
 	unsigned span = 0;
 	struct sqlConnection *trashConn = hAllocConn(CUSTOM_TRASH);
 	struct trackDb *tdb = findTdbForTable(database, curTrack, table, ctLookupName);
 	valuesMatched = getWigglePossibleIntersection(wds, region,
 	    CUSTOM_TRASH, table2, &intersectBedList,
 		splitTableOrFileName, operations);
 	span = minSpan(trashConn, splitTableOrFileName, region->chrom,
 	    region->start, region->end, cart, tdb);
 	wds->setSpanConstraint(wds, span);
 	hFreeConn(&trashConn);
 	}
     else
 	valuesMatched = getWigglePossibleIntersection(wds, region, NULL, table2,
 	    &intersectBedList, splitTableOrFileName, operations);
     }
 else
     {
     boolean hasBin;
 
     if (conn == NULL)
 	errAbort( "getWiggleAsBed: NULL conn given for database table");
 
     if (hFindSplitTable(database, region->chrom, table, splitTableOrFileName, &hasBin))
 	{
 	struct trackDb *tdb = findTdbForTable(database, curTrack, table, ctLookupName);
 	unsigned span = 0;
 
 	/* XXX TBD, watch for a span limit coming in as an SQL filter */
 	span = minSpan(conn, splitTableOrFileName, region->chrom,
 	    region->start, region->end, cart, tdb);
 	wds->setSpanConstraint(wds, span);
 
 	valuesMatched = getWigglePossibleIntersection(wds, region, database,
 	    table2, &intersectBedList, splitTableOrFileName, operations);
 	}
     }
 
 if (valuesMatched > 0)
     {
     struct bed *bed;
 
     wds->sortResults(wds);
     for (bed = wds->bed; bed != NULL; bed = bed->next)
 	{
 	struct bed *copy = lmCloneBed(bed, lm);
 	slAddHead(&bedList, copy);
 	}
     slReverse(&bedList);
     }
 
 wiggleDataStreamFree(&wds);
 
 return bedList;
 }	/*	struct bed *getWiggleAsBed()	*/
 
 struct wigAsciiData *getWiggleAsData(struct sqlConnection *conn, char *table,
 	struct region *region)
 /*	return the wigAsciiData list	*/
 {
 int maxOut = 0;
 struct wigAsciiData *data = NULL;
 int outCount;
 
 maxOut = wigMaxOutput();
 outCount = wigOutRegion(table, conn, region, maxOut, wigDataNoPrint, &data, 0);
 
 return data;
 }
 
 struct wigAsciiData *getWiggleData(struct sqlConnection *conn, char *table,
 	struct region *region, int maxOut, int spanConstraint)
 /*	like getWiggleAsData above, but with specific spanConstraint and
  *	a different data limit count, return the wigAsciiData list	*/
 {
 struct wigAsciiData *data = NULL;
 int outCount;
 
 outCount = wigOutRegion(table, conn, region, maxOut, wigDataNoPrint, &data,
 	spanConstraint);
 
 return data;
 }
 
 void doOutWigData(struct trackDb *track, char *table, struct sqlConnection *conn)
 /* Return wiggle data in variableStep format. */
 {
 doOutWig(track, table, conn, wigOutData);
 }
 
 void doOutWigBed(struct trackDb *track, char *table, struct sqlConnection *conn)
 /* Return wiggle data in bed format. */
 {
 doOutWig(track, table, conn, wigOutBed);
 }
 
 void doSummaryStatsWiggle(struct sqlConnection *conn)
 /* Put up page showing summary stats for wiggle track. */
 {
 struct trackDb *track = curTrack;
 char *table = curTable;
 struct region *region, *regionList = getRegions();
 char *regionName = getRegionName();
 long long regionSize = 0;
 long long gapTotal = 0;
 long startTime = 0, wigFetchTime = 0;
 char splitTableOrFileName[256];
 struct customTrack *ct = NULL;
 boolean isCustom = FALSE;
 struct wiggleDataStream *wds = NULL;
 unsigned long long valuesMatched = 0;
 int regionCount = 0;
 int regionsDone = 0;
 unsigned span = 0;
 char *dataConstraint;
 double ll = 0.0;
 double ul = 0.0;
 boolean hasConstraint = FALSE;
 char *table2 = NULL;
 boolean fullGenome = FALSE;
 boolean statsHeaderDone = FALSE;
 boolean gotSome = FALSE;
 char *shortLabel = table;
 long long statsItemCount = 0;	/*	global accumulators for overall */
 int statsSpan = 0;		/*	stats summary on a multiple region */
 double statsSumData = 0.0;	/*	output */
 double statsSumSquares = 0.0;		/*	"  "	*/
 double lowerLimit = INFINITY;		/*	"  "	*/
 double upperLimit = -1.0 * INFINITY;	/*	"  "	*/
 
 startTime = clock1000();
 if (track != NULL)
      shortLabel = track->shortLabel;
 
 /*	Count the regions, when only one, we can do more stats */
 for (region = regionList; region != NULL; region = region->next)
     ++regionCount;
 
 htmlOpen("%s (%s) Wiggle Summary Statistics", shortLabel, table);
 
 if (anySubtrackMerge(database, curTable))
     hPrintf("<P><EM><B>Note:</B> subtrack merge is currently ignored on this "
 	    "page (not implemented yet).  Statistics shown here are only for "
 	    "the primary table %s (%s).</EM>", shortLabel, table);
 
 fullGenome = fullGenomeRegion();
 
 WIG_INIT;  /* ct, isCustom, hasConstraint, wds and table2 are set here */
 
 for (region = regionList; region != NULL; region = region->next)
     {
     struct bed *intersectBedList = NULL;
     boolean hasBin;
     int operations;
 
     ++regionsDone;
 
     if (table2)
 	intersectBedList = bedTable2(conn, region, table2);
 
     operations = wigFetchStats;
 #if defined(NOT)
     /*	can't do the histogram now, that operation times out	*/
     if (1 == regionCount)
 	operations |= wigFetchAscii;
 #endif
 
     wds->setChromConstraint(wds, region->chrom);
 
     if (fullGenome)
 	wds->setPositionConstraint(wds, 0, 0);
     else
 	wds->setPositionConstraint(wds, region->start, region->end);
 
     if (hasConstraint)
 	wds->setDataConstraint(wds, dataConstraint, ll, ul);
 
     /* depending on what is coming in on regionList, we may need to be
      * smart about how often we call getData for these custom tracks
      * since that is potentially a large file read each time.
      */
     if (isCustom)
 	{
 	if (ct->dbTrack)
 	    {
 	    struct sqlConnection *trashConn = hAllocConn(CUSTOM_TRASH);
 	    struct trackDb *tdb = findTdbForTable(database, curTrack, table, ctLookupName);
 	    span = minSpan(trashConn, splitTableOrFileName, region->chrom,
 		region->start, region->end, cart, tdb);
 	    wds->setSpanConstraint(wds, span);
 	    valuesMatched = getWigglePossibleIntersection(wds, region,
 		CUSTOM_TRASH, table2, &intersectBedList,
 		    splitTableOrFileName, operations);
 	    hFreeConn(&trashConn);
 	    }
 	else
 	    {
 	    valuesMatched = getWigglePossibleIntersection(wds, region, NULL,
 		table2, &intersectBedList, splitTableOrFileName, operations);
 
 	/*  XXX We need to properly get the smallest span for custom tracks */
 	    /*	This is not necessarily the correct answer here	*/
 	    if (wds->stats)
 		span = wds->stats->span;
 	    else
 		span = 1;
 	    }
 	}
     else
 	{
 	if (hFindSplitTable(database, region->chrom, table, splitTableOrFileName, &hasBin))
 	    {
 	    span = minSpan(conn, splitTableOrFileName, region->chrom,
 		region->start, region->end, cart, track);
 	    wds->setSpanConstraint(wds, span);
 	    valuesMatched = getWigglePossibleIntersection(wds, region,
 		database, table2, &intersectBedList, splitTableOrFileName,
 		    operations);
 	    if (intersectBedList)
 		span = 1;
 	    }
 	}
     /*	when doing multiple regions, we need to print out each result as
      *	it happens to keep the connection open to the browser and
      *	prevent any timeout since this could take a while.
      *	(worst case test is quality track on panTro1)
      */
     if (wds->stats)
 	statsItemCount += wds->stats->count;
     if (wds->stats && (regionCount > 1) && (valuesMatched > 0))
 	{
 	double sumData = wds->stats->mean * wds->stats->count;
 	double sumSquares;
 
 	if (wds->stats->count > 1)
 	    sumSquares = (wds->stats->variance * (wds->stats->count - 1)) +
 		((sumData * sumData)/wds->stats->count);
 	else
 	    sumSquares = sumData * sumData;
 
 	/*	global accumulators for overall summary	*/
 	statsSpan = wds->stats->span;
 	statsSumData += sumData;
 	statsSumSquares += sumSquares;
 	if (wds->stats->lowerLimit < lowerLimit)
 	    lowerLimit = wds->stats->lowerLimit;
 	if ((wds->stats->lowerLimit + wds->stats->dataRange) > upperLimit)
 	    upperLimit = wds->stats->lowerLimit + wds->stats->dataRange;
 
 	if (statsHeaderDone)
 	    wds->statsOut(wds, database, "stdout", TRUE, TRUE, FALSE, TRUE);
 	else
 	    {
 	    wds->statsOut(wds, database, "stdout", TRUE, TRUE, TRUE, TRUE);
 	    statsHeaderDone = TRUE;
 	    }
 	wds->freeStats(wds);
 	gotSome = TRUE;
 	}
     if ((regionCount > MAX_REGION_DISPLAY) &&
 		(regionsDone >= MAX_REGION_DISPLAY))
 	{
 	hPrintf("<TR><TH ALIGN=CENTER COLSPAN=12> Can not display more "
 	    "than %d regions, <BR> would take too much time </TH></TR>\n",
 		MAX_REGION_DISPLAY);
 	break;	/*	exit this for loop	*/
 	}
     }	/*for (region = regionList; region != NULL; region = region->next) */
 
 if (hasConstraint)
     freeMem(dataConstraint);	/* been cloned into wds */
 
 if (1 == regionCount)
     {
     statsPreamble(wds, regionList->chrom, regionList->start, regionList->end,
 	span, valuesMatched, table2);
     /* 3 X TRUE = sort results, html table output, with header,
      *	the FALSE means close the table after printing, no more rows to
      *	come.  The case in the if() statement was already taken care of
      *	in the statsPreamble() printout.  No need to do that again.
      */
 
     if ( ! ((valuesMatched == 0) && table2) )
 	wds->statsOut(wds, database, "stdout", TRUE, TRUE, TRUE, FALSE);
     regionSize = basesInRegion(regionList,0);
     gapTotal = gapsInRegion(conn, regionList,0);
     }
 else
     {	/* this is a bit of a kludge here since these printouts are done in the
 	 *	library source wigDataStream.c statsOut() function and
 	 *	this is a clean up of that.  That function should be
 	 *	pulled out of there and made independent and more
 	 *	versatile.
 	 */
     long long realSize;
     double variance;
     double stddev;
 
     /*	Too expensive to lookup the numbers for thousands of regions */
     regionSize = basesInRegion(regionList,MAX_REGION_DISPLAY);
     gapTotal = gapsInRegion(conn, regionList,MAX_REGION_DISPLAY);
     realSize = regionSize - gapTotal;
 
     /*	close the table which was left open in the loop above	*/
     if (!gotSome)
 	hPrintf("<TR><TH ALIGN=CENTER COLSPAN=12> No data found matching this request </TH></TR>\n");
 
     hPrintf("<TR><TH ALIGN=LEFT> SUMMARY: </TH>\n");
     hPrintf("\t<TD> &nbsp; </TD>\n");	/*	chromStart	*/
     hPrintf("\t<TD> &nbsp; </TD>\n");	/*	chromEnd	*/
     hPrintf("\t<TD ALIGN=RIGHT> ");
     printLongWithCommas(stdout, statsItemCount);
     hPrintf(" </TD>\n" );
     hPrintf("\t<TD ALIGN=RIGHT> %d </TD>\n", statsSpan);
     hPrintf("\t<TD ALIGN=RIGHT> ");
     printLongWithCommas(stdout, statsItemCount*statsSpan);
     hPrintf("&nbsp;(%.2f%%) </TD>\n",
 	100.0*(double)(statsItemCount*statsSpan)/(double)realSize);
     hPrintf("\t<TD ALIGN=RIGHT> %g </TD>\n", lowerLimit);
     hPrintf("\t<TD ALIGN=RIGHT> %g </TD>\n", upperLimit);
     hPrintf("\t<TD ALIGN=RIGHT> %g </TD>\n", upperLimit - lowerLimit);
     if (statsItemCount > 0)
 	hPrintf("\t<TD ALIGN=RIGHT> %g </TD>\n", statsSumData/statsItemCount);
     else
 	hPrintf("\t<TD ALIGN=RIGHT> 0.0 </TD>\n");
     stddev = 0.0;
     variance = 0.0;
     if (statsItemCount > 1)
 	{
 	variance = (statsSumSquares -
 	    ((statsSumData * statsSumData)/(double) statsItemCount)) /
 		(double) (statsItemCount - 1);
 	if (variance > 0.0)
 	    stddev = sqrt(variance);
 	}
     hPrintf("\t<TD ALIGN=RIGHT> %g </TD>\n", variance);
     hPrintf("\t<TD ALIGN=RIGHT> %g </TD>\n", stddev);
     hPrintf("</TR>\n");
     wigStatsTableHeading(stdout, TRUE);
     hPrintf("</TABLE></TD></TR></TABLE></P>\n");
     }
 
 
 #if defined(NOT)
 /*	can't do the histogram now, that operation times out	*/
 /*	Single region, we can do the histogram	*/
 if ((valuesMatched > 1) && (1 == regionCount))
     {
     float *valuesArray = NULL;
     size_t valueCount = 0;
     struct histoResult *histoGramResult;
 
     /*	convert the ascii data listings to one giant float array 	*/
     valuesArray = wds->asciiToDataArray(wds, valuesMatched, &valueCount);
 
     /*	histoGram() may return NULL if it doesn't work	*/
 
     histoGramResult = histoGram(valuesArray, valueCount,
 	    NAN, (unsigned) 0, NAN, (float) wds->stats->lowerLimit,
 		(float) (wds->stats->lowerLimit + wds->stats->dataRange),
 		(struct histoResult *)NULL);
 
     printHistoGram(histoGramResult, TRUE);	/* TRUE == html output */
 
     freeHistoGram(&histoGramResult);
     wds->freeAscii(wds);
     wds->freeArray(wds);
     }
 #endif
 
 wds->freeStats(wds);
 wiggleDataStreamFree(&wds);
 
 wigFetchTime = clock1000() - startTime;
 webNewSection("Region and Timing Statistics");
 hTableStart();
 stringStatRow("region", regionName);
 numberStatRow("bases in region", regionSize);
 numberStatRow("bases in gaps", gapTotal);
 floatStatRow("load and calc time", 0.001*wigFetchTime);
 wigFilterStatRow(conn);
 stringStatRow("intersection", cartUsualString(cart, hgtaIntersectTable, "off"));
 hTableEnd();
 htmlClose();
 }	/*	void doSummaryStatsWiggle(struct sqlConnection *conn)	*/
 
 void wigShowFilter(struct sqlConnection *conn)
 /* print out wiggle data value filter */
 {
 double ll, ul;
 char *constraint;
 
 if (checkWigDataFilter(database, curTable, &constraint, &ll, &ul))
     {
     if (constraint && sameWord(constraint, "in range"))
 	{
 	hPrintf("&nbsp;&nbsp;data value %s [%g : %g)\n", constraint, ll, ul);
 	}
     else
 	hPrintf("&nbsp;&nbsp;data value %s %g\n", constraint, ll);
     freeMem(constraint);
     }
 }