src/hg/hgTables/wiggle.c 1.74
1.74 2009/05/29 22:07:01 mikep
no, we do not, the type of a track is its type no need to consult parent
Index: src/hg/hgTables/wiggle.c
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/hgTables/wiggle.c,v
retrieving revision 1.73
retrieving revision 1.74
diff -b -B -U 1000000 -r1.73 -r1.74
--- src/hg/hgTables/wiggle.c 20 May 2009 20:59:57 -0000 1.73
+++ src/hg/hgTables/wiggle.c 29 May 2009 22:07:01 -0000 1.74
@@ -1,1243 +1,1243 @@
/* wiggle - stuff to process wiggle tracks.
* Much of this is lifted from hgText/hgWigText.c */
#include "common.h"
#include "hash.h"
#include "linefile.h"
#include "dystring.h"
#include "localmem.h"
#include "portable.h"
#include "obscure.h"
#include "jksql.h"
#include "cheapcgi.h"
#include "cart.h"
#include "web.h"
#include "bed.h"
#include "hdb.h"
#include "hui.h"
#include "hgColors.h"
#include "trackDb.h"
#include "customTrack.h"
#include "wiggle.h"
#include "hmmstats.h"
#include "correlate.h"
#include "hgTables.h"
static char const rcsid[] = "$Id$";
extern char *maxOutMenu[];
/* a common set of stuff is accumulating in the various
* routines that call getData. For the moment make it a macro,
* perhaps later it can turn into a routine of its own.
*/
#define WIG_INIT \
if (isCustomTrack(table)) \
{ \
ct = ctLookupName(table); \
isCustom = TRUE; \
if (! ct->wiggle) \
errAbort("called to work on a custom track '%s' that isn't wiggle data ?", table); \
\
if (ct->dbTrack) \
safef(splitTableOrFileName,ArraySize(splitTableOrFileName), "%s", \
ct->dbTableName); \
else \
safef(splitTableOrFileName,ArraySize(splitTableOrFileName), "%s", \
ct->wigFile); \
hasConstraint = checkWigDataFilter("ct", table, &dataConstraint, &ll, &ul); \
} \
else \
hasConstraint = checkWigDataFilter(database, table, &dataConstraint, \
&ll, &ul); \
\
wds = wiggleDataStreamNew(); \
\
if (anyIntersection()) \
{ \
char *t2 = cartString(cart, hgtaIntersectTable); \
if (table && t2 && differentWord(t2,table)) \
table2 = t2; \
} \
\
if (hasConstraint)\
wds->setDataConstraint(wds, dataConstraint, ll, ul);
boolean checkWigDataFilter(char *db, char *table,
char **constraint, double *ll, double *ul)
/* check if filter exists, return its values, call with db="ct" for
* custom tracks */
{
char varPrefix[128];
struct hashEl *varList, *var;
char *pat = NULL;
char *cmp = NULL;
if (constraint != NULL)
*constraint = NULL; // Make sure return variable gets set to something at least.
if (isCustomTrack(table))
db = "ct";
safef(varPrefix, sizeof(varPrefix), "%s%s.%s.", hgtaFilterVarPrefix, db, table);
varList = cartFindPrefix(cart, varPrefix);
if (varList == NULL)
return FALSE;
/* check varList, look for dataValue.pat and dataValue.cmp */
for (var = varList; var != NULL; var = var->next)
{
if (endsWith(var->name, ".pat"))
{
char *name;
name = cloneString(var->name);
tolowers(name);
/* make sure we are actually looking at datavalue */
if (stringIn("datavalue", name) || stringIn("score", name))
{
pat = cloneString(var->val);
}
freeMem(name);
}
if (endsWith(var->name, ".cmp"))
{
char *name;
name = cloneString(var->name);
tolowers(name);
/* make sure we are actually looking at datavalue */
if (stringIn("datavalue", name) || stringIn("score", name))
{
cmp = cloneString(var->val);
tolowers(cmp);
if (stringIn("ignored", cmp))
freez(&cmp);
}
freeMem(name);
}
}
/* Must get them both for this to work */
if (cmp && pat)
{
int wordCount = 0;
char *words[2];
char *dupe = cloneString(pat);
wordCount = chopString(dupe, ", \t\n", words, ArraySize(words));
switch (wordCount)
{
case 2: if (ul) *ul = sqlDouble(words[1]);
case 1: if (ll) *ll = sqlDouble(words[0]);
break;
default:
warn("can not understand numbers input for dataValue filter");
errAbort(
"dataValue filter must be one or two numbers (two for 'in range')");
}
if (sameWord(cmp,"in range") && (wordCount != 2))
errAbort("'in range' dataValue filter must have two numbers input\n");
if (constraint)
*constraint = cmp;
return TRUE;
}
else
return FALSE;
} /* static boolean checkWigDataFilter() */
static void wigDataHeader(char *name, char *description, char *visibility,
enum wigOutputType wigOutType)
/* Write out custom track header for this wiggle region. */
{
switch (wigOutType)
{
case wigOutBed:
hPrintf("track ");
break;
default:
case wigOutData:
hPrintf("track type=wiggle_0");
break;
};
if (name != NULL)
hPrintf(" name=\"%s\"", name);
if (description != NULL)
hPrintf(" description=\"%s\"", description);
if (visibility != NULL)
hPrintf(" visibility=%s", visibility);
hPrintf("\n");
}
static void addBedElement(struct bed **bedList, char *chrom,
unsigned start, unsigned end, unsigned count, struct lm *lm)
{
struct bed *bed;
char name[128];
lmAllocVar(lm, bed);
bed->chrom = lmCloneString(lm, chrom);
bed->chromStart = start;
bed->chromEnd = end;
safef(name,ArraySize(name), "%s.%u", bed->chrom, count);
bed->name = lmCloneString(lm, name);
slAddHead(bedList, bed);
}
static struct bed *invertBedList(struct bed *bedList, struct lm *lm,
char *chrom, unsigned chromStart, unsigned chromEnd, unsigned chromSize)
/* bed list result is everything NOT in the given bedList */
{
unsigned elCount = 1;
unsigned start = 0; /* start == end */
unsigned end = 0; /* means no element yet */
unsigned lastEnd = 0; /* in the bedList */
struct bed *inverseBedList = NULL; /* new list */
struct bed *el;
slSort(&bedList, bedLineCmp); /* make sure it is sorted */
for (el = bedList; el != NULL; el=el->next)
{
/* past end of chrom, bad bed list */
if (el->chromStart >= chromSize)
{
end = chromSize;
break;
}
if (el->chromStart > end)
end = el->chromStart + 1;
if (end > start) /* do we have an element */
{
addBedElement(&inverseBedList, chrom, start, end, elCount++, lm);
start = el->chromEnd - 1; /* reset to no element yet */
end = start;
}
else if (el->chromEnd > start)
{
start = el->chromEnd - 1; /* reset to no element yet */
end = start;
}
lastEnd = max(lastEnd,el->chromEnd);
}
/* A last element ? */
if (lastEnd < chromSize)
end = chromSize;
if (end > start) /* potential last element */
addBedElement(&inverseBedList, chrom, start, end, elCount++, lm);
slSort(&inverseBedList, bedLineCmp); /* make sure it is sorted */
return inverseBedList;
}
static struct bed *bedTable2(struct sqlConnection *conn,
struct region *region, char *table2)
/* get a bed list, possibly complement, for table2 */
{
/* This use of bedTable rather than a bitmap is not really working. The
* rest of the table browser does intersection at the exon level, while
* the wig code, which this is part of, does it at the gene level. I
* noticed it while working on the corresponding routines for bigWig,
* which I'm building to work with bitmaps at the exon level. I'm not
* sure it's worth fixing this code since nobody has complained, and we're
* probably going to be doing mostly bigWig rather than wig in the future.
* -JK */
boolean invTable2 = cartCgiUsualBoolean(cart, hgtaInvertTable2, FALSE);
char *op = cartString(cart, hgtaIntersectOp);
struct bed *bedList = NULL;
struct lm *lm1 = lmInit(64*1024);
/* fetch table 2 as a bed list */
bedList = getFilteredBeds(conn, table2, region, lm1, NULL);
/* If table 2 bed list needs to be complemented (!table2), then do so */
if (invTable2 || sameString("none", op))
{
unsigned chromStart = 0; /* start == end == 0 */
unsigned chromEnd = 0; /* means do full chrom */
unsigned chromSize = hChromSize(database, region->chrom);
struct lm *lm2 = lmInit(64*1024);
struct bed *inverseBedList = NULL; /* new list */
if ((region->start != 0) || (region->end != 0))
{
chromStart = region->start;
chromEnd = region->end;
}
if ((struct bed *)NULL == bedList)
{
if (0 == region->end)
chromEnd = chromSize;
addBedElement(&inverseBedList, region->chrom, chromStart, chromEnd,
1, lm2);
}
else
inverseBedList=invertBedList(bedList, lm2, region->chrom, chromStart,
chromEnd, chromSize);
lmCleanup(&lm1); /* == bedFreeList(&bedList) */
return inverseBedList;
}
else
return bedList;
}
static unsigned long long getWigglePossibleIntersection(
struct wiggleDataStream *wds, struct region *region, char *db,
char *table2, struct bed **intersectBedList,
char splitTableOrFileName[256], int operations)
{
unsigned long long valuesMatched = 0;
/* Intersection is either type "any" or "none"
* The "none" case is already taken care of during the loading of
* table 2 since it was then inverted at that time so it is already
* "none" of itself.
*/
/* If table2 is NULL, it means that the WIG_INIT macro recognized that we
* are working on table2 now, so we should not use intersectBedList
* (in fact we may be trying to compute it here). */
if ((table2 != NULL) && anyIntersection())
{
if (*intersectBedList)
{
valuesMatched = wds->getDataViaBed(wds, db, splitTableOrFileName,
operations, intersectBedList);
}
}
else
{
valuesMatched = wds->getData(wds, db, splitTableOrFileName, operations);
}
return valuesMatched;
}
static void intersectDataVector(char *table, struct dataVector *dataVector1,
struct region *region, struct sqlConnection *conn)
/* Perform intersection (if specified) on dataVector. */
{
/* If table is type wig (not bedGraph), then intersection has already been
* performed on each input (other selected subtracks must be the same type
* as table).
* Otherwise, handle intersection here. */
if (anyIntersection() && !isWiggle(database, table) && !isBigWig(table))
{
char *track2 = cartString(cart, hgtaIntersectTrack);
char *table2 = cartString(cart, hgtaIntersectTable);
if (table2 && differentWord(table2, table))
{
struct trackDb *tdb2 = findTrack(track2, fullTrackList);
struct trackTable *tt2 = trackTableNew(tdb2, table2, conn);
struct dataVector *dataVector2 = dataVectorFetchOneRegion(tt2, region,
conn);
char *op = cartString(cart, hgtaIntersectOp);
boolean dv2IsWiggle = (isWiggle(database, table2) || isBigWig(table2) ||
isBedGraph(table2));
dataVectorIntersect(dataVector1, dataVector2,
dv2IsWiggle, sameString(op, "none"));
dataVectorFree(&dataVector2);
}
}
}
int wigPrintDataVectorOut(struct dataVector *dataVectorList,
enum wigOutputType wigOutType, int maxOut,
char *description)
/* Print out bed or data points from list of dataVectors. */
{
int count = 0;
switch (wigOutType)
{
case wigOutBed:
count = dataVectorWriteBed(dataVectorList, "stdout", maxOut,
description);
break;
case wigDataNoPrint:
break;
case wigOutData:
default:
count = dataVectorWriteWigAscii(dataVectorList, "stdout", maxOut,
description);
break;
};
return count;
}
struct dataVector *mergedWigDataVector(char *table,
struct sqlConnection *conn, struct region *region)
/* Perform the specified subtrack merge wiggle-operation on table and
* all other selected subtracks and intersect if necessary. */
{
struct trackDb *tdb1 = hTrackDbForTrack(database, table);
struct trackTable *tt1 = trackTableNew(tdb1, table, conn);
struct dataVector *dataVector1 = dataVectorFetchOneRegion(tt1, region, conn);
struct trackDb *cTdb = hCompositeTrackDbForSubtrack(database, tdb1);
struct trackDb *sTdb = NULL;
int numSubtracks = 1;
char *op = cartString(cart, hgtaSubtrackMergeWigOp);
boolean requireAll = cartBoolean(cart, hgtaSubtrackMergeRequireAll);
boolean useMinScore = cartBoolean(cart, hgtaSubtrackMergeUseMinScore);
float minScore = atof(cartString(cart, hgtaSubtrackMergeMinScore));
if (cTdb == NULL)
errAbort("mergedWigDataVector: could not find parent/composite trackDb "
"entry for subtrack %s", table);
if (dataVector1 == NULL)
{
return NULL;
}
for (sTdb = cTdb->subtracks; sTdb != NULL; sTdb = sTdb->next)
{
if (isSubtrackMerged(sTdb->tableName) &&
! sameString(tdb1->tableName, sTdb->tableName) &&
hSameTrackDbType(tdb1->type, sTdb->type))
{
struct trackTable *tt2 = trackTableNew(sTdb, sTdb->tableName, conn);
struct dataVector *dataVector2 = dataVectorFetchOneRegion(tt2, region,
conn);
numSubtracks++;
if (dataVector2 == NULL)
{
if (requireAll)
{
freeDataVector(&dataVector1);
return NULL;
}
continue;
}
if (sameString(op, "average") || sameString(op, "sum"))
dataVectorSum(dataVector1, dataVector2, requireAll);
else if (sameString(op, "product"))
dataVectorProduct(dataVector1, dataVector2, requireAll);
else if (sameString(op, "min"))
dataVectorMin(dataVector1, dataVector2, requireAll);
else if (sameString(op, "max"))
dataVectorMax(dataVector1, dataVector2, requireAll);
else
errAbort("mergedWigOutRegion: unknown WigOp %s", op);
dataVectorFree(&dataVector2);
}
}
if (sameString(op, "average"))
dataVectorNormalize(dataVector1, numSubtracks);
if (useMinScore)
dataVectorFilterMin(dataVector1, minScore);
intersectDataVector(table, dataVector1, region, conn);
return dataVector1;
}
static int mergedWigOutRegion(char *table, struct sqlConnection *conn,
struct region *region, int maxOut,
enum wigOutputType wigOutType)
/* Perform the specified subtrack merge wiggle-operation on table and
* all other selected subtracks, intersect if necessary, and print out. */
{
struct dataVector *dv = mergedWigDataVector(table, conn, region);
int resultCount =
wigPrintDataVectorOut(dv, wigOutType, maxOut, describeSubtrackMerge("#\t"));
dataVectorFree(&dv);
return resultCount;
}
static int bedGraphOutRegion(char *table, struct sqlConnection *conn,
struct region *region, int maxOut,
enum wigOutputType wigOutType)
/* Read in bedGraph as dataVector (filtering is handled there),
* intersect if necessary, and print it out. */
{
struct dataVector *dv =bedGraphDataVector(table, conn, region);
int resultCount = wigPrintDataVectorOut(dv, wigOutType, maxOut, NULL);
dataVectorFree(&dv);
return resultCount;
}
static int wigOutRegion(char *table, struct sqlConnection *conn,
struct region *region, int maxOut, enum wigOutputType wigOutType,
struct wigAsciiData **data, int spanConstraint)
/* Write out wig data in region. Write up to maxOut elements.
* Returns number of elements written. */
{
int linesOut = 0;
char splitTableOrFileName[256];
struct customTrack *ct = NULL;
boolean isCustom = FALSE;
boolean hasConstraint = FALSE;
struct wiggleDataStream *wds = NULL;
unsigned long long valuesMatched = 0;
int operations = wigFetchAscii;
char *dataConstraint;
double ll = 0.0;
double ul = 0.0;
char *table2 = NULL;
struct bed *intersectBedList = NULL;
switch (wigOutType)
{
case wigOutBed:
operations = wigFetchBed;
break;
default:
case wigDataNoPrint:
case wigOutData:
operations = wigFetchAscii;
break;
};
WIG_INIT; /* ct, isCustom, hasConstraint, wds and table2 are set here */
if (hasConstraint)
freeMem(dataConstraint); /* been cloned into wds */
wds->setMaxOutput(wds, maxOut);
wds->setChromConstraint(wds, region->chrom);
wds->setPositionConstraint(wds, region->start, region->end);
if (table2)
intersectBedList = bedTable2(conn, region, table2);
if (isCustom)
{
if (ct->dbTrack)
{
if (spanConstraint)
wds->setSpanConstraint(wds,spanConstraint);
else
{
struct sqlConnection *trashConn = hAllocConn(CUSTOM_TRASH);
struct trackDb *tdb = findTdbForTable(database, curTrack, table, ctLookupName);
unsigned span = minSpan(trashConn, splitTableOrFileName,
region->chrom, region->start, region->end, cart, tdb);
wds->setSpanConstraint(wds, span);
hFreeConn(&trashConn);
}
valuesMatched = getWigglePossibleIntersection(wds, region,
CUSTOM_TRASH, table2, &intersectBedList,
splitTableOrFileName, operations);
}
else
valuesMatched = getWigglePossibleIntersection(wds, region, NULL, table2,
&intersectBedList, splitTableOrFileName, operations);
}
else
{
boolean hasBin = FALSE;
if (hFindSplitTable(database, region->chrom, table, splitTableOrFileName, &hasBin))
{
/* XXX TBD, watch for a span limit coming in as an SQL filter */
if (intersectBedList)
{
struct trackDb *tdb = findTdbForTable(database, curTrack, table, ctLookupName);
unsigned span;
span = minSpan(conn, splitTableOrFileName, region->chrom,
region->start, region->end, cart, tdb);
wds->setSpanConstraint(wds, span);
}
else if (spanConstraint)
wds->setSpanConstraint(wds,spanConstraint);
valuesMatched = getWigglePossibleIntersection(wds, region, database,
table2, &intersectBedList, splitTableOrFileName, operations);
}
}
switch (wigOutType)
{
case wigDataNoPrint:
if (data)
{
if (*data != NULL) /* no exercise of this function yet */
{ /* data not null, add to existing list */
struct wigAsciiData *asciiData;
struct wigAsciiData *next;
for (asciiData = *data; asciiData; asciiData = next)
{
next = asciiData->next;
slAddHead(&wds->ascii, asciiData);
}
}
wds->sortResults(wds);
*data = wds->ascii; /* moving the list to *data */
wds->ascii = NULL; /* gone as far as wds is concerned */
}
linesOut = valuesMatched;
break;
case wigOutBed:
linesOut = wds->bedOut(wds, "stdout", TRUE);/* TRUE == sort output */
break;
default:
case wigOutData:
linesOut = wds->asciiOut(wds, database, "stdout", TRUE, FALSE);
break; /* TRUE == sort output, FALSE == not raw data out */
};
wiggleDataStreamFree(&wds);
return linesOut;
} /* static int wigOutRegion() */
static int wigMaxOutput()
/* return maxOut value (cart variable defined on curTable) */
{
char *maxOutputStr = NULL;
char *name;
int maxOut;
char *maxOutput = NULL;
if (isCustomTrack(curTable))
name = filterFieldVarName("ct", curTable, "_", filterMaxOutputVar);
else
name = filterFieldVarName(database, curTable, "_", filterMaxOutputVar);
maxOutputStr = cartOptionalString(cart, name);
/* Don't modify(stripChar) the values sitting in the cart hash */
if (NULL == maxOutputStr)
maxOutput = cloneString(maxOutMenu[0]);
else
maxOutput = cloneString(maxOutputStr);
stripChar(maxOutput, ',');
maxOut = sqlUnsigned(maxOutput);
freeMem(maxOutput);
return maxOut;
}
static void doOutWig(struct trackDb *track, char *table, struct sqlConnection *conn,
enum wigOutputType wigOutType)
{
struct region *regionList = getRegions(), *region;
int maxOut = 0, outCount, curOut = 0;
char *shortLabel = table, *longLabel = table;
if (track == NULL)
errAbort("Sorry, can't find necessary track information for %s. "
"If you reached this page by selecting \"All tables\" as the "
"group, please go back and select the same table via a regular "
"track group if possible.",
table);
maxOut = wigMaxOutput();
textOpen();
if (track != NULL)
{
shortLabel = track->shortLabel;
longLabel = track->longLabel;
if (!sameString(track->tableName, table) && track->subtracks != NULL)
{
struct trackDb *tdb = NULL;
for (tdb = track->subtracks; tdb != NULL; tdb = tdb->next)
{
if (sameString(tdb->tableName, table))
{
shortLabel = tdb->shortLabel;
longLabel = tdb->longLabel;
break;
}
}
}
}
wigDataHeader(shortLabel, longLabel, NULL, wigOutType);
for (region = regionList; region != NULL; region = region->next)
{
int curMaxOut = maxOut - curOut;
if (anySubtrackMerge(database, table))
outCount = mergedWigOutRegion(table, conn, region, curMaxOut,
wigOutType);
else if (startsWithWord("bedGraph", track->type))
outCount = bedGraphOutRegion(table, conn, region, curMaxOut,
wigOutType);
else if (startsWithWord("bigWig", track->type))
outCount = bigWigOutRegion(table, conn, region, curMaxOut, wigOutType);
else
outCount = wigOutRegion(table, conn, region, curMaxOut,
wigOutType, NULL, 0);
curOut += outCount;
if (curOut >= maxOut)
break;
}
if (curOut >= maxOut)
warn("Reached output limit of %d data values, please make region smaller,\n\tor set a higher output line limit with the filter settings.", curOut);
}
/*********** PUBLIC ROUTINES *********************************/
struct dataVector *bedGraphDataVector(char *table,
struct sqlConnection *conn, struct region *region)
/* Read in bedGraph as dataVector and return it. Filtering, subtrack merge
* and intersection are handled. */
{
struct dataVector *dv = NULL;
if (anySubtrackMerge(database, table))
dv = mergedWigDataVector(table, conn, region);
else
{
struct trackDb *tdb;
if (isCustomTrack(table))
{
struct customTrack *ct = ctLookupName(table);
tdb = ct->tdb;
conn = hAllocConn(CUSTOM_TRASH);
}
else
{
tdb = hTrackDbForTrack(database, table);
}
struct trackTable *tt1 = trackTableNew(tdb, table, conn);
dv = dataVectorFetchOneRegion(tt1, region, conn);
intersectDataVector(table, dv, region, conn);
if (isCustomTrack(table))
hFreeConn(&conn);
}
return dv;
}
struct dataVector *wiggleDataVector(struct trackDb *tdb, char *table,
struct sqlConnection *conn, struct region *region)
/* Read in wiggle as dataVector and return it. Filtering, subtrack merge
* and intersection are handled. */
{
struct dataVector *dv = NULL;
if (anySubtrackMerge(database, table))
dv = mergedWigDataVector(table, conn, region);
else
{
struct trackTable *tt1 = trackTableNew(tdb, table, conn);
dv = dataVectorFetchOneRegion(tt1, region, conn);
}
return dv;
}
struct bed *getBedGraphAsBed(struct sqlConnection *conn, char *table,
struct region *region)
/* Extract a bedList in region from the given bedGraph table -- filtering and
* intersection are handled inside of this function. */
{
struct dataVector *dv = NULL;
dv = bedGraphDataVector(table, conn, region);
return dataVectorToBedList(dv);
}
void wiggleMinMax(struct trackDb *tdb, double *min, double *max)
{
/* obtain wiggle data limits from trackDb or cart or settings */
if ((tdb != NULL) && (tdb->type != NULL))
{
double tDbMin, tDbMax;
char *typeLine = cloneString(tdb->type);
char *words[8];
int wordCount;
wordCount = chopLine(typeLine, words);
wigFetchMinMaxY(tdb, min, max, &tDbMin, &tDbMax, wordCount, words);
if (tDbMin < *min)
*min = tDbMin;
if (tDbMax > *max)
*max = tDbMax;
freeMem(typeLine);
}
}
boolean isWiggle(char *db, char *table)
/* Return TRUE if db.table is a wiggle. */
{
boolean typeWiggle = FALSE;
if (db != NULL && table != NULL)
{
if (isCustomTrack(table))
{
struct customTrack *ct = ctLookupName(table);
if (ct != NULL && ct->wiggle)
typeWiggle = TRUE;
}
else
{
struct hTableInfo *hti = maybeGetHtiOnDb(db, table);
typeWiggle = (hti != NULL && HTI_IS_WIGGLE);
}
}
return(typeWiggle);
}
boolean isBedGraph(char *table)
/* Return TRUE if table is specified as a bedGraph in the current database's
* trackDb. */
{
-return trackIsType(database, table, NULL, "bedGraph", ctLookupName);// Do we need to get parent track here?
+return trackIsType(database, table, NULL, "bedGraph", ctLookupName);
}
struct bed *getWiggleAsBed(
char *db, char *table, /* Database and table. */
struct region *region, /* Region to get data for. */
char *filter, /* Filter to add to SQL where clause if any. */
struct hash *idHash, /* Restrict to id's in this hash if non-NULL. */
struct lm *lm, /* Where to allocate memory. */
struct sqlConnection *conn) /* SQL connection to work with */
/* Return a bed list of all items in the given range in table.
* Cleanup result via lmCleanup(&lm) rather than bedFreeList. */
/* filter, idHash and lm are currently unused, perhaps future use */
{
struct bed *bedList=NULL;
char splitTableOrFileName[256];
struct customTrack *ct = NULL;
boolean isCustom = FALSE;
boolean hasConstraint = FALSE;
struct wiggleDataStream *wds = NULL;
unsigned long long valuesMatched = 0;
int operations = wigFetchBed;
char *dataConstraint;
double ll = 0.0;
double ul = 0.0;
char *table2 = NULL;
struct bed *intersectBedList = NULL;
int maxOut;
WIG_INIT; /* ct, isCustom, hasConstraint, wds and table2 are set here */
if (hasConstraint)
freeMem(dataConstraint); /* been cloned into wds */
maxOut = wigMaxOutput();
wds->setMaxOutput(wds, maxOut);
wds->setChromConstraint(wds, region->chrom);
wds->setPositionConstraint(wds, region->start, region->end);
if (table2)
intersectBedList = bedTable2(conn, region, table2);
if (isCustom)
{
if (ct->dbTrack)
{
unsigned span = 0;
struct sqlConnection *trashConn = hAllocConn(CUSTOM_TRASH);
struct trackDb *tdb = findTdbForTable(database, curTrack, table, ctLookupName);
valuesMatched = getWigglePossibleIntersection(wds, region,
CUSTOM_TRASH, table2, &intersectBedList,
splitTableOrFileName, operations);
span = minSpan(trashConn, splitTableOrFileName, region->chrom,
region->start, region->end, cart, tdb);
wds->setSpanConstraint(wds, span);
hFreeConn(&trashConn);
}
else
valuesMatched = getWigglePossibleIntersection(wds, region, NULL, table2,
&intersectBedList, splitTableOrFileName, operations);
}
else
{
boolean hasBin;
if (conn == NULL)
errAbort( "getWiggleAsBed: NULL conn given for database table");
if (hFindSplitTable(database, region->chrom, table, splitTableOrFileName, &hasBin))
{
struct trackDb *tdb = findTdbForTable(database, curTrack, table, ctLookupName);
unsigned span = 0;
/* XXX TBD, watch for a span limit coming in as an SQL filter */
span = minSpan(conn, splitTableOrFileName, region->chrom,
region->start, region->end, cart, tdb);
wds->setSpanConstraint(wds, span);
valuesMatched = getWigglePossibleIntersection(wds, region, database,
table2, &intersectBedList, splitTableOrFileName, operations);
}
}
if (valuesMatched > 0)
{
struct bed *bed;
wds->sortResults(wds);
for (bed = wds->bed; bed != NULL; bed = bed->next)
{
struct bed *copy = lmCloneBed(bed, lm);
slAddHead(&bedList, copy);
}
slReverse(&bedList);
}
wiggleDataStreamFree(&wds);
return bedList;
} /* struct bed *getWiggleAsBed() */
struct wigAsciiData *getWiggleAsData(struct sqlConnection *conn, char *table,
struct region *region)
/* return the wigAsciiData list */
{
int maxOut = 0;
struct wigAsciiData *data = NULL;
int outCount;
maxOut = wigMaxOutput();
outCount = wigOutRegion(table, conn, region, maxOut, wigDataNoPrint, &data, 0);
return data;
}
struct wigAsciiData *getWiggleData(struct sqlConnection *conn, char *table,
struct region *region, int maxOut, int spanConstraint)
/* like getWiggleAsData above, but with specific spanConstraint and
* a different data limit count, return the wigAsciiData list */
{
struct wigAsciiData *data = NULL;
int outCount;
outCount = wigOutRegion(table, conn, region, maxOut, wigDataNoPrint, &data,
spanConstraint);
return data;
}
void doOutWigData(struct trackDb *track, char *table, struct sqlConnection *conn)
/* Return wiggle data in variableStep format. */
{
doOutWig(track, table, conn, wigOutData);
}
void doOutWigBed(struct trackDb *track, char *table, struct sqlConnection *conn)
/* Return wiggle data in bed format. */
{
doOutWig(track, table, conn, wigOutBed);
}
void doSummaryStatsWiggle(struct sqlConnection *conn)
/* Put up page showing summary stats for wiggle track. */
{
struct trackDb *track = curTrack;
char *table = curTable;
struct region *region, *regionList = getRegions();
char *regionName = getRegionName();
long long regionSize = 0;
long long gapTotal = 0;
long startTime = 0, wigFetchTime = 0;
char splitTableOrFileName[256];
struct customTrack *ct = NULL;
boolean isCustom = FALSE;
struct wiggleDataStream *wds = NULL;
unsigned long long valuesMatched = 0;
int regionCount = 0;
int regionsDone = 0;
unsigned span = 0;
char *dataConstraint;
double ll = 0.0;
double ul = 0.0;
boolean hasConstraint = FALSE;
char *table2 = NULL;
boolean fullGenome = FALSE;
boolean statsHeaderDone = FALSE;
boolean gotSome = FALSE;
char *shortLabel = table;
long long statsItemCount = 0; /* global accumulators for overall */
int statsSpan = 0; /* stats summary on a multiple region */
double statsSumData = 0.0; /* output */
double statsSumSquares = 0.0; /* " " */
double lowerLimit = INFINITY; /* " " */
double upperLimit = -1.0 * INFINITY; /* " " */
startTime = clock1000();
if (track != NULL)
shortLabel = track->shortLabel;
/* Count the regions, when only one, we can do more stats */
for (region = regionList; region != NULL; region = region->next)
++regionCount;
htmlOpen("%s (%s) Wiggle Summary Statistics", shortLabel, table);
if (anySubtrackMerge(database, curTable))
hPrintf("<P><EM><B>Note:</B> subtrack merge is currently ignored on this "
"page (not implemented yet). Statistics shown here are only for "
"the primary table %s (%s).</EM>", shortLabel, table);
fullGenome = fullGenomeRegion();
WIG_INIT; /* ct, isCustom, hasConstraint, wds and table2 are set here */
for (region = regionList; region != NULL; region = region->next)
{
struct bed *intersectBedList = NULL;
boolean hasBin;
int operations;
++regionsDone;
if (table2)
intersectBedList = bedTable2(conn, region, table2);
operations = wigFetchStats;
#if defined(NOT)
/* can't do the histogram now, that operation times out */
if (1 == regionCount)
operations |= wigFetchAscii;
#endif
wds->setChromConstraint(wds, region->chrom);
if (fullGenome)
wds->setPositionConstraint(wds, 0, 0);
else
wds->setPositionConstraint(wds, region->start, region->end);
if (hasConstraint)
wds->setDataConstraint(wds, dataConstraint, ll, ul);
/* depending on what is coming in on regionList, we may need to be
* smart about how often we call getData for these custom tracks
* since that is potentially a large file read each time.
*/
if (isCustom)
{
if (ct->dbTrack)
{
struct sqlConnection *trashConn = hAllocConn(CUSTOM_TRASH);
struct trackDb *tdb = findTdbForTable(database, curTrack, table, ctLookupName);
span = minSpan(trashConn, splitTableOrFileName, region->chrom,
region->start, region->end, cart, tdb);
wds->setSpanConstraint(wds, span);
valuesMatched = getWigglePossibleIntersection(wds, region,
CUSTOM_TRASH, table2, &intersectBedList,
splitTableOrFileName, operations);
hFreeConn(&trashConn);
}
else
{
valuesMatched = getWigglePossibleIntersection(wds, region, NULL,
table2, &intersectBedList, splitTableOrFileName, operations);
/* XXX We need to properly get the smallest span for custom tracks */
/* This is not necessarily the correct answer here */
if (wds->stats)
span = wds->stats->span;
else
span = 1;
}
}
else
{
if (hFindSplitTable(database, region->chrom, table, splitTableOrFileName, &hasBin))
{
span = minSpan(conn, splitTableOrFileName, region->chrom,
region->start, region->end, cart, track);
wds->setSpanConstraint(wds, span);
valuesMatched = getWigglePossibleIntersection(wds, region,
database, table2, &intersectBedList, splitTableOrFileName,
operations);
if (intersectBedList)
span = 1;
}
}
/* when doing multiple regions, we need to print out each result as
* it happens to keep the connection open to the browser and
* prevent any timeout since this could take a while.
* (worst case test is quality track on panTro1)
*/
if (wds->stats)
statsItemCount += wds->stats->count;
if (wds->stats && (regionCount > 1) && (valuesMatched > 0))
{
double sumData = wds->stats->mean * wds->stats->count;
double sumSquares;
if (wds->stats->count > 1)
sumSquares = (wds->stats->variance * (wds->stats->count - 1)) +
((sumData * sumData)/wds->stats->count);
else
sumSquares = sumData * sumData;
/* global accumulators for overall summary */
statsSpan = wds->stats->span;
statsSumData += sumData;
statsSumSquares += sumSquares;
if (wds->stats->lowerLimit < lowerLimit)
lowerLimit = wds->stats->lowerLimit;
if ((wds->stats->lowerLimit + wds->stats->dataRange) > upperLimit)
upperLimit = wds->stats->lowerLimit + wds->stats->dataRange;
if (statsHeaderDone)
wds->statsOut(wds, database, "stdout", TRUE, TRUE, FALSE, TRUE);
else
{
wds->statsOut(wds, database, "stdout", TRUE, TRUE, TRUE, TRUE);
statsHeaderDone = TRUE;
}
wds->freeStats(wds);
gotSome = TRUE;
}
if ((regionCount > MAX_REGION_DISPLAY) &&
(regionsDone >= MAX_REGION_DISPLAY))
{
hPrintf("<TR><TH ALIGN=CENTER COLSPAN=12> Can not display more "
"than %d regions, <BR> would take too much time </TH></TR>\n",
MAX_REGION_DISPLAY);
break; /* exit this for loop */
}
} /*for (region = regionList; region != NULL; region = region->next) */
if (hasConstraint)
freeMem(dataConstraint); /* been cloned into wds */
if (1 == regionCount)
{
statsPreamble(wds, regionList->chrom, regionList->start, regionList->end,
span, valuesMatched, table2);
/* 3 X TRUE = sort results, html table output, with header,
* the FALSE means close the table after printing, no more rows to
* come. The case in the if() statement was already taken care of
* in the statsPreamble() printout. No need to do that again.
*/
if ( ! ((valuesMatched == 0) && table2) )
wds->statsOut(wds, database, "stdout", TRUE, TRUE, TRUE, FALSE);
regionSize = basesInRegion(regionList,0);
gapTotal = gapsInRegion(conn, regionList,0);
}
else
{ /* this is a bit of a kludge here since these printouts are done in the
* library source wigDataStream.c statsOut() function and
* this is a clean up of that. That function should be
* pulled out of there and made independent and more
* versatile.
*/
long long realSize;
double variance;
double stddev;
/* Too expensive to lookup the numbers for thousands of regions */
regionSize = basesInRegion(regionList,MAX_REGION_DISPLAY);
gapTotal = gapsInRegion(conn, regionList,MAX_REGION_DISPLAY);
realSize = regionSize - gapTotal;
/* close the table which was left open in the loop above */
if (!gotSome)
hPrintf("<TR><TH ALIGN=CENTER COLSPAN=12> No data found matching this request </TH></TR>\n");
hPrintf("<TR><TH ALIGN=LEFT> SUMMARY: </TH>\n");
hPrintf("\t<TD> </TD>\n"); /* chromStart */
hPrintf("\t<TD> </TD>\n"); /* chromEnd */
hPrintf("\t<TD ALIGN=RIGHT> ");
printLongWithCommas(stdout, statsItemCount);
hPrintf(" </TD>\n" );
hPrintf("\t<TD ALIGN=RIGHT> %d </TD>\n", statsSpan);
hPrintf("\t<TD ALIGN=RIGHT> ");
printLongWithCommas(stdout, statsItemCount*statsSpan);
hPrintf(" (%.2f%%) </TD>\n",
100.0*(double)(statsItemCount*statsSpan)/(double)realSize);
hPrintf("\t<TD ALIGN=RIGHT> %g </TD>\n", lowerLimit);
hPrintf("\t<TD ALIGN=RIGHT> %g </TD>\n", upperLimit);
hPrintf("\t<TD ALIGN=RIGHT> %g </TD>\n", upperLimit - lowerLimit);
if (statsItemCount > 0)
hPrintf("\t<TD ALIGN=RIGHT> %g </TD>\n", statsSumData/statsItemCount);
else
hPrintf("\t<TD ALIGN=RIGHT> 0.0 </TD>\n");
stddev = 0.0;
variance = 0.0;
if (statsItemCount > 1)
{
variance = (statsSumSquares -
((statsSumData * statsSumData)/(double) statsItemCount)) /
(double) (statsItemCount - 1);
if (variance > 0.0)
stddev = sqrt(variance);
}
hPrintf("\t<TD ALIGN=RIGHT> %g </TD>\n", variance);
hPrintf("\t<TD ALIGN=RIGHT> %g </TD>\n", stddev);
hPrintf("</TR>\n");
wigStatsTableHeading(stdout, TRUE);
hPrintf("</TABLE></TD></TR></TABLE></P>\n");
}
#if defined(NOT)
/* can't do the histogram now, that operation times out */
/* Single region, we can do the histogram */
if ((valuesMatched > 1) && (1 == regionCount))
{
float *valuesArray = NULL;
size_t valueCount = 0;
struct histoResult *histoGramResult;
/* convert the ascii data listings to one giant float array */
valuesArray = wds->asciiToDataArray(wds, valuesMatched, &valueCount);
/* histoGram() may return NULL if it doesn't work */
histoGramResult = histoGram(valuesArray, valueCount,
NAN, (unsigned) 0, NAN, (float) wds->stats->lowerLimit,
(float) (wds->stats->lowerLimit + wds->stats->dataRange),
(struct histoResult *)NULL);
printHistoGram(histoGramResult, TRUE); /* TRUE == html output */
freeHistoGram(&histoGramResult);
wds->freeAscii(wds);
wds->freeArray(wds);
}
#endif
wds->freeStats(wds);
wiggleDataStreamFree(&wds);
wigFetchTime = clock1000() - startTime;
webNewSection("Region and Timing Statistics");
hTableStart();
stringStatRow("region", regionName);
numberStatRow("bases in region", regionSize);
numberStatRow("bases in gaps", gapTotal);
floatStatRow("load and calc time", 0.001*wigFetchTime);
wigFilterStatRow(conn);
stringStatRow("intersection", cartUsualString(cart, hgtaIntersectTable, "off"));
hTableEnd();
htmlClose();
} /* void doSummaryStatsWiggle(struct sqlConnection *conn) */
void wigShowFilter(struct sqlConnection *conn)
/* print out wiggle data value filter */
{
double ll, ul;
char *constraint;
if (checkWigDataFilter(database, curTable, &constraint, &ll, &ul))
{
if (constraint && sameWord(constraint, "in range"))
{
hPrintf(" data value %s [%g : %g)\n", constraint, ll, ul);
}
else
hPrintf(" data value %s %g\n", constraint, ll);
freeMem(constraint);
}
}