e70152e44cc66cc599ff6b699eb8adc07f3e656a kent Sat May 24 21:09:34 2014 -0700 Adding Copyright NNNN Regents of the University of California to all files I believe with reasonable certainty were developed under UCSC employ or as part of Genome Browser copyright assignment. diff --git src/hg/hgTables/bigWig.c src/hg/hgTables/bigWig.c index 3c71787..7fa8705 100644 --- src/hg/hgTables/bigWig.c +++ src/hg/hgTables/bigWig.c @@ -1,379 +1,382 @@ /* bigWig - stuff to handle bigWig in the Table Browser. */ +/* Copyright (C) 2012 The Regents of the University of California + * See README in this or parent directory for licensing information. */ + #include "common.h" #include "hash.h" #include "linefile.h" #include "dystring.h" #include "localmem.h" #include "jksql.h" #include "cheapcgi.h" #include "cart.h" #include "web.h" #include "bed.h" #include "hdb.h" #include "trackDb.h" #include "customTrack.h" #include "wiggle.h" #include "hmmstats.h" #include "correlate.h" #include "bbiFile.h" #include "bigWig.h" #include "hubConnect.h" #include "hgTables.h" boolean isBigWigTable(char *table) /* Return TRUE if table corresponds to a bigWig file. */ { struct trackDb *tdb = hashFindVal(fullTableToTdbHash, table); if (tdb) return tdbIsBigWig(tdb); else return trackIsType(database, table, curTrack, "bigWig", ctLookupName); } char *bigFileNameFromCtOrHub(char *table, struct sqlConnection *conn) /* If table is a custom track or hub track, return the bigDataUrl setting; * otherwise return NULL. Do a freeMem on returned string when done. */ { char *fileName = NULL; if (isCustomTrack(table)) { struct customTrack *ct = ctLookupName(table); if (ct != NULL) fileName = cloneString(trackDbSetting(ct->tdb, "bigDataUrl")); } else if (isHubTrack(table)) { struct trackDb *tdb = hashFindVal(fullTableToTdbHash, table); assert(tdb != NULL); fileName = cloneString(trackDbSetting(tdb, "bigDataUrl")); assert(fileName != NULL); } return fileName; } char *bigWigFileName(char *table, struct sqlConnection *conn) /* Return file name associated with bigWig. This handles differences whether it's * a custom or built-in track. Do a freeMem on returned string when done. */ { struct trackDb *tdb = hashMustFindVal(fullTableToTdbHash, table); return tdbBigFileName(conn, tdb); } struct bbiInterval *intersectedFilteredBbiIntervalsOnRegion(struct sqlConnection *conn, struct bbiFile *bwf, struct region *region, enum wigCompare filterCmp, double filterLl, double filterUl, struct lm *lm) /* Get list of bbiIntervals (more-or-less bedGraph things from bigWig) out of bigWig file * and if necessary apply filter and intersection. Return list which is allocated in lm. */ { char *chrom = region->chrom; int chromSize = hChromSize(database, chrom); struct bbiInterval *iv, *ivList = bigWigIntervalQuery(bwf, chrom, region->start, region->end, lm); /* Run filter if necessary */ if (filterCmp != wigNoOp_e) { struct bbiInterval *next, *newList = NULL; for (iv = ivList; iv != NULL; iv = next) { next = iv->next; if (wigCompareValFilter(iv->val, filterCmp, filterLl, filterUl)) { slAddHead(&newList, iv); } } slReverse(&newList); ivList = newList; } /* Run intersection if necessary */ if (anyIntersection()) { boolean isBpWise = intersectionIsBpWise(); Bits *bits2 = bitsForIntersectingTable(conn, region, chromSize, isBpWise); struct bbiInterval *next, *newList = NULL; double moreThresh = cartCgiUsualDouble(cart, hgtaMoreThreshold, 0)*0.01; double lessThresh = cartCgiUsualDouble(cart, hgtaLessThreshold, 100)*0.01; char *op = cartString(cart, hgtaIntersectOp); for (iv = ivList; iv != NULL; iv = next) { next = iv->next; int start = iv->start; int size = iv->end - start; int overlap = bitCountRange(bits2, start, size); if (isBpWise) { if (overlap == size) { slAddHead(&newList, iv); } else if (overlap > 0) { /* Here we have to break things up. */ double val = iv->val; struct bbiInterval *partIv = iv; // Reuse memory for first interval int s = iv->start, end = iv->end; for (;;) { s = bitFindSet(bits2, s, end); if (s >= end) break; int bitsSet = bitFindClear(bits2, s, end) - s; if (partIv == NULL) lmAllocVar(lm, partIv); partIv->start = s; partIv->end = s + bitsSet; partIv->val = val; slAddHead(&newList, partIv); partIv = NULL; s += bitsSet; if (s >= end) break; } } } else { double coverage = (double)overlap/size; if (intersectOverlapFilter(op, moreThresh, lessThresh, coverage)) { slAddHead(&newList, iv); } } } slReverse(&newList); ivList = newList; bitFree(&bits2); } return ivList; } void getWigFilter(char *database, char *table, enum wigCompare *retCmp, double *retLl, double *retUl) /* Get wiggle filter variables from cart and convert them into numbers. */ { char *dataConstraint; enum wigCompare cmp = wigNoOp_e; checkWigDataFilter(database, curTable, &dataConstraint, retLl, retUl); if (dataConstraint != NULL) cmp = wigCompareFromString(dataConstraint); *retCmp = cmp; } int bigWigOutRegion(char *table, struct sqlConnection *conn, struct region *region, int maxOut, enum wigOutputType wigOutType) /* Write out bigWig for region, doing intersecting and filtering as need be. */ { boolean isMerged = anySubtrackMerge(table, database); int resultCount = 0; char *wigFileName = bigWigFileName(table, conn); if (wigFileName) { struct bbiFile *bwf = bigWigFileOpen(wigFileName); if (bwf) { /* Easy case, just dump out data. */ if (!anyFilter() && !anyIntersection() && !isMerged && wigOutType == wigOutData) resultCount = bigWigIntervalDump(bwf, region->chrom, region->start, region->end, maxOut, stdout); /* Pretty easy case, still do it ourselves. */ else if (!isMerged && wigOutType == wigOutData) { double ll, ul; enum wigCompare cmp; getWigFilter(database, curTable, &cmp, &ll, &ul); struct lm *lm = lmInit(0); struct bbiInterval *ivList, *iv; ivList = intersectedFilteredBbiIntervalsOnRegion(conn, bwf, region, cmp, ll, ul, lm); for (iv=ivList; iv != NULL && resultCount < maxOut; iv = iv->next, ++resultCount) { fprintf(stdout, "%s\t%d\t%d\t%g\n", region->chrom, iv->start, iv->end, iv->val); } lmCleanup(&lm); } /* Harder cases - resort to making a data vector and letting that machinery handle it. */ else { struct dataVector *dv = bigWigDataVector(table, conn, region); resultCount = wigPrintDataVectorOut(dv, wigOutType, maxOut, NULL); dataVectorFree(&dv); } } bbiFileClose(&bwf); } freeMem(wigFileName); return resultCount; } void doSummaryStatsBigWig(struct sqlConnection *conn) /* Put up page showing summary stats for bigWig track. */ { struct trackDb *track = curTrack; char *table = curTable; char *shortLabel = (track == NULL ? table : track->shortLabel); char *fileName = bigWigFileName(table, conn); long startTime = clock1000(); htmlOpen("%s (%s) Big Wig Summary Statistics", shortLabel, table); if (anySubtrackMerge(database, curTable)) hPrintf("
Note: subtrack merge is currently ignored on this "
"page (not implemented yet). Statistics shown here are only for "
"the primary table %s (%s).", shortLabel, table);
struct bbiFile *bwf = bigWigFileOpen(fileName);
struct region *region, *regionList = getRegions();
double sumData = 0, sumSquares = 0, minVal = 0, maxVal = 0;
bits64 validCount = 0;
if (!anyFilter() && !anyIntersection())
{
for (region = regionList; region != NULL; region = region->next)
{
struct bbiSummaryElement sum;
if (bbiSummaryArrayExtended(bwf, region->chrom, region->start, region->end,
bigWigIntervalQuery, 1, &sum))
{
if (validCount == 0)
{
minVal = sum.minVal;
maxVal = sum.maxVal;
}
else
{
if (sum.minVal < minVal)
minVal = sum.minVal;
if (sum.maxVal > maxVal)
maxVal = sum.maxVal;
}
sumData += sum.sumData;
sumSquares += sum.sumSquares;
validCount += sum.validCount;
}
}
}
else
{
double ll, ul;
enum wigCompare cmp;
getWigFilter(database, curTable, &cmp, &ll, &ul);
for (region = regionList; region != NULL; region = region->next)
{
struct lm *lm = lmInit(0);
struct bbiInterval *iv, *ivList;
ivList = intersectedFilteredBbiIntervalsOnRegion(conn, bwf, region, cmp, ll, ul, lm);
for (iv = ivList; iv != NULL; iv = iv->next)
{
double val = iv->val;
double size = iv->end - iv->start;
if (validCount == 0)
minVal = maxVal = val;
else
{
if (val < minVal)
minVal = val;
if (val > maxVal)
maxVal = val;
}
sumData += size*val;
sumSquares += size*val*val;
validCount += size;
}
lmCleanup(&lm);
}
}
hTableStart();
floatStatRow("mean", sumData/validCount);
floatStatRow("min", minVal);
floatStatRow("max", maxVal);
floatStatRow("standard deviation", calcStdFromSums(sumData, sumSquares, validCount));
numberStatRow("bases with data", validCount);
long long regionSize = basesInRegion(regionList,0);
long long gapTotal = gapsInRegion(conn, regionList,0);
numberStatRow("bases with sequence", regionSize - gapTotal);
numberStatRow("bases in region", regionSize);
wigFilterStatRow(conn);
stringStatRow("intersection", cartUsualString(cart, hgtaIntersectTable, "off"));
long wigFetchTime = clock1000() - startTime;
floatStatRow("load and calc time", 0.001*wigFetchTime);
hTableEnd();
bbiFileClose(&bwf);
htmlClose();
}
struct bed *bigWigIntervalsToBed(struct sqlConnection *conn, char *table, struct region *region,
struct lm *lm)
/* Return a list of unfiltered, unintersected intervals in region as bed (for
* secondary table in intersection). */
{
struct bed *bed, *bedList = NULL;
char *fileName = bigWigFileName(table, conn);
struct bbiFile *bwf = bigWigFileOpen(fileName);
struct bbiInterval *iv, *ivList = bigWigIntervalQuery(bwf, region->chrom, region->start,
region->end, lm);
for (iv = ivList; iv != NULL; iv = iv->next)
{
lmAllocVar(lm, bed);
bed->chrom = region->chrom;
bed->chromStart = iv->start;
bed->chromEnd = iv->end;
slAddHead(&bedList, bed);
}
slReverse(&bedList);
return bedList;
}
void bigWigFillDataVector(char *table, struct region *region,
struct sqlConnection *conn, struct dataVector *vector)
/* Fill in data vector with bigWig info on region. Handles filters and intersections. */
{
/* Figure out filter values if any. */
double ll, ul;
enum wigCompare cmp;
getWigFilter(database, curTable, &cmp, &ll, &ul);
/* Get intervals that pass filter and intersection. */
struct lm *lm = lmInit(0);
char *fileName = bigWigFileName(table, conn);
struct bbiFile *bwf = bigWigFileOpen(fileName);
struct bbiInterval *iv, *ivList;
ivList = intersectedFilteredBbiIntervalsOnRegion(conn, bwf, region, cmp, ll, ul, lm);
int vIndex = 0;
for (iv = ivList; iv != NULL; iv = iv->next)
{
int start = max(iv->start, region->start);
int end = min(iv->end, region->end);
double val = iv->val;
int i;
for (i=start; i