4bae217fb5eb5ac36cc1ac7a13e25a838149ddd1 kent Fri Jun 17 11:12:23 2011 -0700 Adding bigBed support diff --git src/hg/checkCoverageGaps/checkCoverageGaps.c src/hg/checkCoverageGaps/checkCoverageGaps.c index dbb69d0..5e1e37e 100644 --- src/hg/checkCoverageGaps/checkCoverageGaps.c +++ src/hg/checkCoverageGaps/checkCoverageGaps.c @@ -1,216 +1,232 @@ /* checkCoverageGaps - Check for biggest gap in coverage for a list of tracks.. */ #include "common.h" #include "localmem.h" #include "linefile.h" #include "hash.h" #include "options.h" #include "jksql.h" #include "trackDb.h" #include "hdb.h" #include "obscure.h" #include "rangeTree.h" #include "bigWig.h" #include "bigBed.h" static char const rcsid[] = "$Id: newProg.c,v 1.30 2010/03/24 21:18:33 hiram Exp $"; boolean allParts = FALSE; boolean female = FALSE; void usage() /* Explain usage and exit. */ { errAbort( "checkCoverageGaps - Check for biggest gap in coverage for a list of tracks.\n" "usage:\n" " checkCoverageGaps database track1 ... trackN\n" "Note: for bigWig and bigBeds, the biggest gap is rounded to the nearest 10,000 or so\n" "options:\n" " -allParts If set then include _hap and _random and other wierd chroms\n" " -female If set then don't check chrY\n" ); } static struct optionSpec options[] = { {"allParts", OPTION_BOOLEAN}, {"female", OPTION_BOOLEAN}, {NULL, 0}, }; -void bigWigBiggestGap(struct trackDb *tdb, struct sqlConnection *conn, - char *chrom, int chromSize, struct rbTree *rt) + +void bigCoverageIntoTree(struct trackDb *tdb, struct sqlConnection *conn, + char *chrom, int chromSize, struct rbTree *rt, boolean isBigBed) /* Find biggest gap in given chromosome in bigWig */ { char *fileName = bbiNameFromSettingOrTable(tdb, conn, tdb->table); -struct bbiFile *bbi = bigWigFileOpen(fileName); +struct bbiFile *bbi; +if (isBigBed) + bbi = bigBedFileOpen(fileName); +else + bbi = bigWigFileOpen(fileName); int sampleSize = 10000; int sampleCount = chromSize/sampleSize; if (sampleCount > 1) { int evenEnd = sampleCount * sampleSize; double *summaryVals; AllocArray(summaryVals, sampleCount); - if (bigWigSummaryArray(bbi, chrom, 0, evenEnd, bbiSumCoverage, sampleCount, summaryVals)) + boolean ok; + if (isBigBed) + ok = bigBedSummaryArray(bbi, chrom, 0, evenEnd, bbiSumCoverage, sampleCount, summaryVals); + else + ok = bigWigSummaryArray(bbi, chrom, 0, evenEnd, bbiSumCoverage, sampleCount, summaryVals); + if (ok) { int s=0, e, i; for (i=0; i 0.0) rangeTreeAdd(rt, s, e); s = e; } } } bigWigFileClose(&bbi); } +void bigWigBiggestGap(struct trackDb *tdb, struct sqlConnection *conn, + char *chrom, int chromSize, struct rbTree *rt) +/* Find biggest gap in given chromosome in bigWig */ +{ +bigCoverageIntoTree(tdb, conn, chrom, chromSize, rt, FALSE); +} + void bigBedBiggestGap(struct trackDb *tdb, struct sqlConnection *conn, char *chrom, int chromSize, struct rbTree *rt) /* Find biggest gap in given chromosome in bigBed */ { -uglyAbort("bigBed not implemented"); +bigCoverageIntoTree(tdb, conn, chrom, chromSize, rt, TRUE); } void tableBiggestGap(struct hTableInfo *hti, struct trackDb *tdb, struct sqlConnection *conn, char *chrom, int chromSize, struct rbTree *rt) /* Find biggest gap in given chromosome in bigBed */ { char fields[512]; safef(fields, sizeof(fields), "%s,%s", hti->startField, hti->endField); struct sqlResult *sr = hExtendedChromQuery(conn, hti->rootName, chrom, NULL, FALSE, fields, NULL); char **row; while ((row = sqlNextRow(sr)) != NULL) { rangeTreeAdd(rt, sqlUnsigned(row[0]), sqlUnsigned(row[1])); } sqlFreeResult(&sr); } void biggestGapFromRangeTree(struct rbTree *rt, int chromSize, int *retStart, int *retEnd, int *retSize) /* Given a range tree filled with data for given chromosome, figure out * location and size of biggest gap in data. */ { struct range *range, *rangeList = rangeTreeList(rt); if (rangeList == NULL) { *retStart = 0; *retEnd = *retSize = chromSize; } else { int lastEnd = 0; int biggestSize = 0, biggestStart = 0, biggestEnd = 0; for (range = rangeList; range != NULL; range = range->next) { int size = range->start - lastEnd; if (size > biggestSize) { biggestSize = size; biggestStart = lastEnd; biggestEnd = range->start; } lastEnd = range->end; if (range->next == NULL) { size = chromSize - lastEnd; if (size > biggestSize) { biggestSize = size; biggestStart = lastEnd; biggestEnd = chromSize; } } } *retStart = biggestStart; *retEnd = biggestEnd; *retSize = biggestSize; } } void addGaps(struct sqlConnection *conn, char *chrom, struct rbTree *rt) /* Add gaps to range tree. */ { -char query[256]; -safef(query, sizeof(query), "select chromStart, chromEnd from gap where chrom='%s'", chrom); -struct sqlResult *sr = sqlGetResult(conn, query); +struct sqlResult *sr = hExtendedChromQuery(conn, "gap", chrom, NULL, FALSE, + "chromStart,chromEnd", NULL); char **row; while ((row = sqlNextRow(sr)) != NULL) { rangeTreeAdd(rt, sqlUnsigned(row[0]), sqlUnsigned(row[1])); } sqlFreeResult(&sr); } void printBiggestGap(char *database, struct sqlConnection *conn, struct slName *chromList, struct hash *chromHash, char *track) /* Look up track in database, figure out which type it is, call * appropriate biggest gap finder, and then print result. */ { struct trackDb *tdb = hTrackInfo(conn, track); struct hTableInfo *hti = hFindTableInfo(database, chromList->name, tdb->table); char *typeWord = cloneFirstWord(tdb->type); char *biggestChrom = NULL; int biggestSize = 0, biggestStart = 0, biggestEnd = 0; struct slName *chrom; for (chrom = chromList; chrom != NULL; chrom = chrom->next) { if (!allParts && strchr(chrom->name, '_')) // Generally skip weird chroms continue; if (female && sameString(chrom->name, "chrY")) continue; int chromSize = hashIntVal(chromHash, chrom->name); struct rbTree *rt = rangeTreeNew(); int start = 0, end = 0, size = 0; if (sameString("bigWig", typeWord)) bigWigBiggestGap(tdb, conn, chrom->name, chromSize, rt); else if (sameString("bigBed", typeWord)) bigBedBiggestGap(tdb, conn, chrom->name, chromSize, rt); else tableBiggestGap(hti, tdb, conn, chrom->name, chromSize, rt); if (rt->n > 0) // Want to keep completely uncovered chromosome uncovered addGaps(conn, chrom->name, rt); biggestGapFromRangeTree(rt, chromSize, &start, &end, &size); if (size > biggestSize) { biggestSize = size; biggestStart = start; biggestEnd = end; biggestChrom = chrom->name; } rangeTreeFree(&rt); } printf("%s\t%s:%d-%d\t", track, biggestChrom, biggestStart+1, biggestEnd); printLongWithCommas(stdout, biggestSize); putchar('\n'); freez(&typeWord); } void checkCoverageGaps(char *database, int trackCount, char *tracks[]) /* checkCoverageGaps - Check for biggest gap in coverage for a list of tracks.. */ { struct slName *chromList = hChromList(database); struct hash *chromHash = hChromSizeHash(database); struct sqlConnection *conn = sqlConnect(database); int i; for (i=0; i