df1ffb68dbd5b8eeaf6fa3123c4f5ec814756c9a kent Fri Jun 17 13:02:33 2011 -0700 Commenting and making it only open the bbiFile once, rather than once per chrom. diff --git src/hg/checkCoverageGaps/checkCoverageGaps.c src/hg/checkCoverageGaps/checkCoverageGaps.c index 5e1e37e..e8bcd5a 100644 --- src/hg/checkCoverageGaps/checkCoverageGaps.c +++ src/hg/checkCoverageGaps/checkCoverageGaps.c @@ -1,232 +1,225 @@ /* checkCoverageGaps - Check for biggest gap in coverage for a list of tracks.. */ #include "common.h" #include "localmem.h" #include "linefile.h" #include "hash.h" #include "options.h" #include "jksql.h" #include "trackDb.h" #include "hdb.h" #include "obscure.h" #include "rangeTree.h" #include "bigWig.h" #include "bigBed.h" static char const rcsid[] = "$Id: newProg.c,v 1.30 2010/03/24 21:18:33 hiram Exp $"; boolean allParts = FALSE; boolean female = FALSE; void usage() /* Explain usage and exit. */ { errAbort( "checkCoverageGaps - Check for biggest gap in coverage for a list of tracks.\n" "usage:\n" " checkCoverageGaps database track1 ... trackN\n" "Note: for bigWig and bigBeds, the biggest gap is rounded to the nearest 10,000 or so\n" "options:\n" " -allParts If set then include _hap and _random and other wierd chroms\n" " -female If set then don't check chrY\n" ); } static struct optionSpec options[] = { {"allParts", OPTION_BOOLEAN}, {"female", OPTION_BOOLEAN}, {NULL, 0}, }; -void bigCoverageIntoTree(struct trackDb *tdb, struct sqlConnection *conn, +void bigCoverageIntoTree(struct trackDb *tdb, struct bbiFile *bbi, char *chrom, int chromSize, struct rbTree *rt, boolean isBigBed) -/* Find biggest gap in given chromosome in bigWig */ +/* Find biggest gap in given chromosome in bigWig or bigBed */ { -char *fileName = bbiNameFromSettingOrTable(tdb, conn, tdb->table); -struct bbiFile *bbi; -if (isBigBed) - bbi = bigBedFileOpen(fileName); -else - bbi = bigWigFileOpen(fileName); int sampleSize = 10000; int sampleCount = chromSize/sampleSize; if (sampleCount > 1) { int evenEnd = sampleCount * sampleSize; double *summaryVals; AllocArray(summaryVals, sampleCount); boolean ok; if (isBigBed) ok = bigBedSummaryArray(bbi, chrom, 0, evenEnd, bbiSumCoverage, sampleCount, summaryVals); else ok = bigWigSummaryArray(bbi, chrom, 0, evenEnd, bbiSumCoverage, sampleCount, summaryVals); if (ok) { int s=0, e, i; for (i=0; i 0.0) rangeTreeAdd(rt, s, e); s = e; } } } -bigWigFileClose(&bbi); } -void bigWigBiggestGap(struct trackDb *tdb, struct sqlConnection *conn, - char *chrom, int chromSize, struct rbTree *rt) -/* Find biggest gap in given chromosome in bigWig */ -{ -bigCoverageIntoTree(tdb, conn, chrom, chromSize, rt, FALSE); -} - -void bigBedBiggestGap(struct trackDb *tdb, struct sqlConnection *conn, - char *chrom, int chromSize, struct rbTree *rt) -/* Find biggest gap in given chromosome in bigBed */ -{ -bigCoverageIntoTree(tdb, conn, chrom, chromSize, rt, TRUE); -} -void tableBiggestGap(struct hTableInfo *hti, struct trackDb *tdb, struct sqlConnection *conn, +void tableCoverageIntoTree(struct hTableInfo *hti, struct trackDb *tdb, struct sqlConnection *conn, char *chrom, int chromSize, struct rbTree *rt) -/* Find biggest gap in given chromosome in bigBed */ +/* Find biggest gap in given chromosome in database table with chromosome coordinates */ { char fields[512]; safef(fields, sizeof(fields), "%s,%s", hti->startField, hti->endField); struct sqlResult *sr = hExtendedChromQuery(conn, hti->rootName, chrom, NULL, FALSE, fields, NULL); char **row; while ((row = sqlNextRow(sr)) != NULL) { rangeTreeAdd(rt, sqlUnsigned(row[0]), sqlUnsigned(row[1])); } sqlFreeResult(&sr); } void biggestGapFromRangeTree(struct rbTree *rt, int chromSize, int *retStart, int *retEnd, int *retSize) /* Given a range tree filled with data for given chromosome, figure out * location and size of biggest gap in data. */ { struct range *range, *rangeList = rangeTreeList(rt); if (rangeList == NULL) { *retStart = 0; *retEnd = *retSize = chromSize; } else { int lastEnd = 0; int biggestSize = 0, biggestStart = 0, biggestEnd = 0; for (range = rangeList; range != NULL; range = range->next) { int size = range->start - lastEnd; if (size > biggestSize) { biggestSize = size; biggestStart = lastEnd; biggestEnd = range->start; } lastEnd = range->end; if (range->next == NULL) { size = chromSize - lastEnd; if (size > biggestSize) { biggestSize = size; biggestStart = lastEnd; biggestEnd = chromSize; } } } *retStart = biggestStart; *retEnd = biggestEnd; *retSize = biggestSize; } } void addGaps(struct sqlConnection *conn, char *chrom, struct rbTree *rt) -/* Add gaps to range tree. */ +/* Add gaps from gap table to range tree. */ { struct sqlResult *sr = hExtendedChromQuery(conn, "gap", chrom, NULL, FALSE, "chromStart,chromEnd", NULL); char **row; while ((row = sqlNextRow(sr)) != NULL) { rangeTreeAdd(rt, sqlUnsigned(row[0]), sqlUnsigned(row[1])); } sqlFreeResult(&sr); } void printBiggestGap(char *database, struct sqlConnection *conn, struct slName *chromList, struct hash *chromHash, char *track) /* Look up track in database, figure out which type it is, call * appropriate biggest gap finder, and then print result. */ { struct trackDb *tdb = hTrackInfo(conn, track); struct hTableInfo *hti = hFindTableInfo(database, chromList->name, tdb->table); char *typeWord = cloneFirstWord(tdb->type); +boolean isBig = FALSE, isBigBed = FALSE; +struct bbiFile *bbi = NULL; +if (sameString(typeWord, "bigBed")) + { + isBig = TRUE; + isBigBed = TRUE; + bbi = bigBedFileOpen( bbiNameFromSettingOrTable(tdb, conn, tdb->table) ); + } +else if (sameString(typeWord, "bigWig")) + { + isBig = TRUE; + bbi = bigWigFileOpen( bbiNameFromSettingOrTable(tdb, conn, tdb->table) ); + } char *biggestChrom = NULL; int biggestSize = 0, biggestStart = 0, biggestEnd = 0; + struct slName *chrom; for (chrom = chromList; chrom != NULL; chrom = chrom->next) { if (!allParts && strchr(chrom->name, '_')) // Generally skip weird chroms continue; if (female && sameString(chrom->name, "chrY")) continue; int chromSize = hashIntVal(chromHash, chrom->name); struct rbTree *rt = rangeTreeNew(); int start = 0, end = 0, size = 0; - if (sameString("bigWig", typeWord)) - bigWigBiggestGap(tdb, conn, chrom->name, chromSize, rt); - else if (sameString("bigBed", typeWord)) - bigBedBiggestGap(tdb, conn, chrom->name, chromSize, rt); + if (isBig) + bigCoverageIntoTree(tdb, bbi, chrom->name, chromSize, rt, isBigBed); else - tableBiggestGap(hti, tdb, conn, chrom->name, chromSize, rt); + tableCoverageIntoTree(hti, tdb, conn, chrom->name, chromSize, rt); if (rt->n > 0) // Want to keep completely uncovered chromosome uncovered addGaps(conn, chrom->name, rt); biggestGapFromRangeTree(rt, chromSize, &start, &end, &size); if (size > biggestSize) { biggestSize = size; biggestStart = start; biggestEnd = end; biggestChrom = chrom->name; } rangeTreeFree(&rt); } printf("%s\t%s:%d-%d\t", track, biggestChrom, biggestStart+1, biggestEnd); printLongWithCommas(stdout, biggestSize); putchar('\n'); freez(&typeWord); +bbiFileClose(&bbi); } void checkCoverageGaps(char *database, int trackCount, char *tracks[]) /* checkCoverageGaps - Check for biggest gap in coverage for a list of tracks.. */ { struct slName *chromList = hChromList(database); struct hash *chromHash = hChromSizeHash(database); struct sqlConnection *conn = sqlConnect(database); int i; for (i=0; i