aa727212e0fd18b6e2d3dc39764072b78b25d4ae kent Mon Mar 14 20:35:28 2011 -0700 Adding another bed cleanup utility. diff --git src/utils/bedRestrictToPositions/bedRestrictToPositions.c src/utils/bedRestrictToPositions/bedRestrictToPositions.c new file mode 100644 index 0000000..abfc625 --- /dev/null +++ src/utils/bedRestrictToPositions/bedRestrictToPositions.c @@ -0,0 +1,92 @@ +/* bedRestrictToPositions - Filter bed file, restricting to only ones that match chrom/start/ends + * specified in restrict.bed file. */ +#include "common.h" +#include "linefile.h" +#include "hash.h" +#include "options.h" + +static char const rcsid[] = "$Id: newProg.c,v 1.30 2010/03/24 21:18:33 hiram Exp $"; + +void usage() +/* Explain usage and exit. */ +{ +errAbort( + "bedRestrictToPositions - Filter bed file, restricting to only ones that match chrom/start/ends specified in restrict.bed file.\n" + "usage:\n" + " bedRestrictToPositions in.bed restrict.bed out.bed\n" + "options:\n" + " -xxx=XXX\n" + ); +} + +static struct optionSpec options[] = { + {NULL, 0}, +}; + +#define BED_STRING_SIZE 256 + +char *bedString(char *chrom, char *start, char *end, char result[BED_STRING_SIZE]) +/* Return space delimited concatenation: chrom start end */ +{ +safef(result, BED_STRING_SIZE, "%s\t%s\t%s", chrom, start, end); +return result; +} + +struct hash *bedIntoHash(char *fileName) +/* Read in a bed file, return hash keyed by bedStrings (with empty vals) + * and return this hash. */ +{ +/* Add each bed item to hash, and list, checking uniqueness */ +struct hash *hash = hashNew(21); +struct lineFile *lf = lineFileOpen(fileName, TRUE); +char *row[3]; +while (lineFileRow(lf, row)) + { + char key[BED_STRING_SIZE]; + bedString(row[0], row[1], row[2], key); + hashAdd(hash, key, NULL); + } + +/* Clean up and go home. */ +lineFileClose(&lf); +return hash; +} +void bedRestrictToPositions(char *inFile, char *restrictFile, char *outFile) +/* bedRestrictToPositions - Filter bed file, restricting to only ones that match chrom/start/ends + * specified in restrict.bed file. */ +{ +struct hash *restrictHash = bedIntoHash(restrictFile); +struct lineFile *lf = lineFileOpen(inFile, TRUE); +FILE *f = mustOpen(outFile, "w"); +char *line; +while (lineFileNextReal(lf, &line)) + { + char *chrom = nextWord(&line); + char *start = nextWord(&line); + char *end = nextWord(&line); + if (end == NULL) + errAbort("Expecting at least three words line %d of %s", lf->lineIx, lf->fileName); + char key[BED_STRING_SIZE]; + bedString(chrom, start, end, key); + if (hashLookup(restrictHash, key)) + { + fprintf(f, "%s\t%s\t%s", chrom, start, end); + line = skipLeadingSpaces(line); + if (isEmpty(line)) + fputc('\n', f); + else + fprintf(f, "\t%s\n", line); + } + } +carefulClose(&f); +} + +int main(int argc, char *argv[]) +/* Process command line. */ +{ +optionInit(&argc, argv, options); +if (argc != 4) + usage(); +bedRestrictToPositions(argv[1], argv[2], argv[3]); +return 0; +}