a44421a79fb36cc2036fe116b97ea3bc9590cd0c braney Fri Dec 2 09:34:39 2011 -0800 removed rcsid (#295) diff --git src/utils/bedToClosestPoint/bedToClosestPoint.c src/utils/bedToClosestPoint/bedToClosestPoint.c index 502b39d..24661a1 100644 --- src/utils/bedToClosestPoint/bedToClosestPoint.c +++ src/utils/bedToClosestPoint/bedToClosestPoint.c @@ -1,153 +1,152 @@ /* bedToClosestPoint - Given a bed file, and a list of unique points (often promoters) in genome, * assign each bed to the closest point. */ #include "common.h" #include "linefile.h" #include "hash.h" #include "options.h" #include "genomeRangeTree.h" #include "basicBed.h" #include "sqlNum.h" -static char const rcsid[] = "$Id: newProg.c,v 1.30 2010/03/24 21:18:33 hiram Exp $"; void usage() /* Explain usage and exit. */ { errAbort( "bedToClosestPoint - Given a bed file, and a list of unique points (often promoters) in genome,\n" "assign each bed to the closest point.\n" "usage:\n" " bedToClosestPoint inRanges.bed inPoints.bed out.tab\n" "Where:\n" " inRanges.bed is a bed file without exons or strand\n" " inPoints.bed is a bed file (promoters), where the chromStart on the + strand and\n" " the chromEnd on the - strand will determine the point\n" " out.tab has the following fields\n" " - the name field from inPoints.bed\n" " - the name field from inRanges.bed, or '.' if the field is missing\n" " - distance between point and middle of in.bed record. May be - if upstream\n" " - the score field if any from inRanges.bed\n" "options:\n" " -xxx=XXX\n" ); } static struct optionSpec options[] = { {NULL, 0}, }; struct range *rangeTreeClosestPoint(struct rbTree *t, int start, int end) /* Find the closest range in tree to given range. Closeness is measured by * closeness of the midpoint. I'm not quite sure if this is general purpose * enough to go into the range tree library. */ { struct rbTreeNode *p, *nextP = NULL; struct rbTreeNode *closestNode = NULL; int closestDistance = 0; int mid = (start+end)/2; /* Repeatedly explore either the left or right branch, depending on the * value of the key, keeping track of the closest point we've seen. */ for (p = t->root; p != NULL; p = nextP) { struct range *r = p->item; int rMid = (r->start + r->end)/2; int distance = mid - rMid; int absDistance; if (distance < 0) { nextP = p->left; absDistance = -distance; } else if (distance > 0) { nextP = p->right; absDistance = distance; } else { absDistance = 0; } if (closestNode == NULL || absDistance < closestDistance) { closestDistance = absDistance; closestNode = p; if (closestDistance == 0) break; } } return (closestNode == NULL ? NULL : closestNode->item); } struct range *genomeRangeTreeClosest(struct genomeRangeTree *tree, char *chrom, int start, int end) /* Return closest range in genome tree to given segment. Returns NULL if chromosome is empty. */ { struct rbTree *t; return (t=genomeRangeTreeFindRangeTree(tree,chrom)) ? rangeTreeClosestPoint(t, start, end) : NULL; } void bedToClosestPoint(char *inRangesFile, char *inPointsFile, char *outFile) /* bedToClosestPoint - Given a bed file, and a list of unique points (often promoters) in genome, * assign each bed to the closest point. */ { /* Load up inPoints file and put it into a range tree, insuring that each point is unique. */ struct bed *inPointsList = bedLoadNAll(inPointsFile, 6); struct genomeRangeTree *pointTree = genomeRangeTreeNew(); struct bed *bed, *next; for (bed = inPointsList; bed != NULL; bed = next) { next = bed->next; bed->next = NULL; int start; if (bed->strand[0] == '-') start = bed->chromEnd-1; else start = bed->chromStart; struct range *range = genomeRangeTreeAddValList(pointTree, bed->chrom, start, start+1, bed); if (slCount(range->val) > 1) errAbort("Multiple points at %s %d in %s", bed->chrom, start, inPointsFile); } /* Go through ranges file. */ FILE *f = mustOpen(outFile, "w"); struct lineFile *lf = lineFileOpen(inRangesFile, TRUE); char *row[6]; int wordCount; while ((wordCount = lineFileChop(lf, row)) != 0) { /* Parse out bed fields. */ lineFileExpectAtLeast(lf, 4, wordCount); char *chrom = row[0]; unsigned chromStart = sqlUnsigned(row[1]); unsigned chromEnd = sqlUnsigned(row[2]); char *name = (wordCount > 3 ? row[3] : "."); int score = (wordCount > 4 ? sqlSigned(row[4]) : 0); struct range *range = genomeRangeTreeClosest(pointTree, chrom, chromStart, chromEnd); if (range != NULL) { struct bed *bed = range->val; fprintf(f, "%s\t", bed->name); fprintf(f, "%s\t", name); int midRange = (chromStart + chromEnd)/2; int distance = midRange - range->start; if (bed->strand[0] == '-') distance = -distance; fprintf(f, "%d\t", distance); fprintf(f, "%d\n", score); } } carefulClose(&f); } int main(int argc, char *argv[]) /* Process command line. */ { optionInit(&argc, argv, options); if (argc != 4) usage(); bedToClosestPoint(argv[1], argv[2], argv[3]); return 0; }