56332b7995bf6075d07d88b04e5675c247725892 braney Tue Oct 17 16:21:18 2023 -0700 flesh out the indel tracks diff --git src/utils/bedCollect/bedCollect.c src/utils/bedCollect/bedCollect.c index 0aa3d99..aea53fe 100644 --- src/utils/bedCollect/bedCollect.c +++ src/utils/bedCollect/bedCollect.c @@ -1,83 +1,108 @@ /* bedCollect - collect overlapping beds into a single bed. */ #include "common.h" #include "linefile.h" #include "hash.h" #include "options.h" #include "basicBed.h" #include "dystring.h" void usage() /* Explain usage and exit. */ { errAbort( "bedCollect - collect overlapping beds into a single bed\n" "usage:\n" " bedCollect input.bed output.bed\n" "note: input beds need to be sorted with bedSort\n" "options:\n" - " -xxx=XXX\n" + " -exact overlapping blocks must be exactly the same range and score\n" ); } +boolean exact; // overlapping blocks must be exactly the same range and score + /* Command line validation table. */ static struct optionSpec options[] = { + {"exact", OPTION_BOOLEAN}, {NULL, 0}, }; static void outBed(FILE *f, struct bed *bed, struct hash *nameHash) { static int count = 0; struct slName *names = hashSlNameFromHash(nameHash); +int sizeQuery = bed->score; bed->score = slCount(names); struct dyString *dy = newDyString(100); for(; names; names = names->next) { dyStringAppend(dy, names->name); dyStringAppend(dy, ","); } bed->name = dy->string; -fprintf(f, "%s %d %d arr%d %d + %d %d 0 %s\n", bed->chrom, bed->chromStart, bed->chromEnd, count++, bed->score, bed->chromStart, bed->chromEnd, bed->name); +fprintf(f, "%s %d %d arr%d %d + %d %d 0 %s %d\n", bed->chrom, bed->chromStart, bed->chromEnd, count++, bed->score, bed->chromStart, bed->chromEnd, bed->name, sizeQuery); //bedOutputN(bed, 5, f, '\t', '\n'); } void bedCollect(char *inFile, char *outFile) /* bedCollect - collect overlapping beds into a single bed. */ { struct bed *allBeds = bedLoadAll(inFile); FILE *f = mustOpen(outFile, "w"); struct bed *bed, *prevBed = allBeds; prevBed->score = 1; struct hash *nameHash = newHash(0); +hashStore(nameHash, prevBed->name); -for(bed = allBeds; bed; bed = bed->next) +if (exact) + { + for(bed = prevBed->next; bed; bed = bed->next) + { + if (differentString(prevBed->chrom, bed->chrom) || (prevBed->chromStart != bed->chromStart) || (prevBed->chromEnd != bed->chromEnd) || (prevBed->score != bed->score)) { - bed->score = 1; + outBed(f, prevBed, nameHash); + freeHash(&nameHash); + nameHash = newHash(0); + prevBed = bed; + hashStore(nameHash, bed->name); + } + else + { + hashStore(nameHash, bed->name); + } + } + } +else + { + for(bed = prevBed->next; bed; bed = bed->next) + { if (differentString(prevBed->chrom, bed->chrom) || (prevBed->chromEnd <= bed->chromStart)) { outBed(f, prevBed, nameHash); + freeHash(&nameHash); nameHash = newHash(0); prevBed = bed; hashStore(nameHash, bed->name); } else { hashStore(nameHash, bed->name); prevBed->chromEnd = (bed->chromEnd > prevBed->chromEnd) ? bed->chromEnd : prevBed->chromEnd; - prevBed->score++; - //printf("merging %d %d %d %d %d %s\n", prevBed->chromStart, prevBed->chromEnd, bed->chromStart, bed->chromEnd, prevBed->score, bed->name); } } outBed(f, prevBed, nameHash); } +} int main(int argc, char *argv[]) /* Process command line. */ { optionInit(&argc, argv, options); if (argc != 3) usage(); +exact = optionExists("exact"); bedCollect(argv[1], argv[2]); return 0; }