56332b7995bf6075d07d88b04e5675c247725892
braney
  Tue Oct 17 16:21:18 2023 -0700
flesh out the indel tracks

diff --git src/utils/bedCollect/bedCollect.c src/utils/bedCollect/bedCollect.c
index 0aa3d99..aea53fe 100644
--- src/utils/bedCollect/bedCollect.c
+++ src/utils/bedCollect/bedCollect.c
@@ -1,83 +1,108 @@
 /* bedCollect - collect overlapping beds into a single bed. */
 #include "common.h"
 #include "linefile.h"
 #include "hash.h"
 #include "options.h"
 #include "basicBed.h"
 #include "dystring.h"
 
 void usage()
 /* Explain usage and exit. */
 {
 errAbort(
   "bedCollect - collect overlapping beds into a single bed\n"
   "usage:\n"
   "   bedCollect input.bed output.bed\n"
   "note: input beds need to be sorted with bedSort\n"
   "options:\n"
-  "   -xxx=XXX\n"
+  "   -exact       overlapping blocks must be exactly the same range and score\n"
   );
 }
 
+boolean exact;  // overlapping blocks must be exactly the same range and score
+
 /* Command line validation table. */
 static struct optionSpec options[] = {
+   {"exact", OPTION_BOOLEAN},
    {NULL, 0},
 };
 
 static void outBed(FILE *f, struct bed *bed, struct hash *nameHash)
 {
 static int count = 0;
 struct slName *names = hashSlNameFromHash(nameHash);
+int sizeQuery = bed->score;
 bed->score = slCount(names);
 struct dyString *dy = newDyString(100);
 for(; names; names = names->next)
     {
     dyStringAppend(dy, names->name);
     dyStringAppend(dy, ",");
     }
 bed->name = dy->string;
-fprintf(f, "%s %d %d arr%d %d + %d %d 0 %s\n", bed->chrom, bed->chromStart, bed->chromEnd, count++, bed->score, bed->chromStart, bed->chromEnd, bed->name);
+fprintf(f, "%s %d %d arr%d %d + %d %d 0 %s %d\n", bed->chrom, bed->chromStart, bed->chromEnd, count++, bed->score, bed->chromStart, bed->chromEnd, bed->name, sizeQuery);
 //bedOutputN(bed, 5, f, '\t', '\n');
 }
 
 void bedCollect(char *inFile, char *outFile)
 /* bedCollect - collect overlapping beds into a single bed. */
 {
 struct bed *allBeds = bedLoadAll(inFile);
 FILE *f = mustOpen(outFile, "w");
 struct bed *bed, *prevBed = allBeds;
 prevBed->score = 1;
 struct hash *nameHash = newHash(0);
+hashStore(nameHash, prevBed->name);
 
-for(bed = allBeds; bed;  bed = bed->next)
+if (exact)
+    {
+    for(bed = prevBed->next; bed;  bed = bed->next)
+        {
+        if (differentString(prevBed->chrom, bed->chrom) || (prevBed->chromStart != bed->chromStart) || (prevBed->chromEnd != bed->chromEnd) || (prevBed->score != bed->score))
             {
-    bed->score = 1;
+            outBed(f, prevBed, nameHash);
 
+            freeHash(&nameHash);
+            nameHash = newHash(0);
+            prevBed = bed;
+            hashStore(nameHash, bed->name);
+            }
+        else
+            {
+            hashStore(nameHash, bed->name);
+            }
+        }
+    }
+else
+    {
+    for(bed = prevBed->next; bed;  bed = bed->next)
+        {
         if (differentString(prevBed->chrom, bed->chrom) || (prevBed->chromEnd <= bed->chromStart))
             {
             outBed(f, prevBed, nameHash);
 
+            freeHash(&nameHash);
             nameHash = newHash(0);
             prevBed = bed;
             hashStore(nameHash, bed->name);
             }
         else
             {
             hashStore(nameHash, bed->name);
             prevBed->chromEnd = (bed->chromEnd > prevBed->chromEnd) ?  bed->chromEnd : prevBed->chromEnd;
-        prevBed->score++;
-        //printf("merging %d %d %d %d %d %s\n", prevBed->chromStart, prevBed->chromEnd, bed->chromStart, bed->chromEnd, prevBed->score, bed->name);
             }
         }
     outBed(f, prevBed, nameHash);
     }
+}
 
 int main(int argc, char *argv[])
 /* Process command line. */
 {
 optionInit(&argc, argv, options);
 if (argc != 3)
     usage();
+exact = optionExists("exact");
 bedCollect(argv[1], argv[2]);
 return 0;
 }