3e39804e374b48ecc82eec8ae77b9de1a0846173
braney
  Sat Aug 12 16:16:44 2023 -0700
second cut at HPRC rearrangement summary tracks

diff --git src/utils/bedCollect/bedCollect.c src/utils/bedCollect/bedCollect.c
new file mode 100644
index 0000000..0aa3d99
--- /dev/null
+++ src/utils/bedCollect/bedCollect.c
@@ -0,0 +1,83 @@
+/* bedCollect - collect overlapping beds into a single bed. */
+#include "common.h"
+#include "linefile.h"
+#include "hash.h"
+#include "options.h"
+#include "basicBed.h"
+#include "dystring.h"
+
+void usage()
+/* Explain usage and exit. */
+{
+errAbort(
+  "bedCollect - collect overlapping beds into a single bed\n"
+  "usage:\n"
+  "   bedCollect input.bed output.bed\n"
+  "note: input beds need to be sorted with bedSort\n"
+  "options:\n"
+  "   -xxx=XXX\n"
+  );
+}
+
+/* Command line validation table. */
+static struct optionSpec options[] = {
+   {NULL, 0},
+};
+
+static void outBed(FILE *f, struct bed *bed, struct hash *nameHash)
+{
+static int count = 0;
+struct slName *names = hashSlNameFromHash(nameHash);
+bed->score = slCount(names);
+struct dyString *dy = newDyString(100);
+for(; names; names = names->next)
+    {
+    dyStringAppend(dy, names->name);
+    dyStringAppend(dy, ",");
+    }
+bed->name = dy->string;
+fprintf(f, "%s %d %d arr%d %d + %d %d 0 %s\n", bed->chrom, bed->chromStart, bed->chromEnd, count++, bed->score, bed->chromStart, bed->chromEnd, bed->name);
+//bedOutputN(bed, 5, f, '\t', '\n');
+}
+
+void bedCollect(char *inFile, char *outFile)
+/* bedCollect - collect overlapping beds into a single bed. */
+{
+struct bed *allBeds = bedLoadAll(inFile);
+FILE *f = mustOpen(outFile, "w");
+struct bed *bed, *prevBed = allBeds;
+prevBed->score = 1;
+struct hash *nameHash = newHash(0);
+
+for(bed = allBeds; bed;  bed = bed->next)
+    {
+    bed->score = 1;
+
+    if (differentString(prevBed->chrom, bed->chrom) || (prevBed->chromEnd <= bed->chromStart))
+        {
+        outBed(f, prevBed, nameHash);
+
+        nameHash = newHash(0);
+        prevBed = bed;
+        hashStore(nameHash, bed->name);
+        }
+    else
+        {
+        hashStore(nameHash, bed->name);
+        prevBed->chromEnd = (bed->chromEnd > prevBed->chromEnd) ?  bed->chromEnd : prevBed->chromEnd;
+        prevBed->score++;
+        //printf("merging %d %d %d %d %d %s\n", prevBed->chromStart, prevBed->chromEnd, bed->chromStart, bed->chromEnd, prevBed->score, bed->name);
+        }
+    }
+outBed(f, prevBed, nameHash);
+}
+
+int main(int argc, char *argv[])
+/* Process command line. */
+{
+optionInit(&argc, argv, options);
+if (argc != 3)
+    usage();
+bedCollect(argv[1], argv[2]);
+return 0;
+}