d0054a39067a773d5342af78a80d964835d47a60
markd
  Thu Oct 16 13:28:21 2025 -0700
add bedToBigBed -fixScores to correct scores that are out-of-range or invalid

diff --git src/utils/bedToBigBed/bedToBigBed.c src/utils/bedToBigBed/bedToBigBed.c
index 7f9e5b97711..e0ac7b1e662 100644
--- src/utils/bedToBigBed/bedToBigBed.c
+++ src/utils/bedToBigBed/bedToBigBed.c
@@ -33,30 +33,31 @@
 
 /* Things set directly or indirectly by command lne in main() routine. */
 int blockSize = 256;
 int itemsPerSlot = 512;
 char *extraIndex = NULL;
 int bedN = 0;   /* number of standard bed fields */
 int bedP = 0;   /* number of bed plus fields */
 char *asFile = NULL;
 char *asText = NULL;
 char *udcDir = NULL;
 static boolean doCompress = FALSE;
 static boolean tabSep = FALSE;
 static boolean sizesIs2Bit = FALSE;
 static boolean sizesIsChromAliasBb = FALSE;
 static boolean allow1bpOverlap = FALSE;
+static boolean fixScores = FALSE;
 
 void usage()
 /* Explain usage and exit. */
 {
 errAbort(
   "bedToBigBed v. %s - Convert bed file to bigBed. (bbi version: %d)\n"
   "usage:\n"
   "   bedToBigBed in.bed chrom.sizes out.bb\n"
   "Where in.bed is in one of the ascii bed formats, but not including track lines\n"
   "and chrom.sizes is a two-column file/URL: <chromosome name> <size in bases>\n"
   "and out.bb is the output indexed big bed file.\n"
   "\n"
   "If the assembly <db> is hosted by UCSC, chrom.sizes can be a URL like\n"
   "  http://hgdownload.soe.ucsc.edu/goldenPath/<db>/bigZips/<db>.chrom.sizes\n"
   "or you may use the script fetchChromSizes to download the chrom.sizes file.\n"
@@ -95,49 +96,51 @@
   "   -as=fields.as - If you have non-standard \"bedPlus\" fields, it's great to put a definition\n"
   "                   of each field in a row in AutoSql format here.\n"
   "   -blockSize=N - Number of items to bundle in r-tree.  Default %d\n"
   "   -itemsPerSlot=N - Number of data points bundled at lowest level. Default %d\n"
   "   -unc - If set, do not use compression.\n"
   "   -tab - If set, expect fields to be tab separated, normally\n"
   "           expects white space separator.\n"
   "   -extraIndex=fieldList - If set, make an index on each field in a comma separated list\n"
   "           extraIndex=name and extraIndex=name,id are commonly used.\n"
   "   -sizesIs2Bit  -- If set, the chrom.sizes file is assumed to be a 2bit file.\n"
   "   -sizesIsChromAliasBb -- If set, then chrom.sizes file is assumed to be a chromAlias\n"
   "    bigBed file or a URL to a such a file (see above).\n"
   "   -sizesIsBb  -- Obsolete name for -sizesIsChromAliasBb.\n"
   "   -udcDir=/path/to/udcCacheDir  -- sets the UDC cache dir for caching of remote files.\n"
   "   -allow1bpOverlap  -- allow exons to overlap by at most one base pair\n"
+  "   -fixScores  -- change non-integer scores to 0 and for scores into range 0..1000\n"
   "   -maxAlloc=N -- Set the maximum memory allocation size to N bytes\n"
   "   -sort -- sort the input file\n"
   , version, bbiCurrentVersion, blockSize, itemsPerSlot
   );
 }
 
 static struct optionSpec options[] = {
    {"blockSize", OPTION_INT},
    {"itemsPerSlot", OPTION_INT},
    {"type", OPTION_STRING},
    {"as", OPTION_STRING},
    {"unc", OPTION_BOOLEAN},
    {"tab", OPTION_BOOLEAN},
    {"sizesIs2Bit", OPTION_BOOLEAN},
    {"sizesIsChromAliasBb", OPTION_BOOLEAN},
    {"sizesIsBb", OPTION_BOOLEAN},
    {"extraIndex", OPTION_STRING},
    {"udcDir", OPTION_STRING},
    {"allow1bpOverlap", OPTION_BOOLEAN},
+   {"fixScores", OPTION_BOOLEAN},
    {"maxAlloc", OPTION_LONG_LONG},
    {"sort", OPTION_BOOLEAN},
    {NULL, 0},
 };
 
 static struct lineFile *rewindFile(char *inName, struct lineFile *lf)
 /* set up lineFile to point at the beginning of the file.  It we're reading from a decompressing
  * pipe, we need to close and reopen the pipe. */
 {
 if (lf->pl)
     {
     lineFileClose(&lf);
     lf = lineFileOpen(inName, TRUE);
     }
 else
@@ -215,49 +218,55 @@
 bits64 blockStartOffset = 0;
 int startPos = 0, endPos = 0;
 bits32 chromId = 0;
 struct dyString *stream = dyStringNew(0);
 
 /* Will keep track of some things that help us determine how much to reduce. */
 bits32 resEnds[resTryCount];
 int resTry;
 for (resTry = 0; resTry < resTryCount; ++resTry)
     resEnds[resTry] = 0;
 boolean atEnd = FALSE, sameChrom = FALSE;
 bits32 start = 0, end = 0;
 char *chrom = NULL;
 struct bed *bed;
 AllocVar(bed);
+unsigned opts = 0;
+if (allow1bpOverlap)
+    opts |= BED_ALLOW_1BP_OVERLAP;
+if (fixScores)
+    opts |= BED_FIX_SCORE;
+
 
 /* Help keep track of which beds are in current chunk so as to write out
  * namedChunks to eim if need be. */
 long sectionStartIx = 0, sectionEndIx = 0;
 
 for (;;)
     {
     /* Get next line of input if any. */
     if (lineFileNextReal(lf, &line))
 	{
 	/* Chop up line and make sure the word count is right. */
 	int wordCount;
 	if (tabSep)
 	    wordCount = chopTabs(line, row);
 	else
 	    wordCount = chopLine(line, row);
 	lineFileExpectWordsMesg(lf, fieldCount, wordCount, "If the input is a tab-sep file, do not forget to use the -tab option");
 
-	loadAndValidateBedExt(row, bedN, fieldCount, lf, bed, as, FALSE, allow1bpOverlap);
+	loadAndValidateBedOpts(row, bedN, fieldCount, lf, bed, as, opts);
 
 	chrom = bed->chrom;
 	start = bed->chromStart;
 	end = bed->chromEnd;
 
 	sameChrom = sameString(chrom, usage->name);
 	}
     else  /* No next line */
 	{
 	atEnd = TRUE;
 	}
 
 
     /* Check conditions that would end block and save block info and advance to next if need be. */
     if (atEnd || !sameChrom || itemIx >= itemsPerSlot)
@@ -342,32 +351,34 @@
 	{
 	bbExIndexMakerAddKeysFromRow(eim, row, sectionEndIx);
 	sectionEndIx += 1;
 	}
 
     /* Write out data. */
     dyStringWriteOne(stream, chromId);
     dyStringWriteOne(stream, start);
     dyStringWriteOne(stream, end);
     if (fieldCount > 3)
         {
 	int i;
 	/* Write 3rd through next to last field and a tab separator. */
 	for (i=3; i<lastField; ++i)
 	    {
-	    char *s = row[i];
-	    dyStringAppend(stream, s);
+            if ((opts & BED_FIX_SCORE) && (i == 4))
+                dyStringPrintf(stream, "%d", bed->score);  // keep fixed score
+            else
+                dyStringAppend(stream, row[i]);
 	    dyStringAppendC(stream, '\t');
 	    }
 	/* Write last field and terminal zero */
 	char *s = row[lastField];
 	dyStringAppend(stream, s);
 	}
     dyStringAppendC(stream, 0);
 
     itemIx += 1;
 
     /* Do zoom counting. */
     for (resTry = 0; resTry < resTryCount; ++resTry)
         {
 	bits32 resEnd = resEnds[resTry];
 	if (start >= resEnd && resEnd < usage->size)
@@ -870,30 +881,31 @@
 int main(int argc, char *argv[])
 /* Process command line. */
 {
 optionInit(&argc, argv, options);
 blockSize = optionInt("blockSize", blockSize);
 itemsPerSlot = optionInt("itemsPerSlot", itemsPerSlot);
 asFile = optionVal("as", asFile);
 doCompress = !optionExists("unc");
 sizesIs2Bit = optionExists("sizesIs2Bit");
 sizesIsChromAliasBb = optionExists("sizesIsChromAliasBb") || optionExists("sizesIsBb");
 if (sizesIs2Bit && sizesIsChromAliasBb)
     errAbort("can't specify both -sizesIs2Bit and -sizesIsChromAliasBb");
 extraIndex = optionVal("extraIndex", NULL);
 tabSep = optionExists("tab");
 allow1bpOverlap = optionExists("allow1bpOverlap");
+fixScores = optionExists("fixScores");
 udcDir = optionVal("udcDir", udcDefaultDir());
 size_t maxAlloc = optionLongLong("maxAlloc", 0);
 if (argc != 4)
     usage();
 
 char *bedFileName = argv[1];
 
 mustBeReadableAndRegularFile(bedFileName);
 
 udcSetDefaultDir(udcDir);
 if (maxAlloc > 0)
     setMaxAlloc(maxAlloc);
 
 if (optionExists("type"))
     {