8fdef9e866ecc228bfe4ea54102d0079818f0366
braney
  Fri Apr 8 14:49:38 2022 -0700
let bedGraphToBigWig use chromAlias.bb as chromSizes file.  Add tests
to both bedGraphToBigWig and bedToBigBed

diff --git src/utils/bedGraphToBigWig/bedGraphToBigWig.c src/utils/bedGraphToBigWig/bedGraphToBigWig.c
index deb33ac..6e89e40 100644
--- src/utils/bedGraphToBigWig/bedGraphToBigWig.c
+++ src/utils/bedGraphToBigWig/bedGraphToBigWig.c
@@ -8,71 +8,75 @@
 #include "linefile.h"
 #include "localmem.h"
 #include "hash.h"
 #include "options.h"
 #include "sqlNum.h"
 #include "dystring.h"
 #include "cirTree.h"
 #include "sig.h"
 #include "zlibFace.h"
 #include "bPlusTree.h"
 #include "bbiFile.h"
 #include "bwgInternal.h"
 #include "bigWig.h"
 
 
-char *version = "2.8";   // when changing, change in bedToBigBed, bedGraphToBigWig, and wigToBigWig
+char *version = "2.9";   // when changing, change in bedToBigBed, bedGraphToBigWig, and wigToBigWig
 /* Version history from 2.8 on at least -
+ * 2.9 - ability to specify chromAlias bigBed as chromSizes file
  * 2.8  sync up version numbers with bedToBigBed 
  */
 
 static int blockSize = 256;
 static int itemsPerSlot = 1024;
 static boolean doCompress = FALSE;
 static int maxGigs = 100;   // Maximum number of gigs to allocate in one block.  
 			    // Undocumented on purpose.
+static boolean sizesIsBb = FALSE;
 
 
 void usage()
 /* Explain usage and exit. */
 {
 errAbort(
   "bedGraphToBigWig v %s - Convert a bedGraph file to bigWig format (bbi version: %d).\n"
   "usage:\n"
   "   bedGraphToBigWig in.bedGraph chrom.sizes out.bw\n"
   "where in.bedGraph is a four column file in the format:\n"
   "      <chrom> <start> <end> <value>\n"
   "and chrom.sizes is a two-column file/URL: <chromosome name> <size in bases>\n"
   "and out.bw is the output indexed big wig file.\n"
   "If the assembly <db> is hosted by UCSC, chrom.sizes can be a URL like\n"
   "  http://hgdownload.soe.ucsc.edu/goldenPath/<db>/bigZips/<db>.chrom.sizes\n"
   "or you may use the script fetchChromSizes to download the chrom.sizes file.\n"
   "If not hosted by UCSC, a chrom.sizes file can be generated by running\n"
   "twoBitInfo on the assembly .2bit file.\n"
   "The input bedGraph file must be sorted, use the unix sort command:\n"
   "  sort -k1,1 -k2,2n unsorted.bedGraph > sorted.bedGraph\n"
   "options:\n"
   "   -blockSize=N - Number of items to bundle in r-tree.  Default %d\n"
   "   -itemsPerSlot=N - Number of data points bundled at lowest level. Default %d\n"
+  "   -sizesIsBb  -- If set, the chrom.sizes file is assumed to be a bigBed file.\n"
   "   -unc - If set, do not use compression."
   , version, bbiCurrentVersion, blockSize, itemsPerSlot
   );
 }
 
 static struct optionSpec options[] = {
    {"blockSize", OPTION_INT},
    {"itemsPerSlot", OPTION_INT},
+   {"sizesIsBb", OPTION_BOOLEAN},
    {"unc", OPTION_BOOLEAN},
    {"maxGigs", OPTION_INT},
    {NULL, 0},
 };
 
 struct sectionItem
 /* An item in a section of a bedGraph. */
     {
     bits32 start, end;			/* Position in chromosome, half open. */
     float val;				/* Single precision value. */
     };
 
 void writeSections(struct bbiChromUsage *usageList, struct lineFile *lf, 
 	int itemsPerSlot, struct bbiBoundsArray *bounds, int sectionCount, FILE *f,
 	int resTryCount, int resScales[], int resSizes[], 
@@ -360,38 +364,45 @@
 assert(boundsPt == boundsEnd);
 cirTreeFileBulkIndexToOpenFile(boundsArray, sizeof(boundsArray[0]), initialReductionCount,
     blockSize, itemsPerSlot, NULL, bbiBoundsArrayFetchKey, bbiBoundsArrayFetchOffset, 
     indexOffset, f);
 
 freez(&boundsArray);
 slReverse(&twiceReducedList);
 return twiceReducedList;
 }
 
 void bedGraphToBigWig(char *inName, char *chromSizes, char *outName)
 /* bedGraphToBigWig - Convert a bedGraph program to bigWig.. */
 {
 verboseTimeInit();
 struct lineFile *lf = lineFileOpen(inName, TRUE);
-struct hash *chromSizesHash = bbiChromSizesFromFile(chromSizes);
-verbose(2, "%d chroms in %s\n", chromSizesHash->elCount, chromSizes);
 int minDiff = 0, i;
 double aveSize = 0;
 bits64 bedCount = 0;
 bits32 uncompressBufSize = 0;
-struct bbiChromUsage *usageList = bbiChromUsageFromBedFile(lf, chromSizesHash, NULL, 
+struct bbiChromUsage *usageList;
+
+if (sizesIsBb)
+    usageList = bbiChromUsageFromBedFileAlias(lf, chromSizes, NULL, &minDiff, &aveSize, &bedCount, FALSE);
+else
+    {
+    struct hash *chromSizesHash = bbiChromSizesFromFile(chromSizes);
+    verbose(2, "%d chroms in %s\n", chromSizesHash->elCount, chromSizes);
+    usageList = bbiChromUsageFromBedFile(lf, chromSizesHash, NULL, 
         &minDiff, &aveSize, &bedCount, FALSE);
+    }
 verboseTime(2, "pass1");
 verbose(2, "%d chroms in %s, minDiff=%d, aveSize=%g, bedCount=%lld\n", 
     slCount(usageList), inName, minDiff, aveSize, bedCount);
 
 /* Write out dummy header, zoom offsets. */
 FILE *f = mustOpen(outName, "wb");
 bbiWriteDummyHeader(f);
 bbiWriteDummyZooms(f);
 
 /* Write out dummy total summary. */
 struct bbiSummaryElement totalSum;
 ZeroVar(&totalSum);
 bits64 totalSummaryOffset = ftell(f);
 bbiSummaryElementWrite(f, &totalSum);
 
@@ -493,23 +504,24 @@
 fseek(f, 0L, SEEK_END);
 writeOne(f, sig);
 
 lineFileClose(&lf);
 carefulClose(&f);
 }
 
 int main(int argc, char *argv[])
 /* Process command line. */
 {
 optionInit(&argc, argv, options);
 maxGigs = optionInt("maxGigs", maxGigs);
 setMaxAlloc(maxGigs*1000000000L);  
 blockSize = optionInt("blockSize", blockSize);
 itemsPerSlot = optionInt("itemsPerSlot", itemsPerSlot);
+sizesIsBb = optionExists("sizesIsBb");
 doCompress = !optionExists("unc");
 if (argc != 4)
     usage();
 bedGraphToBigWig(argv[1], argv[2], argv[3]);
 if (verboseLevel() > 1)
     printVmPeak();
 return 0;
 }