8fdef9e866ecc228bfe4ea54102d0079818f0366
braney
  Fri Apr 8 14:49:38 2022 -0700
let bedGraphToBigWig use chromAlias.bb as chromSizes file.  Add tests
to both bedGraphToBigWig and bedToBigBed

diff --git src/lib/bbiWrite.c src/lib/bbiWrite.c
index 37bfdd9..fc31ab1 100644
--- src/lib/bbiWrite.c
+++ src/lib/bbiWrite.c
@@ -1,32 +1,33 @@
 /* bbiWrite.c - Routines to help write bigWig and bigBed files. See also bbiFile.h */
 
 /* Copyright (C) 2014 The Regents of the University of California 
  * See kent/LICENSE or http://genome.ucsc.edu/license/ for licensing information. */
 
 #include "limits.h"
 #include "common.h"
 #include "hash.h"
 #include "linefile.h"
 #include "sqlNum.h"
 #include "zlibFace.h"
 #include "cirTree.h"
 #include "bPlusTree.h"
 #include "bbiFile.h"
-//#include "bbiAlias.h"
+#include "bbiAlias.h"
 #include "net.h"
 #include "obscure.h"
+#include "bigBed.h"
 
 void bbiWriteDummyHeader(FILE *f)
 /* Write out all-zero header, just to reserve space for it. */
 {
 repeatCharOut(f, 0, 64);
 }
 
 void bbiWriteDummyZooms(FILE *f)
 /* Write out zeroes to reserve space for ten zoom levels. */
 {
 repeatCharOut(f, 0, bbiMaxZoomLevels * 24);
 }
 
 void bbiSummaryElementWrite(FILE *f, struct bbiSummaryElement *sum)
 /* Write out summary element to file. */
@@ -158,31 +159,31 @@
 }
 
 void bbExIndexMakerUpdateMaxFieldSize(struct bbExIndexMaker *eim, char **row)
 /* Fold in information about row into bbExIndexMaker into eim->maxFieldSize */
 {
 int i;
 for (i=0; i<eim->indexCount; ++i)
     {
     int rowIx = eim->indexFields[i];
     int size = strlen(row[rowIx]);
     if (size > eim->maxFieldSize[i])
         eim->maxFieldSize[i] = size;
     }
 }
 
-struct bbiChromUsage *bbiChromUsageFromBedFileAlias(struct lineFile *lf, 
+struct bbiChromUsage *bbiChromUsageFromBedFileInternal(struct lineFile *lf, 
         bbiChromSizeFunc chromSizeFunc,  void *chromSizeClosure,
 	struct bbExIndexMaker *eim, int *retMinDiff, double *retAveSize, bits64 *retBedCount, boolean tabSep)
 /* Go through bed file and collect chromosomes and statistics.  If eim parameter is non-NULL
  * collect max field sizes there too. */
 {
 int maxRowSize = (eim == NULL ? 3 : bbExIndexMakerMaxIndexField(eim) + 1);
 char *row[maxRowSize];
 struct bbiChromUsage *usage = NULL, *usageList = NULL;
 int lastStart = -1;
 bits32 id = 0;
 bits64 totalBases = 0, bedCount = 0;
 int minDiff = BIGNUM;
 
 lineFileRemoveInitialCustomTrackLines(lf);
 
@@ -205,31 +206,31 @@
     if (start > end)
         {
 	    errAbort("end (%d) before start (%d) line %d of %s",
 	    	end, start, lf->lineIx, lf->fileName);
 	}
     ++bedCount;
     totalBases += (end - start);
     if (usage == NULL || differentString(usage->name, chrom))
         {
 	/* make sure chrom names are sorted in ASCII order */
 	if ((usage != NULL) && strcmp(usage->name, chrom) > 0)
 	    {
 	    errAbort("%s is not case-sensitive sorted at line %d.  Please use \"sort -k1,1 -k2,2n\" with LC_COLLATE=C,  or bedSort and try again.",
 	    	lf->fileName, lf->lineIx);
 	    }
-	int chromSize = (*chromSizeFunc)(chromSizeClosure, chrom);
+	int chromSize = (*chromSizeFunc)(chromSizeClosure, chrom, lf->lineIx);
         if (chromSize == 0)
             errAbort("%s is not found in chromosome sizes file", chrom);
        
 	AllocVar(usage);
 	usage->name = cloneString(chrom);
 	usage->id = id++;
 	usage->size = chromSize;
 	slAddHead(&usageList, usage);
 	lastStart = -1;
 	}
     if (end > usage->size)
         errAbort("End coordinate %d bigger than %s size of %d line %d of %s", end, usage->name, usage->size, lf->lineIx, lf->fileName);
     usage->itemCount += 1;
     if (lastStart >= 0)
         {
@@ -242,45 +243,87 @@
 	    minDiff = diff;
 	    }
 	}
     lastStart = start;
     }
 slReverse(&usageList);
 double aveSize = 0;
 if (bedCount > 0)
     aveSize = (double)totalBases/bedCount;
 *retMinDiff = minDiff;
 *retAveSize = aveSize;
 *retBedCount = bedCount;
 return usageList;
 }
 
-static int chromHashSizeFunc(void *closure, char *chrom)
+struct chromSizeClosure  // a structure that contains the data we need to get a chromosome size from a bigBed
+{ 
+    struct bbiFile *bbi;
+    struct bptIndex *bptIndex;
+    struct lm *lm;
+    struct hash *usedAlias;
+};
+
+static int bbChromSizeFunc(void *closure, char *chrom, int lineIx)
+/* A function to return the size of a given sequence. */
+{
+struct chromSizeClosure *bbChromSizeClosure = (struct chromSizeClosure *)closure;
+
+return bbiAliasChromSizeExt(bbChromSizeClosure->bbi, bbChromSizeClosure->bptIndex, bbChromSizeClosure->lm, chrom, bbChromSizeClosure->usedAlias, lineIx);
+}
+
+struct bbiChromUsage *bbiChromUsageFromBedFileAlias(struct lineFile *lf, char *chromAliasBb,
+	struct bbExIndexMaker *eim, int *retMinDiff, double *retAveSize, bits64 *retBedCount, boolean tabSep)
+/* A wrapper for bbiChromUsageFromBedFile that uses a bigBed to find chromosome sizes. */
+{
+struct chromSizeClosure *bbChromSizeClosure = NULL;
+
+AllocVar(bbChromSizeClosure);
+bbChromSizeClosure->bbi = bigBedFileOpen(chromAliasBb);
+bbChromSizeClosure->bptIndex = bbiAliasOpenExtra(bbChromSizeClosure->bbi);
+bbChromSizeClosure->lm = lmInit(0);
+bbChromSizeClosure->usedAlias = hashNew(0);
+
+struct bbiChromUsage *usageList = bbiChromUsageFromBedFileInternal(lf, bbChromSizeFunc, bbChromSizeClosure, eim, retMinDiff, retAveSize, retBedCount, tabSep);
+
+bbiFileClose(&bbChromSizeClosure->bbi);
+struct bptIndex *next, *bptIndex = bbChromSizeClosure->bptIndex;
+for(; bptIndex; bptIndex = next)
+    {
+    next = bptIndex->next;
+    freez(&bptIndex);
+    }
+lmCleanup(&bbChromSizeClosure->lm);
+
+return usageList;
+}
+
+static int chromHashSizeFunc(void *closure, char *chrom, int lineIx)
 /* Function to find the size of sequence using a hash passed in as a closure. */
 {
 struct hash *chromSizesHash = (struct hash *)closure;
 struct hashEl *chromHashEl = hashLookup(chromSizesHash, chrom);
 if (chromHashEl == NULL)
     errAbort("%s is not found in chromosome sizes file", chrom);
 return ptToInt(chromHashEl->val);
 }
 
 struct bbiChromUsage *bbiChromUsageFromBedFile(struct lineFile *lf, struct hash *chromSizesHash, 
 	struct bbExIndexMaker *eim, int *retMinDiff, double *retAveSize, bits64 *retBedCount, boolean tabSep)
 /* A wrapper for bbiChromUsageFromBedFile that uses a hash to find chromosome sizes. */
 {
-return bbiChromUsageFromBedFileAlias(lf, chromHashSizeFunc, chromSizesHash,
+return bbiChromUsageFromBedFileInternal(lf, chromHashSizeFunc, chromSizesHash,
 	eim, retMinDiff, retAveSize, retBedCount, tabSep);
 }
 
 int bbiCalcResScalesAndSizes(int aveSize, 
     int resScales[bbiMaxZoomLevels], int resSizes[bbiMaxZoomLevels])
 /* Fill in resScales with amount to zoom at each level, and zero out resSizes based
  * on average span. Returns the number of zoom levels we actually will use. */
 {
 int resTryCount = bbiMaxZoomLevels, resTry;
 int resIncrement = bbiResIncrement;
 int minZoom = 10;
 int res = aveSize;
 if (res < minZoom)
     res = minZoom;
 for (resTry = 0; resTry < resTryCount; ++resTry)