src/hg/lib/genomeRangeTreeFile.c e70152e44cc66cc599ff6b699eb8adc07f3e656a

e70152e44cc66cc599ff6b699eb8adc07f3e656a
kent
  Sat May 24 21:09:34 2014 -0700
Adding Copyright NNNN Regents of the University of California to all files I believe with reasonable certainty were developed under UCSC employ or as part of Genome Browser copyright assignment.
diff --git src/hg/lib/genomeRangeTreeFile.c src/hg/lib/genomeRangeTreeFile.c
index 410d462..cc895d5 100644
--- src/hg/lib/genomeRangeTreeFile.c
+++ src/hg/lib/genomeRangeTreeFile.c
@@ -1,610 +1,613 @@
+/* Copyright (C) 2014 The Regents of the University of California 
+ * See README in this or parent directory for licensing information. */
+
 #include "common.h"
 #include "sig.h"
 #include "localmem.h"
 #include "rbTree.h"
 #include "hash.h"
 #include "rangeTreeFile.h"
 #include "genomeRangeTreeFile.h"
 #include <limits.h>
 
 
 struct genomeRangeTree *genomeRangeTreeRead(char *fileName)
 /* Read in the genomeRangeTree data for each chromosome and
  * return the genomeRangeTree.
  * Squawk and die if there is a problem. */
 {
 struct genomeRangeTreeFile *f = genomeRangeTreeFileReadHeader(fileName);
 struct genomeRangeTree *tree = genomeRangeTreeFileReadData(f);
 genomeRangeTreeFileFree(&f);
 return tree;
 }
 
 void genomeRangeTreeWrite(struct genomeRangeTree *t, char *fileName)
 /* Write out genomeRangeTree including: 
  * header portion
  * index of chromosomes
  * data for each range tree */
 {
 struct genomeRangeTreeFile *f = genomeRangeTreeFileNew(t, fileName);
 genomeRangeTreeFileWriteHeader(f);
 genomeRangeTreeFileWriteData(f);
 genomeRangeTreeFileFree(&f);
 }
 
 struct genomeRangeTreeFile *genomeRangeTreeFileNew(struct genomeRangeTree *tree, char *fileName)
 /* Create a genomeRangeTreeFile to save a genomeRangeTree in 'fileName'. 
  * Opens the file, does not write the header.
  * To write the header data only use: genomeRangeTreeFileWriteHeader().
  * To write the data portion only use: genomeRangeTreeFileWriteData(). 
  * If fileName is null, does not open any file. */
 {
 struct genomeRangeTreeFile *tf;
 AllocVar(tf);
 if (fileName)
     {
     tf->name = cloneString(fileName);
     tf->file =  mustOpen(fileName, "wb");
     }
 tf->tree = tree;
 tf->sig = genomeRangeTreeSig;
 tf->version = 0;
 tf->numChroms = tree->hash->elCount;
 tf->valDataSize = 0; /* zero for no data, (later: -ve for variable size, +ve specifies fixed size */
 tf->valDataType = 0; /* unused (later: signature for type ) */
 tf->reserved1 = 0;
 tf->reserved2 = 0;
 /* Figure out location of first byte past header (location of the index). */
 tf->headerLen = sizeof(tf->sig) + sizeof(tf->version) + sizeof(tf->headerLen) + sizeof(tf->numChroms)
     + sizeof(tf->valDataSize) + sizeof(tf->valDataType) + sizeof(tf->reserved1) + sizeof(tf->reserved2);
 
 bits32 offset = 0;
 bits32 nodes = 0;
 long long counter = 0; /* check for 32 bit overflow */
 /* get a sorted list of all chroms */
 tf->chromList = hashElListHash(tree->hash);
 slSort(&(tf->chromList), hashElCmp); /* strcmp() sort */
 
 /* Figure out location of first byte past index (location of the data).
  * Each index entry contains 4+4 bytes of offset information
  * and the size and name of the sequence, which is variable length. */
 struct hashEl *chrom;
 offset = tf->headerLen;
 for ( chrom = tf->chromList ; chrom ; chrom = chrom->next )
     {
     int nameLen = strlen(chrom->name);
     if (nameLen > 255)
         errAbort("name %s too long", chrom->name);
     /* index is list of triples: (name nodes offset) */
     offset += nameLen + 1 + sizeof(nodes) + sizeof(offset);
     }
 
 /* Set up index. */ 
 tf->nodes = newHash(0);
 tf->offset = newHash(0);
 for ( chrom = tf->chromList ; chrom ; chrom = chrom->next )
     {
     int size = rangeTreeSizeInFile(genomeRangeTreeFindRangeTree(tree, chrom->name));
     hashAddInt(tf->nodes, chrom->name, genomeRangeTreeFindRangeTree(tree, chrom->name)->n);
     hashAddInt(tf->offset, chrom->name, offset);
     offset += size;
     counter += (long long)size;
     if (counter > UINT_MAX )
         errAbort("Error in %s, index overflow at %s. The genomeRangeTree format "
                 "does not support trees larger than %dGb, \n"
                 "please split up into smaller files.\n", __FILE__, 
                 chrom->name, UINT_MAX/1000000000);
     }
 
 return tf;
 }
 
 struct genomeRangeTreeFile *genomeRangeTreeFileReadHeader(char *fileName)
 /* Creates a genomeRangeTreeFile to read a genomeRangeTree from 'fileName'.
  * Opens the file, reads in header and index. 
  * Leaves file handle open at begining of data portion.
  * Returns a genomeRangeTreeFile containing file handle and index into contents.
  * To read genomeRangeTree data use: genomeRangeTreeFileReadData().
  * To return genomeRangeTree and close file and index use: genomeRangeTreeFileFree()
  * Squawk and die if there is a problem. */
 {
 struct genomeRangeTreeFile *f;
 int i;
 
 AllocVar(f);
 f->name = cloneString(fileName);
 f->file = mustOpen(fileName, "rb");
 
 /* Allocate header verify signature, and read in
  * the constant-length bits. */
 f->tree = genomeRangeTreeNew();
 mustReadOne(f->file, f->sig);
 if (f->sig == genomeRangeTreeSwapSig)
     f->isSwapped = TRUE;
 else if (f->sig != genomeRangeTreeSig)
     errAbort("%s doesn't have a valid genomeRangeTreeSig", fileName);
 f->version = readBits32(f->file, f->isSwapped);
 if (f->version != 0)
     {
     errAbort("Can only handle version 0 of this file. This is version %d", (int)f->version);
     }
 f->headerLen = readBits32(f->file, f->isSwapped);
 f->numChroms = readBits32(f->file, f->isSwapped);
 f->valDataSize = readBits32(f->file, f->isSwapped);
 f->valDataType = readBits32(f->file, f->isSwapped);
 f->reserved1 = readBits32(f->file, f->isSwapped);
 f->reserved2 = readBits32(f->file, f->isSwapped);
 if (f->valDataSize != 0)
     errAbort("Can only handle valDataSize of 0. This has %d.\n", (int)f->valDataSize);
 if (f->valDataType != 0)
     errAbort("Can only handle valDataType of 0. This has %d.\n", (int)f->valDataType);
 
 /* Read in index. Set up chromList, nodes, and offset fields. */
 f->nodes = newHash(0);
 f->offset = newHash(0);
 for (i=0; i < f->numChroms ; ++i)
     {
     char name[256];
     if (!fastReadString(f->file, name))
         errAbort("%s is truncated", fileName);
     genomeRangeTreeFindOrAddRangeTree(f->tree, name); /* add empty tree for chrom */
     hashAddInt(f->nodes,  name, readBits32(f->file, f->isSwapped)); /* node count */
     hashAddInt(f->offset, name, readBits32(f->file, f->isSwapped)); /* data offset */
     }
 f->chromList = hashElListHash(f->tree->hash);
 slSort(&(f->chromList), hashElCmp); /* strcmp() sort */
 return f;
 }
 
 struct genomeRangeTree *genomeRangeTreeFileReadData(struct genomeRangeTreeFile *f)
 /* Read in the genomeRangeTree data for each chromosome and
  * return the genomeRangeTree.
  * File handle is left open pointing at the end of the file.
  * To close and free the genomeRangeTreeFile use: genomeRangeTreeFileFree().
  * Squawk and die if there is a problem. */
 {
 struct hashEl *chrom;
 
 /* Read in nodes one tree at a time */
 for (chrom = f->chromList ; chrom ; chrom = chrom->next)
     {
     rangeTreeReadNodes(f->file, genomeRangeTreeFindOrAddRangeTree(f->tree, chrom->name), hashIntVal(f->nodes, chrom->name), f->isSwapped);
     }
 return f->tree;
 }
 
 
 static void genomeRangeTreeFileWriteHeaderDetailed(struct genomeRangeTreeFile *f, boolean zeroSig)
 /* Write out genomeRangeTree header including: 
  *  header portion
  *  index of chromosomes.
  * If zeroSig is TRUE then write a zero in the sig field. 
  *   This stops any other programs reading the header until the file is ready.
  *   This is to support operations such as genomeRangeTreeFileOr() 
  *   which need to go back and update the header with node and offset information 
  *   after the data section is written. On the second header write the sig field
  *   can be written correctly so that other programs can then read the completed file.
  * To close the file use: genomeRangeTreeFileFree(). */
 {
 struct hashEl *chrom;
 bits32 zero = 0;
 /* Write out fixed parts of header. */
 if (zeroSig)
     writeOne(f->file, zero);
 else
     writeOne(f->file, f->sig);
 writeOne(f->file, f->version);
 writeOne(f->file, f->headerLen);
 writeOne(f->file, f->numChroms);
 writeOne(f->file, f->valDataSize);
 writeOne(f->file, f->valDataType);
 writeOne(f->file, f->reserved1);
 writeOne(f->file, f->reserved2);
 /* Write out index. */
 for ( chrom = f->chromList; chrom ; chrom = chrom->next )
     {
     bits32 nodes = hashIntVal(f->nodes, chrom->name);
     bits32 offset = hashIntVal(f->offset, chrom->name);
     writeString(f->file, chrom->name);
     writeOne(f->file, nodes);
     writeOne(f->file, offset);
     }
 }
 
 void genomeRangeTreeFileWriteHeader(struct genomeRangeTreeFile *f)
 /* Write out genomeRangeTree header including: 
  *  header portion
  *  index of chromosomes.
  * To close the file use: genomeRangeTreeFileFree(). */
 {
 genomeRangeTreeFileWriteHeaderDetailed(f, FALSE);
 }
 
 void genomeRangeTreeFileWriteData(struct genomeRangeTreeFile *f)
 /* Write out all genomeRangeTree data. */
 {
 struct hashEl *chrom;
 for ( chrom = f->chromList ; chrom ; chrom = chrom->next)
     {
     rangeTreeWriteNodes(genomeRangeTreeFindRangeTree(f->tree, chrom->name), f->file);
     }
 }
 
 struct genomeRangeTree *genomeRangeTreeFileFree(struct genomeRangeTreeFile **pFile)
 /* Free up the resources associated with a genomeRangeTreeFile.
  * Close the file.
  * Return the genomeRangeTree. */
 {
 struct genomeRangeTreeFile *f = *pFile;
 struct genomeRangeTree *tree = f->tree;
 freez(&(f->name));
 if (f->file)
     carefulClose(&(f->file));
 hashElFreeList(&(f->chromList));
 hashFree(&(f->nodes));
 hashFree(&(f->offset));
 freez(pFile);
 return tree;
 }
 
 void genomeRangeTreeFileUnionDetailed(struct genomeRangeTreeFile *tf1, struct genomeRangeTreeFile *tf2, char *outFile, int *numChroms, int *nodes, unsigned *size, boolean saveMem, boolean orDirectToFile)
 /* Create union of two saved genomeRangeTrees through a linear file scan.
  * Writes resulting genomeRangeTree to outFile. 
  * The resulting file cannot be safely read until the operation is complete. The header
  * information at the beginning of the file has to be updated after all the data is written
  * since the number of nodes in the final merged rangeTree is not known until the ranges are merged.
  * To enforce this, the header is written with a zero initial 'sig' field so that it cannot
  * be read as a genomeRangeTree file. The header information and 'sig' is re-written with 
  * correct data at the end of the process via an 'fseek' operation to the beginning of the file. 
  * If outFile is null, does not output the file. 
  * The number of nodes in the resulting tree is returned in n.
  * If size is not NULL, this will return the total size of the resulting ranges (adds 'n' 
  * calculations to run time of program). */
 {
 struct genomeRangeTree *tree = genomeRangeTreeNew();
 struct hashEl *c1, *c2;
 struct genomeRangeTreeFile *tf;
 int res, nodes1, nodes2, i=0;
 if (size)
     *size = 0;
 /* Add all chroms to the tree and write the header */
 for (c1 = tf1->chromList ; c1 ; c1=c1->next)
     {
     genomeRangeTreeFindOrAddRangeTree(tree, c1->name);
     }
 for (c2 = tf2->chromList ; c2 ; c2=c2->next)
     {
     genomeRangeTreeFindOrAddRangeTree(tree, c2->name);
     }
 
 /* write the header with a zero sig so that noone 
  * else can read this file until its ready (the header
  * will need to be re-written at the end to update the
  * chrom node and offset information. */
 tf = genomeRangeTreeFileNew(tree, outFile);
 if (tf->file)
     genomeRangeTreeFileWriteHeaderDetailed(tf, TRUE); 
 
 /* loop through the chromosomes merging the rangeTrees */
 c1 = tf1->chromList;
 c2 = tf2->chromList;
 while (c1 && c2) /* at least one chrom in each tree */
     {
     res = strcmp(c1->name, c2->name);
     nodes1 = hashIntVal(tf1->nodes, c1->name);
     nodes2 = hashIntVal(tf2->nodes, c2->name);
     if (res < 0) /* chrom1 < chrom2 so write out chrom1 directly */
 	{
 	struct rangeStartSize *r1;
 	hashAddInt(tf->nodes, c1->name, nodes1);
 	AllocArray(r1, nodes1);
 	rangeReadArray(tf1->file, r1, nodes1, tf1->isSwapped);
 	if (tf->file)
 		rangeWriteArray(r1, nodes1, tf->file);
 	if (size)
 		*size += rangeArraySize(r1, nodes1);
 	freez(&r1);
 	i += nodes1;
 	c1 = c1->next; 
 	}
     else if (res > 0) /* chrom2 < chrom1 so write out chrom2 directly */
 	{
 	struct rangeStartSize *r2;
 	hashAddInt(tf->nodes, c2->name, nodes2);
 	AllocArray(r2, nodes2);
 	rangeReadArray(tf2->file, r2, nodes2, tf2->isSwapped);
 	if (tf->file)
 		rangeWriteArray(r2, nodes2, tf->file);
 	if (size)
 		*size += rangeArraySize(r2, nodes2);
 	freez(&r2);
 	i += nodes2;
 	c2 = c2->next;
 	}
     else /* res == 0 so both chroms the same and need to merge */
 	{
 	struct rangeStartSize *r1, *r2;
 	AllocArray(r1, nodes1);
 	AllocArray(r2, nodes2);
 	rangeReadArray(tf1->file, r1, nodes1, tf1->isSwapped);
 	rangeReadArray(tf2->file, r2, nodes2, tf2->isSwapped);
 	/* merge and store the total nodes after merging */
 	int n0=0;
 	unsigned s0=0;
 	if (orDirectToFile)
 	    s0 = rangeArrayUnionToFile(r1, r2, nodes1, nodes2, tf->file, &n0);
 	else
 	    {
 	    struct rangeStartSize *r=NULL;
 	    s0 = rangeArrayUnion(r1, r2, nodes1, nodes2, &r, saveMem, NULL, NULL, NULL, &n0);
 	    if (tf->file && n0 > 0)
 		{
 		rangeWriteArray(r, n0, tf->file);
 		}
 	    freez(&r);
 	    }
 	if (size)
 	    (*size) += s0;
 	i += n0;
 	hashAddInt(tf->nodes, c1->name, n0);
 	freez(&r1);
 	freez(&r2);
 	c1 = c1->next;
 	c2 = c2->next;
 	}
     }
 
 /* directly write out any remaining chromosomes, either c1 or c2 */
 while (c1)
     {
     struct rangeStartSize *r1;
     nodes1 = hashIntVal(tf1->nodes, c1->name);
     hashAddInt(tf->nodes, c1->name, nodes1);
     AllocArray(r1, nodes1);
     rangeReadArray(tf1->file, r1, nodes1, tf1->isSwapped);
     if (tf->file)
 	rangeWriteArray(r1, nodes1, tf->file);
     if (size)
 	*size += rangeArraySize(r1, nodes1);
     i += nodes1;
     freez(&r1);
     c1 = c1->next;
     }
 while (c2)
     {
     struct rangeStartSize *r2;
     nodes2 = hashIntVal(tf2->nodes, c2->name);
     hashAddInt(tf->nodes, c2->name, nodes2);
     AllocArray(r2, nodes2);
     rangeReadArray(tf2->file, r2, nodes2, tf2->isSwapped);
     if (tf->file)
 	rangeWriteArray(r2, nodes2, tf->file);
     if (size)
 	*size += rangeArraySize(r2, nodes2);
     i += nodes2;
     freez(&r2);
     c2 = c2->next;
     }
 /* Re-calculate the offset for each chrom.
  * We could skip the first loop (code duplication in genomeRangeTreeFileNew) 
  * if we stored the indexLen in the header as well.
  * We would then only have to recalculate the offsets for each chrom. */
 bits32 offset = tf->headerLen;
 struct hashEl *chrom;
 for (chrom = tf->chromList ; chrom ; chrom = chrom->next )
     {
     int nameLen = strlen(chrom->name);
     offset += nameLen + 1 + sizeof(offset) + sizeof(offset); /* name + nodes + offset */
     }
 for (chrom = tf->chromList ; chrom ; chrom = chrom->next )
     {
     int size = 2*sizeof(bits32)*hashIntVal(tf->nodes, chrom->name);
     hashAddInt(tf->offset, chrom->name, offset);
     offset += size;
     }
 /* Now go back and rewrite the header, and close the file */
 if (tf->file && fseek(tf->file, 0, SEEK_SET))
     errAbort("couldnt fseek(f,0,SEEK_SET) to rewrite header: errno(%d) = %s\n", errno, strerror(errno));
 if (tf->file)
     genomeRangeTreeFileWriteHeader(tf);
 *nodes = i;
 *numChroms = tf->numChroms;
 genomeRangeTreeFileFree(&tf);
 }
 
 void genomeRangeTreeFileUnion(struct genomeRangeTreeFile *tf1, struct genomeRangeTreeFile *tf2, char *outFile)
 /* Combine two saved genomeRangeTrees in a logical 'or' through a linear file scan.
  * Writes resulting genomeRangeTree to outFile. 
  * The resulting file cannot be safely read until the operation is complete. The header
  * information at the beginning of the file has to be updated after all the data is written
  * since the final merged rangeTree sizes are not known until the ranges are merged.
  * To enforce this, the header is written with a zero initial 'sig' field so that it cannot
  * be read as a genomeRangeTree file. The header information and 'sig' is re-written with 
  * correct data at the end of the process via an 'fseek' operation to the beginning of the file. 
  * If outFile is null, does not output the file. */
 {
 int nodes, numChroms;
 genomeRangeTreeFileUnionDetailed(tf1, tf2, outFile, &numChroms, &nodes, NULL, FALSE, FALSE);
 }
 
 void genomeRangeTreeFileIntersectionDetailed(struct genomeRangeTreeFile *tf1, struct genomeRangeTreeFile *tf2, char *outFile, int *numChroms, int *nodes, unsigned *size, boolean saveMem)
 /* Create intersection genomeRangeTree from two saved genomeRangeTrees in a logical 'and' through a linear file scan.
  * Writes resulting genomeRangeTree to outFile if outFile is non-null.
  * The resulting file cannot be safely read until the operation is complete. The header
  * information at the beginning of the file has to be updated after all the data is written
  * since the final merged rangeTree sizes are not known until the ranges are merged.
  * To enforce this, the header is written with a zero initial 'sig' field so that it cannot
  * be read as a genomeRangeTree file. The header information and 'sig' is re-written with 
  * correct data at the end of the process via an 'fseek' operation to the beginning of the file. */
 {
 struct genomeRangeTree *tree = genomeRangeTreeNew();
 struct hashEl *c1, *c2;
 struct hash *ranges = hashNew(0), *nodeCounts = hashNew(0);
 struct genomeRangeTreeFile *tf;
 int res, nodes1, nodes2, i = 0;
 
 if (size)
     *size = 0;
 /* loop through the chromosomes merging the rangeTrees */
 c1 = tf1->chromList;
 c2 = tf2->chromList;
 while (c1 && c2) /* at least one chrom in each tree */
     {
     res = strcmp(c1->name, c2->name);
     nodes1 = hashIntVal(tf1->nodes, c1->name);
     nodes2 = hashIntVal(tf2->nodes, c2->name);
     if (res < 0) /* chrom1 < chrom2 so skip chrom1 */
 	{
 	c1 = c1->next; 
 	if (c1 && fseek(tf1->file, hashIntVal(tf1->offset, c1->name), SEEK_SET) != 0)
 	    errnoAbort("could not seek to pos %d in chrom %s in file %s\n", 
 		hashIntVal(tf1->offset, c1->name), c1->name, tf1->name);
 	//struct rangeStartSize *r1;
 	//AllocArray(r1, nodes1);
 	//rangeReadArray(tf1->file, r1, nodes1, tf1->isSwapped);
 	//freez(&r1);
 	}
     else if (res > 0) /* chrom2 < chrom1 so skip chrom2 */
 	{
 	c2 = c2->next;
 	if (c2 && fseek(tf2->file, hashIntVal(tf2->offset, c2->name), SEEK_SET) != 0 )
 	    errAbort("could not seek to pos %d in chrom %s in file %s : error(%d) %s\n", 
 		hashIntVal(tf2->offset, c2->name), c2->name, tf2->name, errno, strerror(errno));
 	//struct rangeStartSize *r2; /* Need to read this chrom to move the file pointer */
 	//AllocArray(r2, nodes2);
 	//rangeReadArray(tf2->file, r2, nodes2, tf2->isSwapped);
 	//freez(&r2);
 	}
     else /* res == 0 so both chroms the same and need to merge */
 	{
 	struct rangeStartSize *r1, *r2;
 	AllocArray(r1, nodes1);
 	AllocArray(r2, nodes2);
 	rangeReadArray(tf1->file, r1, nodes1, tf1->isSwapped);
 	rangeReadArray(tf2->file, r2, nodes2, tf2->isSwapped);
 	/* store the intersected nodes */
 	unsigned s0;
 	int n0=0;
 	struct rangeStartSize *r = NULL;
 	s0 = rangeArrayIntersection(r1, r2, nodes1, nodes2, &r, saveMem, NULL, NULL, NULL, &n0);
 	/* if non-null intersection then remember chrom, array of ranges and number of nodes */
 	if (n0>0)
 	    {
 	    genomeRangeTreeFindOrAddRangeTree(tree, c1->name);
 	    hashAddUnique(ranges, c1->name, r);
 	    hashAddInt(nodeCounts, c1->name, n0);
 	    if (size)
 		*size += s0;
 	    i += n0;
 	    }
 	freez(&r1);
 	freez(&r2);
 	c1 = c1->next;
 	c2 = c2->next;
 	}
     }
 /* write header with chroms */
 tf = genomeRangeTreeFileNew(tree, outFile);
 if (tf->file)
     genomeRangeTreeFileWriteHeaderDetailed(tf, TRUE); 
 freeHash(&(tf->nodes));
 tf->nodes = nodeCounts;
 
 /* Re-calculate the offset for each chrom.
  * We could skip the first loop (code duplication in genomeRangeTreeFileNew) 
  * if we stored the indexLen in the header as well.
  * We would then only have to recalculate the offsets for each chrom. */
 bits32 offset = tf->headerLen;
 struct hashEl *chrom;
 for (chrom = tf->chromList ; chrom ; chrom = chrom->next )
     {
     int nameLen = strlen(chrom->name);
     offset += nameLen + 1 + sizeof(offset) + sizeof(offset); /* name + nodes + offset */
     }
 for (chrom = tf->chromList ; chrom ; chrom = chrom->next )
     {
     int size = 2*sizeof(bits32)*hashIntVal(tf->nodes, chrom->name);
     hashAddInt(tf->offset, chrom->name, offset);
     offset += size;
     }
 /* Now go back and rewrite the header, data, and close the file */
 if (tf->file)
     {
     if (fseek(tf->file, 0, SEEK_SET))
 	errAbort("couldnt fseek(f,0,SEEK_SET) to rewrite header: errno(%d) = %s\n", errno, strerror(errno));
     genomeRangeTreeFileWriteHeader(tf);
     for (chrom = tf->chromList ; chrom ; chrom = chrom->next )
 	{
 	struct rangeStartSize *r = hashMustFindVal(ranges, chrom->name);
 	rangeWriteArray(r, hashIntVal(tf->nodes, chrom->name), tf->file);
 	freez(&r);
 	}
     }
 *nodes = i;
 *numChroms = tf->numChroms;
 genomeRangeTreeFileFree(&tf);
 }
 
 void genomeRangeTreeFileStats(char *fileName, int *numChroms, int *nodes, int *size)
 /* Calculates the number of chroms, ranges, and total size of ranges in the genomeRangeTree file.
  * Performs a linear scan of the file. */
 {
 struct genomeRangeTreeFile *tf = genomeRangeTreeFileReadHeader(fileName);
 struct hashEl *c;
 int n;
 *size = 0;
 *nodes = 0;
 *numChroms = tf->numChroms;
 for (c = tf->chromList ; c ; c = c->next)
     {
     struct rangeStartSize *r;
     n = hashIntVal(tf->nodes, c->name);
     *nodes += n;
     AllocArray(r, n);
     rangeReadArray(tf->file, r, n, tf->isSwapped);
     *size += rangeArraySize(r, n);
     freez(&r);
     }
 genomeRangeTreeFileFree(&tf);
 }
 
 int genomeRangeTreeFileChromSeek(struct genomeRangeTreeFile *tf, char *chrom)
 /* Seek the tree file to the start of the chromosome data.
  * Returns the number of nodes in the chromosome. 
  * If this chromosome is not in this tree, does not do a seek
  * and returns 0 for number of nodes.
  */
 {
 if (!tf->file)
     errAbort("no file handle for genomeRangeTree chrom seek\n");
 if (!chrom)
     errAbort("no chrom for genomeRangeTree chrom seek\n");
 if (!hashLookup(tf->tree->hash, chrom))
     return 0; /* chrom not in the tree */
 if (fseek(tf->file, hashIntVal(tf->offset, chrom), SEEK_SET) != 0)
     errnoAbort("could not seek to pos %d in chrom %s in file %s\n", 
 	hashIntVal(tf->offset, chrom), chrom, tf->name);
 return hashIntVal(tf->nodes, chrom);
 }
 
 void genomeRangeTreeFileWriteToBed(char *inBama, char *bedFile, boolean withId, boolean mergeAdjacent)
 /* Write a genomeRangeTreeFile directly to a bed file.
  * If withId then unique identifier is added to the name field.
  * If mergeAdjacent then adjacent ranges, which would otherwise appear on individual lines,
  * are merged into a single bed line. */
 {
 struct genomeRangeTreeFile *tf = genomeRangeTreeFileReadHeader(inBama);
 FILE *f = mustOpen(bedFile,"w");
 struct hashEl *chrom;
 for (chrom = tf->chromList ; chrom ; chrom = chrom->next)
     {
     struct rangeStartSize *r;
     int n = hashIntVal(tf->nodes, chrom->name);
     AllocArray(r, n);
     rangeReadArray(tf->file, r, n, tf->isSwapped);
     rangeWriteArrayToBed(chrom->name, r, n, withId, mergeAdjacent, f);
     freez(&r);
     }
 carefulClose(&f);
 genomeRangeTreeFileFree(&tf);
 }