4898794edd81be5285ea6e544acbedeaeb31bf78
max
  Tue Nov 23 08:10:57 2021 -0800
Fixing pointers to README file for license in all source code files. refs #27614

diff --git src/hg/oneShot/chromToPos/chromToPos.c src/hg/oneShot/chromToPos/chromToPos.c
index 33ed9c2..2bab74a 100644
--- src/hg/oneShot/chromToPos/chromToPos.c
+++ src/hg/oneShot/chromToPos/chromToPos.c
@@ -1,300 +1,300 @@
 /* chromToPos - Given list of chromosomes and sizes, create files that index them by position 
  * in genome via a b+ tree. 
  *    This module is as much about experimenting with b+ tree implementation as anything, 
  * though once that is stable, it'll probably be refactored into a b+ tree library module
  * with this being just a client. */
 
 /* Copyright (C) 2011 The Regents of the University of California 
- * See README in this or parent directory for licensing information. */
+ * See kent/LICENSE or http://genome.ucsc.edu/license/ for licensing information. */
 
 #include "common.h"
 #include "linefile.h"
 #include "hash.h"
 #include "options.h"
 #include "sig.h"
 #include "net.h"
 
 
 int blockSize = 100;
 
 void usage()
 /* Explain usage and exit. */
 {
 errAbort(
   "chromToPos - Given list of chromosomes and sizes, create files that index them by position\n"
   "via a b+ tree.\n"
   "usage:\n"
   "   chromToPos chromSizes.txt chromPos.bin chromPos.ix\n"
   "options:\n"
   "   -blockSize=N (default %d) Size of block for index purposes\n"
   , blockSize);
 }
 
 static struct optionSpec options[] = {
    {"blockSize", OPTION_INT},
    {NULL, 0},
 };
 
 struct chromInfo
 /* A chromosome name and a chromosome size. */
     {
     struct chromInfo *next;	/* Next in list. */
     char *name;			/* Chromosome name. */
     bits32 size;		/* Size of chromosome. */
     bits32 genomeOffset;	/* Offset of chromosome in genome coordinates. */
     bits32 binFileOffset;	/* Offset of record in binary file. */
     };
 
 struct chromInfo *readChromSizes(char *fileName)
 /* Create list of chromInfos based on a two column file <chrom><size> */
 {
 struct lineFile *lf = lineFileOpen(fileName, TRUE);
 char *row[2];
 struct chromInfo *list = NULL, *el;
 bits64 maxTotal = (1LL << 32) - 1;
 bits64 total = 0;
 int chromCount = 0;
 struct hash *uniqHash = hashNew(16);
 
 while (lineFileChop(lf, row))
     {
     char *name = row[0];
     if (hashLookup(uniqHash, name))
        errAbort("Duplicate chromosome or contig name %s line %d of %s",
        	name, lf->lineIx, lf->fileName);
     hashAdd(uniqHash, name, NULL);
     AllocVar(el);
     el->name = cloneString(name);
     el->size = lineFileNeedNum(lf, row, 1);
     el->genomeOffset = total;
     total += el->size;
     if (total > maxTotal)
         errAbort("Too many bases line %d of %s.  Max is %lld,  total so far is %lld",
 		lf->lineIx, lf->fileName, maxTotal, total);
     slAddHead(&list, el);
     ++chromCount;
     }
 hashFree(&uniqHash);
 lineFileClose(&lf);
 slReverse(&list);
 verbose(1, "Read %d chroms totalling %lld bases in %s\n", chromCount, total, fileName);
 return list;
 }
 
 int countLevels(int maxBlockSize, int itemCount)
 /* Count up number of levels needed in tree of given maximum block size. */
 {
 int levels = 1;
 while (itemCount > maxBlockSize)
     {
     itemCount = (itemCount + maxBlockSize - 1)  / blockSize;
     levels += 1;
     }
 return levels;
 }
 
 void writeBinSaveOffsets(struct chromInfo *chromList, int chromCount, char *fileName)
 /* Save chromosome info as a binary file, and save offsets of each chromosome within file. */
 {
 bits32 magic = chromSizeBinSig;
 bits32 count = chromCount;
 bits32 reserved = 0;
 FILE *f = mustOpen(fileName, "wb");
 
 writeOne(f, magic);
 writeOne(f, chromCount);
 writeOne(f, reserved);
 writeOne(f, reserved);
 
 struct chromInfo *chrom;
 for (chrom = chromList; chrom != NULL; chrom = chrom->next)
     {
     chrom->binFileOffset = ftell(f);
     writeOne(f, chrom->genomeOffset);
     mustWrite(f, chrom->name, strlen(chrom->name)+1);
     }
 
 carefulClose(&f);
 }
 
 int xToY(int x, unsigned y)
 /* Return x to the Y power, with y usually small. */
 {
 int i, val = 1;
 for (i=0; i<y; ++i)
     val *= x;
 return val;
 }
 
 /* Say have a block size of 4 and 27 items.  You'd put them in the tree as so:
  *  01 17
  *  01 05 09 13   17 21 25
  *  01 02 03 04   05 06 07 08  09 10 11 12   13 14 15 16   17 18 19 20   21 22 23 24  25 26 27
  */
 
 
 bits32 writeIndexLevel(struct chromInfo **chromArray, int chromCount, 
 	bits32 indexOffset, int level, FILE *f)
 /* Write out a non-leaf level. */
 {
 /* Calculate number of nodes to write at this level. */
 int slotSizePer = xToY(blockSize, level);   // Number of chroms per slot in node
 int nodeSizePer = slotSizePer * blockSize;  // Number of chroms per node
 int nodeCount = (chromCount + nodeSizePer - 1)/nodeSizePer;	
 
 /* Calculate sizes and offsets. */
 int bytesInBlock = (2*sizeof(UBYTE) + sizeof(bits16) + blockSize * (2*sizeof(bits32)));
 bits32 levelSize = nodeCount * bytesInBlock;
 bits32 endLevel = indexOffset + levelSize;
 bits32 nextChild = endLevel;
 
 UBYTE isLeaf = FALSE;
 UBYTE reserved = 0;
 
 int i,j;
 for (i=0; i<chromCount; i += nodeSizePer)
     {
     /* Calculate size of this block */
     bits16 countOne = (chromCount - i + slotSizePer - 1)/slotSizePer;
     if (countOne > blockSize)
         countOne = blockSize;
 
     /* Write block header. */
     writeOne(f, isLeaf);
     writeOne(f, reserved);
     writeOne(f, countOne);
 
     int slotsUsed = 0;
     int endIx = i + nodeSizePer;
     if (endIx > chromCount)
         endIx = chromCount;
     for (j=i; j<endIx; j += slotSizePer)
         {
         struct chromInfo *chrom = chromArray[j];
 	writeOne(f, chrom->genomeOffset);
 	writeOne(f, nextChild);
 	nextChild += bytesInBlock;
 	++slotsUsed;
 	}
     assert(slotsUsed == countOne);
     for (j=countOne; j<blockSize; ++j)
         {
 	bits32 genomeOffsetPad=0;
 	bits32 binFileOffsetPad=0;
 	writeOne(f, genomeOffsetPad);
 	writeOne(f, binFileOffsetPad);
 	}
     }
 return endLevel;
 }
 
 void writeLeafLevel(struct chromInfo **chromArray, int chromCount, FILE *f)
 /* Write out leaf level blocks. */
 {
 int i,j;
 UBYTE isLeaf = TRUE;
 UBYTE reserved = 0;
 bits16 countOne;
 bits32 genomeOffsetPad=0;
 bits32 binFileOffsetPad=0;
 int countLeft = chromCount;
 for (i=0; i<chromCount; i += countOne)
     {
     /* Write block header */
     if (countLeft > blockSize)
         countOne = blockSize;
     else
         countOne = countLeft;
     writeOne(f, isLeaf);
     writeOne(f, reserved);
     writeOne(f, countOne);
 
     /* Write out position in genome and in file for each chrom. */
     for (j=0; j<countOne; ++j)
         {
 	assert(i+j < chromCount);
         struct chromInfo *chrom = chromArray[i+j];
 	writeOne(f, chrom->genomeOffset);
 	writeOne(f, chrom->binFileOffset);
 	}
     
     /* Pad out any unused bits of last block with zeroes. */
     for (j=countOne; j<blockSize; ++j)
         {
 	writeOne(f, genomeOffsetPad);
 	writeOne(f, binFileOffsetPad);
 	}
 
     countLeft -= countOne;
     }
 }
 
 void writeIndex(struct chromInfo *chromList, int chromCount, char *fileName)
 /* Write index file - a b+ tree. */
 {
 /* Open file and write out header. */
 FILE *f = mustOpen(fileName, "w");
 bits32 magic = chromSizeIndexSig;
 bits32 count = chromCount;
 bits16 bSize = blockSize;
 bits16 reserved16 = 0;
 bits32 reserved32 = 0;
 writeOne(f, magic);
 writeOne(f, chromCount);
 writeOne(f, bSize);
 writeOne(f, reserved16);
 writeOne(f, reserved32);
 bits32 indexOffset = ftell(f);
 
 /* Make array for all chromosomes. */
 struct chromInfo *chrom, **chromArray;
 AllocArray(chromArray, chromCount);
 int i;
 for (i=0, chrom=chromList; i<chromCount; ++i, chrom=chrom->next)
     chromArray[i] = chrom;
 
 /* Figure out how many levels in B tree, and number of chroms between items at highest level. */
 int levels = countLevels(blockSize, chromCount);
 verbose(1, "%d levels with blockSize %d covers %d items\n", levels, blockSize, chromCount);
 
 /* Write non-leaf nodes. */
 for (i=levels-1; i > 0; --i)
     {
     bits32 endLevelOffset = writeIndexLevel(chromArray, chromCount, indexOffset, i, f);
     indexOffset = ftell(f);
     if (endLevelOffset != indexOffset)
         errAbort("internal err: mismatch endLevelOffset=%u vs indexOffset=%u", endLevelOffset, indexOffset);
     }
 
 /* Write leaf nodes */
 writeLeafLevel(chromArray, chromCount, f);
 
 /* Clean up and go home. */
 freez(&chromArray);
 carefulClose(&f);
 }
 
 void chromToPos(char *sizesText, char *posBin, char *posIndex)
 /* chromToPos - Given list of chromosomes and sizes, create files that index them by position via 
  * a b+ tree. */
 {
 struct chromInfo *chrom, *chromList = readChromSizes(sizesText);
 int chromCount = slCount(chromList);
 writeBinSaveOffsets(chromList, chromCount, posBin);
 writeIndex(chromList, chromCount, posIndex);
 }
 
 int main(int argc, char *argv[])
 /* Process command line. */
 {
 optionInit(&argc, argv, options);
 if (argc != 4)
     usage();
 blockSize = optionInt("blockSize", blockSize);
 int minBlockSize = 2, maxBlockSize = (1L << 16) - 1;
 if (blockSize < minBlockSize || blockSize > maxBlockSize)
     errAbort("Block size (%d) not in range, must be between %d and %d",
     	blockSize, minBlockSize, maxBlockSize);
 chromToPos(argv[1], argv[2], argv[3]);
 return 0;
 }