9b033f6d0e8b14a79908339c61c7153e7004f19b
markd
  Thu Sep 8 12:27:11 2022 -0700
change bedToBigBed to actually describe the use of chromAlias

diff --git src/utils/bedToBigBed/bedToBigBed.c src/utils/bedToBigBed/bedToBigBed.c
index 13a9ac1..f601d30 100644
--- src/utils/bedToBigBed/bedToBigBed.c
+++ src/utils/bedToBigBed/bedToBigBed.c
@@ -28,89 +28,107 @@
  *   2.6 - Made it not crash on empty input.  
  *   */
 
 /* Things set directly or indirectly by command lne in main() routine. */
 int blockSize = 256;
 int itemsPerSlot = 512;
 char *extraIndex = NULL;
 int bedN = 0;   /* number of standard bed fields */
 int bedP = 0;   /* number of bed plus fields */
 char *asFile = NULL;
 char *asText = NULL;
 char *udcDir = NULL;
 static boolean doCompress = FALSE;
 static boolean tabSep = FALSE;
 static boolean sizesIs2Bit = FALSE;
-static boolean sizesIsBb = FALSE;
+static boolean sizesIsChromAliasBb = FALSE;
 static boolean allow1bpOverlap = FALSE;
 
 void usage()
 /* Explain usage and exit. */
 {
 errAbort(
   "bedToBigBed v. %s - Convert bed file to bigBed. (bbi version: %d)\n"
   "usage:\n"
   "   bedToBigBed in.bed chrom.sizes out.bb\n"
   "Where in.bed is in one of the ascii bed formats, but not including track lines\n"
   "and chrom.sizes is a two-column file/URL: <chromosome name> <size in bases>\n"
   "and out.bb is the output indexed big bed file.\n"
+  "\n"
   "If the assembly <db> is hosted by UCSC, chrom.sizes can be a URL like\n"
   "  http://hgdownload.soe.ucsc.edu/goldenPath/<db>/bigZips/<db>.chrom.sizes\n"
   "or you may use the script fetchChromSizes to download the chrom.sizes file.\n"
   "If you have bed annotations on patch sequences from NCBI, a more inclusive\n"
   "chrom.sizes file can be found using a URL like\n"
   "  http://hgdownload.soe.ucsc.edu/goldenPath/<db>/database/chromInfo.txt.gz\n"
   "If not hosted by UCSC, a chrom.sizes file can be generated by running\n"
-  "twoBitInfo on the assembly .2bit file.\n"
+  "twoBitInfo on the assembly .2bit file or the 2bit file or used directly\n"
+  "if the -sizesIs2Bit option is specified.\n"
+  "\n"
+  "The chrom.sizes file may also be a chromAlias bigBed file, or a URL to\n"
+  "such a file, by specifying the -sizesIsChromAliasBb option.  With a\n"
+  "chromAlias, the input BED file may have chromosome names matching any\n"
+  "of the sequence name aliases in the chromAlias file.\n"
+  "\n"
+  "For UCSC provided genomes, the chromAlias files can be found under:\n"
+  "    https://hgdownload.soe.ucsc.edu/goldenPath/<db>/bigZips/<db>.chromAlias.bb\n"
+  "For UCSC GenArk assembly hubs, the chrom aliases are namedd in the form:\n"
+  "    https://hgdownload.soe.ucsc.edu/hubs/GCF/006/542/625/GCF_006542625.1/GCF_006542625.1.chromAlias.bb\n"
+  "For a description of generating chromAlias files for your own assembly hub, see:\n"
+  "      http://genomewiki.ucsc.edu/index.php/Chrom_Alias\n"
+  "\n"
   "The in.bed file must be sorted by chromosome,start,\n"
   "  to sort a bed file, use the unix sort command:\n"
   "     sort -k1,1 -k2,2n unsorted.bed > sorted.bed\n"
   "Sorting must be set to skip Unicode mapping (LC_COLLATE=C).\n"
   "\n"
   "options:\n"
   "   -type=bedN[+[P]] : \n"
   "                      N is between 3 and 15, \n"
   "                      optional (+) if extra \"bedPlus\" fields, \n"
   "                      optional P specifies the number of extra fields. Not required, but preferred.\n"
   "                      Examples: -type=bed6 or -type=bed6+ or -type=bed6+3 \n"
   "                      (see http://genome.ucsc.edu/FAQ/FAQformat.html#format1)\n"
   "   -as=fields.as - If you have non-standard \"bedPlus\" fields, it's great to put a definition\n"
   "                   of each field in a row in AutoSql format here.\n"
   "   -blockSize=N - Number of items to bundle in r-tree.  Default %d\n"
   "   -itemsPerSlot=N - Number of data points bundled at lowest level. Default %d\n"
   "   -unc - If set, do not use compression.\n"
   "   -tab - If set, expect fields to be tab separated, normally\n"
   "           expects white space separator.\n"
   "   -extraIndex=fieldList - If set, make an index on each field in a comma separated list\n"
   "           extraIndex=name and extraIndex=name,id are commonly used.\n"
   "   -sizesIs2Bit  -- If set, the chrom.sizes file is assumed to be a 2bit file.\n"
-  "   -sizesIsBb  -- If set, the chrom.sizes file is assumed to be a bigBed file.\n"
+  "   -sizesIsChromAliasBb -- If set, then chrom.sizes file is assumed to be a chromAlias\n"
+  "    bigBed file or a URL to a such a file (see above).\n"
+  "   -sizesIsBb  -- Obsolete name for -sizesIsChromAliasBb.\n"
   "   -udcDir=/path/to/udcCacheDir  -- sets the UDC cache dir for caching of remote files.\n"
   "   -allow1bpOverlap  -- allow exons to overlap by at most one base pair\n"
   "   -maxAlloc=N -- Set the maximum memory allocation size to N bytes\n"
   , version, bbiCurrentVersion, blockSize, itemsPerSlot
   );
 }
 
 static struct optionSpec options[] = {
    {"blockSize", OPTION_INT},
    {"itemsPerSlot", OPTION_INT},
    {"type", OPTION_STRING},
    {"as", OPTION_STRING},
    {"unc", OPTION_BOOLEAN},
    {"tab", OPTION_BOOLEAN},
    {"sizesIs2Bit", OPTION_BOOLEAN},
+   {"sizesIsChromAliasBb", OPTION_BOOLEAN},
    {"sizesIsBb", OPTION_BOOLEAN},
    {"extraIndex", OPTION_STRING},
    {"udcDir", OPTION_STRING},
    {"allow1bpOverlap", OPTION_BOOLEAN},
    {"maxAlloc", OPTION_LONG_LONG},
    {NULL, 0},
 };
 
 int bbNamedFileChunkCmpByName(const void *va, const void *vb)
 /* Compare two named offset object to facilitate qsorting by name. */
 {
 const struct bbNamedFileChunk *a = va, *b = vb;
 return strcmp(a->name, b->name);
 }
 
@@ -587,31 +605,31 @@
 struct lineFile *lf = lineFileOpen(inName, TRUE);
 
 bits16 fieldCount = slCount(as->columnList);
 bits16 extraIndexCount = slCount(extraIndexList);
 
 struct bbExIndexMaker *eim = NULL;
 if (extraIndexList != NULL)
     eim = bbExIndexMakerNew(extraIndexList, as);
 
 /* Do first pass, mostly just scanning file and counting hits per chromosome. */
 int minDiff = 0;
 double aveSize = 0;
 bits64 bedCount = 0;
 bits32 uncompressBufSize = 0;
 struct bbiChromUsage *usageList = NULL;
-if (sizesIsBb)
+if (sizesIsChromAliasBb)
     usageList = bbiChromUsageFromBedFileAlias(lf, chromSizes, eim, &minDiff, &aveSize, &bedCount, tabSep);
 else
     {
     struct hash *chromSizesHash = NULL;
     if (sizesIs2Bit)
         chromSizesHash = twoBitChromHash(chromSizes);
     else
         chromSizesHash = bbiChromSizesFromFile(chromSizes);
     verbose(2, "Read %d chromosomes and sizes from %s\n",  chromSizesHash->elCount, chromSizes);
     usageList = bbiChromUsageFromBedFile(lf, chromSizesHash, eim, &minDiff, &aveSize, &bedCount, tabSep);
     freeHash(&chromSizesHash);
     }
 verboseTime(1, "pass1 - making usageList (%d chroms)", slCount(usageList));
 verbose(2, "%d chroms in %s. Average span of beds %f\n", slCount(usageList), inName, aveSize);
 
@@ -824,31 +842,33 @@
     errAbort("AutoSql file (%s) not in legal format.", asFile);
 asCompareObjAgainstStandardBed(as, bedN, TRUE); // abort if bedN columns are not standard
 bbFileCreate(inName, chromSizes, blockSize, itemsPerSlot, asText, as, 
 	doCompress, extraIndexList, outName);
 }
 
 int main(int argc, char *argv[])
 /* Process command line. */
 {
 optionInit(&argc, argv, options);
 blockSize = optionInt("blockSize", blockSize);
 itemsPerSlot = optionInt("itemsPerSlot", itemsPerSlot);
 asFile = optionVal("as", asFile);
 doCompress = !optionExists("unc");
 sizesIs2Bit = optionExists("sizesIs2Bit");
-sizesIsBb = optionExists("sizesIsBb");
+sizesIsChromAliasBb = optionExists("sizesIsChromAliasBb") || optionExists("sizesIsBb");
+if (sizesIs2Bit && sizesIsChromAliasBb)
+    errAbort("can't specify both -sizesIs2Bit and -sizesIsChromAliasBb");
 extraIndex = optionVal("extraIndex", NULL);
 tabSep = optionExists("tab");
 allow1bpOverlap = optionExists("allow1bpOverlap");
 udcDir = optionVal("udcDir", udcDefaultDir());
 size_t maxAlloc = optionLongLong("maxAlloc", 0);
 if (argc != 4)
     usage();
 udcSetDefaultDir(udcDir);
 if (maxAlloc > 0)
     setMaxAlloc(maxAlloc);
 
 if (optionExists("type"))
     {
     // parse type
     char *btype = cloneString(optionVal("type", ""));