f952b14fca3833e9d8fef0ae8a086ad15485b552
galt
  Wed Jan 1 02:11:38 2025 -0800
fix various errors for utils compiled on Rocky 9 hgwdev-new.

diff --git src/hg/ratStuff/mafSplit/mafSplit.c src/hg/ratStuff/mafSplit/mafSplit.c
index 8e11e3e..102d5cd 100644
--- src/hg/ratStuff/mafSplit/mafSplit.c
+++ src/hg/ratStuff/mafSplit/mafSplit.c
@@ -1,384 +1,384 @@
 /* Split MAF file based on positions in a bed file */
 
 /* Copyright (C) 2011 The Regents of the University of California 
  * See kent/LICENSE or http://genome.ucsc.edu/license/ for licensing information. */
 
 #include "common.h"
 #include "portable.h"
 #include "hash.h"
 #include "dystring.h"
 #include "options.h"
 #include "sqlNum.h"
 #include "maf.h"
 #include "bed.h"
 
 
 void usage()
 /* Explain usage and exit. */
 {
 errAbort(
   "mafSplit - Split multiple alignment files\n"
   "usage:\n"
   "   mafSplit splits.bed outRoot file(s).maf\n"
   "options:\n"
   "   -byTarget       Make one file per target sequence.  (splits.bed input\n"
   "                   is ignored).\n"
   "   -outDirDepth=N  For use only with -byTarget.\n"
   "                   Create N levels of output directory under current dir.\n"
   "                   This helps prevent NFS problems with a large number of\n"
   "                   file in a directory.  Using -outDirDepth=3 would\n"
   "                   produce ./1/2/3/outRoot123.maf.\n"
   "   -useSequenceName  For use only with -byTarget.\n"
   "                     Instead of auto-incrementing an integer to determine\n"
   "                     output filename, expect each target sequence name to\n"
   "                     end with a unique number and use that number as the\n"
   "                     integer to tack onto outRoot.\n"
   "   -useFullSequenceName  For use only with -byTarget.\n"
   "                     Instead of auto-incrementing an integer to determine\n"
   "                     output filename, use the target sequence name\n"
   "                     to tack onto outRoot.\n"
   "   -useHashedName=N  For use only with -byTarget.\n"
   "                     Instead of auto-incrementing an integer or requiring\n"
   "                     a unique number in the sequence name, use a hash\n"
   "                     function on the sequence name to compute an N-bit\n"
   "                     number.  This limits the max #filenames to 2^N and\n"
   "                     ensures that even if different subsets of sequences\n"
   "                     appear in different pairwise mafs, the split file\n"
   "                     names will be consistent (due to hash function).\n"
   "                     This option is useful when a \"scaffold-based\"\n"
   "                     assembly has more than one sequence name pattern,\n"
   "                     e.g. both chroms and scaffolds.\n"
   "\n"
   );
 }
 
 static struct optionSpec options[] = {
    {"byTarget", OPTION_BOOLEAN},
    {"outDirDepth", OPTION_INT},
    {"useSequenceName", OPTION_BOOLEAN},
    {"useFullSequenceName", OPTION_BOOLEAN},
    {"useHashedName", OPTION_INT},
    {NULL, 0},
 };
 
 /* Option variables */
 static boolean byTarget = FALSE;
 static int outDirDepth = 0;
 static boolean useSequenceName = FALSE;
 static boolean useFullSequenceName = FALSE;
 static int hashedNameBits = 0;
 
 
 static int numberFromName(char *name)
 /* Given a name that ends in a number, pick off the number. */
 {
 char *s = name + strlen(name) - 1;
 if (!isdigit(*s))
     errAbort("numberFromName: name \"%s\" does not end in a number.", name);
 while (s > name && isdigit(*(s-1)))
     s--;
 return atoi(s);
 }
 
 static int numberFromHashedName(char *name, int hashedNameBits)
 /* Run name through a hash function and mask to hashedNameBits. */
 {
 int hashedName = hashCrc(name);
 unsigned mask = (1 << hashedNameBits) - 1;
 return hashedName & mask;
 }
 
 static char *mkOutPath(char *outRootDir, char *outRootFile, int seqNum, char *target)
 /* formulate pathname, using seqNum if outDirDepth > 0 */
 {
 struct dyString *dy = dyStringNew(1024);
 if (outDirDepth > 0)
     {
     int i = 0, pwrOfTen = 1;
     dyStringPrintf(dy, "%s/", outRootDir);
     for (i = 0;  i < outDirDepth-1;  i++)
 	pwrOfTen *= 10;
     for (i = 0;  i < outDirDepth;  i++, pwrOfTen /= 10)
 	{
 	int seqNumDownshifted = seqNum / pwrOfTen;
 	int seqNumMasked = seqNumDownshifted % 10;
 	if (seqNumMasked < 0)
 	    errAbort("mkOutPath got negative number: %d --> %d --> %d\n",
 		     seqNum, seqNumDownshifted, seqNumMasked);
 	dyStringPrintf(dy, "%d/", seqNumMasked);
 	makeDir(dy->string);
 	}
     if (target)
 	dyStringPrintf(dy, "%s%s.maf", outRootFile, target);
     else
 	dyStringPrintf(dy, "%s%05d.maf", outRootFile, seqNum);
     }
 else
     {
     if (target)
 	dyStringPrintf(dy, "%s/%s%s.maf", outRootDir, outRootFile, target);
     else
 	dyStringPrintf(dy, "%s/%s%03d.maf", outRootDir, outRootFile, seqNum);
     }
 return dyStringCannibalize(&dy);
 }
 
 void splitMafByTarget(char *mafInName, char *outRootDir, char *outRootFile,
 		      struct hash *pathHash, int *pSeqNum)
 /* Split maf into one file per target sequence. */
 {
 static FILE *prevFile = NULL;
 static char prevTarget[512];
 struct mafFile *mf = mafOpen(mafInName);
 struct mafAli *maf = NULL;
 
 prevTarget[0] = '\0';
 while ((maf = mafNext(mf)) != NULL)
     {
     struct mafComp *mc = maf->components;
     char *targetName = mc->src;
     FILE *f = NULL;
 
     if (isNotEmpty(prevTarget) && sameString(targetName, prevTarget))
 	{
 	verbose(3, "Reusing open filehandle for target %s\n", prevTarget);
 	f = prevFile;
 	}
     else 
 	{
 	char *path;
 	if (hashedNameBits > 0)
 	    {
 	    /* Hash paths by themselves instead of by target, because 
 	     * we may end up reusing a path for several targets. */
 	    int tHashed = numberFromHashedName(targetName, hashedNameBits);
 	    path = mkOutPath(outRootDir, outRootFile, tHashed, NULL);
 	    path = (char *)hashFindVal(pathHash, path);
 	    }
 	else
 	    path = (char *)hashFindVal(pathHash, targetName);
 	carefulClose(&prevFile);
 	if (path != NULL)
 	    {
 	    verbose(3, "Appending path from hash lookup on %s: %s\n",
 		    targetName, path);
 	    f = mustOpen(path, "a");
 	    }
 	else
 	    {
 	    int seqNum = (*pSeqNum)++;
 	    if (useSequenceName)
 		seqNum = numberFromName(targetName);
 	    else if (hashedNameBits > 0)
 		seqNum = numberFromHashedName(targetName, hashedNameBits);
 	    if (useFullSequenceName)
 		{
 		/* skip over db. prefix if any */
 		char *target = strchr(targetName,'.');
 		if (target)
 		    ++target;
 		else
 		    target = targetName;
 		path = mkOutPath(outRootDir, outRootFile, seqNum, target);
 		}
 	    else
 		path = mkOutPath(outRootDir, outRootFile, seqNum, NULL);
 	    verbose(3, "Opening path %s for writing and adding it to hash "
 		    "for %s\n", path, targetName);
 	    f = mustOpen(path, "w");
 	    fprintf(f, "##maf version=1 scoring=blastz\n");
 	    if (hashedNameBits > 0)
 		hashAdd(pathHash, path, path);
 	    else
 		hashAdd(pathHash, targetName, path);
 	    }
 	prevFile = f;
 	}
     mafWrite(f, maf);
     safef(prevTarget, sizeof(prevTarget), "%s", targetName);
     mafAliFree(&maf);
     }
 mafFileFree(&mf);
 }
 
 struct hash *loadSplits(char *file)
 /* load splits into a hash of lists by chrom */
 {
 struct bed *bed = NULL, *bedList = NULL, *nextBed = NULL;
 struct hash *splitHash = newHash(6);
 struct bed *splits = bedLoadNAll(file, 3);
 
 verbose(2, "found %d splits\n", slCount(splits));
 for (bed = splits; bed != NULL; bed = nextBed)
     {
     verbose(3, "split %s:%d\n", bed->chrom, bed->chromStart);
     nextBed = bed->next;
     if ((bedList = hashFindVal(splitHash, bed->chrom)) == NULL)
         {
         verbose(3, "adding %s to hash\n", bed->chrom);
         hashAdd(splitHash, bed->chrom, cloneBed(bed));
         }
     else
         slAddTail(&bedList, cloneBed(bed));
     freeMem(bed);
     }
 return splitHash;
 }
 
 char *chromFromSrc(char *src)
 /* get chrom name from <db>.<chrom> */
 {
 char *p;
 if ((p = strchr(src, '.')) == NULL)
     errAbort("Can't find chrom in MAF component src: %s\n", src);
 return ++p;
 }
 
 void splitMafFile(char *file, char *outDir, char *outPrefix, 
                         struct hash *splitHash)
 /* split file based on positions in hash */
 {
 char *chrom = NULL;
 char outFile[PATH_LEN];
 int ix = 0;
 FILE *f;
 boolean nextFile = TRUE;
 struct bed *bed, *bedList = NULL;
 int splitPos = 0;
 struct mafFile *mf = mafOpen(file);
 struct mafAli *maf = NULL;
 struct mafComp *mc;
 
 verbose(1, "splitting %s\n", file);
 maf = mafNext(mf);
 while (1)
     {
     if (!maf)
         {
         maf = mafNext(mf);
         if (!maf)
             break;
         }
     mc = maf->components;
     if (!chrom || differentString(chrom, chromFromSrc(mc->src)))
         /* new chrom */
         {
         chrom = cloneString(chromFromSrc(mc->src));
         bedList = (struct bed *)hashFindVal(splitHash, chrom);
         if (bedList)
             bed = (struct bed *)slPopHead(&bedList);
         else
             /* no entry for this chrom -- use the whole thing */
             bed = NULL;
         if (bed)
             splitPos = bed->chromStart;
         else
             /* last piece of this chrom */
             splitPos = mc->srcSize;
         ix = 0;
         nextFile = TRUE;
         }
     if (nextFile)
         {
         verbose(2, "file %d, pos: %d\n", ix, splitPos);
         safef(outFile, sizeof(outFile), "%s/%s%s.%02d.maf", 
                     outDir, outPrefix, chrom, ix);
         f = mustOpen(outFile, "w");
         mafWriteStart(f, mf->scoring);
         nextFile = FALSE;
         }
     if (mc->start + mc->size < splitPos)
         {
         mafWrite(f, maf);
         mafAliFree(&maf);
         continue;
         }
     if (mc->start < splitPos)
         {
         struct mafAli *full = maf;
         struct mafAli *subset = mafSubset(full, mc->src, mc->start, splitPos);
         mafWrite(f, subset);
         mafAliFree(&subset);
         maf = mafSubset(full, mc->src, splitPos , mc->start + mc->size);
         mafAliFree(&full);
         }
     mafWriteEnd(f);
     carefulClose(&f);
     ix++;
     nextFile = TRUE;
     bed = (struct bed *)slPopHead(&bedList);
     if (bed)
         splitPos = bed->chromStart;
     else
         splitPos = mc->srcSize;
     }
 mafFileFree(&mf);
 }
 
 void mafSplit(char *splitFile, char *outRoot, int mafCount, char *mafFiles[])
 /* Split MAF files at breaks in split file */
 {
-char dir[256], file[128];
+char dir[PATH_LEN], file[FILENAME_LEN];
 int i = 0;
 
 if (byTarget)
     verbose(1, "Splitting %d files by target sequence -- ignoring first "
 	    "argument %s\n",
 	    mafCount, splitFile);
 else
     verbose(1, "Splitting %d files from %s to %s\n",
 	    mafCount, splitFile, outRoot);
 splitPath(outRoot, dir, file, NULL);
 if (isNotEmpty(dir))
     makeDir(dir);
 else
     safef(dir, sizeof(dir), "%s", ".");
 if (byTarget)
     {
     struct hash *pathHash = hashNew(17);
     int seqNum = 0;
     for (i = 0; i < mafCount; i++)
 	splitMafByTarget(mafFiles[i], dir, file, pathHash, &seqNum);
     }
 else
     {
     struct hash *bedHash = loadSplits(splitFile);
     for (i = 0; i < mafCount; i++)
 	splitMafFile(mafFiles[i], dir, file, bedHash);
     }
 }
 
 int main(int argc, char *argv[])
 /* Process command line. */
 {
 optionInit(&argc, argv, options);
 byTarget = optionExists("byTarget");
 outDirDepth = optionInt("outDirDepth", outDirDepth);
 useSequenceName = optionExists("useSequenceName");
 useFullSequenceName = optionExists("useFullSequenceName");
 hashedNameBits = optionInt("useHashedName", hashedNameBits);
 if (outDirDepth > 0 && !byTarget)
     errAbort("-outDirDepth=N can be specified only when -byTarget is "
 	     "specified.");
 if (useSequenceName && !byTarget)
     errAbort("-useSequenceName can be specified only when -byTarget is "
 	     "specified.");
 if (useFullSequenceName && !byTarget)
     errAbort("-useFullSequenceName can be specified only when -byTarget is "
 	     "specified.");
 if (hashedNameBits > 0 && !byTarget)
     errAbort("-useHashedName can be specified only when -byTarget is "
 	     "specified.");
 if (hashedNameBits > 0 && useSequenceName)
     errAbort("-useHashedName and -useSequenceName are mutually exclusive.  "
 	     "Please pick one.");
 if (hashedNameBits > 0 &&
     (hashedNameBits < 2 || hashedNameBits > 17))
     errAbort("-useHashedName=N: N should be between 2 and 17.  If you "
 	     "strongly disagree, modify mafSplit.c.  Watch fileserver if "
 	     "exceeding.");
 if (argc < 4)
     usage();
 mafSplit(argv[1], argv[2], argc-3, &argv[3]);
 return 0;
 }