f952b14fca3833e9d8fef0ae8a086ad15485b552
galt
  Wed Jan 1 02:11:38 2025 -0800
fix various errors for utils compiled on Rocky 9 hgwdev-new.

diff --git src/hg/makeDb/hgLoadOutJoined/hgLoadOutJoined.c src/hg/makeDb/hgLoadOutJoined/hgLoadOutJoined.c
index dfd187b..b77fcf8 100644
--- src/hg/makeDb/hgLoadOutJoined/hgLoadOutJoined.c
+++ src/hg/makeDb/hgLoadOutJoined/hgLoadOutJoined.c
@@ -1,317 +1,317 @@
 /* hgLoadOutJoined - fixed loading RepeatMasker .out files. */
 /* correctly load the id field, and new meaning for repStart, repEnd, repLeft */
 
 /* Copyright (C) 2014 The Regents of the University of California 
  * See kent/LICENSE or http://genome.ucsc.edu/license/ for licensing information. */
 #include "common.h"
 #include "linefile.h"
 #include "dystring.h"
 #include "options.h"
 #include "cheapcgi.h"
 #include "hCommon.h"
 #include "hdb.h"
 #include "jksql.h"
 #include "rmskOut2.h"
 
 
 char *createRmskOut = "CREATE TABLE %s (\n"
 "   bin smallint unsigned not null,     # bin index field for range queries\n"
 "   swScore int unsigned not null,	# Smith Waterman alignment score\n"
 "   milliDiv int unsigned not null,	# Base mismatches in parts per thousand\n"
 "   milliDel int unsigned not null,	# Bases deleted in parts per thousand\n"
 "   milliIns int unsigned not null,	# Bases inserted in parts per thousand\n"
 "   genoName varchar(255) not null,	# Genomic sequence name\n"
 "   genoStart int unsigned not null,	# Start in genomic sequence\n"
 "   genoEnd int unsigned not null,	# End in genomic sequence\n"
 "   genoLeft int not null,		# -#bases after match in genomic sequence\n"
 "   strand char(1) not null,		# Relative orientation + or -\n"
 "   repName varchar(255) not null,	# Name of repeat\n"
 "   repClass varchar(255) not null,	# Class of repeat\n"
 "   repFamily varchar(255) not null,	# Family of repeat\n"
 "   repStart int not null,		# Start in repeat sequence - regardless of orient\n"
 "   repEnd int not null,		# End in repeat sequence - regardless of orient\n"
 "   repLeft int not null,		# -#bases after match in repeat sequence - regardless of orient\n"
 "   id int not null,		# The ID of the hit. Used to link related fragments\n"
 "             #Indices\n";
 
 boolean noBin = FALSE;
 char *tabFileName = NULL;
 char *suffix = NULL;
 int badRepCnt = 0;
 
 /* command line option specifications */
 static struct optionSpec optionSpecs[] = {
     {"tabFile", OPTION_STRING},
     {"tabfile", OPTION_STRING},
     {"table", OPTION_STRING},
     {NULL, 0}
 };
 
 void usage()
 /* Explain usage and exit. */
 {
 errAbort(
   "hgLoadOutJoined - load new style (2014) RepeatMasker .out files into database\n"
   "usage:\n"
   "   hgLoadOutJoined database file(s).out\n"
   "For multiple files chrN.out this will create the single table 'rmskOutBaseline'\n"
   "in the database.\n"
   "options:\n"
   "   -tabFile=text.tab - don't actually load database, just create tab file\n"
   "   -table=name - use a different suffix other than the default (rmskOutBaseline)");
 }
 
 void badFormat(struct lineFile *lf, int id)
 /* Print generic bad format message. */
 {
 errAbort("Badly formatted line %d in %s\n", lf->lineIx, lf->fileName);
 }
 
 int makeMilli(char *s, struct lineFile *lf)
 /* Convert ascii floating point to parts per thousand representation. */
 {
 /* Cope with -0.0  and -0.2 etc.*/
 if (s[0] == '-')
     {
     if (!sameString(s, "-0.0"))
         warn("Strange perc. field %s line %d of %s", s, lf->lineIx, lf->fileName);
     s = "0.0";
     }
 if (!isdigit(s[0]))
     badFormat(lf,1);
 return round(10.0*atof(s));
 }
 
 static int parenInt(char *s, struct lineFile *lf)
 /* Convert from ascii to int where a parenthesized integer is the same */
 {
 if (s[0] == '(')
     return atoi(s+1);
 else
     return atoi(s);
 }
 
 boolean checkRepeat(struct rmskOut2 *r, struct lineFile *lf)
 /* check for bogus repeat */
 {
 /* this is bogus on both strands */
 if (r->repStart > r->repEnd)
     {
     badRepCnt++;
     if (verboseLevel() > 1)
         {
         verbose(2, "bad rep range [%d, %d] line %d of %s %s:%d-%d\n",
 		r->repStart, r->repEnd, lf->lineIx, lf->fileName, r->genoName, r->genoStart, r->genoEnd);
         }
     return FALSE;
     }
 return TRUE;
 }
 
 FILE *theFile = NULL;
 struct hash *chromFpHash = NULL;
 char *defaultTempName = "rmskOutBaseline.tab";
 
 FILE *getFileForChrom(char *chrom)
 /* Return the appropriate file pointer for the given chrom */
 {
 char *tempName = tabFileName ? tabFileName : defaultTempName;
 
 if (theFile == NULL)
     theFile = mustOpen(tempName, "w");
 
 return theFile;
 }
 
 void helCarefulClose(struct hashEl *hel)
 /* Call carefulClose on hashed file pointer. */
 {
 FILE *f = (FILE *)(hel->val);
 carefulClose(&f);
 }
 
 void closeFiles()
 /* Close the * single file pointer that we've been using. */
 {
 carefulClose(&theFile);
 }
 
 void readOneOut(char *rmskFile)
 /* Read .out file rmskFile, check each line, and print OK lines to .tab. */
 {
 struct lineFile *lf;
 char *line, *words[24];
 int lineSize, wordCount;
 
 /* Open .out file and process header. */
 lf = lineFileOpen(rmskFile, TRUE);
 if (!lineFileNext(lf, &line, &lineSize))
     errAbort("Empty %s", lf->fileName);
 if (!startsWith("   SW  perc perc", line))
     {
     if (!startsWith("   SW   perc perc", line))
 	errAbort("%s doesn't seem to be a RepeatMasker .out file, first "
 	    "line seen:\n%s", lf->fileName, line);
     }
 lineFileNext(lf, &line, &lineSize);
 lineFileNext(lf, &line, &lineSize);
 
 /* Process line oriented records of .out file. */
 while (lineFileNext(lf, &line, &lineSize))
     {
     static struct rmskOut2 r;
     char *s;
 
     wordCount = chopLine(line, words);
     if (wordCount < 14)
         errAbort("Expecting 14 or 15 words line %d of %s", 
 	    lf->lineIx, lf->fileName);
     r.swScore = atoi(words[0]);
     r.milliDiv = makeMilli(words[1], lf);
     r.milliDel = makeMilli(words[2], lf);
     r.milliIns = makeMilli(words[3], lf);
     r.genoName = words[4];
     r.genoStart = atoi(words[5])-1;
     r.genoEnd = atoi(words[6]);
     r.genoLeft = parenInt(words[7], lf);
     r.strand[0]  = (words[8][0] == '+' ? '+' : '-');
     r.repName = words[9];
     r.repClass = words[10];
     char *repClassTest = cloneString(r.repClass);
     stripChar(repClassTest, '(');
     stripChar(repClassTest, ')');
     int nonDigitCount = countLeadingNondigits(repClassTest);
     int wordOffset = 0;
     // this repClass is only digits, (or only (digits) with surrounding parens)
     //   this is the sign of an empty field here
     // due to custom library in use that has no class/family indication
     if (0 == nonDigitCount)
         {
         wordOffset = 1;
         r.repClass = cloneString("Unspecified");
         r.repFamily = cloneString("Unspecified");
         }
     else
         {
         s = strchr(r.repClass, '/');
         if (s == NULL)
             r.repFamily = r.repClass;
         else
            {
            *s++ = 0;
            r.repFamily = s;
            }
         }
     r.repStart = parenInt(words[11-wordOffset], lf);
     r.repEnd = atoi(words[12-wordOffset]);
     r.repLeft = parenInt(words[13-wordOffset], lf);
     r.id = atoi(words[14-wordOffset]);
     if (words[8][0] == 'C')
 	{
 	r.repLeft = parenInt(words[11-wordOffset], lf);
 	r.repStart = parenInt(words[13-wordOffset], lf);
 	}
     if (checkRepeat(&r, lf))
         {
 	FILE *f = getFileForChrom(r.genoName);
         if (!noBin)
             fprintf(f, "%u\t", hFindBin(r.genoStart, r.genoEnd));
         rmskOut2TabOut(&r, f);
         }
     }
 }
 
 void loadOneTable(char *database, struct sqlConnection *conn, char *tempName, char *tableName)
 /* Load .tab file tempName into tableName and remove tempName. */
 {
 struct dyString *query = dyStringNew(1024);
 
 verbose(1, "Loading up table %s\n", tableName);
 if (sqlTableExists(conn, tableName))
     {
     sqlDyStringPrintf(query, "DROP table %s", tableName);
     sqlUpdate(conn, query->string);
     }
 
 /* Create first part of table definitions, the fields. */
 dyStringClear(query);
 sqlDyStringPrintf(query, createRmskOut, tableName);
 
 /* Create the indexes */
 int indexLen = hGetMinIndexLength(database);
 sqlDyStringPrintf(query, "   INDEX(genoName(%d),bin))\n", indexLen);
 
 sqlUpdate(conn, query->string);
 
 /* Load database from tab-file. */
 dyStringClear(query);
 sqlDyStringPrintf(query, "LOAD data local infile '%s' into table %s",
 	       tempName, tableName);
 sqlUpdate(conn, query->string);
 remove(tempName);
 }
 
 void processOneOut(char *database, struct sqlConnection *conn, char *rmskFile, char *suffix)
 /* Read one RepeatMasker .out file and load it into database. */
 {
 verbose(1, "Processing %s\n", rmskFile);
 
 readOneOut(rmskFile);
 
 /* Create database table (if not -tabFile). */
 if (tabFileName == NULL)
     {
-    char dir[256], base[128], extension[64];
+    char dir[PATH_LEN], base[FILENAME_LEN], extension[FILEEXT_LEN];
     char tableName[256];
     splitPath(rmskFile, dir, base, extension);
     chopSuffix(base);
     safef(tableName, sizeof(tableName), "%s_%s", base, suffix);
     closeFiles();
     loadOneTable(database, conn, defaultTempName, tableName);
     }
 }
 
 struct sqlConnection *theConn = NULL;
 
 void hgLoadOutJoined(char *database, int rmskCount, char *rmskFileNames[], char *suffix)
 /* hgLoadOutJoined - load RepeatMasker .out files into database. */
 {
 struct sqlConnection *conn = NULL;
 int i;
 
 if (tabFileName == NULL)
     {
     conn = hAllocConn(database);
     verbose(2,"#\thgLoadOutJoined: connected to database: %s\n", database);
     }
 for (i=0; i<rmskCount; ++i)
     {
     readOneOut(rmskFileNames[i]);
     }
 closeFiles();
 if (tabFileName == NULL)
     {
     loadOneTable(database, conn, defaultTempName, suffix);
     }
 hFreeConn(&conn);
 if (badRepCnt > 0)
     {
     warn("note: %d records dropped due to repEnd < 0 or repStart > repEnd\n", badRepCnt);
     if (verboseLevel() < 2)
         warn("      run with -verbose=2 for details\n");
     }
 }
 
 int main(int argc, char *argv[])
 /* Process command line. */
 {
 optionInit(&argc, argv, optionSpecs);
 if (argc < 3)
     usage();
 suffix = optionVal("table", "rmskOutBaseline");
 tabFileName = optionVal("tabFile", tabFileName);
 if (tabFileName == NULL)
     tabFileName = optionVal("tabfile", tabFileName);
 hgLoadOutJoined(argv[1], argc-2, argv+2, suffix) ;
 return 0;
 }