f57fc11d951807e19b20b5960e735fa50eaea279 braney Fri Jun 12 13:10:00 2026 -0700 Fix more warnings exposed by -O3 build across hg utils and errAbort Continuation of the -O3 cleanup: a full clean rebuild surfaced warnings in many programs whose objects had not been recompiled before. Most are the same mechanical patterns as the first batch (strncpy -> safecpy/memcpy, sprintf -> safef), plus a few that needed more thought: - errAbort.h: mark errAbort/vaErrAbort/errnoAbort/noWarnAbort as noreturn. They provably never return (longjmp or exit; the existing "to make compiler happy" exit(-1) in noWarnAbort confirms the intent), and this lets GCC prune the impossible null paths after an errAbort guard, fixing false-positive null-deref / overread warnings in mafAddIRows, mafAddIRowsStream and phyloPlace with no source change to those files. - altSplice.c (hgGene): real one-element stack buffer overflow. makeGrayShades writes shadesOfGray[maxShade+1], but the caller declared shadesOfGray[9] with maxShade=8. Grow the array to [10] (maxShade stays 8); behavior unchanged. - hgc.c bedPrintPos: ~60 callers pass a track-specific struct cast to (struct bed *) and read only its bed-compatible leading fields. At -O3 -Warray-bounds flags the casts because the real object is smaller than struct bed; the reads are safe by the bed-layout convention, so suppress -Warray-bounds around just that function. - mafsInRegion.c: chromFromSrc returns strchr(src,'.')+1, which GCC mis-sizes as a 0-byte region when handed to strcmp via sameString/differentString; suppress the false-positive -Wstringop-overread around extractMafs. - sanger22gtf.c, bottleneck.c: put the printf/fprintf in the else of the NULL guard so -Wformat-overflow (which runs before the noreturn-based pruning) can see the argument is non-null. safecpy/memcpy/safef conversions: basicBed already done earlier; here haplotypes (memcpy of the original pointer pun), gbToFaRa, motifSig, hgClonePos, featureBits, libScan, hgGoldGapGl, hgSoftPromoter, mafClick, mafAddQRows, hgc.c, stanToBedAndExpRecs, bedUp, faSplit, trfBig, splitFaIntoContigs, aladdin, ameme. A full clean tree now builds with no warnings at -O3. refs #37761 Co-Authored-By: Claude Opus 4.8 (1M context) diff --git src/hg/trfBig/trfBig.c src/hg/trfBig/trfBig.c index 80fa7addaec..57ddd6ab7f0 100644 --- src/hg/trfBig/trfBig.c +++ src/hg/trfBig/trfBig.c @@ -1,304 +1,304 @@ /* trfBig - Mask tandem repeats on a big sequence file.. */ /* Copyright (C) 2011 The Regents of the University of California * See kent/LICENSE or http://genome.ucsc.edu/license/ for licensing information. */ #include "common.h" #include "linefile.h" #include "fa.h" #include "nib.h" #include "portable.h" #include "options.h" #include "verbose.h" /* Variables that can be set from command line. */ char *trfExe = "trf"; /* trf executable name. */ boolean doBed = FALSE; /* Output .bed file. */ char *tempDir = "."; /* By default use current dir. */ int maxPeriod = 2000; /* Maximum size of repeat. */ bool keep = FALSE; /* Don't delete tmp files */ int trf409_l = 0; /* trf 4.09 new option -l, from trf usage message: -l maximum TR length expected (in millions) (eg, -l 3 or -l=3 for 3 million) Human genome HG38 would need -l 6 */ /* command line option specifications */ static struct optionSpec optionSpecs[] = { {"bed", OPTION_BOOLEAN}, {"bedAt", OPTION_STRING}, {"tempDir", OPTION_STRING}, {"trf", OPTION_STRING}, {"maxPeriod", OPTION_INT}, {"keep", OPTION_BOOLEAN}, {"l", OPTION_INT}, {NULL, 0} }; void usage() /* Explain usage and exit. */ { errAbort( "trfBig - Mask tandem repeats on a big sequence file.\n" "usage:\n" " trfBig inFile outFile\n" "This will repeatedly run trf to mask tandem repeats in infile\n" "and put masked results in outFile. inFile and outFile can be .fa\n" "or .nib format. Outfile can be .bed as well. Sequence output is hard\n" "masked, lowercase.\n" "\n" " -bed creates a bed file in current dir\n" " -bedAt=path.bed - create a bed file at explicit location\n" " -tempDir=dir Where to put temp files.\n" " -trf=trfExe explicitly specifies trf executable name\n" " -maxPeriod=N Maximum period size of repeat (default %d)\n" " -keep don't delete tmp files\n" " -l= when used here, for new trf v4.09 option:\n" " maximum TR length expected (in millions)\n" " (eg, -l=3 for 3 million), Human genome hg38 would need -l=6", maxPeriod); } void writeSomeDatToBed(char *inName, FILE *out, char *chromName, int chromOffset, int start, int end) /* Read dat file and write bits of it to .bed out file adding offset as necessary. */ { struct lineFile *lf = lineFileOpen(inName, TRUE); char *line; int lineSize; char *row[14]; boolean gotHead = FALSE; int s, e, i; while (lineFileNext(lf, &line, &lineSize)) { if (startsWith("Parameters:", line)) { gotHead = TRUE; break; } } if (!gotHead) errAbort("%s isn't a recognized trf .dat file\n", inName); while(lineFileRow(lf, row)) { s = atoi(row[0])-1; e = atoi(row[1]); if (s < start) s = start; if (e > end) e = end; if (s < e) { fprintf(out, "%s\t%d\t%d\ttrf", chromName, s+chromOffset, e+chromOffset); for (i=2; i<14; ++i) fprintf(out, "\t%s", row[i]); fprintf(out, "\n"); } } lineFileClose(&lf); } void removeWild(char *pat) /* Remove all files matching wildcard. */ { char dir[PATH_LEN], fn[FILENAME_LEN], ext[FILEEXT_LEN]; char wild[256]; struct fileInfo *list, *el; splitPath(pat, dir, fn, ext); sprintf(wild, "%s%s", fn, ext); if (dir[0] == 0) strcpy(dir, "."); list = listDirX(tempDir, wild, TRUE); for (el = list; el != NULL; el = el->next) { remove(el->name); verbose(1, "Removed %s\n", el->name); } slFreeList(&list); } void makeTrfRootName(char trfRootName[512], char *faFile) /* Make root name of files trf produces from faFile. */ { -sprintf(trfRootName, "%s.2.7.7.80.10.50.%d", faFile, maxPeriod); +safef(trfRootName, 512, "%s.2.7.7.80.10.50.%d", faFile, maxPeriod); } void trfSysCall(char *faFile) /* Invoke trf program on file. */ { // need to execute in trf directory, as tmp files go to current directory char faBase[FILENAME_LEN], faExt[FILENAME_LEN]; splitPath(faFile, NULL, faBase, faExt); char command[1024]; if (trf409_l > 0) safef(command, sizeof(command), "cd %s && %s %s%s 2 7 7 80 10 50 %d -m %s -l %d", tempDir, trfExe, faBase, faExt, maxPeriod, doBed ? "-d" : "", trf409_l); else safef(command, sizeof(command), "cd %s && %s %s%s 2 7 7 80 10 50 %d -m %s", tempDir, trfExe, faBase, faExt, maxPeriod, doBed ? "-d" : ""); verbose(1, "command %s\n", command); fflush(stdout); fflush(stderr); /* Run the system command, expecting a return code of 1, as trf returns the number of successfully processed sequences. */ int status = system(command); if (status == -1) errnoAbort("error starting command: %s", command); else if (WIFSIGNALED(status)) errAbort("command terminated by signal %d: %s", WTERMSIG(status), command); else if (WIFEXITED(status)) { if (WEXITSTATUS(status) != 1) errAbort("command exited with status %d (expected 1): %s", WEXITSTATUS(status), command); } else errAbort("unexpected exit for command: %s", command); } void outputWithBreaks(FILE *out, char *s, int size, int lineSize) /* Print s of given size to file, adding line feeds every now and then. */ { int i, oneSize; for (i=0; i lineSize) oneSize = lineSize; mustWrite(out, s+i, oneSize); fputc('\n', out); } } void trfBig(char *input, char *output) /* trfBig - Mask tandem repeats on a big sequence file.. */ { int maxSize = 5000000; int overlapSize = 10000; int start, end, s, e; int halfOverlapSize = overlapSize/2; char tempFile[512], trfRootName[512], trfTemp[1024], bedFileName[1024]; char dir[PATH_LEN], seqName[FILENAME_LEN], ext[FILEEXT_LEN]; FILE *bedFile = NULL; struct dnaSeq *maskedSeq = NULL; if (doBed) { if (optionExists("bedAt")) strcpy(bedFileName, optionVal("bedAt", NULL)); else { splitPath(output, dir, seqName, ext); sprintf(bedFileName, "%s%s.bed", dir, seqName); } bedFile = mustOpen(bedFileName, "w"); } splitPath(input, dir, seqName, ext); if (sameString("stdin", seqName)) safef(tempFile, sizeof(tempFile), "%s", rTempName(tempDir, seqName, ".tf")); else safef(tempFile, sizeof(tempFile), "%s/%s.tf", tempDir, seqName); if (endsWith(input, ".nib") && (endsWith(output, ".nib") || sameString(output, "/dev/null"))) { int nibSize; FILE *in; struct nibStream *ns = nibStreamOpen(output); struct dnaSeq *seq; nibOpenVerify(input, &in, &nibSize); for (start = 0; start < nibSize; start = end) { end = start + maxSize; if (end > nibSize) end = nibSize; seq = nibLdPart(input, in, nibSize, start, end - start); faWrite(tempFile, seq->name, seq->dna, seq->size); freeDnaSeq(&seq); trfSysCall(tempFile); makeTrfRootName(trfRootName, tempFile); sprintf(trfTemp, "%s.mask", trfRootName); maskedSeq = faReadDna(trfTemp); s = (start == 0 ? 0 : halfOverlapSize); if (end == nibSize) e = end - start; else { e = end - halfOverlapSize - start; end -= overlapSize; } nibStreamMany(ns, maskedSeq->dna + s, e-s); freeDnaSeq(&maskedSeq); if (doBed) { sprintf(trfTemp, "%s.dat", trfRootName); writeSomeDatToBed(trfTemp, bedFile, seqName, start, s, e); } } nibStreamClose(&ns); } else if (!endsWith(input, ".nib") && !endsWith(output, ".nib")) { struct lineFile *lf = lineFileOpen(input, TRUE); struct dnaSeq seq; FILE *out = mustOpen(output, "w"); ZeroVar(&seq); while (faSpeedReadNext(lf, &seq.dna, &seq.size, &seq.name)) { fprintf(out, ">%s\n", seq.name); for (start = 0; start < seq.size; start = end) { end = start + maxSize; if (end > seq.size) end = seq.size; faWrite(tempFile, seq.name, seq.dna+start, end - start); trfSysCall(tempFile); makeTrfRootName(trfRootName, tempFile); sprintf(trfTemp, "%s.mask", trfRootName); maskedSeq = faReadDna(trfTemp); s = (start == 0 ? 0 : halfOverlapSize); if (end == seq.size) e = end - start; else { e = end - halfOverlapSize - start; end -= overlapSize; } outputWithBreaks(out, maskedSeq->dna+s, e-s, 50); freeDnaSeq(&maskedSeq); if (doBed) { sprintf(trfTemp, "%s.dat", trfRootName); writeSomeDatToBed(trfTemp, bedFile, seq.name, start, s, e); } } } lineFileClose(&lf); carefulClose(&out); } else { errAbort("Sorry, both input and output must be in same format."); } if (!keep) { sprintf(trfTemp, "%s*", tempFile); removeWild(trfTemp); } } int main(int argc, char *argv[]) /* Process command line. */ { optionInit(&argc, argv, optionSpecs); if (argc != 3) usage(); trf409_l = optionInt("l", trf409_l); trfExe = optionVal("trf", trfExe); doBed = optionExists("bed") || optionExists("bedAt"); tempDir = optionVal("tempDir", tempDir); maxPeriod = optionInt("maxPeriod", maxPeriod); keep = optionExists("keep"); trfBig(argv[1], argv[2]); return 0; }