5c8526806b8283911229cd3e53dafbd001a1f68b markd Tue Feb 1 23:11:59 2011 -0800 Fixed bug with tempDir option not working. Update to current option parsing. Made help more informative diff --git src/hg/trfBig/trfBig.c src/hg/trfBig/trfBig.c index e42a627..5fc5169 100644 --- src/hg/trfBig/trfBig.c +++ src/hg/trfBig/trfBig.c @@ -1,47 +1,63 @@ /* trfBig - Mask tandem repeats on a big sequence file.. */ #include "common.h" #include "linefile.h" #include "fa.h" #include "nib.h" #include "portable.h" -#include "cheapcgi.h" +#include "options.h" +#include "verbose.h" static char const rcsid[] = "$Id: trfBig.c,v 1.20 2009/12/24 05:10:49 markd Exp $"; /* Variables that can be set from command line. */ char *trfExe = "trf"; /* trf executable name. */ boolean doBed = FALSE; /* Output .bed file. */ char *tempDir = "."; /* By default use current dir. */ int maxPeriod = 2000; /* Maximum size of repeat. */ +bool keep = FALSE; /* Don't delete tmp files */ + +/* command line option specifications */ +static struct optionSpec optionSpecs[] = +{ + {"bed", OPTION_BOOLEAN}, + {"bedAt", OPTION_STRING}, + {"tempDir", OPTION_STRING}, + {"trf", OPTION_STRING}, + {"maxPeriod", OPTION_INT}, + {"keep", OPTION_BOOLEAN}, + {NULL, 0} +}; void usage() /* Explain usage and exit. */ { errAbort( "trfBig - Mask tandem repeats on a big sequence file.\n" "usage:\n" " trfBig inFile outFile\n" "This will repeatedly run trf to mask tandem repeats in infile\n" "and put masked results in outFile. inFile and outFile can be .fa\n" - "or .nib format. Outfile can be .bed as well\n" + "or .nib format. Outfile can be .bed as well. Sequence output is hard\n" + "masked, lowercase.\n" "\n" " -bed creates a bed file in current dir\n" " -bedAt=path.bed - create a bed file at explicit location\n" " -tempDir=dir Where to put temp files.\n" " -trf=trfExe explicitly specifies trf executable name\n" - " -maxPeriod=N Maximum period size of repeat (default %d)\n", + " -maxPeriod=N Maximum period size of repeat (default %d)\n" + " -keep don't delete tmp files\n", maxPeriod); } void writeSomeDatToBed(char *inName, FILE *out, char *chromName, int chromOffset, int start, int end) /* Read dat file and write bits of it to .bed out file adding offset as necessary. */ { struct lineFile *lf = lineFileOpen(inName, TRUE); char *line; int lineSize; char *row[14]; boolean gotHead = FALSE; int s, e, i; while (lineFileNext(lf, &line, &lineSize)) @@ -74,48 +90,54 @@ void removeWild(char *pat) /* Remove all files matching wildcard. */ { char dir[256], fn[128], ext[64]; char wild[256]; struct fileInfo *list, *el; splitPath(pat, dir, fn, ext); sprintf(wild, "%s%s", fn, ext); if (dir[0] == 0) strcpy(dir, "."); list = listDirX(tempDir, wild, TRUE); for (el = list; el != NULL; el = el->next) { remove(el->name); - uglyf("Removed %s\n", el->name); + verbose(1, "Removed %s\n", el->name); } slFreeList(&list); } void makeTrfRootName(char trfRootName[512], char *faFile) /* Make root name of files trf produces from faFile. */ { sprintf(trfRootName, "%s.2.7.7.80.10.50.%d", faFile, maxPeriod); } void trfSysCall(char *faFile) /* Invoke trf program on file. */ { +// need to execute in trf directory, as tmp files go to current directory +char faBase[FILENAME_LEN], faExt[FILENAME_LEN]; +splitPath(faFile, NULL, faBase, faExt); + char command[1024]; -safef(command, sizeof(command), "cd %s; %s %s 2 7 7 80 10 50 %d -m %s", - tempDir, trfExe, faFile, maxPeriod, doBed ? "-d" : ""); -uglyf("faFile %s, command %s\n", faFile, command); +safef(command, sizeof(command), "cd %s && %s %s%s 2 7 7 80 10 50 %d -m %s", + tempDir, trfExe, faBase, faExt, maxPeriod, doBed ? "-d" : ""); +verbose(1, "command %s\n", command); +fflush(stdout); +fflush(stderr); /* Run the system command, expecting a return code of 1, as trf returns the number of successfully processed sequences. */ int status = system(command); if (status == -1) errnoAbort("error starting command: %s", command); else if (WIFSIGNALED(status)) errAbort("command terminated by signal %d: %s", WTERMSIG(status), command); else if (WIFEXITED(status)) { if (WEXITSTATUS(status) != 1) errAbort("command exited with status %d (expected 1): %s", WEXITSTATUS(status), command); } else errAbort("unexpected exit for command: %s", command); @@ -136,32 +158,32 @@ void trfBig(char *input, char *output) /* trfBig - Mask tandem repeats on a big sequence file.. */ { int maxSize = 5000000; int overlapSize = 10000; int start, end, s, e; int halfOverlapSize = overlapSize/2; char tempFile[512], trfRootName[512], trfTemp[512], bedFileName[512]; char dir[256], seqName[128], ext[64]; FILE *bedFile = NULL; struct dnaSeq *maskedSeq = NULL; if (doBed) { - if (cgiVarExists("bedAt")) - strcpy(bedFileName, cgiString("bedAt")); + if (optionExists("bedAt")) + strcpy(bedFileName, optionVal("bedAt", NULL)); else { splitPath(output, dir, seqName, ext); sprintf(bedFileName, "%s%s.bed", dir, seqName); } bedFile = mustOpen(bedFileName, "w"); } splitPath(input, dir, seqName, ext); if (sameString("stdin", seqName)) safef(tempFile, sizeof(tempFile), "%s", rTempName(tempDir, seqName, ".tf")); else safef(tempFile, sizeof(tempFile), "%s/%s.tf", tempDir, seqName); if (endsWith(input, ".nib") && (endsWith(output, ".nib") || sameString(output, "/dev/null"))) @@ -232,32 +254,36 @@ freeDnaSeq(&maskedSeq); if (doBed) { sprintf(trfTemp, "%s.dat", trfRootName); writeSomeDatToBed(trfTemp, bedFile, seq.name, start, s, e); } } } lineFileClose(&lf); carefulClose(&out); } else { errAbort("Sorry, both input and output must be in same format."); } +if (!keep) + { sprintf(trfTemp, "%s*", tempFile); removeWild(trfTemp); } +} int main(int argc, char *argv[]) /* Process command line. */ { -cgiSpoof(&argc, argv); +optionInit(&argc, argv, optionSpecs); if (argc != 3) usage(); -trfExe = cgiUsualString("trf", trfExe); -doBed = cgiBoolean("bed") || cgiVarExists("bedAt"); -tempDir = cgiUsualString("tempDir", tempDir); -maxPeriod = cgiUsualInt("maxPeriod", maxPeriod); +trfExe = optionVal("trf", trfExe); +doBed = optionExists("bed") || optionExists("bedAt"); +tempDir = optionVal("tempDir", tempDir); +maxPeriod = optionInt("maxPeriod", maxPeriod); +keep = optionExists("keep"); trfBig(argv[1], argv[2]); return 0; }