5c8526806b8283911229cd3e53dafbd001a1f68b
markd
  Tue Feb 1 23:11:59 2011 -0800
Fixed bug with tempDir option not working.  Update to current option parsing.  Made help more informative
diff --git src/hg/trfBig/trfBig.c src/hg/trfBig/trfBig.c
index e42a627..5fc5169 100644
--- src/hg/trfBig/trfBig.c
+++ src/hg/trfBig/trfBig.c
@@ -1,47 +1,63 @@
 /* trfBig - Mask tandem repeats on a big sequence file.. */
 #include "common.h"
 #include "linefile.h"
 #include "fa.h"
 #include "nib.h"
 #include "portable.h"
-#include "cheapcgi.h"
+#include "options.h"
+#include "verbose.h"
 
 static char const rcsid[] = "$Id: trfBig.c,v 1.20 2009/12/24 05:10:49 markd Exp $";
 
 /* Variables that can be set from command line. */
 char *trfExe = "trf";	/* trf executable name. */
 boolean doBed = FALSE;	/* Output .bed file. */
 char *tempDir = ".";	/* By default use current dir. */
 int maxPeriod = 2000;    /* Maximum size of repeat. */
+bool keep = FALSE;       /* Don't delete tmp files */
+
+/* command line option specifications */
+static struct optionSpec optionSpecs[] =
+{
+    {"bed", OPTION_BOOLEAN},
+    {"bedAt", OPTION_STRING},
+    {"tempDir", OPTION_STRING},
+    {"trf", OPTION_STRING},
+    {"maxPeriod", OPTION_INT},
+    {"keep", OPTION_BOOLEAN},
+    {NULL, 0}
+};
 
 void usage()
 /* Explain usage and exit. */
 {
 errAbort(
   "trfBig - Mask tandem repeats on a big sequence file.\n"
   "usage:\n"
   "   trfBig inFile outFile\n"
   "This will repeatedly run trf to mask tandem repeats in infile\n"
   "and put masked results in outFile.  inFile and outFile can be .fa\n"
-  "or .nib format. Outfile can be .bed as well\n"
+  "or .nib format. Outfile can be .bed as well. Sequence output is hard\n"
+  "masked, lowercase.\n"
   "\n"
   "   -bed creates a bed file in current dir\n"
   "   -bedAt=path.bed - create a bed file at explicit location\n"
   "   -tempDir=dir Where to put temp files.\n"
   "   -trf=trfExe explicitly specifies trf executable name\n"
-  "   -maxPeriod=N  Maximum period size of repeat (default %d)\n",
+  "   -maxPeriod=N  Maximum period size of repeat (default %d)\n"
+  "   -keep  don't delete tmp files\n",
   maxPeriod);
 }
 
 void writeSomeDatToBed(char *inName, FILE *out, char *chromName, int chromOffset, 
 	int start, int end)
 /* Read dat file and write bits of it to .bed out file adding offset as necessary. */
 {
 struct lineFile *lf = lineFileOpen(inName, TRUE);
 char *line;
 int lineSize;
 char *row[14];
 boolean gotHead = FALSE;
 int s, e, i;
 
 while (lineFileNext(lf, &line, &lineSize))
@@ -74,48 +90,54 @@
 void removeWild(char *pat)
 /* Remove all files matching wildcard. */
 {
 char dir[256], fn[128], ext[64];
 char wild[256];
 struct fileInfo *list, *el;
 
 splitPath(pat, dir, fn, ext);
 sprintf(wild, "%s%s", fn, ext);
 if (dir[0] == 0) strcpy(dir, ".");
 
 list = listDirX(tempDir, wild, TRUE);
 for (el = list; el != NULL; el = el->next)
     {
     remove(el->name);
-    uglyf("Removed %s\n", el->name);
+    verbose(1, "Removed %s\n", el->name);
     }
 slFreeList(&list);
 }
 
 void makeTrfRootName(char trfRootName[512], char *faFile)
 /* Make root name of files trf produces from faFile. */
 {
 sprintf(trfRootName, "%s.2.7.7.80.10.50.%d", faFile, maxPeriod);
 }
 
 void trfSysCall(char *faFile)
 /* Invoke trf program on file. */
 {
+// need to execute in trf directory, as tmp files go to current directory
+char faBase[FILENAME_LEN], faExt[FILENAME_LEN];
+splitPath(faFile, NULL, faBase, faExt);
+
 char command[1024];
-safef(command, sizeof(command), "cd %s; %s %s 2 7 7 80 10 50 %d -m %s", 
-      tempDir, trfExe, faFile, maxPeriod, doBed ? "-d" : "");
-uglyf("faFile %s, command %s\n", faFile, command);
+safef(command, sizeof(command), "cd %s && %s %s%s 2 7 7 80 10 50 %d -m %s", 
+      tempDir, trfExe, faBase, faExt, maxPeriod, doBed ? "-d" : "");
+verbose(1, "command %s\n", command);
+fflush(stdout);
+fflush(stderr);
 
 /* Run the system command, expecting a return code of 1, as trf
    returns the number of successfully processed sequences. */
 int status = system(command);
 if (status == -1) 
     errnoAbort("error starting command: %s", command);
 else if (WIFSIGNALED(status))
     errAbort("command terminated by signal %d: %s", WTERMSIG(status), command);
 else if (WIFEXITED(status))
     {
     if (WEXITSTATUS(status) != 1)
         errAbort("command exited with status %d (expected 1): %s", WEXITSTATUS(status), command);
     }
 else
     errAbort("unexpected exit for command: %s", command);
@@ -136,32 +158,32 @@
 
 void trfBig(char *input, char *output)
 /* trfBig - Mask tandem repeats on a big sequence file.. */
 {
 int maxSize = 5000000;
 int overlapSize = 10000;
 int start, end, s, e;
 int halfOverlapSize = overlapSize/2;
 char tempFile[512], trfRootName[512], trfTemp[512], bedFileName[512];
 char dir[256], seqName[128], ext[64];
 FILE *bedFile = NULL;
 struct dnaSeq  *maskedSeq = NULL;
 
 if (doBed)
     {
-    if (cgiVarExists("bedAt"))
-       strcpy(bedFileName, cgiString("bedAt"));
+    if (optionExists("bedAt"))
+        strcpy(bedFileName, optionVal("bedAt", NULL));
     else
 	{
 	splitPath(output, dir, seqName, ext);
 	sprintf(bedFileName, "%s%s.bed", dir, seqName);
 	}
     bedFile = mustOpen(bedFileName, "w");
     }
 splitPath(input, dir, seqName, ext);
 if (sameString("stdin", seqName))
     safef(tempFile, sizeof(tempFile), "%s",
 	  rTempName(tempDir, seqName, ".tf"));
 else
     safef(tempFile, sizeof(tempFile), "%s/%s.tf", tempDir, seqName);
 if (endsWith(input, ".nib") && 
 	(endsWith(output, ".nib") || sameString(output, "/dev/null")))
@@ -232,32 +254,36 @@
 	    freeDnaSeq(&maskedSeq);
 	    if (doBed)
 		{
 		sprintf(trfTemp, "%s.dat", trfRootName);
 		writeSomeDatToBed(trfTemp, bedFile, seq.name, start, s, e);
 		}
 	    }
 	}
     lineFileClose(&lf);
     carefulClose(&out);
     }
 else
     {
     errAbort("Sorry, both input and output must be in same format.");
     }
+if (!keep)
+    {
 sprintf(trfTemp, "%s*", tempFile);
 removeWild(trfTemp);
 }
+}
 
 int main(int argc, char *argv[])
 /* Process command line. */
 {
-cgiSpoof(&argc, argv);
+optionInit(&argc, argv, optionSpecs);
 if (argc != 3)
     usage();
-trfExe = cgiUsualString("trf", trfExe);
-doBed = cgiBoolean("bed") || cgiVarExists("bedAt");
-tempDir = cgiUsualString("tempDir", tempDir);
-maxPeriod = cgiUsualInt("maxPeriod", maxPeriod);
+trfExe = optionVal("trf", trfExe);
+doBed = optionExists("bed") || optionExists("bedAt");
+tempDir = optionVal("tempDir", tempDir);
+maxPeriod = optionInt("maxPeriod", maxPeriod);
+keep = optionExists("keep");
 trfBig(argv[1], argv[2]);
 return 0;
 }