src/hg/maskOutFa/maskOutFa.c 1.10

1.10 2009/08/02 19:47:35 markd
add option to specify mask input format when reading from stdin. check command line options
Index: src/hg/maskOutFa/maskOutFa.c
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/maskOutFa/maskOutFa.c,v
retrieving revision 1.9
retrieving revision 1.10
diff -b -B -U 4 -r1.9 -r1.10
--- src/hg/maskOutFa/maskOutFa.c	14 Jun 2006 16:31:30 -0000	1.9
+++ src/hg/maskOutFa/maskOutFa.c	2 Aug 2009 19:47:35 -0000	1.10
@@ -1,16 +1,24 @@
 /* maskOutFa - Produce a masked .fa file given an unmasked .fa and 
  * a RepeatMasker .out file or a bed file to mask on. */
 #include "common.h"
 #include "linefile.h"
-#include "cheapcgi.h"
+#include "options.h"
 #include "hash.h"
 #include "fa.h"
 #include "bed.h"
 #include "repMask.h"
 
 static char const rcsid[] = "$Id$";
 
+static struct optionSpec optionSpecs[] = {
+    {"soft", OPTION_BOOLEAN},
+    {"softAdd", OPTION_BOOLEAN},
+    {"clip", OPTION_BOOLEAN},
+    {"maskFormat", OPTION_STRING},
+    {NULL, 0}
+};
+
 void usage()
 /* Explain usage and exit. */
 {
 errAbort(
@@ -24,13 +32,12 @@
   "lower case letters are converted to N's.\n"
   "options:\n"
   "   -soft - puts masked parts in lower case other in upper.\n"
   "   -softAdd - lower cases masked bits, leaves others unchanged\n"
-  "   -clip - clip out of bounds mask records rather than dying.\n");
+  "   -clip - clip out of bounds mask records rather than dying.\n"
+  "   -maskFormat=fmt - \"out\" or \"bed\" for when input does not have required extension.\n");
 }
 
-boolean faMixedSpeedReadNext(struct lineFile *lf, DNA **retDna, int *retSize, char **retName);
-
 void maskOutFa(char *inFa, char *maskFile, char *outFa)
 /* maskOutFa - Produce a masked .fa file given an unmasked .fa and a RepeatMasker .out file. */
 {
 struct lineFile *lf;
@@ -38,17 +45,36 @@
 struct dnaSeq *seqList = NULL, *seq;
 char *line;
 int lineSize;
 boolean ok;
-boolean isOut = endsWith(maskFile, ".out");
-boolean isBed = endsWith(maskFile, ".bed");
+boolean isOut = FALSE;
+boolean isBed = FALSE;
 boolean extraHard = sameWord(maskFile, "hard");
 FILE *f;
 char *words[32];
 int wordCount;
-boolean clip = cgiBoolean("clip");
-boolean soft = cgiBoolean("soft");
-boolean softAdd = cgiBoolean("softAdd");
+char *maskFormat = optionVal("maskFormat", NULL);
+boolean clip = optionExists("clip");
+boolean soft = optionExists("soft");
+boolean softAdd = optionExists("softAdd");
+
+if (maskFormat == NULL)
+    {
+    isOut = endsWith(maskFile, ".out");
+    isBed = endsWith(maskFile, ".bed");
+    }
+else if (sameWord(maskFormat, "bed"))
+    {
+    isOut = FALSE;
+    isBed = TRUE;
+    }
+else if (sameWord(maskFormat, "out"))
+    {
+    isOut = TRUE;
+    isBed = FALSE;
+    }
+else
+	errAbort("Unrecognized -maskFormat: %s", maskFormat);
 
 /* Read DNA and hash sequence names. */
 seqList = faReadAllMixed(inFa);
 for (seq = seqList; seq != NULL; seq = seq->next)
@@ -80,9 +106,9 @@
 	{
 	}
     else
 	{
-	errAbort("Unrecognized file type %s", maskFile);
+	errAbort("Unrecognized file type %s, may need to specify -maskFormat", maskFile);
 	}
 
     /* Read line at a time from mask file and set masked sequence 
      * areas to N. */
@@ -149,9 +175,9 @@
 
 int main(int argc, char *argv[])
 /* Process command line. */
 {
-cgiSpoof(&argc, argv);
+optionInit(&argc, argv, optionSpecs);
 if (argc != 4)
     usage();
 maskOutFa(argv[1], argv[2], argv[3]);
 return 0;