src/hg/maskOutFa/maskOutFa.c 1.10
1.10 2009/08/02 19:47:35 markd
add option to specify mask input format when reading from stdin. check command line options
Index: src/hg/maskOutFa/maskOutFa.c
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/maskOutFa/maskOutFa.c,v
retrieving revision 1.9
retrieving revision 1.10
diff -b -B -U 4 -r1.9 -r1.10
--- src/hg/maskOutFa/maskOutFa.c 14 Jun 2006 16:31:30 -0000 1.9
+++ src/hg/maskOutFa/maskOutFa.c 2 Aug 2009 19:47:35 -0000 1.10
@@ -1,16 +1,24 @@
/* maskOutFa - Produce a masked .fa file given an unmasked .fa and
* a RepeatMasker .out file or a bed file to mask on. */
#include "common.h"
#include "linefile.h"
-#include "cheapcgi.h"
+#include "options.h"
#include "hash.h"
#include "fa.h"
#include "bed.h"
#include "repMask.h"
static char const rcsid[] = "$Id$";
+static struct optionSpec optionSpecs[] = {
+ {"soft", OPTION_BOOLEAN},
+ {"softAdd", OPTION_BOOLEAN},
+ {"clip", OPTION_BOOLEAN},
+ {"maskFormat", OPTION_STRING},
+ {NULL, 0}
+};
+
void usage()
/* Explain usage and exit. */
{
errAbort(
@@ -24,13 +32,12 @@
"lower case letters are converted to N's.\n"
"options:\n"
" -soft - puts masked parts in lower case other in upper.\n"
" -softAdd - lower cases masked bits, leaves others unchanged\n"
- " -clip - clip out of bounds mask records rather than dying.\n");
+ " -clip - clip out of bounds mask records rather than dying.\n"
+ " -maskFormat=fmt - \"out\" or \"bed\" for when input does not have required extension.\n");
}
-boolean faMixedSpeedReadNext(struct lineFile *lf, DNA **retDna, int *retSize, char **retName);
-
void maskOutFa(char *inFa, char *maskFile, char *outFa)
/* maskOutFa - Produce a masked .fa file given an unmasked .fa and a RepeatMasker .out file. */
{
struct lineFile *lf;
@@ -38,17 +45,36 @@
struct dnaSeq *seqList = NULL, *seq;
char *line;
int lineSize;
boolean ok;
-boolean isOut = endsWith(maskFile, ".out");
-boolean isBed = endsWith(maskFile, ".bed");
+boolean isOut = FALSE;
+boolean isBed = FALSE;
boolean extraHard = sameWord(maskFile, "hard");
FILE *f;
char *words[32];
int wordCount;
-boolean clip = cgiBoolean("clip");
-boolean soft = cgiBoolean("soft");
-boolean softAdd = cgiBoolean("softAdd");
+char *maskFormat = optionVal("maskFormat", NULL);
+boolean clip = optionExists("clip");
+boolean soft = optionExists("soft");
+boolean softAdd = optionExists("softAdd");
+
+if (maskFormat == NULL)
+ {
+ isOut = endsWith(maskFile, ".out");
+ isBed = endsWith(maskFile, ".bed");
+ }
+else if (sameWord(maskFormat, "bed"))
+ {
+ isOut = FALSE;
+ isBed = TRUE;
+ }
+else if (sameWord(maskFormat, "out"))
+ {
+ isOut = TRUE;
+ isBed = FALSE;
+ }
+else
+ errAbort("Unrecognized -maskFormat: %s", maskFormat);
/* Read DNA and hash sequence names. */
seqList = faReadAllMixed(inFa);
for (seq = seqList; seq != NULL; seq = seq->next)
@@ -80,9 +106,9 @@
{
}
else
{
- errAbort("Unrecognized file type %s", maskFile);
+ errAbort("Unrecognized file type %s, may need to specify -maskFormat", maskFile);
}
/* Read line at a time from mask file and set masked sequence
* areas to N. */
@@ -149,9 +175,9 @@
int main(int argc, char *argv[])
/* Process command line. */
{
-cgiSpoof(&argc, argv);
+optionInit(&argc, argv, optionSpecs);
if (argc != 4)
usage();
maskOutFa(argv[1], argv[2], argv[3]);
return 0;