src/blat/blat.c 989c9aeaab7cb44cf18de57003b0044e229b6f28

989c9aeaab7cb44cf18de57003b0044e229b6f28
donnak
  Mon Oct 14 11:39:29 2013 -0700
Fixed some typos and formatting probs in conjunction with updating blat spec.
diff --git src/blat/blat.c src/blat/blat.c
index 0f8b171..9b49948 100644
--- src/blat/blat.c
+++ src/blat/blat.c
@@ -52,117 +52,117 @@
 char *qMask = NULL;
 double minRepDivergence = 15;
 double minIdentity = 90;
 char *outputFormat = "psl";
 
 
 void usage()
 /* Explain usage and exit. */
 {
 printf(
   "blat - Standalone BLAT v. %s fast sequence search command line tool\n"
   "usage:\n"
   "   blat database query [-ooc=11.ooc] output.psl\n"
   "where:\n"
   "   database and query are each either a .fa, .nib or .2bit file,\n"
-  "   or a list these files one file name per line.\n"
+  "      or a list of these files with one file name per line.\n"
   "   -ooc=11.ooc tells the program to load over-occurring 11-mers from\n"
-  "               and external file.  This will increase the speed\n"
-  "               by a factor of 40 in many cases, but is not required\n"
-  "   output.psl is where to put the output.\n"
-  "   Subranges of nib and .2bit files may specified using the syntax:\n"
+  "      an external file.  This will increase the speed\n"
+  "      by a factor of 40 in many cases, but is not required.\n"
+  "   output.psl is the name of the output file.\n"
+  "   Subranges of .nib and .2bit files may be specified using the syntax:\n"
   "      /path/file.nib:seqid:start-end\n"
   "   or\n"
   "      /path/file.2bit:seqid:start-end\n"
   "   or\n"
   "      /path/file.nib:start-end\n"
   "   With the second form, a sequence id of file:start-end will be used.\n"
   "options:\n"
   "   -t=type        Database type.  Type is one of:\n"
   "                    dna - DNA sequence\n"
   "                    prot - protein sequence\n"
   "                    dnax - DNA sequence translated in six frames to protein\n"
-  "               The default is dna\n"
+  "                  The default is dna.\n"
   "   -q=type        Query type.  Type is one of:\n"
   "                    dna - DNA sequence\n"
   "                    rna - RNA sequence\n"
   "                    prot - protein sequence\n"
   "                    dnax - DNA sequence translated in six frames to protein\n"
   "                    rnax - DNA sequence translated in three frames to protein\n"
-  "               The default is dna\n"
-  "   -prot       Synonymous with -t=prot -q=prot\n"
+  "                  The default is dna.\n"
+  "   -prot          Synonymous with -t=prot -q=prot.\n"
   "   -ooc=N.ooc     Use overused tile file N.ooc.  N should correspond to \n"
-  "               the tileSize\n"
-  "   -tileSize=N sets the size of match that triggers an alignment.  \n"
-  "               Usually between 8 and 12\n"
+  "                  the tileSize.\n"
+  "   -tileSize=N    Sets the size of match that triggers an alignment.  \n"
+  "                  Usually between 8 and 12.\n"
   "                  Default is 11 for DNA and 5 for protein.\n"
-  "   -stepSize=N spacing between tiles. Default is tileSize.\n"
-  "   -oneOff=N   If set to 1 this allows one mismatch in tile and still\n"
+  "   -stepSize=N    Spacing between tiles. Default is tileSize.\n"
+  "   -oneOff=N      If set to 1, this allows one mismatch in tile and still\n"
   "                  triggers an alignments.  Default is 0.\n"
-  "   -minMatch=N sets the number of tile matches.  Usually set from 2 to 4\n"
+  "   -minMatch=N    Sets the number of tile matches.  Usually set from 2 to 4.\n"
   "                  Default is 2 for nucleotide, 1 for protein.\n"
-  "   -minScore=N sets minimum score.  This is the matches minus the \n"
-  "               mismatches minus some sort of gap penalty.  Default is 30\n"
+  "   -minScore=N    Sets minimum score.  This is the matches minus the \n"
+  "                  mismatches minus some sort of gap penalty.  Default is 30.\n"
   "   -minIdentity=N Sets minimum sequence identity (in percent).  Default is\n"
   "                  90 for nucleotide searches, 25 for protein or translated\n"
   "                  protein searches.\n"
-  "   -maxGap=N   sets the size of maximum gap between tiles in a clump.  Usually\n"
+  "   -maxGap=N      Sets the size of maximum gap between tiles in a clump.  Usually\n"
   "                  set from 0 to 3.  Default is 2. Only relevent for minMatch > 1.\n"
-  "   -noHead     suppress .psl header (so it's just a tab-separated file)\n"
+  "   -noHead        Suppresses .psl header (so it's just a tab-separated file).\n"
   "   -makeOoc=N.ooc Make overused tile file. Target needs to be complete genome.\n"
-  "   -repMatch=N sets the number of repetitions of a tile allowed before\n"
+  "   -repMatch=N    Sets the number of repetitions of a tile allowed before\n"
   "                  it is marked as overused.  Typically this is 256 for tileSize\n"
   "                  12, 1024 for tile size 11, 4096 for tile size 10.\n"
-  "               Default is 1024.  Typically only comes into play with makeOoc.\n"
-  "               Also affected by stepSize. When stepSize is halved repMatch is\n"
+  "                  Default is 1024.  Typically comes into play only with makeOoc.\n"
+  "                  Also affected by stepSize: when stepSize is halved, repMatch is\n"
   "                  doubled to compensate.\n"
   "   -mask=type     Mask out repeats.  Alignments won't be started in masked region\n"
   "                  but may extend through it in nucleotide searches.  Masked areas\n"
-  "               are ignored entirely in protein or translated searches. Types are\n"
-  "                 lower - mask out lower cased sequence\n"
-  "                 upper - mask out upper cased sequence\n"
+  "                  are ignored entirely in protein or translated searches. Types are:\n"
+  "                    lower - mask out lower-cased sequence\n"
+  "                    upper - mask out upper-cased sequence\n"
   "                    out   - mask according to database.out RepeatMasker .out file\n"
   "                    file.out - mask database according to RepeatMasker file.out\n"
   "   -qMask=type    Mask out repeats in query sequence.  Similar to -mask above but\n"
   "                  for query rather than target sequence.\n"
   "   -repeats=type  Type is same as mask types above.  Repeat bases will not be\n"
   "                  masked in any way, but matches in repeat areas will be reported\n"
   "                  separately from matches in other areas in the psl output.\n"
-  "   -minRepDivergence=NN - minimum percent divergence of repeats to allow \n"
+  "   -minRepDivergence=NN   Minimum percent divergence of repeats to allow \n"
   "                  them to be unmasked.  Default is 15.  Only relevant for \n"
   "                  masking using RepeatMasker .out files.\n"
-  "   -dots=N     Output dot every N sequences to show program's progress\n"
-  "   -trimT      Trim leading poly-T\n"
-  "   -noTrimA    Don't trim trailing poly-A\n"
+  "   -dots=N        Output dot every N sequences to show program's progress.\n"
+  "   -trimT         Trim leading poly-T.\n"
+  "   -noTrimA       Don't trim trailing poly-A.\n"
   "   -trimHardA     Remove poly-A tail from qSize as well as alignments in \n"
-  "               psl output\n"
+  "                  psl output.\n"
   "   -fastMap       Run for fast DNA/DNA remapping - not allowing introns, \n"
   "                  requiring high %%ID. Query sizes must not exceed %d.\n"
   "   -out=type      Controls output file format.  Type is one of:\n"
-  "                   psl - Default.  Tab separated format, no sequence\n"
-  "                   pslx - Tab separated format with sequence\n"
+  "                    psl - Default.  Tab-separated format, no sequence\n"
+  "                    pslx - Tab-separated format with sequence\n"
   "                    axt - blastz-associated axt format\n"
   "                    maf - multiz-associated maf format\n"
   "                    sim4 - similar to sim4 format\n"
   "                    wublast - similar to wublast format\n"
   "                    blast - similar to NCBI blast format\n"
   "                    blast8- NCBI blast tabular format\n"
   "                    blast9 - NCBI blast tabular format with comments\n"
-  "   -fine       For high quality mRNAs look harder for small initial and\n"
-  "               terminal exons.  Not recommended for ESTs\n"
-  "   -maxIntron=N  Sets maximum intron size. Default is %d\n"
-  "   -extendThroughN - Allows extension of alignment through large blocks of N's\n"
+  "   -fine          For high-quality mRNAs, look harder for small initial and\n"
+  "                  terminal exons.  Not recommended for ESTs.\n"
+  "   -maxIntron=N  Sets maximum intron size. Default is %d.\n"
+  "   -extendThroughN Allows extension of alignment through large blocks of Ns.\n"
   , gfVersion, MAXSINGLEPIECESIZE, ffIntronMaxDefault
   );
 exit(-1);
 }
 
 
 struct optionSpec options[] = {
    {"t", OPTION_STRING},
    {"q", OPTION_STRING},
    {"prot", OPTION_BOOLEAN},
    {"ooc", OPTION_STRING},
    {"tileSize", OPTION_INT},
    {"stepSize", OPTION_INT},
    {"oneOff", OPTION_INT},
    {"minMatch", OPTION_INT},