989c9aeaab7cb44cf18de57003b0044e229b6f28 donnak Mon Oct 14 11:39:29 2013 -0700 Fixed some typos and formatting probs in conjunction with updating blat spec. diff --git src/blat/blat.c src/blat/blat.c index 0f8b171..9b49948 100644 --- src/blat/blat.c +++ src/blat/blat.c @@ -52,117 +52,117 @@ char *qMask = NULL; double minRepDivergence = 15; double minIdentity = 90; char *outputFormat = "psl"; void usage() /* Explain usage and exit. */ { printf( "blat - Standalone BLAT v. %s fast sequence search command line tool\n" "usage:\n" " blat database query [-ooc=11.ooc] output.psl\n" "where:\n" " database and query are each either a .fa, .nib or .2bit file,\n" - " or a list these files one file name per line.\n" + " or a list of these files with one file name per line.\n" " -ooc=11.ooc tells the program to load over-occurring 11-mers from\n" - " and external file. This will increase the speed\n" - " by a factor of 40 in many cases, but is not required\n" - " output.psl is where to put the output.\n" - " Subranges of nib and .2bit files may specified using the syntax:\n" + " an external file. This will increase the speed\n" + " by a factor of 40 in many cases, but is not required.\n" + " output.psl is the name of the output file.\n" + " Subranges of .nib and .2bit files may be specified using the syntax:\n" " /path/file.nib:seqid:start-end\n" " or\n" " /path/file.2bit:seqid:start-end\n" " or\n" " /path/file.nib:start-end\n" " With the second form, a sequence id of file:start-end will be used.\n" "options:\n" " -t=type Database type. Type is one of:\n" " dna - DNA sequence\n" " prot - protein sequence\n" " dnax - DNA sequence translated in six frames to protein\n" - " The default is dna\n" + " The default is dna.\n" " -q=type Query type. Type is one of:\n" " dna - DNA sequence\n" " rna - RNA sequence\n" " prot - protein sequence\n" " dnax - DNA sequence translated in six frames to protein\n" " rnax - DNA sequence translated in three frames to protein\n" - " The default is dna\n" - " -prot Synonymous with -t=prot -q=prot\n" + " The default is dna.\n" + " -prot Synonymous with -t=prot -q=prot.\n" " -ooc=N.ooc Use overused tile file N.ooc. N should correspond to \n" - " the tileSize\n" - " -tileSize=N sets the size of match that triggers an alignment. \n" - " Usually between 8 and 12\n" + " the tileSize.\n" + " -tileSize=N Sets the size of match that triggers an alignment. \n" + " Usually between 8 and 12.\n" " Default is 11 for DNA and 5 for protein.\n" - " -stepSize=N spacing between tiles. Default is tileSize.\n" - " -oneOff=N If set to 1 this allows one mismatch in tile and still\n" + " -stepSize=N Spacing between tiles. Default is tileSize.\n" + " -oneOff=N If set to 1, this allows one mismatch in tile and still\n" " triggers an alignments. Default is 0.\n" - " -minMatch=N sets the number of tile matches. Usually set from 2 to 4\n" + " -minMatch=N Sets the number of tile matches. Usually set from 2 to 4.\n" " Default is 2 for nucleotide, 1 for protein.\n" - " -minScore=N sets minimum score. This is the matches minus the \n" - " mismatches minus some sort of gap penalty. Default is 30\n" + " -minScore=N Sets minimum score. This is the matches minus the \n" + " mismatches minus some sort of gap penalty. Default is 30.\n" " -minIdentity=N Sets minimum sequence identity (in percent). Default is\n" " 90 for nucleotide searches, 25 for protein or translated\n" " protein searches.\n" - " -maxGap=N sets the size of maximum gap between tiles in a clump. Usually\n" + " -maxGap=N Sets the size of maximum gap between tiles in a clump. Usually\n" " set from 0 to 3. Default is 2. Only relevent for minMatch > 1.\n" - " -noHead suppress .psl header (so it's just a tab-separated file)\n" + " -noHead Suppresses .psl header (so it's just a tab-separated file).\n" " -makeOoc=N.ooc Make overused tile file. Target needs to be complete genome.\n" - " -repMatch=N sets the number of repetitions of a tile allowed before\n" + " -repMatch=N Sets the number of repetitions of a tile allowed before\n" " it is marked as overused. Typically this is 256 for tileSize\n" " 12, 1024 for tile size 11, 4096 for tile size 10.\n" - " Default is 1024. Typically only comes into play with makeOoc.\n" - " Also affected by stepSize. When stepSize is halved repMatch is\n" + " Default is 1024. Typically comes into play only with makeOoc.\n" + " Also affected by stepSize: when stepSize is halved, repMatch is\n" " doubled to compensate.\n" " -mask=type Mask out repeats. Alignments won't be started in masked region\n" " but may extend through it in nucleotide searches. Masked areas\n" - " are ignored entirely in protein or translated searches. Types are\n" - " lower - mask out lower cased sequence\n" - " upper - mask out upper cased sequence\n" + " are ignored entirely in protein or translated searches. Types are:\n" + " lower - mask out lower-cased sequence\n" + " upper - mask out upper-cased sequence\n" " out - mask according to database.out RepeatMasker .out file\n" " file.out - mask database according to RepeatMasker file.out\n" " -qMask=type Mask out repeats in query sequence. Similar to -mask above but\n" " for query rather than target sequence.\n" " -repeats=type Type is same as mask types above. Repeat bases will not be\n" " masked in any way, but matches in repeat areas will be reported\n" " separately from matches in other areas in the psl output.\n" - " -minRepDivergence=NN - minimum percent divergence of repeats to allow \n" + " -minRepDivergence=NN Minimum percent divergence of repeats to allow \n" " them to be unmasked. Default is 15. Only relevant for \n" " masking using RepeatMasker .out files.\n" - " -dots=N Output dot every N sequences to show program's progress\n" - " -trimT Trim leading poly-T\n" - " -noTrimA Don't trim trailing poly-A\n" + " -dots=N Output dot every N sequences to show program's progress.\n" + " -trimT Trim leading poly-T.\n" + " -noTrimA Don't trim trailing poly-A.\n" " -trimHardA Remove poly-A tail from qSize as well as alignments in \n" - " psl output\n" + " psl output.\n" " -fastMap Run for fast DNA/DNA remapping - not allowing introns, \n" " requiring high %%ID. Query sizes must not exceed %d.\n" " -out=type Controls output file format. Type is one of:\n" - " psl - Default. Tab separated format, no sequence\n" - " pslx - Tab separated format with sequence\n" + " psl - Default. Tab-separated format, no sequence\n" + " pslx - Tab-separated format with sequence\n" " axt - blastz-associated axt format\n" " maf - multiz-associated maf format\n" " sim4 - similar to sim4 format\n" " wublast - similar to wublast format\n" " blast - similar to NCBI blast format\n" " blast8- NCBI blast tabular format\n" " blast9 - NCBI blast tabular format with comments\n" - " -fine For high quality mRNAs look harder for small initial and\n" - " terminal exons. Not recommended for ESTs\n" - " -maxIntron=N Sets maximum intron size. Default is %d\n" - " -extendThroughN - Allows extension of alignment through large blocks of N's\n" + " -fine For high-quality mRNAs, look harder for small initial and\n" + " terminal exons. Not recommended for ESTs.\n" + " -maxIntron=N Sets maximum intron size. Default is %d.\n" + " -extendThroughN Allows extension of alignment through large blocks of Ns.\n" , gfVersion, MAXSINGLEPIECESIZE, ffIntronMaxDefault ); exit(-1); } struct optionSpec options[] = { {"t", OPTION_STRING}, {"q", OPTION_STRING}, {"prot", OPTION_BOOLEAN}, {"ooc", OPTION_STRING}, {"tileSize", OPTION_INT}, {"stepSize", OPTION_INT}, {"oneOff", OPTION_INT}, {"minMatch", OPTION_INT},