c6778c294dc2a5d5e1cf6df928e16dc9016df71c
ceisenhart
  Thu May 29 13:00:14 2014 -0700
Updated to with new library file'fq', fixed indenting and comment missplacement

diff --git src/utils/fastqMottTrim/fastqMottTrim.c src/utils/fastqMottTrim/fastqMottTrim.c
index 61b8c3d..93cc23a 100644
--- src/utils/fastqMottTrim/fastqMottTrim.c
+++ src/utils/fastqMottTrim/fastqMottTrim.c
@@ -1,219 +1,158 @@
-/* fastqMottTrim - mott. */
-/* Applies Richard Mott's trimming algorithm to the */
-/* three prime end of each sequence. */
-/* Cuttoff is the lowest desired quality score in phred scores, */
-/* Set by default to a 50%  chance of mismatch. */
-/* Base corresponds to any additions to the ASCII quality scheme. */
-/* Minlength specifies the minimum sequence length for the output. */
-/* For paired and unpaired data. */
+/* fastqMottTrim - mott. 
+* Applies Richard Mott's trimming algorithm to the 
+* three prime end of each sequence. 
+* Cuttoff is the lowest desired quality score in phred scores, 
+* Set by default to a 50%  chance of mismatch. 
+* Base corresponds to any additions to the ASCII quality scheme. 
+* Minlength specifies the minimum sequence length for the output. 
+* For paired and unpaired data. */
 
 /* Copyright (C) 2014 The Regents of the University of California 
  * See README in this or parent directory for licensing information. */
 #include "common.h"
 #include "linefile.h"
 #include "hash.h"
 #include "options.h"
 #include "dystring.h"
+#include "fq.h"
 
 int clMinLength = 30;
 boolean clIsIllumina = FALSE;
 int clCutoff = 3;
 
 void usage()
 /* Explain usage and exit. */
 {
 errAbort(
   "fastqMottTrim - applies Mott's trimming algorithm to a fastq file\n"
   " trims the 3 prime end based on cumulative quality \n"
   "usage:\n"
   "   fastqMottTrim input.fq output.fq\n"
   "for paired end use \n"
   "   fastqMottTrim pair1.fq pair2.fq output1.fq output2.fq \n"
   "options:\n"
   " -minLength=N the minimum length allowed for a trimmed sequence default 20  \n"
   " -isIllumina TRUE for illumina, FALSE for Sanger \n"
   " -cutoff=N  the lowest desired phred score, default 30 \n"
   );
 }
 
 /* Command line validation table. */
 static struct optionSpec options[] = {
    {"minLength",OPTION_INT},
    {"isIllumina",OPTION_BOOLEAN},
    {"cutoff",OPTION_INT},
    {NULL,0},
 };
 
-struct fastqSeq
-/* holds a single fastq sequence */
-    {
-    struct fastqSeq *next; /* Next in list. */
-    int size;       /* Size of the sequence. */
-    char *header;   /* Sequence header, begins with '@' */
-    char *del;      /* Fastq deliminator '+' */
-    char *dna;      /* DNA sequence */
-    unsigned char *quality;  /* DNA quality, in ASCII format */
-    };
-
-char *nextLineIntoString(struct lineFile *lf)
-/* Return next line cloned into persistent memory.  Abort if can't get next line. */
-{
-char *line;
-lineFileNeedNext(lf, &line, NULL);
-return cloneString(line);
-}
-
-struct fastqSeq  *fastqReadNext(struct lineFile *lf)
-/* Reads the next fastq sequence and returns a fastq struct.
- * Returns NULL at end of file. */
-{
-char *line;
-if (!lineFileNext(lf, &line, NULL))
-    return NULL;   /* Pass back end of file. */
-
-struct fastqSeq *seq;
-AllocVar(seq);
-seq->header = cloneString(line);
-seq->dna = nextLineIntoString(lf);
-seq->del = nextLineIntoString(lf);
-seq->quality = (unsigned char *)nextLineIntoString(lf);
-seq->size = strlen(seq->dna);
-if (seq->size != strlen((char *)seq->quality))
-    errAbort("dna and quality sizes don't match at record ending line %d of %s",
-            lf->lineIx, lf->fileName);
-return(seq);
-}
 
-
-void fastqWriteNext(struct fastqSeq *input, FILE *f)
-/*  Writes a single fastq structure to the file provided. */
-{
-    fprintf(f,"%s\n",input->header);
-    fprintf(f,"%s\n",input->dna);
-    fprintf(f,"%s\n",input->del);
-    fprintf(f,"%s\n",input->quality);
-}
-
-void freeFastqSeq(struct fastqSeq **pInput)
-/* Frees the memory allocated to a fastq structure. */
-{
-struct fastqSeq *input = *pInput;
-if (input != NULL)
-    {
-    freeMem(input->header);
-    freeMem(input->dna);
-    freeMem(input->del);
-    freeMem(input->quality);
-    freez(pInput);
-    }
-}
-
-boolean  mottTrim(struct fastqSeq *input, int minLength, boolean isIllumina, int cutoff)
-/* Applies mott's trimming algorithm to the fastq input. */
-/* Trims from the 3 prime end based on */
-/* The sum of (quality score - cutoff value ). */
-/* Returns true if the trimmed sequence is larger than the minimum sequence length. */
+boolean  mottTrim(struct fq *input, int minLength, boolean isIllumina, int cutoff)
+/* Applies mott's trimming algorithm to the fastq input. 
+* Trims from the 3 prime end based on 
+* The sum of (quality score - cutoff value ). 
+* Returns true if the trimmed sequence is larger than the minimum sequence length. */
 {
 int base = 33;
 int index = -1;
 long minValue = 100000000;
 long scoreValue = 0;
 if(isIllumina)
     base = 64;
 int len = strlen(input->dna);
 int i = len - 1;
 for(; i >= 0; --i)
     {
-/* Convert the quality scores to their ascii values. */
-/* Calculate the sum of the (quality score - cutoff value)'s. */
     int qualScore = input->quality[i] - base;
     scoreValue += (qualScore - cutoff);
+    /* Convert the quality scores to their ascii values. 
+    * Calculate the sum of the (quality score - cutoff value)'s. */
     if(scoreValue < minValue)
         {
         minValue = scoreValue;
         index = i;
         }
         /* Modify the fastq fields to be the trimmed length. */
     }
 if(minValue <= cutoff)
     {
     input->dna[index] = '\0';
     input->quality[index] = '\0';
     }  
 if(strlen(input->dna) >= minLength)
     {
     return(TRUE);
     }
 return(FALSE);
 }
 
-void parseFastqPairs(char *input, char *output,
+void trimPairedFastqFiles(char *input, char *output,
             char *input2, char *output2, int minLength, boolean isIllumina, int cutoff )
-/* Goes through fastq sequences in a fastq file; */
-/* Parses, stores, mottTrims,  prints, then frees each fastq sequence. */
-/* For paired data. */
+/* Goes through fastq sequences in a fastq file; 
+* Parses, stores, mottTrims,  prints, then frees each fastq sequence. 
+* For paired data. */
 {
 FILE *f = mustOpen(output, "w");
 FILE *f2 = mustOpen(output2, "w");
 struct lineFile *lf = lineFileOpen(input, TRUE);
 struct lineFile *lf2 = lineFileOpen(input2, TRUE);
-struct fastqSeq *fq;
-struct fastqSeq *fq2;
-while ((fq = fastqReadNext(lf)) != NULL && (fq2 = fastqReadNext(lf2)) != NULL)
+struct fq *fq;
+struct fq *fq2;
+while ((fq = fqReadNext(lf)) != NULL && (fq2 = fqReadNext(lf2)) != NULL)
     {
     if( mottTrim(fq, minLength, isIllumina, cutoff) && mottTrim(fq2, minLength, isIllumina, cutoff))
+        /* For paired data both reads must pass the minimum cutoff length.*/
 	{
-	fastqWriteNext(fq, f);
-        fastqWriteNext(fq2, f2);
+	fqWriteNext(fq, f);
+        fqWriteNext(fq2, f2);
 	}
-    freeFastqSeq(&fq);
-    freeFastqSeq(&fq2);
+    fqFree(&fq);
+    fqFree(&fq2);
     }
 carefulClose(&f);
 carefulClose(&f2);
 }
 
 
 
-void parseFastq(char *input, char *output, int minLength, boolean isIllumina, int cutoff )
+void trimSingleFastqFile(char *input, char *output, int minLength, boolean isIllumina, int cutoff )
 /* Goes through fastq sequences in a fastq file; */
 /* Parses, stores, mottTrims,  prints, then frees each fastq sequence. */
 {
 FILE *f = mustOpen(output, "w");
 struct lineFile *lf = lineFileOpen(input, TRUE);
-struct fastqSeq *fq;
-while ((fq = fastqReadNext(lf)) != NULL)
+struct fq *fq;
+while ((fq = fqReadNext(lf)) != NULL)
     {
     if( mottTrim(fq, minLength, isIllumina, cutoff))
         {
-	fastqWriteNext(fq, f);
+	fqWriteNext(fq, f);
         }
-    freeFastqSeq(&fq);
+    fqFree(&fq);
     }
 carefulClose(&f);
 }
 
 int main(int argc, char *argv[])
 /* Process command line. */
 {
 optionInit(&argc, argv, options);
 clMinLength = optionInt("minLength", clMinLength);
 clIsIllumina = optionExists("isIllumina");
 clCutoff = optionInt("cutoff", clCutoff);
 
 if(argc != 3 && argc != 5)
     {
     usage();
     }
 if(argc==3)
     {
-    parseFastq(argv[1], argv[2], clMinLength, clIsIllumina, clCutoff);
+    trimSingleFastqFile(argv[1], argv[2], clMinLength, clIsIllumina, clCutoff);
     }
     
 if(argc==5)
     {
-    parseFastqPairs(argv[1], argv[3], argv[2], argv[4], clMinLength, clIsIllumina, clCutoff);
+    trimPairedFastqFiles(argv[1], argv[3], argv[2], argv[4], clMinLength, clIsIllumina, clCutoff);
     }
     
 return 0;
 }