src/utils/fastqStatsAndSubsample/fastqStatsAndSubsample.c 2217111185161d4c10fb3f3e87e817bdebdd53a7

2217111185161d4c10fb3f3e87e817bdebdd53a7
kent
  Thu Oct 24 17:18:15 2013 -0700
Making this tolerate new lines in fastq files between stanzas.
diff --git src/utils/fastqStatsAndSubsample/fastqStatsAndSubsample.c src/utils/fastqStatsAndSubsample/fastqStatsAndSubsample.c
index 049919e..7c02a7d 100644
--- src/utils/fastqStatsAndSubsample/fastqStatsAndSubsample.c
+++ src/utils/fastqStatsAndSubsample/fastqStatsAndSubsample.c
@@ -35,31 +35,31 @@
    {"sampleSize", OPTION_INT},
    {"seed", OPTION_INT},
    {"smallOk", OPTION_BOOLEAN},
    {NULL, 0},
 };
 
 /* Estimate base count from file size based on this. */
 #define ZIPPED_BYTES_PER_BASE 0.80
 #define UNZIPPED_BYTES_PER_BASE 2.5
 
 static boolean nextLineMustMatchChar(struct lineFile *lf, char match, boolean noEof)
 /* Get next line and make sure, other than whitespace, it matches 'match'.
  * Return FALSE on EOF, unless noEof is set, in which case abort */
 {
 char *line;
-if (!lineFileNext(lf, &line, NULL))
+if (!lineFileNextReal(lf, &line))
     {
     if (noEof)
         errAbort("Expecting %c got end of file in %s", match, lf->fileName);
     else
         return FALSE;
     }
 if (line[0] != match)
     errAbort("Expecting %c got %s line %d of %s", match, line, lf->lineIx, lf->fileName);
 return TRUE;
 }
 
 static int averageReadSize(char *fileName, int maxReads)
 /* Read up to maxReads from fastq file and return average # of reads. */
 {
 struct lineFile *lf = lineFileOpen(fileName, FALSE);
@@ -165,31 +165,31 @@
 
 
 boolean oneFastqRecord(struct lineFile *lf, FILE *f, boolean copy, boolean firstTime)
 /* Read next fastq record from LF, and optionally copy it to f.  Return FALSE at end of file 
  * Do a _little_ error checking on record while we're at it.  The format has already been
  * validated on the client side fairly thoroughly. */
 {
 char *line;
 int lineSize;
 
 /* Treat NULL file same as non-copy, so only have one condition to check on . */
 if (f == NULL)
     copy = FALSE;
 
 /* Deal with initial line starting with '@' */
-if (!lineFileNext(lf, &line, &lineSize))
+if (!lineFileNextRealWithSize(lf, &line, &lineSize))
     return FALSE;
 if (line[0] != '@')
     errAbort("Expecting line starting with '@' got %s line %d of %s", 
 	line, lf->lineIx, lf->fileName);
 if (copy)
     mustWrite(f, line, lineSize);
 
 /* Deal with line containing sequence. */
 if (!lineFileNext(lf, &line, &lineSize))
     errAbort("%s truncated in middle of record", lf->fileName);
 
 /* Get size and add it to stats */
 int seqSize = lineSize-1;
 if (seqSize > MAX_READ_SIZE)
     errAbort("Sequence size %d too long line %d of %s.  Max is %d", seqSize, 
@@ -280,31 +280,31 @@
 
 
 return TRUE;
 }
 
 boolean maybeCopyFastqRecord(struct lineFile *lf, FILE *f, boolean copy, int *retSeqSize)
 /* Read next fastq record from LF, and optionally copy it to f.  Return FALSE at end of file 
  * Do a _little_ error checking on record while we're at it.  The format has already been
  * validated on the client side fairly thoroughly. Similar to oneFastq record but with
  * fewer side effects. */
 {
 char *line;
 int lineSize;
 
 /* Deal with initial line starting with '@' */
-if (!lineFileNext(lf, &line, &lineSize))
+if (!lineFileNextRealWithSize(lf, &line, &lineSize))
     return FALSE;
 if (line[0] != '@')
     errAbort("Expecting line starting with '@' got %s line %d of %s", 
 	line, lf->lineIx, lf->fileName);
 if (copy)
     mustWrite(f, line, lineSize);
 
 
 /* Deal with line containing sequence. */
 if (!lineFileNext(lf, &line, &lineSize))
     errAbort("%s truncated in middle of record", lf->fileName);
 if (copy)
     mustWrite(f, line, lineSize);
 int seqSize = lineSize-1;