c864e74a423acbe711e6be084573aa11b687c1ce kent Tue Aug 6 17:47:29 2013 -0700 Making it work ok with /dev/null as fastq output. Fixing an integer overflow bug. diff --git src/utils/fastqStatsAndSubsample/fastqStatsAndSubsample.c src/utils/fastqStatsAndSubsample/fastqStatsAndSubsample.c index 4b23fdd..049919e 100644 --- src/utils/fastqStatsAndSubsample/fastqStatsAndSubsample.c +++ src/utils/fastqStatsAndSubsample/fastqStatsAndSubsample.c @@ -131,59 +131,63 @@ for (i=0; ilineIx, lf->fileName); if (copy) mustWrite(f, line, lineSize); /* Deal with line containing sequence. */ if (!lineFileNext(lf, &line, &lineSize)) errAbort("%s truncated in middle of record", lf->fileName); /* Get size and add it to stats */ int seqSize = lineSize-1; @@ -344,71 +348,83 @@ boolean doIt = randomizer[i]; if (!maybeCopyFastqRecord(lf, f, doIt, &seqSize)) internalErr(); if (doIt) basesInSample += seqSize; } freez(&randomizer); lineFileClose(&lf); return basesInSample; } void fastqStatsAndSubsample(char *inFastq, char *outStats, char *outFastq) /* fastqStatsAndSubsample - Go through a fastq file doing sanity checks and collecting * statistics, and also producing a smaller fastq out of a sample of the data. */ { +/* Temporary file if any */ +FILE *smallF = NULL; + +/* Make this work without making input. */ +if (sameString("/dev/null", outFastq)) + outFastq = NULL; + + +/* Open up temp output file. This one will be for the initial scaling. We'll do + * a second round of scaling as well. */ +char smallFastqName[PATH_LEN] = ""; +if (outFastq != NULL) + { /* Split up outFastq path, so we can make a temp file in the same dir. */ char outDir[PATH_LEN]; + if (outFastq) splitPath(outFastq, outDir, NULL, NULL); -/* Open up temp output file. This one will be for the initial scaling. We'll do - * a second round of scaling as well. */ -char smallFastqName[PATH_LEN]; safef(smallFastqName, PATH_LEN, "%sfastqSubsampleXXXXXX", outDir); int smallFd = mkstemp(smallFastqName); -FILE *smallF = fdopen(smallFd, "w"); + smallF = fdopen(smallFd, "w"); + } /* Scan through input, collecting stats, validating, and creating a subset file. */ int downStep = calcInitialReduction(inFastq, sampleSize); struct lineFile *lf = lineFileOpen(inFastq, FALSE); boolean done = FALSE; int readsCopied = 0, totalReads = 0; long long basesInSample = 0; boolean firstTime = TRUE; while (!done) { int hotPosInCycle = rand()%downStep; int cycle; for (cycle=0; cycle