4898794edd81be5285ea6e544acbedeaeb31bf78 max Tue Nov 23 08:10:57 2021 -0800 Fixing pointers to README file for license in all source code files. refs #27614 diff --git src/hg/oneShot/trimFosmids/trimFosmids.c src/hg/oneShot/trimFosmids/trimFosmids.c index d72f4dc..b2c9228 100644 --- src/hg/oneShot/trimFosmids/trimFosmids.c +++ src/hg/oneShot/trimFosmids/trimFosmids.c @@ -1,191 +1,191 @@ /* trimFosmids - Process Fosmid end reads to remove low quality bases and put in one big file. */ /* Copyright (C) 2011 The Regents of the University of California - * See README in this or parent directory for licensing information. */ + * See kent/LICENSE or http://genome.ucsc.edu/license/ for licensing information. */ #include "common.h" #include "portable.h" #include "hash.h" #include "fa.h" #include "qaSeq.h" void usage() /* Explain usage and exit. */ { errAbort( "trimFosmids - Process Fosmid Endreads to remove low quality bases and put in one big file\n" "usage:\n" " trimFosmids sangDir outFile.fa\n"); } int tilNextGood(UBYTE *q, int size, int minQual) /* Return number of bases until next one that meets or exceeds minQual. */ { int i; for (i=0; i<size; ++i) { if (q[i] >= minQual) break; } return i; } int tilNextBad(UBYTE *q, int size, int minQual) /* Return number of bases until next one that is less than minQual. */ { int i; for (i=0; i<size; ++i) { if (q[i] < minQual) break; } return i; } int revTilNextGood(UBYTE *q, int size, int minQual) /* Return number of bases in reverse direction until next one that meets or exceeds minQual. */ { int i; q += size-1; for (i=0; i<size; ++i) { if (q[-i] >= minQual) break; } return i; } int revTilNextBad(UBYTE *q, int size, int minQual) /* Return number of bases in reverse direction until next one that is less than minQual. */ { int i; q += size-1; for (i=0; i<size; ++i) { if (q[-i] < minQual) break; } return i; } boolean trimQa(struct qaSeq *qa, int minQual, int minQualRun, int *retStart, int *retSize) /* Find part of sequence that survives trimming according to minQual * and qual run specs. Return FALSE if nothing left after trimming. * This trims sequence on both ends until it comes to a run of sequences * of length minQualRun or more which meets or exceeds minQual. */ { int qaSize = qa->size; UBYTE *q = qa->qa; int start, end; int runSize; for (start = 0; start < qaSize; ) { start += tilNextGood(q+start, qaSize-start, minQual); runSize = tilNextBad(q+start, qaSize-start, minQual); if (runSize >= minQualRun) break; start += runSize; } *retStart = start; if (start >= qaSize) { *retSize = 0; return FALSE; } for (end = qaSize; end > start;) { runSize = revTilNextGood(q+start, end - start, minQual); end -= runSize; runSize = revTilNextBad(q+start, end - start, minQual); if (runSize >= minQualRun) break; end -= runSize; } *retSize = end - start; return (end > start); } int filterByQual(char *faFileName, FILE *f, int minQual, int minQualRun, struct hash *uniqHash) /* Write out parts of sequence that meet quality standards to fa file in out. * Returns untrimmed size. */ { char qaFileName[512], dir[256], name[128], ext[64]; struct qaSeq *qa; int start, size; int initialSize; splitPath(faFileName, dir, name, ext); sprintf(qaFileName, "%s%s.qual", dir, name); qa = qaMustReadBoth(qaFileName, faFileName); if (hashLookup(uniqHash, qa->name)) warn("%s duplicated, ignoring all but first occurrence", qa->name); else { hashAdd(uniqHash, qa->name, NULL); if (trimQa(qa, minQual, minQualRun, &start, &size)) { faWriteNext(f, qa->name, qa->dna + start, size); } } initialSize = qa->size; qaSeqFree(&qa); return initialSize; } void trimFosmids(char *sangDir, char *outFile) /* trimFosmids - Process Fosmid end reads to remove low quality bases and put in one big file. */ { struct hash *hash = newHash(20); struct fileInfo *dirList, *dirEl; struct fileInfo *subList, *subEl; struct fileInfo *faList, *faEl; FILE *f = mustOpen(outFile, "w"); unsigned long totalSize = 0; dirList = listDirX(sangDir, "*", TRUE); for (dirEl = dirList; dirEl != NULL; dirEl = dirEl->next) { if (dirEl->isDir) { printf("%s", dirEl->name); fflush(stdout); subList = listDirX(dirEl->name, "*", TRUE); for (subEl = subList; subEl != NULL; subEl = subEl->next) { if (subEl->isDir) { printf("."); fflush(stdout); faList = listDirX(subEl->name, "*.fasta", TRUE); for (faEl = faList; faEl != NULL; faEl = faEl->next) { uglyf("%s\n", faEl->name); // totalSize += filterByQual(faEl->name, f, 19, 15, hash); } slFreeList(&faList); } } printf("\n"); slFreeList(&subList); } } printf("Total size %lu bytes\n", totalSize); fclose(f); } int main(int argc, char *argv[]) /* Process command line. */ { if (argc != 3) usage(); dnaUtilOpen(); trimFosmids(argv[1], argv[2]); return 0; }