a31ae2470e4b225665e48c267e22bcca06bc6ddf angie Wed Jun 16 17:45:59 2021 -0700 faFrag: support multi-sequence alignment fasta by fetching the same coords from each sequence in file. diff --git src/utils/faFrag/faFrag.c src/utils/faFrag/faFrag.c index ea12d4f..b5edb69 100644 --- src/utils/faFrag/faFrag.c +++ src/utils/faFrag/faFrag.c @@ -1,55 +1,64 @@ /* faFrag - Extract a piece of DNA from a .fa file.. */ #include "common.h" #include "dnaseq.h" #include "fa.h" #include "options.h" void usage() /* Explain usage and exit. */ { errAbort( "faFrag - Extract a piece of DNA from a .fa file.\n" "usage:\n" " faFrag in.fa start end out.fa\n" "options:\n" " -mixed - preserve mixed-case in FASTA file\n"); } void faFrag(char *inName, int start, int end, char *outName, boolean mixed) /* faFrag - Extract a piece of DNA from a .fa file.. */ { -struct dnaSeq *seq; -char name[512]; - +FILE *outF = mustOpen(outName, "w"); if (start >= end) usage(); +struct dnaSeq *seqList, *seq; if (mixed) - seq = faReadAllMixed(inName); + seqList = faReadAllMixed(inName); else - seq = faReadAllDna(inName); -if (seq->next != NULL) - warn("More than one sequence in %s, just using first\n", inName); + seqList = faReadAllDna(inName); +int seqCount = 0; +for (seq = seqList; seq != NULL; seq = seq->next) + { + int clippedEnd = end; if (end > seq->size) { + clippedEnd = seq->size; + if (start >= clippedEnd) + warn("Sorry, %s is too short (%d bases), skipping", seq->name, seq->size); + else warn("%s only has %d bases, truncating", seq->name, seq->size); - end = seq->size; - if (start >= end) - errAbort("Sorry, no sequence left after truncating"); } -sprintf(name, "%s:%d-%d", seq->name, start, end); -faWrite(outName, name, seq->dna + start, end-start); -printf("Wrote %d bases to %s\n", end-start, outName); + if (start < clippedEnd) + { + char name[512]; + safef(name, sizeof(name), "%s:%d-%d", seq->name, start, clippedEnd); + faWriteNext(outF, name, seq->dna + start, clippedEnd-start); + seqCount++; + } + } +carefulClose(&outF); +verbose(2, "Wrote %d bases from %d sequences to %s\n", end-start, seqCount, outName); } int main(int argc, char *argv[]) /* Process command line. */ { optionHash(&argc, argv); if (argc != 5) usage(); if (!isdigit(argv[2][0]) || !isdigit(argv[3][0])) usage(); faFrag(argv[1], atoi(argv[2]), atoi(argv[3]), argv[4], optionExists("mixed")); return 0; }