a31ae2470e4b225665e48c267e22bcca06bc6ddf
angie
  Wed Jun 16 17:45:59 2021 -0700
faFrag: support multi-sequence alignment fasta by fetching the same coords from each sequence in file.

diff --git src/utils/faFrag/faFrag.c src/utils/faFrag/faFrag.c
index ea12d4f..b5edb69 100644
--- src/utils/faFrag/faFrag.c
+++ src/utils/faFrag/faFrag.c
@@ -7,49 +7,58 @@
 
 void usage()
 /* Explain usage and exit. */
 {
 errAbort(
   "faFrag - Extract a piece of DNA from a .fa file.\n"
   "usage:\n"
   "   faFrag in.fa start end out.fa\n"
   "options:\n"
   "   -mixed - preserve mixed-case in FASTA file\n");
 }
 
 void faFrag(char *inName, int start, int end, char *outName, boolean mixed)
 /* faFrag - Extract a piece of DNA from a .fa file.. */
 {
-struct dnaSeq *seq;
-char name[512];
-
+FILE *outF = mustOpen(outName, "w");
 if (start >= end)
     usage();
+struct dnaSeq *seqList, *seq;
 if (mixed)
-    seq = faReadAllMixed(inName);
+    seqList = faReadAllMixed(inName);
 else
-    seq = faReadAllDna(inName);
-if (seq->next != NULL)
-    warn("More than one sequence in %s, just using first\n", inName);
+    seqList = faReadAllDna(inName);
+int seqCount = 0;
+for (seq = seqList;  seq != NULL;  seq = seq->next)
+    {
+    int clippedEnd = end;
     if (end > seq->size)
         {
+        clippedEnd = seq->size;
+        if (start >= clippedEnd)
+            warn("Sorry, %s is too short (%d bases), skipping", seq->name, seq->size);
+        else
             warn("%s only has %d bases, truncating", seq->name, seq->size);
-    end = seq->size;
-    if (start >= end)
-        errAbort("Sorry, no sequence left after truncating");
         }
-sprintf(name, "%s:%d-%d", seq->name, start, end);
-faWrite(outName, name, seq->dna + start, end-start);
-printf("Wrote %d bases to %s\n", end-start, outName);
+    if (start < clippedEnd)
+        {
+        char name[512];
+        safef(name, sizeof(name), "%s:%d-%d", seq->name, start, clippedEnd);
+        faWriteNext(outF, name, seq->dna + start, clippedEnd-start);
+        seqCount++;
+        }
+    }
+carefulClose(&outF);
+verbose(2, "Wrote %d bases from %d sequences to %s\n", end-start, seqCount, outName);
 }
 
 int main(int argc, char *argv[])
 /* Process command line. */
 {
 optionHash(&argc, argv);
 if (argc != 5)
     usage();
 if (!isdigit(argv[2][0]) || !isdigit(argv[3][0]))
     usage();
 faFrag(argv[1], atoi(argv[2]), atoi(argv[3]), argv[4], optionExists("mixed"));
 return 0;
 }