be4311c07e14feb728abc6425ee606ffaa611a58
markd
  Fri Jan 22 06:46:58 2021 -0800
merge with master

diff --git src/utils/faSize/faSize.c src/utils/faSize/faSize.c
index e4e4dbf..16edaef 100644
--- src/utils/faSize/faSize.c
+++ src/utils/faSize/faSize.c
@@ -1,40 +1,43 @@
 /* faSize - print total size and total N count of FA file. */
 #include "common.h"
 #include "fa.h"
 #include "dnautil.h"
 #include "options.h"
 
 
 /* command line options */
 static struct optionSpec optionSpecs[] =
 {
     {"detailed", OPTION_BOOLEAN},
     {"tab", OPTION_BOOLEAN},
+    {"veryDetailed", OPTION_BOOLEAN},
     {NULL, 0}
 };
 
 void usage()
 /* Print usage info and exit. */
 {
 errAbort("faSize - print total base count in fa files.\n"
 	 "usage:\n"
 	 "   faSize file(s).fa\n"
 	 "Command flags\n"
 	 "   -detailed        outputs name and size of each record\n"
          "                    has the side effect of printing nothing else\n"
-         "   -tab             output statistics in a tab separated format\n");
+         "   -tab             output statistics in a tab separated format\n"
+         "   -veryDetailed    outputs name, size, #Ns, #real, #upper, #lower of each record\n"
+         );
 }
 
 struct faInfo
 /* Summary info on one fa. */
    {
    struct faInfo *next;	/* Next in list. */
    char *name;		/* First word after >.  The name of seq. */
    int size;            /* Size, including N's. */
    int nCount;          /* Number of N's. */
    int lCount;          /* Number of Upper-case chars. */
    int uCount;          /* Number of Lower-case chars. */
    };
 
 int cmpFaInfo(const void *va, const void *vb)
 /* Compare two faInfo. */
@@ -158,30 +161,31 @@
 
 void faSize(char *faFiles[], int faCount)
 /* faSize - print total size and total N count of FA files. */
 {
 char *fileName;
 int i;
 struct dnaSeq seq;
 int fileCount = 0;
 int seqCount = 0;
 unsigned long long baseCount = 0;
 unsigned long long nCount = 0;
 unsigned long long uCount = 0;
 unsigned long long lCount = 0;
 struct lineFile *lf;
 struct faInfo *fiList = NULL, *fi;
+boolean veryDetailed = optionExists("veryDetailed");
 boolean detailed = optionExists("detailed");
 boolean tabFmt = optionExists("tab");
 
 ZeroVar(&seq);
 
 dnaUtilOpen();
 for (i = 0; i<faCount; ++i)
     {
     fileName = faFiles[i];
     lf = lineFileOpen(fileName, FALSE);
     ++fileCount;
     while (faSpeedReadNextPC(lf, &seq.dna, &seq.size, &seq.name))
 	{
 	int j;
 	int ns = 0;
@@ -201,39 +205,43 @@
 		    ++us;
 		if (islower(d)) 
 		    ++ls;
 		}
 	    }
 	baseCount += seq.size;
 	nCount += ns;
 	uCount += us;
 	lCount += ls;
 	AllocVar(fi);
 	fi->name = cloneString(seq.name);
 	fi->size = seq.size;
 	fi->nCount = ns;
 	fi->uCount = us;
 	fi->lCount = ls;
-	if (detailed)
+        if (veryDetailed)
+            {
+	    printf("%s\t%d\t%d\t%d\t%d\t%d\n", seq.name, seq.size, ns, seq.size-ns, us, ls);
+            }
+	else if (detailed)
 	    {
 	    printf("%s\t%d\n", seq.name, seq.size);
 	    }
 	slAddHead(&fiList, fi);
 	}
     lineFileClose(&lf);
     }
-if (!detailed)
+if (!(detailed || veryDetailed))
     {
     double perCentMasked = 100.0;
     double perCentRealMasked = 100.0;
     if (baseCount > 0)
 	perCentMasked = 100.0*(double)lCount/(double)baseCount;
     if ((baseCount - nCount) > 0)
 	perCentRealMasked = 100.0*(double)lCount/(double)(baseCount - nCount);
 
     if (tabFmt)
         {
         printf("baseCount\t%llu\n", baseCount);
         printf("nBaseCount\t%llu\n", nCount);
         printf("realBaseCount\t%llu\n", baseCount - nCount);
         printf("upperBaseCount\t%llu\n", uCount);
         printf("lowerBaseCount\t%llu\n", lCount);