e941a68c6f3089528741d24330b8ef04999a47fe
markd
  Sun Nov 14 15:03:29 2021 -0800
added option to output TSV-style header

diff --git src/hg/pslStats/pslStats.c src/hg/pslStats/pslStats.c
index 45d0bcc..2032f52 100644
--- src/hg/pslStats/pslStats.c
+++ src/hg/pslStats/pslStats.c
@@ -3,56 +3,59 @@
 /* Copyright (C) 2011 The Regents of the University of California 
  * See README in this or parent directory for licensing information. */
 #include "common.h"
 #include "options.h"
 #include "obscure.h"
 #include "linefile.h"
 #include "hash.h"
 #include "localmem.h"
 #include "psl.h"
 #include "sqlNum.h"
 
 
 /* size for query name hashs */
 static int queryHashPowTwo = 22;
 static boolean warnOnConflicts = FALSE;
+static boolean tsvHeader = FALSE;
 
 /* command line option specifications */
 static struct optionSpec optionSpecs[] = {
     {"queryStats", OPTION_BOOLEAN},
     {"overallStats", OPTION_BOOLEAN},
     {"queries", OPTION_STRING},
     {"warnOnConflicts", OPTION_BOOLEAN},
+    {"tsv", OPTION_BOOLEAN},
     {NULL, 0}
 };
 
 static void usage()
 /* Explain usage and exit. */
 {
 errAbort(
   "pslStats - collect statistics from a psl file.\n"
   "\n"
   "usage:\n"
   "   pslStats [options] psl statsOut\n"
   "\n"
   "Options:\n"
   "  -queryStats - output per-query statistics, the default is per-alignment stats\n"
   "  -overallStats - output overall statistics.\n"
   "  -queries=querySizeFile - tab separated file with of expected qNames and sizes.\n"
   "   If specified, statistic will include queries that didn't align.\n"
   "  -warnOnConflicts - warn and ignore when a two PSLs with the same qName conflict.\n"
-  "   This can happen with bogus generated names.\n");
+  "   This can happen with bogus generated names.\n"
+  "  -tsv - write a TSV header instead of an autoSql header\n");
 }
 
 struct querySizeCnt
 /* structure used to hold query size and a count */
 {
     unsigned qSize;
     unsigned alnCnt;
 };
 
 static void querySizeConflict(char *qName, unsigned qSize, unsigned qSize2)
 /* generate a error or warning on conflicting query sizes */
 {
 if (warnOnConflicts)
     warn("conflicting query sizes for %s: %d and %d", qName, qSize, qSize2);
 else
@@ -297,58 +300,60 @@
     ss->maxQCover = max(ss->maxQCover, ss2->maxQCover);
     ss->minTCover = min(ss->minTCover, ss2->minTCover);
     ss->maxTCover = max(ss->maxTCover, ss2->maxTCover);
     ss->minRepMatch = min(ss->minRepMatch, ss2->minRepMatch);
     ss->maxRepMatch = max(ss->maxRepMatch, ss2->maxRepMatch);
     }
 ss->queryCnt += ss2->queryCnt;
 ss->totalQSize += ss2->totalQSize;
 ss->totalAlign += ss2->totalAlign;
 ss->totalMatch += ss2->totalMatch;
 ss->totalRepMatch += ss2->totalRepMatch;
 ss->alnCnt += ss2->alnCnt;
 }
 
 /* header for alignment statistics */
-static char *alnStatsHdr = "#qName\t" "qSize\t" "tName\t" "tStart\t" "tEnd\t"
+static char *alnStatsHdr = "qName\t" "qSize\t" "tName\t" "tStart\t" "tEnd\t"
 "ident\t" "qCover\t" "repMatch\t" "tCover\n";
 
 /* format for alignStats output */
 static char *alnStatsFmt = "%s\t%d\t%s\t%d\t%d\t%0.4f\t%0.4f\t%0.4f\t%0.4f\n";
 
 static void alignStatsOutputUnaligned(FILE *fh, struct hash* querySizesTbl)
 /* output stats on unaligned */
 {
 struct hashCookie cookie = hashFirst(querySizesTbl);
 struct hashEl *hel;
 while ((hel = hashNext(&cookie)) != NULL)
     {
     struct querySizeCnt *qs = hel->val;
     if (qs->alnCnt == 0)
         fprintf(fh, alnStatsFmt, hel->name, qs->qSize, "", 0, 0, 0.0, 0.0, 0.0, 0.0);
     }
 }
 
 static void pslAlignStats(char *pslFile, char *statsFile, char *querySizeFile)
 /* collect and output per-alignment stats */
 {
 struct hash* querySizesTbl = (querySizeFile != NULL)
     ? querySizeCntLoad(querySizeFile) : NULL;
 struct lineFile *pslLf = pslFileOpen(pslFile);
 FILE *fh = mustOpen(statsFile, "w");
 struct psl* psl;
 
+if (!tsvHeader)
+    fputc('#', fh);
 fputs(alnStatsHdr, fh);
 while ((psl = pslNext(pslLf)) != NULL)
     {
     fprintf(fh, alnStatsFmt, psl->qName, psl->qSize, psl->tName, psl->tStart, psl->tEnd,
             calcIdent(psl), calcQCover(psl), calcRepMatch(psl), calcTCover(psl));
     if (querySizesTbl != NULL)
         querySizeCntGet(querySizesTbl, psl->qName, psl->qSize)->alnCnt++;
     pslFree(&psl);
     }
 lineFileClose(&pslLf);
 
 if (querySizesTbl != NULL)
     alignStatsOutputUnaligned(fh, querySizesTbl);
 
 carefulClose(&fh);
@@ -460,30 +465,31 @@
 /* collect and output overall stats */
 {
 struct hash *queryStatsTbl = collectQueryStats(pslFile, querySizeFile);
 int aligned1 = 0, alignedN = 0;
 struct sumStats os = sumOverAllStats(queryStatsTbl, &aligned1, &alignedN);
 outputOverallStats(statsFile, &os, aligned1, alignedN);
 }
 
 int main(int argc, char *argv[])
 /* Process command line. */
 {
 optionInit(&argc, argv, optionSpecs);
 if (argc != 3)
     usage();
 warnOnConflicts = optionExists("warnOnConflicts");
+tsvHeader = optionExists("tsv");
 char *querySizeFile = optionVal("queries", NULL);
 if (optionExists("queryStats") && optionExists("overallStats"))
     errAbort("can't specify both -queryStats and -overallStats");
 if (optionExists("queryStats"))
     pslQueryStats(argv[1], argv[2], querySizeFile);
 else if (optionExists("overallStats"))
     pslOverallStats(argv[1], argv[2], querySizeFile);
 else
     pslAlignStats(argv[1], argv[2], querySizeFile);
 return 0;
 }
 /*
  * Local Variables:
  * c-file-style: "jkent-c"
  * End: