e941a68c6f3089528741d24330b8ef04999a47fe markd Sun Nov 14 15:03:29 2021 -0800 added option to output TSV-style header diff --git src/hg/pslStats/pslStats.c src/hg/pslStats/pslStats.c index 45d0bcc..2032f52 100644 --- src/hg/pslStats/pslStats.c +++ src/hg/pslStats/pslStats.c @@ -3,56 +3,59 @@ /* Copyright (C) 2011 The Regents of the University of California * See README in this or parent directory for licensing information. */ #include "common.h" #include "options.h" #include "obscure.h" #include "linefile.h" #include "hash.h" #include "localmem.h" #include "psl.h" #include "sqlNum.h" /* size for query name hashs */ static int queryHashPowTwo = 22; static boolean warnOnConflicts = FALSE; +static boolean tsvHeader = FALSE; /* command line option specifications */ static struct optionSpec optionSpecs[] = { {"queryStats", OPTION_BOOLEAN}, {"overallStats", OPTION_BOOLEAN}, {"queries", OPTION_STRING}, {"warnOnConflicts", OPTION_BOOLEAN}, + {"tsv", OPTION_BOOLEAN}, {NULL, 0} }; static void usage() /* Explain usage and exit. */ { errAbort( "pslStats - collect statistics from a psl file.\n" "\n" "usage:\n" " pslStats [options] psl statsOut\n" "\n" "Options:\n" " -queryStats - output per-query statistics, the default is per-alignment stats\n" " -overallStats - output overall statistics.\n" " -queries=querySizeFile - tab separated file with of expected qNames and sizes.\n" " If specified, statistic will include queries that didn't align.\n" " -warnOnConflicts - warn and ignore when a two PSLs with the same qName conflict.\n" - " This can happen with bogus generated names.\n"); + " This can happen with bogus generated names.\n" + " -tsv - write a TSV header instead of an autoSql header\n"); } struct querySizeCnt /* structure used to hold query size and a count */ { unsigned qSize; unsigned alnCnt; }; static void querySizeConflict(char *qName, unsigned qSize, unsigned qSize2) /* generate a error or warning on conflicting query sizes */ { if (warnOnConflicts) warn("conflicting query sizes for %s: %d and %d", qName, qSize, qSize2); else @@ -297,58 +300,60 @@ ss->maxQCover = max(ss->maxQCover, ss2->maxQCover); ss->minTCover = min(ss->minTCover, ss2->minTCover); ss->maxTCover = max(ss->maxTCover, ss2->maxTCover); ss->minRepMatch = min(ss->minRepMatch, ss2->minRepMatch); ss->maxRepMatch = max(ss->maxRepMatch, ss2->maxRepMatch); } ss->queryCnt += ss2->queryCnt; ss->totalQSize += ss2->totalQSize; ss->totalAlign += ss2->totalAlign; ss->totalMatch += ss2->totalMatch; ss->totalRepMatch += ss2->totalRepMatch; ss->alnCnt += ss2->alnCnt; } /* header for alignment statistics */ -static char *alnStatsHdr = "#qName\t" "qSize\t" "tName\t" "tStart\t" "tEnd\t" +static char *alnStatsHdr = "qName\t" "qSize\t" "tName\t" "tStart\t" "tEnd\t" "ident\t" "qCover\t" "repMatch\t" "tCover\n"; /* format for alignStats output */ static char *alnStatsFmt = "%s\t%d\t%s\t%d\t%d\t%0.4f\t%0.4f\t%0.4f\t%0.4f\n"; static void alignStatsOutputUnaligned(FILE *fh, struct hash* querySizesTbl) /* output stats on unaligned */ { struct hashCookie cookie = hashFirst(querySizesTbl); struct hashEl *hel; while ((hel = hashNext(&cookie)) != NULL) { struct querySizeCnt *qs = hel->val; if (qs->alnCnt == 0) fprintf(fh, alnStatsFmt, hel->name, qs->qSize, "", 0, 0, 0.0, 0.0, 0.0, 0.0); } } static void pslAlignStats(char *pslFile, char *statsFile, char *querySizeFile) /* collect and output per-alignment stats */ { struct hash* querySizesTbl = (querySizeFile != NULL) ? querySizeCntLoad(querySizeFile) : NULL; struct lineFile *pslLf = pslFileOpen(pslFile); FILE *fh = mustOpen(statsFile, "w"); struct psl* psl; +if (!tsvHeader) + fputc('#', fh); fputs(alnStatsHdr, fh); while ((psl = pslNext(pslLf)) != NULL) { fprintf(fh, alnStatsFmt, psl->qName, psl->qSize, psl->tName, psl->tStart, psl->tEnd, calcIdent(psl), calcQCover(psl), calcRepMatch(psl), calcTCover(psl)); if (querySizesTbl != NULL) querySizeCntGet(querySizesTbl, psl->qName, psl->qSize)->alnCnt++; pslFree(&psl); } lineFileClose(&pslLf); if (querySizesTbl != NULL) alignStatsOutputUnaligned(fh, querySizesTbl); carefulClose(&fh); @@ -460,30 +465,31 @@ /* collect and output overall stats */ { struct hash *queryStatsTbl = collectQueryStats(pslFile, querySizeFile); int aligned1 = 0, alignedN = 0; struct sumStats os = sumOverAllStats(queryStatsTbl, &aligned1, &alignedN); outputOverallStats(statsFile, &os, aligned1, alignedN); } int main(int argc, char *argv[]) /* Process command line. */ { optionInit(&argc, argv, optionSpecs); if (argc != 3) usage(); warnOnConflicts = optionExists("warnOnConflicts"); +tsvHeader = optionExists("tsv"); char *querySizeFile = optionVal("queries", NULL); if (optionExists("queryStats") && optionExists("overallStats")) errAbort("can't specify both -queryStats and -overallStats"); if (optionExists("queryStats")) pslQueryStats(argv[1], argv[2], querySizeFile); else if (optionExists("overallStats")) pslOverallStats(argv[1], argv[2], querySizeFile); else pslAlignStats(argv[1], argv[2], querySizeFile); return 0; } /* * Local Variables: * c-file-style: "jkent-c" * End: