0a6421e764888bb6e8e1b2058501f7ce4188d814 markd Mon Jun 17 02:08:53 2019 -0700 import of prerelease of gencode V31lift37 diff --git src/hg/pslStats/pslStats.c src/hg/pslStats/pslStats.c index 4a1cfb4..65b9d44 100644 --- src/hg/pslStats/pslStats.c +++ src/hg/pslStats/pslStats.c @@ -2,72 +2,88 @@ /* Copyright (C) 2011 The Regents of the University of California * See README in this or parent directory for licensing information. */ #include "common.h" #include "options.h" #include "obscure.h" #include "linefile.h" #include "hash.h" #include "localmem.h" #include "psl.h" #include "sqlNum.h" /* size for query name hashs */ static int queryHashPowTwo = 22; +static boolean warnOnConflicts = FALSE; /* command line option specifications */ static struct optionSpec optionSpecs[] = { {"queryStats", OPTION_BOOLEAN}, {"overallStats", OPTION_BOOLEAN}, {"queries", OPTION_STRING}, + {"warnOnConflicts", OPTION_BOOLEAN}, {NULL, 0} }; static void usage() /* Explain usage and exit. */ { errAbort( "pslStats - collect statistics from a psl file.\n" "\n" "usage:\n" " pslStats [options] psl statsOut\n" "\n" "Options:\n" " -queryStats - output per-query statistics, the default is per-alignment stats\n" " -overallStats - output overall statistics.\n" " -queries=querySizeFile - tab separated file with of expected qNames and sizes.\n" - " If specified, statistic will include queries that didn't align.\n"); + " If specified, statistic will include queries that didn't align.\n" + " -warnOnConflicts - warn and ignore when a two PSLs with the same qName conflict.\n" + " This can happen with bogus generated names.\n"); } struct querySizeCnt /* structure used to hold query size and a count */ { unsigned qSize; unsigned alnCnt; }; +static void querySizeConflict(char *qName, unsigned qSize, unsigned qSize22) +/* generate a error or warning on conflicting query sizes */ +{ +if (warnOnConflicts) + warn("conflicting query sizes for %s: %d and %d", qName, qSize, qSize2); +else + errAbort("conflicting query sizes for %s: %d and %d", qName, qSize, qSize2); +} + static struct querySizeCnt *querySizeCntGet(struct hash* querySizesTbl, char *qName, unsigned qSize) /* get entry with size and alignment count, create if not present. */ { struct hashEl *hel = hashStore(querySizesTbl, qName); struct querySizeCnt *qs = hel->val; if (qs != NULL) { if (qs->qSize != qSize) - errAbort("conflicting query sizes for %s: %d and %d", qName, qs->qSize, qSize); + { + querySizeConflict(qName, qs->qSize, qSize); + return NULL; + } } else { lmAllocVar(querySizesTbl->lm, qs); hel->val = qs; qs->qSize = qSize; } return qs; } /* read qNames and sizes into a hash of querySizeCnt objet */ static struct hash* querySizeCntLoad(char *querySizeFile) { struct hash* querySizesTbl = hashNew(queryHashPowTwo); struct lineFile *lf = lineFileOpen(querySizeFile, TRUE); @@ -105,31 +121,34 @@ struct sumStats *sumStatsGetForQuery(struct hash *queryStatsTbl, char *qName, unsigned qSize) /* lookup a stats on query by name, creating if it doesn't exist */ { struct hashEl *hel = hashStore(queryStatsTbl, qName); struct sumStats *qs = hel->val; if (qs == NULL) { AllocVar(qs); qs->qName = hel->name; /* use string in hash */ qs->queryCnt = 1; qs->minQSize = qs->maxQSize = qSize; hel->val = qs; } else if (qs->minQSize != qSize) - errAbort("conflicting query sizes for %s: %d and %d", qName, qs->minQSize, qSize); + { + querySizeConflict(qName, qs->minQSize, qSize); + return NULL; + } return hel->val; } /* read qNames and sizes into a hash of sumStats objects */ static struct hash* sumStatsLoad(char *querySizeFile) { struct hash* querySizesTbl = hashNew(queryHashPowTwo); struct lineFile *lf = lineFileOpen(querySizeFile, TRUE); char *row[2]; while (lineFileNextRowTab(lf, row, ArraySize(row))) sumStatsGetForQuery(querySizesTbl, row[0], sqlUnsigned(row[1])); lineFileClose(&lf); return querySizesTbl; @@ -439,30 +458,31 @@ char *querySizeFile) /* collect and output overall stats */ { struct hash *queryStatsTbl = collectQueryStats(pslFile, querySizeFile); int aligned1 = 0, alignedN = 0; struct sumStats os = sumOverAllStats(queryStatsTbl, &aligned1, &alignedN); outputOverallStats(statsFile, &os, aligned1, alignedN); } int main(int argc, char *argv[]) /* Process command line. */ { optionInit(&argc, argv, optionSpecs); if (argc != 3) usage(); +warnOnConflicts = optionExists("warnOnConflicts"); char *querySizeFile = optionVal("queries", NULL); if (optionExists("queryStats") && optionExists("overallStats")) errAbort("can't specify both -queryStats and -overallStats"); if (optionExists("queryStats")) pslQueryStats(argv[1], argv[2], querySizeFile); else if (optionExists("overallStats")) pslOverallStats(argv[1], argv[2], querySizeFile); else pslAlignStats(argv[1], argv[2], querySizeFile); return 0; } /* * Local Variables: * c-file-style: "jkent-c" * End: