0a6421e764888bb6e8e1b2058501f7ce4188d814
markd
  Mon Jun 17 02:08:53 2019 -0700
import of prerelease of gencode V31lift37

diff --git src/hg/pslStats/pslStats.c src/hg/pslStats/pslStats.c
index 4a1cfb4..65b9d44 100644
--- src/hg/pslStats/pslStats.c
+++ src/hg/pslStats/pslStats.c
@@ -2,72 +2,88 @@
 
 /* Copyright (C) 2011 The Regents of the University of California 
  * See README in this or parent directory for licensing information. */
 #include "common.h"
 #include "options.h"
 #include "obscure.h"
 #include "linefile.h"
 #include "hash.h"
 #include "localmem.h"
 #include "psl.h"
 #include "sqlNum.h"
 
 
 /* size for query name hashs */
 static int queryHashPowTwo = 22;
+static boolean warnOnConflicts = FALSE;
 
 /* command line option specifications */
 static struct optionSpec optionSpecs[] = {
     {"queryStats", OPTION_BOOLEAN},
     {"overallStats", OPTION_BOOLEAN},
     {"queries", OPTION_STRING},
+    {"warnOnConflicts", OPTION_BOOLEAN},
     {NULL, 0}
 };
 
 static void usage()
 /* Explain usage and exit. */
 {
 errAbort(
   "pslStats - collect statistics from a psl file.\n"
   "\n"
   "usage:\n"
   "   pslStats [options] psl statsOut\n"
   "\n"
   "Options:\n"
   "  -queryStats - output per-query statistics, the default is per-alignment stats\n"
   "  -overallStats - output overall statistics.\n"
   "  -queries=querySizeFile - tab separated file with of expected qNames and sizes.\n"
-  "   If specified, statistic will include queries that didn't align.\n");
+  "   If specified, statistic will include queries that didn't align.\n"
+  "  -warnOnConflicts - warn and ignore when a two PSLs with the same qName conflict.\n"
+  "   This can happen with bogus generated names.\n");
 }
 
 struct querySizeCnt
 /* structure used to hold query size and a count */
 {
     unsigned qSize;
     unsigned alnCnt;
 };
 
+static void querySizeConflict(char *qName, unsigned qSize, unsigned qSize22)
+/* generate a error or warning on conflicting query sizes */
+{
+if (warnOnConflicts)
+    warn("conflicting query sizes for %s: %d and %d", qName, qSize, qSize2);
+else
+    errAbort("conflicting query sizes for %s: %d and %d", qName, qSize, qSize2);
+}
+
 static struct querySizeCnt *querySizeCntGet(struct hash* querySizesTbl,
                                             char *qName, unsigned qSize)
 /* get entry with size and alignment count, create if not present. */
 {
 struct hashEl *hel = hashStore(querySizesTbl, qName);
 struct querySizeCnt *qs = hel->val;
 if (qs != NULL)
     {
     if (qs->qSize != qSize)
-        errAbort("conflicting query sizes for %s: %d and %d", qName, qs->qSize, qSize);
+        {
+        querySizeConflict(qName, qs->qSize, qSize);
+        return NULL;
+        }
     }
 else
     {
     lmAllocVar(querySizesTbl->lm, qs);
     hel->val = qs;
     qs->qSize = qSize;
     }
 return qs;
 }
 
 /* read qNames and sizes into a hash of querySizeCnt objet */
 static struct hash* querySizeCntLoad(char *querySizeFile)
 {
 struct hash* querySizesTbl = hashNew(queryHashPowTwo);
 struct lineFile *lf = lineFileOpen(querySizeFile, TRUE);
@@ -105,31 +121,34 @@
 struct sumStats *sumStatsGetForQuery(struct hash *queryStatsTbl,
                                      char *qName, unsigned qSize)
 /* lookup a stats on query by name, creating if it doesn't exist */
 {
 struct hashEl *hel = hashStore(queryStatsTbl, qName);
 struct sumStats *qs = hel->val;
 if (qs == NULL)
     {
     AllocVar(qs);
     qs->qName = hel->name;  /* use string in hash */
     qs->queryCnt = 1;
     qs->minQSize = qs->maxQSize = qSize;
     hel->val = qs;
     }
 else if (qs->minQSize != qSize)
-    errAbort("conflicting query sizes for %s: %d and %d", qName, qs->minQSize, qSize);
+    {
+    querySizeConflict(qName, qs->minQSize, qSize);
+    return NULL;
+    }
 return hel->val;
 }
 
 /* read qNames and sizes into a hash of sumStats objects */
 static struct hash* sumStatsLoad(char *querySizeFile)
 {
 struct hash* querySizesTbl = hashNew(queryHashPowTwo);
 struct lineFile *lf = lineFileOpen(querySizeFile, TRUE);
 char *row[2];
 
 while (lineFileNextRowTab(lf, row, ArraySize(row)))
     sumStatsGetForQuery(querySizesTbl, row[0], sqlUnsigned(row[1]));
 
 lineFileClose(&lf);
 return querySizesTbl;
@@ -439,30 +458,31 @@
                             char *querySizeFile)
 /* collect and output overall stats */
 {
 struct hash *queryStatsTbl = collectQueryStats(pslFile, querySizeFile);
 int aligned1 = 0, alignedN = 0;
 struct sumStats os = sumOverAllStats(queryStatsTbl, &aligned1, &alignedN);
 outputOverallStats(statsFile, &os, aligned1, alignedN);
 }
 
 int main(int argc, char *argv[])
 /* Process command line. */
 {
 optionInit(&argc, argv, optionSpecs);
 if (argc != 3)
     usage();
+warnOnConflicts = optionExists("warnOnConflicts");
 char *querySizeFile = optionVal("queries", NULL);
 if (optionExists("queryStats") && optionExists("overallStats"))
     errAbort("can't specify both -queryStats and -overallStats");
 if (optionExists("queryStats"))
     pslQueryStats(argv[1], argv[2], querySizeFile);
 else if (optionExists("overallStats"))
     pslOverallStats(argv[1], argv[2], querySizeFile);
 else
     pslAlignStats(argv[1], argv[2], querySizeFile);
 return 0;
 }
 /*
  * Local Variables:
  * c-file-style: "jkent-c"
  * End: