874c24c932ee906b6332b5df5b6f4e1c93bf8b92
markd
  Mon Feb 9 22:15:49 2015 -0800
Created a program to filter genePreds file.  Currently filters
based on genePredCheck validation errors.

diff --git src/hg/genePredCheck/genePredCheck.c src/hg/genePredCheck/genePredCheck.c
index 434c5f0..79e74da 100644
--- src/hg/genePredCheck/genePredCheck.c
+++ src/hg/genePredCheck/genePredCheck.c
@@ -1,117 +1,94 @@
 /* genePredCheck - validate genePred files or tables. */
 
 /* Copyright (C) 2014 The Regents of the University of California 
  * See README in this or parent directory for licensing information. */
 #include "common.h"
 #include "options.h"
 #include "verbose.h"
 #include "portable.h"
 #include "hdb.h"
 #include "genePred.h"
 #include "genePredReader.h"
-#include "chromInfo.h"
 
 
 /* Command line option specifications */
 static struct optionSpec optionSpecs[] = {
     {"db", OPTION_STRING},
     {NULL, 0}
 };
 char *gDb = NULL;
 int gErrCount = 0;  /* global count of errors */
 int gChkCount = 0;  /* global count of genes checked */
 
 void usage()
 /* Explain usage and exit. */
 {
 errAbort(
   "genePredCheck - validate genePred files or tables\n"
   "usage:\n"
   "   genePredCheck [options] fileTbl ..\n"
   "\n"
   "If fileTbl is an existing file, then it is check.  Otherwise, if -db\n"
   "is provided, then a table by this name is checked.\n"
   "\n"
   "options:\n"
   "   -db=db - If specified, then this database is used to\n"
   "    get chromosome sizes, and perhaps the table to check.\n"
   "\n");
 }
 
 static void checkAGenePred(char *fileTbl, int iRec, struct genePred *gp)
 /* check one genePred */
 {
-int chromSize = -1;  /* default to not checking */
-char desc[512];
+char desc[2*PATH_LEN];
 
 safef(desc, sizeof(desc), "%s:%d", fileTbl, iRec);
-if (gDb != NULL)
-    {
-    // hGetChromInfo is case independent
-    struct chromInfo *ci = hGetChromInfo(gDb, gp->chrom);
-    if (ci == NULL)
-        {
-        fprintf(stderr, "Error: %s: %s has invalid chrom for %s: %s\n",
-                desc, gp->name, gDb, gp->chrom);
-        gErrCount++;
-        chromSize = -1;  // don't validate
-        }
-    else if (differentString(gp->chrom, ci->chrom)) // verify case dependent ==
-        {
-        fprintf(stderr, "Error: %s: %s has invalid chrom for %s: %s\n",
-                desc, gp->name, gDb, gp->chrom);
-        gErrCount++;
-        chromSize = -1;  // don't validate
-        }
-    else
-        chromSize = ci->size;
-    }
-gErrCount += genePredCheck(desc, stderr, chromSize, gp);
+gErrCount += genePredCheckDb(desc, stderr, gDb, gp);
 gChkCount++;
 }
 
 static void checkGenePred(char *fileTbl)
 /* check a genePred file or table */
 {
 struct sqlConnection *conn = NULL;
 struct genePredReader *gpr = NULL;
 struct genePred *gp;
 int iRec = 0;
 
 
 if (fileExists(fileTbl))
     {
     gpr = genePredReaderFile(fileTbl, NULL);
     }
 else if (gDb != NULL)
     {
     conn = hAllocConn(gDb);
     gpr = genePredReaderQuery(conn, fileTbl, NULL);
     }
 else
     {
     errAbort("file %s doesn't exist, must specify -db=db if this is a table", fileTbl);
     }
 
 while ((gp = genePredReaderNext(gpr)) != NULL)
     {
     checkAGenePred(fileTbl, ++iRec, gp);
     genePredFree(&gp);
     }
 genePredReaderFree(&gpr);
 hFreeConn(&conn);
 }
 
 int main(int argc, char *argv[])
 /* Process command line. */
 {
 int iarg;
 optionInit(&argc, argv, optionSpecs);
 if (argc < 2)
     usage();
 gDb = optionVal("db", NULL);
 for (iarg = 1; iarg < argc; iarg++)
     checkGenePred(argv[iarg]);
 verbose(1, "checked: %d failed: %d\n", gChkCount, gErrCount);
 return ((gErrCount == 0) ? 0 : 1);
 }