874c24c932ee906b6332b5df5b6f4e1c93bf8b92 markd Mon Feb 9 22:15:49 2015 -0800 Created a program to filter genePreds file. Currently filters based on genePredCheck validation errors. diff --git src/hg/genePredCheck/genePredCheck.c src/hg/genePredCheck/genePredCheck.c index 434c5f0..79e74da 100644 --- src/hg/genePredCheck/genePredCheck.c +++ src/hg/genePredCheck/genePredCheck.c @@ -1,117 +1,94 @@ /* genePredCheck - validate genePred files or tables. */ /* Copyright (C) 2014 The Regents of the University of California * See README in this or parent directory for licensing information. */ #include "common.h" #include "options.h" #include "verbose.h" #include "portable.h" #include "hdb.h" #include "genePred.h" #include "genePredReader.h" -#include "chromInfo.h" /* Command line option specifications */ static struct optionSpec optionSpecs[] = { {"db", OPTION_STRING}, {NULL, 0} }; char *gDb = NULL; int gErrCount = 0; /* global count of errors */ int gChkCount = 0; /* global count of genes checked */ void usage() /* Explain usage and exit. */ { errAbort( "genePredCheck - validate genePred files or tables\n" "usage:\n" " genePredCheck [options] fileTbl ..\n" "\n" "If fileTbl is an existing file, then it is check. Otherwise, if -db\n" "is provided, then a table by this name is checked.\n" "\n" "options:\n" " -db=db - If specified, then this database is used to\n" " get chromosome sizes, and perhaps the table to check.\n" "\n"); } static void checkAGenePred(char *fileTbl, int iRec, struct genePred *gp) /* check one genePred */ { -int chromSize = -1; /* default to not checking */ -char desc[512]; +char desc[2*PATH_LEN]; safef(desc, sizeof(desc), "%s:%d", fileTbl, iRec); -if (gDb != NULL) - { - // hGetChromInfo is case independent - struct chromInfo *ci = hGetChromInfo(gDb, gp->chrom); - if (ci == NULL) - { - fprintf(stderr, "Error: %s: %s has invalid chrom for %s: %s\n", - desc, gp->name, gDb, gp->chrom); - gErrCount++; - chromSize = -1; // don't validate - } - else if (differentString(gp->chrom, ci->chrom)) // verify case dependent == - { - fprintf(stderr, "Error: %s: %s has invalid chrom for %s: %s\n", - desc, gp->name, gDb, gp->chrom); - gErrCount++; - chromSize = -1; // don't validate - } - else - chromSize = ci->size; - } -gErrCount += genePredCheck(desc, stderr, chromSize, gp); +gErrCount += genePredCheckDb(desc, stderr, gDb, gp); gChkCount++; } static void checkGenePred(char *fileTbl) /* check a genePred file or table */ { struct sqlConnection *conn = NULL; struct genePredReader *gpr = NULL; struct genePred *gp; int iRec = 0; if (fileExists(fileTbl)) { gpr = genePredReaderFile(fileTbl, NULL); } else if (gDb != NULL) { conn = hAllocConn(gDb); gpr = genePredReaderQuery(conn, fileTbl, NULL); } else { errAbort("file %s doesn't exist, must specify -db=db if this is a table", fileTbl); } while ((gp = genePredReaderNext(gpr)) != NULL) { checkAGenePred(fileTbl, ++iRec, gp); genePredFree(&gp); } genePredReaderFree(&gpr); hFreeConn(&conn); } int main(int argc, char *argv[]) /* Process command line. */ { int iarg; optionInit(&argc, argv, optionSpecs); if (argc < 2) usage(); gDb = optionVal("db", NULL); for (iarg = 1; iarg < argc; iarg++) checkGenePred(argv[iarg]); verbose(1, "checked: %d failed: %d\n", gChkCount, gErrCount); return ((gErrCount == 0) ? 0 : 1); }