874c24c932ee906b6332b5df5b6f4e1c93bf8b92 markd Mon Feb 9 22:15:49 2015 -0800 Created a program to filter genePreds file. Currently filters based on genePredCheck validation errors. diff --git src/hg/inc/genePred.h src/hg/inc/genePred.h index 2423b13..31ba43a 100644 --- src/hg/inc/genePred.h +++ src/hg/inc/genePred.h @@ -242,37 +242,42 @@ /* read all genes from a table find the gene with the biggest overlap. * Cache the list of genes to so we only read it once. * If there are multiple hits and the name that matches exactly, * this overrides the biggest overlap */ int genePredBases(struct genePred *gp); /* count coding and utr bases in a gene prediction */ int genePredCodingBases(struct genePred *gp); /* Count up the number of coding bases in gene prediction. */ boolean genePredCdsExon(struct genePred *gp, int iExon, int *startPtr, int *endPtr); /* Get the CDS range in an exon. If there is no CDS, return FALSE and then * set start == end */ -int genePredCheck(char *desc, FILE* out, int chromSize, +int genePredCheck(char *desc, FILE* errFh, int chromSize, struct genePred* gp); /* Validate a genePred for consistency. desc is printed the error messages - * to file out (open /dev/null to discard). chromSize should contain + * to file errFh (open /dev/null to discard). chromSize should contain * size of chromosome, or 0 if chrom is not valid, or -1 to not check * chromosome bounds. Returns count of errors. */ +int genePredCheckDb(char *desc, FILE* errFh, char* db, struct genePred* gp); +/* Validate a genePred for consistency. desc is printed the error messages + * to file errFh (open /dev/null to discard). Lookup chromosome size in database if + * db is not NULL. Returns count of errors. */ + boolean genePredNmdTarget(struct genePred *gp); /* Return TRUE if cds end is more than 50bp upstream of last intron. */ void genePredAddExonFrames(struct genePred *gp); /* Add exonFrames array to a genePred that doesn't have it. Frame is assumed * to be contiguous. */ void genePredRc(struct genePred *gp, int chromSize); /* Reverse complement a genePred (project it to the opposite strand). Useful * when doing analysis that is simplified by having things on the same strand. */ int genePredCdsSize(struct genePred *gp); /* compute the number of bases of CDS */