cb4ac73d3a456f2b757fd10112a44c2abffbb54f tdreszer Tue Apr 23 13:46:43 2013 -0700 Added routines for getting genePred coding DNA and determining coding position. To be used in haplotypes code about to be checked in. diff --git src/hg/inc/genePred.h src/hg/inc/genePred.h index 35bec5c..5a8204b 100644 --- src/hg/inc/genePred.h +++ src/hg/inc/genePred.h @@ -1,22 +1,24 @@ /* genePred.h was originally generated by the autoSql program, which also * generated genePred.c and genePred.sql. This header links the database and the RAM * representation of objects. */ #ifndef GENEPRED_H #define GENEPRED_H +#include "dnaseq.h" + struct gff; struct gffFile; struct gffGroup; struct psl; struct genbankCds; struct rbTree; enum cdsStatus /* value to indicate status of CDS annotation at either start or end */ { cdsNone, /* "none" - No CDS (non-coding) */ cdsUnknown, /* "unk" - CDS is unknown (coding, but not known) */ cdsIncomplete, /* "incmpl" - CDS is not complete at this end */ cdsComplete, /* "cmpl" - CDS is complete at this end */ }; @@ -286,17 +288,30 @@ void gpPartOutAsBed(struct genePred *gp, int start, int end, FILE *f, char *type, int id, int minSize); /* Write out part of gp as bed12. */ boolean codonToPos(struct genePred *gp, unsigned num, int *chromStart, int *chromEnd); // map 1-based codon to genomic coordinates. If the codon crosses an exon junction, we return just the beginning (LHS) of the codon. // Returns true if we find the codon in given gene predition; chromStart and chromEnd are set to appropriate three base region. boolean exonToPos(struct genePred *gp, unsigned num, int *chromStart, int *chromEnd); // map 1-based exon number to genomic coordinates. // Returns true if we find the exon in given gene predition; chromStart and chromEnd are set to appropriate region. struct asObject *genePredAsObj(); // Return asObject describing fields of genePred +struct dnaSeq *genePredGetDna(char *database, struct genePred *gp, + boolean coding, enum dnaCase dnaCase); +// Returns the DNA sequence associated with gene prediction. +// Negative strand genes will return the sequence as read from the negative strand. +// Optionally restrict to coding sequence only + +int genePredBaseToCodingPos(struct genePred *gp, int basePos, + boolean stranded, boolean *isCoding); +// Given a genePred model and a single (0 based) base position, predict the 0-based +// DNA (stranded) coding sequence pos. Dividing this number by 3 should give the AA position! +// Returns -1 when outside of coding exons unless OPTIONAL isCoding pointer to boolean is +// provided. In that case, returns last valid position and sets isCoding to FALSE. + #endif /* GENEPRED_H */