f7792e45d8c710b375ed418f55f759a13c545421 markd Thu Jun 1 23:11:14 2017 -0700 Moved guts of genePredToProt to library to allow Brian to use it in CGIs. Removed unnecessary PSL creation from genePredToProt. diff --git src/hg/inc/genePred.h src/hg/inc/genePred.h index f2707c9..6f09c4e 100644 --- src/hg/inc/genePred.h +++ src/hg/inc/genePred.h @@ -1,27 +1,28 @@ /* genePred.h was originally generated by the autoSql program, which also * generated genePred.c and genePred.sql. This header links the database and the RAM * representation of objects. */ /* Copyright (C) 2013 The Regents of the University of California * See README in this or parent directory for licensing information. */ #ifndef GENEPRED_H #define GENEPRED_H #include "dnaseq.h" #include "bigBed.h" +#include "nibTwo.h" struct gff; struct gffFile; struct gffGroup; struct psl; struct genbankCds; struct rbTree; enum cdsStatus /* value to indicate status of CDS annotation at either start or end */ { cdsNone, /* "none" - No CDS (non-coding) */ cdsUnknown, /* "unk" - CDS is unknown (coding, but not known) */ cdsIncomplete, /* "incmpl" - CDS is not complete at this end */ cdsComplete, /* "cmpl" - CDS is complete at this end */ @@ -365,17 +366,32 @@ // Negative strand genes will return the sequence as read from the negative strand. // Optionally restrict to coding sequence only int genePredBaseToCodingPos(struct genePred *gp, int basePos, boolean stranded, boolean *isCoding); // Given a genePred model and a single (0 based) base position, predict the 0-based // DNA (stranded) coding sequence pos. Dividing this number by 3 should give the AA position! // Returns -1 when outside of coding exons unless OPTIONAL isCoding pointer to boolean is // provided. In that case, returns last valid position and sets isCoding to FALSE. struct genePredExt *genePredFromBigGenePred( char *chrom, struct bigBedInterval *bb); /* build a genePred from a bigGenePred interval */ struct genePredExt *genePredFromBigGenePredRow(char **row); /* build a genePred from a bigGenePred row */ + +/* options to genePredTranslate */ +#define GENEPRED_TRANSLATE_SELENO 0x01 /* Assume internal TGA code for selenocysteine and translate to `U' */ +#define GENEPRED_TRANSLATE_INCLUDE_STOP 0x02 /* If the CDS ends with a stop codon, represent it as a `*' */ +#define GENEPRED_TRANSLATE_STAR_INFRAME_STOPS 0x04 /* Use `*' instead of `X' for in-frame stop codons. + * This will result in selenocysteine's being `*', with only codons + * containing `N' being translated to `X'. This doesn't include terminal + * stop */ + +void genePredTranslate(struct genePred *gp, struct nibTwoCache* genomeSeqs, unsigned options, + char **protRet, char **cdsRet); +/* Translate a genePred into a protein. It can also return the CDS part of the + * mRNA sequence. If the chrom is chrM, the mitochondrial translation tables are + * used. If protRet or cdsRet is NULL, those sequences are not returned. + */ #endif /* GENEPRED_H */