7edc888861d9fd7769d0a21ae55d5f3e75e61587 markd Fri Aug 19 17:06:17 2016 -0700 Added genePredToProt that translates genePred files to protein. It handles cases where there the annotations has frameshifting indels described in the exonFrames column. Partial codons will be skipped. This differs from getRnaPred, which only handles initial, incomplete codons. diff --git src/hg/getRnaPred/getRnaPred.c src/hg/getRnaPred/getRnaPred.c index 754bd8b..368008d 100644 --- src/hg/getRnaPred/getRnaPred.c +++ src/hg/getRnaPred/getRnaPred.c @@ -25,50 +25,45 @@ "to process all chromosome\n" "\n" "options:\n" " -weird - only get ones with weird splice sites\n" " -cdsUpper - output CDS in upper case\n" " -cdsOnly - only output CDS\n" " -cdsOut=file - write CDS to this tab-separated file, in the form\n" " acc start end\n" " where start..end are genbank style, one-based coordinates\n" " -keepMasking - un/masked in upper/lower case.\n" " -pslOut=psl - output a PSLs for the virtual mRNAs. Allows virtual\n" " mRNA to be analyzed by tools that work on PSLs\n" " -suffix=suf - append suffix to each id to avoid confusion with mRNAs\n" " use to define the genes.\n" " -peptides - out the translation of the CDS to a peptide sequence.\n" + " The newer program genePredToProt maybe produce better results in cases\n" + " were there are frame-shifting indels in the CDS.\n" " -exonIndices - output indices of exon boundaries after sequence name,\n" " e.g., \"103 243 290\" says positions 1-103 are from the first exon,\n" " positions 104-243 are from the second exon, etc. \n" " -maxSize=size - output a maximum of size characters. Useful when\n" " testing gene predictions by RT-PCR.\n" " -genomeSeqs=spec - get genome sequences from the specified nib directory\n" " or 2bit file instead of going though the path found in chromInfo.\n" " -includeCoords - include the genomic coordinates as a comment in the\n" " fasta header. This is necessary when there are multiple genePreds\n" " with the same name.\n" " -genePredExt - (for use with -peptides) use extended genePred format,\n" " and consider frame information when translating (Warning: only\n" " considers offset at 5' end, not frameshifts between blocks)\n" -#if 0 - /* Not implemented, not sure it's worth the complexity */ - "If frame\n" - " is in genePred and blocks don't have contiguous frame, it will output a '*'\n" - " where the frameshift occured and continue to translated based on the frame\n" - " specification.\n" -#endif ); } static struct optionSpec options[] = { {"weird", OPTION_BOOLEAN}, {"cdsUpper", OPTION_BOOLEAN}, {"cdsOut", OPTION_STRING}, {"cdsOnly", OPTION_BOOLEAN}, {"keepMasking", OPTION_BOOLEAN}, {"pslOut", OPTION_STRING}, {"suffix", OPTION_STRING}, {"peptides", OPTION_BOOLEAN}, {"includeCoords", OPTION_BOOLEAN}, {"exonIndices", OPTION_BOOLEAN}, {"maxSize", OPTION_INT},