547d66239f70b9636b33694ec29a8042e66e95aa max Thu Mar 3 08:19:38 2022 -0800 adding cdsStartStat docs, refs #29030 diff --git src/hg/htdocs/goldenPath/help/bigGenePred.html src/hg/htdocs/goldenPath/help/bigGenePred.html index 1f92ddd..085f78f 100755 --- src/hg/htdocs/goldenPath/help/bigGenePred.html +++ src/hg/htdocs/goldenPath/help/bigGenePred.html @@ -46,30 +46,33 @@ uint thickEnd; "End of where display should be thick (stop codon)" uint reserved; "RGB value (use R,G,B string in input file)" int blockCount; "Number of blocks" int[blockCount] blockSizes; "Comma separated list of block sizes" int[blockCount] chromStarts;"Start positions relative to chromStart" string name2; "Alternative/human readable name" string cdsStartStat; "Status of CDS start annotation (none, unknown, incomplete, or complete)" string cdsEndStat; "Status of CDS end annotation (none, unknown, incomplete, or complete)" int[blockCount] exonFrames; "Exon frame {0,1,2}, or -1 if no frame for exon" string type; "Transcript type" string geneName; "Primary identifier for gene" string geneName2; "Alternative/human-readable gene name" string geneType; "Gene type" ) +
cdsStartStat/cdsEndStat: If you want only protein-coding transcripts, then filter for cdsStartStat='cmpl' and cdsEndStat='cmpl'. Non-coding transcripts have either one of these set to 'incmpl'. +
+The following bed12+8 is an example of a pre-bigGenePred text file .
Step 1.
Format your pre-bigGenePred file. The first 12 fields of pre-bigGenePred files are described by the
BED file format. Your file must
also contain the 8 extra fields described in the autoSql file definition
shown above: name2, cdsStartStat, cdsEndStat, exonFrames, type, geneName, geneName2,
geneType
. For example, you can use this bed12+8 input file,
bigGenePred.txt. Your pre-bigGenePred file must be sorted
first on the chrom
field, and secondarily on the chromStart
field. You
can use the UNIX sort
command to do this: