src/lib/tests/input/sacCerTest.gff3 1.1

1.1 2009/04/22 16:56:49 markd
added initial implementation of gff3 parser. Still has some rough edges and problems due to ambiguities in the GFF3 specification
Index: src/lib/tests/input/sacCerTest.gff3
===================================================================
RCS file: src/lib/tests/input/sacCerTest.gff3
diff -N src/lib/tests/input/sacCerTest.gff3
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ src/lib/tests/input/sacCerTest.gff3	22 Apr 2009 16:56:49 -0000	1.1
@@ -0,0 +1,43 @@
+##gff-version 3
+#date Wed Apr  1 19:50:12 2009
+#
+# Saccharomyces cerevisiae S288C genome
+#
+# has empty Alias with multiple values:
+chrI	SGD	chromosome	1	230208	.	.	.	ID=chrI;dbxref=NCBI:NC_001133
+chrI	SGD	repeat_region	1	62	.	-	.	ID=TEL01L-TR;Name=TEL01L-TR;Note=Terminal%20stretch%20of%20telomeric%20repeats%20on%20the%20left%20arm%20of%20Chromosome%20I;dbxref=SGD:S000028864
+chrI	SGD	telomere	1	801	.	-	.	ID=TEL01L;Name=TEL01L;Note=Telomeric%20region%20on%20the%20left%20arm%20of%20Chromosome%20I%3B%20composed%20of%20an%20X%20element%20core%20sequence%2C%20X%20element%20combinatorial%20repeats%2C%20and%20a%20short%20terminal%20stretch%20of%20telomeric%20repeats;dbxref=SGD:S000028862
+chrI	SGD	repeat_region	63	336	.	-	.	ID=TEL01L-XR;Name=TEL01L-XR;Note=Telomeric%20X%20element%20combinatorial%20Repeat%20region%20on%20the%20left%20arm%20of%20Chromosome%20I%3B%20contains%20repeats%20of%20the%20D%2C%20C%2C%20B%20and%20A%20types%2C%20as%20well%20as%20Tbf1p%20binding%20sites%3B%20formerly%20called%20SubTelomeric%20Repeats;dbxref=SGD:S000028866
+chrI	SGD	gene	335	649	.	+	.	ID=YAL069W;Name=YAL069W;Ontology_term=GO:0003674,GO:0005575,GO:0008150;Note=Dubious%20open%20reading%20frame%20unlikely%20to%20encode%20a%20protein%2C%20based%20on%20available%20experimental%20and%20comparative%20sequence%20data;dbxref=SGD:S000002143;orf_classification=Dubious
+chrI	SGD	CDS	335	649	.	+	0	Parent=YAL069W;Name=YAL069W;Ontology_term=GO:0003674,GO:0005575,GO:0008150;Note=Dubious%20open%20reading%20frame%20unlikely%20to%20encode%20a%20protein%2C%20based%20on%20available%20experimental%20and%20comparative%20sequence%20data;dbxref=SGD:S000002143;orf_classification=Dubious
+chrI	SGD	repeat_region	337	801	.	-	.	ID=TEL01L-XC;Name=TEL01L-XC;Note=Telomeric%20X%20element%20Core%20sequence%20on%20the%20left%20arm%20of%20Chromosome%20I%3B%20contains%20an%20ARS%20consensus%20sequence%2C%20an%20Abf1p%20binding%20site%20consensus%20sequence%20and%20two%20small%20overlapping%20ORFs%20(YAL068W-A%20and%20YAL069W);dbxref=SGD:S000028865
+chrI	SGD	nucleotide_match	753	763	.	-	.	Parent=TEL01L-XC;Name=TEL01L-XC;Note=Telomeric%20X%20element%20Core%20sequence%20on%20the%20left%20arm%20of%20Chromosome%20I%3B%20contains%20an%20ARS%20consensus%20sequence%2C%20an%20Abf1p%20binding%20site%20consensus%20sequence%20and%20two%20small%20overlapping%20ORFs%20(YAL068W-A%20and%20YAL069W);dbxref=SGD:S000028865
+chrI	SGD	binding_site	532	544	.	-	.	Parent=TEL01L-XC;Name=TEL01L-XC;Note=Telomeric%20X%20element%20Core%20sequence%20on%20the%20left%20arm%20of%20Chromosome%20I%3B%20contains%20an%20ARS%20consensus%20sequence%2C%20an%20Abf1p%20binding%20site%20consensus%20sequence%20and%20two%20small%20overlapping%20ORFs%20(YAL068W-A%20and%20YAL069W);dbxref=SGD:S000028865
+chrI	SGD	gene	538	792	.	+	.	ID=YAL068W-A;Name=YAL068W-A;Ontology_term=GO:0003674,GO:0005575,GO:0008150;Note=Dubious%20open%20reading%20frame%20unlikely%20to%20encode%20a%20protein%3B%20identified%20by%20gene-trapping%2C%20microarray-based%20expression%20analysis%2C%20and%20genome-wide%20homology%20searching;dbxref=SGD:S000028594;orf_classification=Dubious
+chrI	SGD	CDS	538	792	.	+	0	Parent=YAL068W-A;Name=YAL068W-A;Ontology_term=GO:0003674,GO:0005575,GO:0008150;Note=Dubious%20open%20reading%20frame%20unlikely%20to%20encode%20a%20protein%3B%20identified%20by%20gene-trapping%2C%20microarray-based%20expression%20analysis%2C%20and%20genome-wide%20homology%20searching;dbxref=SGD:S000028594;orf_classification=Dubious
+chrI	SGD	ARS	650	1791	.	.	.	ID=ARS102;Name=ARS102;Alias=ARSI-1;Note=Autonomously%20Replicating%20Sequence;dbxref=SGD:S000121252
+chrI	SGD	gene	1807	2169	.	-	.	ID=YAL068C;Name=YAL068C;gene=PAU8;Alias=PAU8;Ontology_term=GO:0003674,GO:0005575,GO:0030437,GO:0045944;Note=Hypothetical%20protein%20of%20unknown%20function%3B%20YAL068C%20is%20not%20an%20essential%20gene;dbxref=SGD:S000002142;orf_classification=Uncharacterized
+chrI	SGD	CDS	1807	2169	.	-	0	Parent=YAL068C;Name=YAL068C;gene=PAU8;Alias=PAU8;Ontology_term=GO:0003674,GO:0005575,GO:0030437,GO:0045944;Note=Hypothetical%20protein%20of%20unknown%20function%3B%20YAL068C%20is%20not%20an%20essential%20gene;dbxref=SGD:S000002142;orf_classification=Uncharacterized
+chrI	SGD	gene	2480	2707	.	+	.	ID=YAL067W-A;Name=YAL067W-A;Ontology_term=GO:0003674,GO:0005575,GO:0008150;Note=Putative%20protein%20of%20unknown%20function%3B%20identified%20by%20gene-trapping%2C%20microarray-based%20expression%20analysis%2C%20and%20genome-wide%20homology%20searching;dbxref=SGD:S000028593;orf_classification=Uncharacterized
+chrI	SGD	CDS	2480	2707	.	+	0	Parent=YAL067W-A;Name=YAL067W-A;Ontology_term=GO:0003674,GO:0005575,GO:0008150;Note=Putative%20protein%20of%20unknown%20function%3B%20identified%20by%20gene-trapping%2C%20microarray-based%20expression%20analysis%2C%20and%20genome-wide%20homology%20searching;dbxref=SGD:S000028593;orf_classification=Uncharacterized
+chrI	SGD	gene	7236	9017	.	-	.	ID=YAL067C;Name=YAL067C;gene=SEO1;Alias=SEO1;Ontology_term=GO:0005215,GO:0006810,GO:0016020;Note=Putative%20permease%2C%20member%20of%20the%20allantoate%20transporter%20subfamily%20of%20the%20major%20facilitator%20superfamily%3B%20mutation%20confers%20resistance%20to%20ethionine%20sulfoxide;dbxref=SGD:S000000062;orf_classification=Verified
+chrI	SGD	CDS	7236	9017	.	-	0	Parent=YAL067C;Name=YAL067C;gene=SEO1;Alias=SEO1;Ontology_term=GO:0005215,GO:0006810,GO:0016020;Note=Putative%20permease%2C%20member%20of%20the%20allantoate%20transporter%20subfamily%20of%20the%20major%20facilitator%20superfamily%3B%20mutation%20confers%20resistance%20to%20ethionine%20sulfoxide;dbxref=SGD:S000000062;orf_classification=Verified
+chrI	SGD	ARS	7998	8548	.	.	.	ID=ARS103;Name=ARS103;Alias=ARSI-8;Note=Autonomously%20Replicating%20Sequence;dbxref=SGD:S000121253
+chrI	SGD	gene	10092	10400	.	+	.	ID=YAL066W;Name=YAL066W;Ontology_term=GO:0003674,GO:0005575,GO:0008150;Note=Dubious%20open%20reading%20frame%20unlikely%20to%20encode%20a%20protein%2C%20based%20on%20available%20experimental%20and%20comparative%20sequence%20data;dbxref=SGD:S000000061;orf_classification=Dubious
+chrI	SGD	CDS	10092	10400	.	+	0	Parent=YAL066W;Name=YAL066W;Ontology_term=GO:0003674,GO:0005575,GO:0008150;Note=Dubious%20open%20reading%20frame%20unlikely%20to%20encode%20a%20protein%2C%20based%20on%20available%20experimental%20and%20comparative%20sequence%20data;dbxref=SGD:S000000061;orf_classification=Dubious
+chrI	SGD	gene	11566	11952	.	-	.	ID=YAL065C;Name=YAL065C;Ontology_term=GO:0003674,GO:0005575,GO:0008150;Note=Putative%20protein%20of%20unknown%20function%3B%20has%20homology%20to%20FLO1%3B%20possible%20pseudogene;dbxref=SGD:S000001817;orf_classification=Uncharacterized
+chrI	SGD	CDS	11566	11952	.	-	0	Parent=YAL065C;Name=YAL065C;Ontology_term=GO:0003674,GO:0005575,GO:0008150;Note=Putative%20protein%20of%20unknown%20function%3B%20has%20homology%20to%20FLO1%3B%20possible%20pseudogene;dbxref=SGD:S000001817;orf_classification=Uncharacterized
+chrI	SGD	gene	12047	12427	.	+	.	ID=YAL064W-B;Name=YAL064W-B;Ontology_term=GO:0003674,GO:0005575,GO:0008150;Note=Fungal-specific%20protein%20of%20unknown%20function;dbxref=SGD:S000002141;orf_classification=Uncharacterized
+chrI	SGD	CDS	12047	12427	.	+	0	Parent=YAL064W-B;Name=YAL064W-B;Ontology_term=GO:0003674,GO:0005575,GO:0008150;Note=Fungal-specific%20protein%20of%20unknown%20function;dbxref=SGD:S000002141;orf_classification=Uncharacterized
+chrI	SGD	gene	13364	13744	.	-	.	ID=YAL064C-A;Name=YAL064C-A;gene=TDA8;Alias=TDA8,YAL065C-A;Ontology_term=GO:0003674,GO:0005575,GO:0008150;Note=Putative%20protein%20of%20unknown%20function%3B%20YAL064C-A%20is%20not%20an%20essential%20gene;dbxref=SGD:S000002140;orf_classification=Uncharacterized
+chrI	SGD	CDS	13364	13744	.	-	0	Parent=YAL064C-A;Name=YAL064C-A;gene=TDA8;Alias=TDA8,YAL065C-A;Ontology_term=GO:0003674,GO:0005575,GO:0008150;Note=Putative%20protein%20of%20unknown%20function%3B%20YAL064C-A%20is%20not%20an%20essential%20gene;dbxref=SGD:S000002140;orf_classification=Uncharacterized
+chrI	SGD	gene	21526	21852	.	+	.	ID=YAL064W;Name=YAL064W;Ontology_term=GO:0003674,GO:0005575,GO:0008150;Note=Protein%20of%20unknown%20function%3B%20may%20interact%20with%20ribosomes%2C%20based%20on%20co-purification%20experiments;dbxref=SGD:S000000060;orf_classification=Verified
+chrI	SGD	CDS	21526	21852	.	+	0	Parent=YAL064W;Name=YAL064W;Ontology_term=GO:0003674,GO:0005575,GO:0008150;Note=Protein%20of%20unknown%20function%3B%20may%20interact%20with%20ribosomes%2C%20based%20on%20co-purification%20experiments;dbxref=SGD:S000000060;orf_classification=Verified
+chrI	SGD	long_terminal_repeat	22232	22554	.	+	.	ID=YALWdelta1;Name=YALWdelta1;Ontology_term=SO:0000286;Note=Ty1%20LTR;dbxref=SGD:S000006787
+chrI	SGD	gene	22397	22687	.	-	.	ID=YAL063C-A;Name=YAL063C-A;Ontology_term=GO:0003674,GO:0005575,GO:0008150;Note=Putative%20protein%20of%20unknown%20function%3B%20identified%20by%20expression%20profiling%20and%20mass%20spectrometry;dbxref=SGD:S000028813;orf_classification=Uncharacterized
+chrI	SGD	CDS	22397	22687	.	-	0	Parent=YAL063C-A;Name=YAL063C-A;Ontology_term=GO:0003674,GO:0005575,GO:0008150;Note=Putative%20protein%20of%20unknown%20function%3B%20identified%20by%20expression%20profiling%20and%20mass%20spectrometry;dbxref=SGD:S000028813;orf_classification=Uncharacterized
+chrI	landmark	region	24001	27969	.	-	.	ID=FLO9
+# has empty Note:
+chrXVI	SGD	gene	101608	102702	.	-	.	ID=YPL236C;Name=YPL236C;gene=ENV7;Alias=ENV7;Ontology_term=GO:0000329,GO:0004674,GO:0008150;Note=;dbxref=SGD:S000006157;orf_classification=Uncharacterized
+chrXVI	SGD	CDS	101608	102702	.	-	0	Parent=YPL236C;Name=YPL236C;gene=ENV7;Alias=ENV7;Ontology_term=GO:0000329,GO:0004674,GO:0008150;Note=;dbxref=SGD:S000006157;orf_classification=Uncharacterized
+