src/hg/encode/encodeValidate/doEncodeValidate.pl 1.152
1.152 2009/02/13 19:16:08 mikep
forgot a <tab>
Index: src/hg/encode/encodeValidate/doEncodeValidate.pl
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/encode/encodeValidate/doEncodeValidate.pl,v
retrieving revision 1.151
retrieving revision 1.152
diff -b -B -U 4 -r1.151 -r1.152
--- src/hg/encode/encodeValidate/doEncodeValidate.pl 13 Feb 2009 19:00:42 -0000 1.151
+++ src/hg/encode/encodeValidate/doEncodeValidate.pl 13 Feb 2009 19:16:08 -0000 1.152
@@ -852,8 +852,14 @@
return ();
}
sub validateFasta
+# Wold lab fasta files; they dont have fastq format.
+# Sample fasta lines are:
+#>HWI-EAS229_75_30DY0AAXX:7:1:0:949/1
+#NGCGGATGTTCTCAGTGTCCACAGCGCAGGTGAAATAAGGGAAGCAGTAGCGACGCCCATCTCCACGCGCAGCGC
+#>HWI-EAS229_75_30DY0AAXX:7:1:0:1739/1
+#NAGCCATCAGGAAAGCAAGGAGGGGGCATTAAAGGACAATCAAGGGGTTTGGAGGAAGGAGCAGGCCGGAGGCAA
{
# Wold lab has fasta files, like fastq format without quality
my ($path, $file, $type) = @_;
doTime("beginning validateFasta") if $opt_timing;
@@ -865,9 +871,9 @@
my $state = 'firstLine';
my $seqName;
my $seqNameRegEx = "[A-Za-z0-9_.:/-]+";
my $seqRegEx = "[A-Za-z\n\.~]+";
- my $states = {firstLine => {REGEX => "\@($seqNameRegEx)", NEXT => 'seqLine'},
+ my $states = {firstLine => {REGEX => ">($seqNameRegEx)", NEXT => 'seqLine'},
seqLine => {REGEX => $seqRegEx, NEXT => 'firstLine'}};
while(<$fh>) {
chomp;
$line++;