e616b1ad06be9b542979d2224de81fa26a355ada angie Tue Feb 2 11:36:05 2021 -0800 Loosen up regex for BioSample attributes with stray quotes. diff --git src/hg/utils/otto/sarscov2phylo/bioSampleTextToTab.pl src/hg/utils/otto/sarscov2phylo/bioSampleTextToTab.pl index 7e5a424f..897a25a 100755 --- src/hg/utils/otto/sarscov2phylo/bioSampleTextToTab.pl +++ src/hg/utils/otto/sarscov2phylo/bioSampleTextToTab.pl @@ -13,31 +13,31 @@ (lc($val) ne "not provided") && (lc($val) ne "restricted access")); } my %attribs = (); while (<>) { chomp; if (/^Identifiers: BioSample: (\w+)(; Sample name: ([^;]+))?(; SRA: (\w+))?/) { my ($acc, $sampleName, $sra) = ($1, $3, $5); $attribs{__acc} = $acc; $attribs{__sampleName} = $sampleName if ($sampleName); $attribs{__sra} = $sra if ($sra); } elsif (/^Identifiers: /) { die "Can't parse Identifiers line $.:\n$_\t"; - } elsif (/^ \/([^=]+)="([^"]+)"$/) { + } elsif (/^ \/([^=]+)="(.+)"$/) { my ($attr, $val) = ($1, $2); if (isReal($val)) { $attribs{$attr} = $val; } } elsif (/^ \//) { die "Can't parse attribute line $.:\n$_\t"; } elsif (/^(EPI_ISL_\d+)/) { $attribs{__epi} = $1; } elsif (/^Accession: (\w+)\sID: (\d+)/) { # Last line of record; reconcile whatever attributes were accumulated with the columns that # we want to define. my ($acc, $gi) = ($1, $2); die "acc mismatch '$acc' vs. '$attribs{__acc}'" if ($acc ne $attribs{__acc}); my $name = ""; if (exists $attribs{"sample name"}) {