d5938e92fec0038d9874c8edf92ba60bc770f4a8 angie Mon May 24 09:57:40 2021 -0700 Handle Biosample keyword set to empty string. diff --git src/hg/utils/otto/sarscov2phylo/bioSampleTextToTab.pl src/hg/utils/otto/sarscov2phylo/bioSampleTextToTab.pl index 897a25a..9bf00a5 100755 --- src/hg/utils/otto/sarscov2phylo/bioSampleTextToTab.pl +++ src/hg/utils/otto/sarscov2phylo/bioSampleTextToTab.pl @@ -18,30 +18,33 @@ while (<>) { chomp; if (/^Identifiers: BioSample: (\w+)(; Sample name: ([^;]+))?(; SRA: (\w+))?/) { my ($acc, $sampleName, $sra) = ($1, $3, $5); $attribs{__acc} = $acc; $attribs{__sampleName} = $sampleName if ($sampleName); $attribs{__sra} = $sra if ($sra); } elsif (/^Identifiers: /) { die "Can't parse Identifiers line $.:\n$_\t"; } elsif (/^ \/([^=]+)="(.+)"$/) { my ($attr, $val) = ($1, $2); if (isReal($val)) { $attribs{$attr} = $val; } + } elsif (/^ \/([^=]+)=""$/) { + # empty value; ignore. + next; } elsif (/^ \//) { die "Can't parse attribute line $.:\n$_\t"; } elsif (/^(EPI_ISL_\d+)/) { $attribs{__epi} = $1; } elsif (/^Accession: (\w+)\sID: (\d+)/) { # Last line of record; reconcile whatever attributes were accumulated with the columns that # we want to define. my ($acc, $gi) = ($1, $2); die "acc mismatch '$acc' vs. '$attribs{__acc}'" if ($acc ne $attribs{__acc}); my $name = ""; if (exists $attribs{"sample name"}) { $name = $attribs{"sample name"}; } elsif (exists $attribs{"Submitter Id"}) { $name = $attribs{"Submitter Id"}; } elsif (exists $attribs{strain}) {