435fbceebeaddec93ebca4eaa50b20123967a285 angie Fri Oct 25 11:40:25 2019 -0700 LRG: if source URL is missing a protocol, prepend http:// . Thx Jairo. refs #24285 diff --git src/hg/utils/automation/parseLrgXml.pl src/hg/utils/automation/parseLrgXml.pl index 069df2c..cff0370 100755 --- src/hg/utils/automation/parseLrgXml.pl +++ src/hg/utils/automation/parseLrgXml.pl @@ -93,30 +93,34 @@ my $lrgNcbiAcc = $dom->findvalue('/lrg/fixed_annotation/sequence_source'); # LRG sequence source(s); for now, keeping only the first listed source because hgc.c # doesn't yet anticipate lists. my @lrgSources = $dom->findnodes('/lrg/fixed_annotation/source'); my ($lrgSource, $lrgSourceUrl) = ('', ''); while ((defined $lrgSources[0]) && (! $lrgSources[0]->findvalue('name'))) { shift @lrgSources; } if (defined $lrgSources[0]) { $lrgSource = utf8ToHtml($lrgSources[0]->findvalue('name')); $lrgSourceUrl = $lrgSources[0]->findvalue('url'); } # watch out for stray tab chars: $lrgSource =~ s/^\s*(.*?)\s*$/$1/; $lrgSourceUrl =~ s/^\s*(.*?)\s*$/$1/; + # watch out for URLs that are just hostnames (no protocol) + if ($lrgSourceUrl !~ /^\w+:\/\//) { + $lrgSourceUrl = "http://" . $lrgSourceUrl; + } my $creationDate = $dom->findvalue('/lrg/fixed_annotation/creation_date'); foreach my $refMapping (@refMappings) { # Find BED 12+ fields. my $mapType = $refMapping->findvalue('@type'); my $seq = $refMapping->findvalue('@other_name'); if ($seq eq 'unlocalized') { $seq = "Un"; } if ($mapType eq 'haplotype' || $mapType eq 'fix_patch' || $mapType eq 'novel_patch') { my $gbAcc = $refMapping->findvalue('@other_id_syn'); $gbAcc =~ m/^[A-Z]+\d+\.\d+$/ || die "$xmlIn: $assemblyPrefix has $mapType mapping with " . "other_id_syn='$gbAcc', expecting versioned GenBank acc (e.g. 'KI270850.1')."; if ($assemblyPrefix eq 'GRCh37') { $gbAcc =~ s/\..*//;