fbd7a94213923088270a2bc3da63d0b8139451f0
angie
  Thu Oct 10 14:33:52 2019 -0700
LRG now distinguishes between fix_patch and novel_patch in their XML, so we can include mappings to fix & alt patch sequences.  Overdue for an update anyway.  refs #24285

diff --git src/hg/utils/automation/parseLrgXml.pl src/hg/utils/automation/parseLrgXml.pl
index cce61b9..069df2c 100755
--- src/hg/utils/automation/parseLrgXml.pl
+++ src/hg/utils/automation/parseLrgXml.pl
@@ -102,51 +102,51 @@
     $lrgSource = utf8ToHtml($lrgSources[0]->findvalue('name'));
     $lrgSourceUrl = $lrgSources[0]->findvalue('url');
   }
   # watch out for stray tab chars:
   $lrgSource =~ s/^\s*(.*?)\s*$/$1/;
   $lrgSourceUrl =~ s/^\s*(.*?)\s*$/$1/;
   my $creationDate = $dom->findvalue('/lrg/fixed_annotation/creation_date');
 
   foreach my $refMapping (@refMappings) {
     # Find BED 12+ fields.
     my $mapType = $refMapping->findvalue('@type');
     my $seq = $refMapping->findvalue('@other_name');
     if ($seq eq 'unlocalized') {
       $seq = "Un";
     }
-    if ($mapType eq 'haplotype' || $mapType eq 'patch') {
+    if ($mapType eq 'haplotype' || $mapType eq 'fix_patch' || $mapType eq 'novel_patch') {
       my $gbAcc = $refMapping->findvalue('@other_id_syn');
       $gbAcc =~ m/^[A-Z]+\d+\.\d+$/ || die "$xmlIn: $assemblyPrefix has $mapType mapping with " .
         "other_id_syn='$gbAcc', expecting versioned GenBank acc (e.g. 'KI270850.1').";
       if ($assemblyPrefix eq 'GRCh37') {
         $gbAcc =~ s/\..*//;
         $gbAcc = lc $gbAcc;
       } else {
         $gbAcc =~ s/\./v/;
       }
       # Trim chromosome band stuff if present
       $seq =~ s/[pq].*//;
       if ($assemblyPrefix eq 'GRCh37' && exists $gbAccToHg19Alt{$gbAcc}) {
         $seq = $gbAccToHg19Alt{$gbAcc};
       } elsif ($seq eq 'Un') {
         $seq .= "_$gbAcc";
       } else {
         # NOTE: as of 5/30/18, there are no mappings to hg19 or hg38 seqs with the suffix _random,
         # so I'm not sure what those would look like in the XML.  This could cause us to lose
         # mappings to the _random sequences, *if* any are added in the future.
-        my $suffix = ($mapType eq 'haplotype' ? 'alt' : 'fix');
+        my $suffix = (($mapType eq 'haplotype' || $mapType eq 'novel_patch') ? 'alt' : 'fix');
         $seq .= "_${gbAcc}_$suffix";
       }
     }
     $seq = 'chr' . $seq unless ($seq =~ /^chr/);
     my $start = $refMapping->findvalue('@other_start') - 1;
     my $end = $refMapping->findvalue('@other_end');
     my @mappingSpans = $refMapping->findnodes('mapping_span');
     die 'Unusual number of mapping_spans' if (@mappingSpans != 1);
     my $span = $mappingSpans[0];
     my $lrgStart = $span->findvalue('@lrg_start') - 1;
     my $lrgEnd = $span->findvalue('@lrg_end');
     if ($lrgSize < $lrgEnd) {
       die "$xmlIn: length of sequence is $lrgSize but $assemblyPrefix lrg_end is $lrgEnd";
     }
     my $name = $lrgName;