97199fbf1ff56c9ee01956deb736b97244ea3ac6
hiram
  Sun Mar 1 21:52:28 2020 -0800
calculating featureBits like measurement for gene tracks, removing duplicates for ncbiRefSeq, remove blanks from gene names for ncbiRefSeq, and fix fundamental bug reference to geneToId in ncbiRefSeqOtherAttrs.pl refs #23891

diff --git src/hg/utils/automation/ncbiRefSeqOtherAttrs.pl src/hg/utils/automation/ncbiRefSeqOtherAttrs.pl
index fa82a6c..77898eb 100755
--- src/hg/utils/automation/ncbiRefSeqOtherAttrs.pl
+++ src/hg/utils/automation/ncbiRefSeqOtherAttrs.pl
@@ -71,43 +71,45 @@
 my @attrFound = ();
 for (my $i = 0;  $i <= $#attrOrder;  $i++) {
   $attrToIx{$attrOrder[$i]} = $i;
   $attrFound[$i] = 0;
 }
 
 # Map item IDs to extra columns:
 my %itemAttrs = ();
 my %ignoredAttrs = ();
 my %geneToId = ();
 my %idToParent = ();
 open(my $ATTRS, $attrsFile) || die "Can't open attributes file '$attrsFile': $!\n";
 while (<$ATTRS>) {
   chomp;
   my ($id, $attr, $val) = split("\t");
+  $id =~ s/ /_/g;
   if ($attr eq 'Dbxref') {
     # Dbxref is one attribute, but split it up into multiple output columns for URL generation
     my @xrefs = split(',', $val);
     foreach my $xref (@xrefs) {
       foreach my $source qw(GeneID MIM HGNC MGI miRBase WormBase XenBase BGD RGD SGD ZFIN FLYBASE) {
         if ($xref =~ s/^$source://) {
           my $ix = $attrToIx{$source};
           $itemAttrs{$id}->[$ix] = $xref if (! defined $itemAttrs{$id}->[$ix]);
           $attrFound[$ix] = 1;
         }
       }
     }
   } elsif ($attr eq 'Parent') {
+    $val =~ s/ /_/g;
     $idToParent{$id} = $val;
   } else {
     my $ix = $attrToIx{$attr};
     if (defined $ix) {
       if ($attr eq 'gene') {
         $geneToId{$val} = $id;
       }
       $itemAttrs{$id}->[$ix] = $val;
       $attrFound[$ix] = 1;
     } else {
       $ignoredAttrs{$attr}++;
     }
   }
 }
 close($ATTRS);
@@ -153,31 +155,31 @@
 # Make a list of indexes of empty columns, in descending order, for splicing out:
 for (my $ix = 0;  $ix <= $#attrOrder;  $ix++) {
   if (! $attrFound[$ix]) {
     warn "No values found for $attrOrder[$ix]; removing column from output.\n";
   }
 }
 
 # Read BED and append extra columns:
 open(my $BED, $bedFile) || die "Can't open BED file '$bedFile': $!\n";
 while (<$BED>) {
   chomp;
   my @bedCols = split;
   my $id = $bedCols[3];
   my $extraCols = $itemAttrs{$id};
   if (! defined $extraCols) {
-    $id = geneToId{$id};
+    $id = $geneToId{$id};
     $extraCols = $itemAttrs{$id};
   }
   die "No attributes for bed name '$bedCols[3]' (id '$id')" unless defined $extraCols;
   foreach my $ix (0..$#attrOrder) {
     if ($attrFound[$ix]) {
       # If the desired attribute isn't there for $id, but $id has ancestors, look for attr there
       my $parentId = $idToParent{$id};
       while (! defined $extraCols->[$ix] && defined $parentId) {
         my $parent = $itemAttrs{$parentId} || $itemAttrs{$geneToId{$parentId}};
         if (defined $parent) {
           if (defined $parent->[$ix]) {
             $extraCols->[$ix] = $parent->[$ix];
           } else {
             $parentId = $idToParent{$parentId};
           }