97199fbf1ff56c9ee01956deb736b97244ea3ac6 hiram Sun Mar 1 21:52:28 2020 -0800 calculating featureBits like measurement for gene tracks, removing duplicates for ncbiRefSeq, remove blanks from gene names for ncbiRefSeq, and fix fundamental bug reference to geneToId in ncbiRefSeqOtherAttrs.pl refs #23891 diff --git src/hg/utils/automation/ncbiRefSeqOtherAttrs.pl src/hg/utils/automation/ncbiRefSeqOtherAttrs.pl index fa82a6c..77898eb 100755 --- src/hg/utils/automation/ncbiRefSeqOtherAttrs.pl +++ src/hg/utils/automation/ncbiRefSeqOtherAttrs.pl @@ -71,43 +71,45 @@ my @attrFound = (); for (my $i = 0; $i <= $#attrOrder; $i++) { $attrToIx{$attrOrder[$i]} = $i; $attrFound[$i] = 0; } # Map item IDs to extra columns: my %itemAttrs = (); my %ignoredAttrs = (); my %geneToId = (); my %idToParent = (); open(my $ATTRS, $attrsFile) || die "Can't open attributes file '$attrsFile': $!\n"; while (<$ATTRS>) { chomp; my ($id, $attr, $val) = split("\t"); + $id =~ s/ /_/g; if ($attr eq 'Dbxref') { # Dbxref is one attribute, but split it up into multiple output columns for URL generation my @xrefs = split(',', $val); foreach my $xref (@xrefs) { foreach my $source qw(GeneID MIM HGNC MGI miRBase WormBase XenBase BGD RGD SGD ZFIN FLYBASE) { if ($xref =~ s/^$source://) { my $ix = $attrToIx{$source}; $itemAttrs{$id}->[$ix] = $xref if (! defined $itemAttrs{$id}->[$ix]); $attrFound[$ix] = 1; } } } } elsif ($attr eq 'Parent') { + $val =~ s/ /_/g; $idToParent{$id} = $val; } else { my $ix = $attrToIx{$attr}; if (defined $ix) { if ($attr eq 'gene') { $geneToId{$val} = $id; } $itemAttrs{$id}->[$ix] = $val; $attrFound[$ix] = 1; } else { $ignoredAttrs{$attr}++; } } } close($ATTRS); @@ -153,31 +155,31 @@ # Make a list of indexes of empty columns, in descending order, for splicing out: for (my $ix = 0; $ix <= $#attrOrder; $ix++) { if (! $attrFound[$ix]) { warn "No values found for $attrOrder[$ix]; removing column from output.\n"; } } # Read BED and append extra columns: open(my $BED, $bedFile) || die "Can't open BED file '$bedFile': $!\n"; while (<$BED>) { chomp; my @bedCols = split; my $id = $bedCols[3]; my $extraCols = $itemAttrs{$id}; if (! defined $extraCols) { - $id = geneToId{$id}; + $id = $geneToId{$id}; $extraCols = $itemAttrs{$id}; } die "No attributes for bed name '$bedCols[3]' (id '$id')" unless defined $extraCols; foreach my $ix (0..$#attrOrder) { if ($attrFound[$ix]) { # If the desired attribute isn't there for $id, but $id has ancestors, look for attr there my $parentId = $idToParent{$id}; while (! defined $extraCols->[$ix] && defined $parentId) { my $parent = $itemAttrs{$parentId} || $itemAttrs{$geneToId{$parentId}}; if (defined $parent) { if (defined $parent->[$ix]) { $extraCols->[$ix] = $parent->[$ix]; } else { $parentId = $idToParent{$parentId}; }