e5d963606c378c0f08cb946c3a9393847dd6ce22 kate Wed Oct 7 15:23:41 2020 -0700 Add bigBed to GeneReviews to support mouseOver with diseases. refs #19841 diff --git src/hg/utils/otto/geneReviews/geneRevsAddDiseases.pl src/hg/utils/otto/geneReviews/geneRevsAddDiseases.pl new file mode 100755 index 0000000..5bad1bb --- /dev/null +++ src/hg/utils/otto/geneReviews/geneRevsAddDiseases.pl @@ -0,0 +1,51 @@ +#!/usr/bin/env perl + +# geneReviewsAddDiseases.pl diseaseFile bedFile +# +# To support mouseOver with disease names via adding disease list to BED file +# RM #19841 +# +# Parse gene disease file created by extendGeneReviews.pl +# <gene> <disease1>;<disease2>;... <diseaseN> +# +# Add last column containing disease list to each per-gene BED row in bed file +# and write to standard output + +use strict; +use English; +use feature 'say'; # append newline when printing + +my ($diseaseFile, $bedFile) = @ARGV; + +# Read in gene/disease file + +my %geneDiseases; +my %geneDiseaseCounts; +open(my $D, $diseaseFile) or die ("can't open file $diseaseFile\n"); +while (<$D>) { + chomp; + my ($gene, $count, $diseases) = split("\t"); + #print "DEBUG: ", $gene, "\t", $count, "\t",$diseases, "\n"; + $geneDiseases{$gene} = $diseases; + $geneDiseaseCounts{$gene} = $count; + #print "DEBUG: ", $gene, ": ", $geneDiseases{$gene}, "\n"; +} +close $D; + +# Read stdin bed file and append diseases to each row + +$OFS="\t"; +open(my $B, $bedFile) or die ("can't open file $bedFile\n"); +while (<$B>) { + chomp; + my ($chrom, $start, $end, $gene) = split("\t"); + #print "DEBUG: ", "\t", $gene, "\n"; + my $diseases = "n/a"; + my $count = 0; + if (exists($geneDiseases{$gene})) { + $diseases = $geneDiseases{$gene}; + $count = $geneDiseaseCounts{$gene}; + } + say $chrom, $start, $end, $gene, "0", ".", $start, $end, 0, $count, $diseases; +} +