007a3bff22438077c95cf425e22877644704c23a kate Tue Sep 15 15:25:10 2020 -0700 1. Size lollies based on #studies. 2. Display p-value in E notation (request from Juha). refs #26129 diff --git src/hg/makeDb/outside/covid/makeCovidHgiGwas.pl src/hg/makeDb/outside/covid/makeCovidHgiGwas.pl index d968e3e..bbda600 100644 --- src/hg/makeDb/outside/covid/makeCovidHgiGwas.pl +++ src/hg/makeDb/outside/covid/makeCovidHgiGwas.pl @@ -1,57 +1,61 @@ -# Format files from COVID Host Genetics Initiative as BED 9+10 (covidHgiGwas.as) for lollipop display -# +# Format files from COVID Host Genetics Initiative as BED 9+11 (covidHgiGwas.as) for lollipop display use strict; use English; my $db = $ARGV[0]; -my $file = $ARGV[1]; +my $allStudies = $ARGV[1]; +my $file = $ARGV[2]; open(my $fh, $file) or die ("can't open file $file\n"); my $hdr = <$fh>; chomp($hdr); my @hdr = split('\t', $hdr); my $fields = $#hdr; # extended format in hg19 which was lifted from hg38 analysis (4 fields w/ hg38) if ($db eq "hg38") { $fields += 4; } my $first = $fields-11; my $last = $fields-4; +my $scale = 3; +my $sizeBins = 5; while (<$fh>) { chomp; my ($chromNum, $pos, $ref, $alt) = split; # drop 'chr23' entries (tills we identify) next if $chromNum eq "23"; my @data = split; my ($studies, $effectSize, $effectSE, $pval, $pvalHet, $samples, $alleleFreq, $snp) = @data[$first..$last]; my $chr = "chr" . $chromNum; my $start = $pos - 1; my $end = $pos; my $score = 0; my $blue = "0,0,255"; my $red = "255,0,0"; my $lightBlue = "160,160,255"; my $lightRed = "255,160,160"; - my $logPval = -(log($pval)/log(10)); my $logPvalHet = -(log($pvalHet)/log(10)); + my $logPval = -(log($pval)/log(10)); my $intPval = int($logPval); my $color; if ($effectSize > 0) { # positive (red) $color = ($intPval >= 5) ? $red : $lightRed; } else { # negative (blue) $color = ($intPval >= 5) ? $blue : $lightBlue; } my $name = ($snp eq "NA") ? $chromNum . ":" . $pos : $snp; + my $studyWeight = int(($studies * $sizeBins) / $allStudies) + $scale; $OFS = "\t"; print $chr, $start, $end, $name, $score, '.', $start, $end, $color; $OFS = ""; printf("\t%.3f\t%.3f", $effectSize, $effectSE); - printf("\t%.3f\t%.3f", $logPval, $logPvalHet); - printf("\t%s\t%s\t%.3f\t%s\t%s\t%.3f", $ref, $alt, $alleleFreq, $samples, $studies, abs($effectSize)); + printf("\t%.2e\t%.3f\t%.2e", $pval, $logPval, $pvalHet); + printf("\t%s\t%s\t%.3f\t%s\t%s\t%d\t%.3f", $ref, $alt, $alleleFreq, $samples, $studies, + $studyWeight, abs($effectSize)); print "\n"; } close ($fh);