007a3bff22438077c95cf425e22877644704c23a
kate
  Tue Sep 15 15:25:10 2020 -0700
1. Size lollies based on #studies. 2. Display p-value in E notation (request from Juha). refs #26129

diff --git src/hg/makeDb/outside/covid/makeCovidHgiGwas.pl src/hg/makeDb/outside/covid/makeCovidHgiGwas.pl
index d968e3e..bbda600 100644
--- src/hg/makeDb/outside/covid/makeCovidHgiGwas.pl
+++ src/hg/makeDb/outside/covid/makeCovidHgiGwas.pl
@@ -1,57 +1,61 @@
-# Format files from COVID Host Genetics Initiative as BED 9+10 (covidHgiGwas.as) for lollipop display
-#
+# Format files from COVID Host Genetics Initiative as BED 9+11 (covidHgiGwas.as) for lollipop display
 
 use strict;
 use English;
 
 my $db = $ARGV[0];
-my $file = $ARGV[1];
+my $allStudies = $ARGV[1];
+my $file = $ARGV[2];
 open(my $fh, $file) or die ("can't open file $file\n");
 
 my $hdr = <$fh>;
 chomp($hdr);
 my @hdr = split('\t', $hdr);
 my $fields = $#hdr;
 
 # extended format in hg19 which was lifted from hg38 analysis (4 fields w/ hg38)
 if ($db eq "hg38") {
     $fields += 4;
 }
 my $first = $fields-11;
 my $last = $fields-4;
 
+my $scale = 3;
+my $sizeBins = 5;
 while (<$fh>) {
     chomp;
     my ($chromNum, $pos, $ref, $alt) = split;
     # drop 'chr23' entries (tills we identify)
     next if $chromNum eq "23";
     my @data = split;
     my ($studies, $effectSize, $effectSE, $pval, $pvalHet, 
         $samples, $alleleFreq, $snp) = @data[$first..$last];
     my $chr = "chr" . $chromNum;
     my $start = $pos - 1;
     my $end = $pos;
     my $score = 0;
 
 
     my $blue = "0,0,255"; my $red = "255,0,0";
     my $lightBlue = "160,160,255"; my $lightRed = "255,160,160";
-    my $logPval = -(log($pval)/log(10)); my $logPvalHet =  -(log($pvalHet)/log(10));
+    my $logPval = -(log($pval)/log(10));
     my $intPval = int($logPval);
     my $color;
     if ($effectSize > 0) {
         # positive (red)
         $color = ($intPval >= 5) ? $red : $lightRed;
     } else {
         # negative (blue)
         $color = ($intPval >= 5) ? $blue : $lightBlue;
     }
     my $name = ($snp eq "NA") ? $chromNum . ":" . $pos : $snp;
+    my $studyWeight = int(($studies * $sizeBins) / $allStudies) + $scale; 
     $OFS = "\t"; print $chr, $start, $end, $name, $score, '.', $start, $end, $color; $OFS = "";
     printf("\t%.3f\t%.3f", $effectSize, $effectSE);
-    printf("\t%.3f\t%.3f", $logPval, $logPvalHet);
-    printf("\t%s\t%s\t%.3f\t%s\t%s\t%.3f", $ref, $alt, $alleleFreq, $samples, $studies, abs($effectSize));
+    printf("\t%.2e\t%.3f\t%.2e", $pval, $logPval, $pvalHet);
+    printf("\t%s\t%s\t%.3f\t%s\t%s\t%d\t%.3f", $ref, $alt, $alleleFreq, $samples, $studies, 
+                $studyWeight, abs($effectSize));
     print "\n";
 }
 close ($fh);