2f53c99dd16cc36dd899b2c237631b30d95d6bbb kate Wed Sep 9 20:44:58 2020 -0700 Clean-up after track, add make doc. refs #26129 diff --git src/hg/makeDb/outside/covid/makeCovidHgiGwas.pl src/hg/makeDb/outside/covid/makeCovidHgiGwas.pl new file mode 100644 index 0000000..d968e3e --- /dev/null +++ src/hg/makeDb/outside/covid/makeCovidHgiGwas.pl @@ -0,0 +1,57 @@ +# Format files from COVID Host Genetics Initiative as BED 9+10 (covidHgiGwas.as) for lollipop display +# + +use strict; +use English; + +my $db = $ARGV[0]; +my $file = $ARGV[1]; +open(my $fh, $file) or die ("can't open file $file\n"); + +my $hdr = <$fh>; +chomp($hdr); +my @hdr = split('\t', $hdr); +my $fields = $#hdr; + +# extended format in hg19 which was lifted from hg38 analysis (4 fields w/ hg38) +if ($db eq "hg38") { + $fields += 4; +} +my $first = $fields-11; +my $last = $fields-4; + +while (<$fh>) { + chomp; + my ($chromNum, $pos, $ref, $alt) = split; + # drop 'chr23' entries (tills we identify) + next if $chromNum eq "23"; + my @data = split; + my ($studies, $effectSize, $effectSE, $pval, $pvalHet, + $samples, $alleleFreq, $snp) = @data[$first..$last]; + my $chr = "chr" . $chromNum; + my $start = $pos - 1; + my $end = $pos; + my $score = 0; + + + my $blue = "0,0,255"; my $red = "255,0,0"; + my $lightBlue = "160,160,255"; my $lightRed = "255,160,160"; + my $logPval = -(log($pval)/log(10)); my $logPvalHet = -(log($pvalHet)/log(10)); + my $intPval = int($logPval); + my $color; + if ($effectSize > 0) { + # positive (red) + $color = ($intPval >= 5) ? $red : $lightRed; + } else { + # negative (blue) + $color = ($intPval >= 5) ? $blue : $lightBlue; + } + my $name = ($snp eq "NA") ? $chromNum . ":" . $pos : $snp; + $OFS = "\t"; print $chr, $start, $end, $name, $score, '.', $start, $end, $color; $OFS = ""; + printf("\t%.3f\t%.3f", $effectSize, $effectSE); + printf("\t%.3f\t%.3f", $logPval, $logPvalHet); + printf("\t%s\t%s\t%.3f\t%s\t%s\t%.3f", $ref, $alt, $alleleFreq, $samples, $studies, abs($effectSize)); + print "\n"; +} +close ($fh); +