b62b289568697d98f019a4bd82bb910b044cbeb7
kate
  Tue Sep 15 15:27:20 2020 -0700
Support for use of lolly size, and pvalue displayed as E notation. refs #26129

diff --git src/hg/makeDb/doc/covid/covidHgiGwas.txt src/hg/makeDb/doc/covid/covidHgiGwas.txt
index 373a78d..c9ee1c0 100644
--- src/hg/makeDb/doc/covid/covidHgiGwas.txt
+++ src/hg/makeDb/doc/covid/covidHgiGwas.txt
@@ -1,127 +1,134 @@
 # GWAS from the COVID-19 Host Genetics Initiative (HGI)
 # (2020-07-02 kate)
 
 # From: covid19hg.org/results
 
 # Contacts:  Rachel Liao,  Juha Karjalainen (Broad)
 juha.karjalainen@helsinki.fi
 
 # Create build dir
 cd /hive/data/outside/covid/covidHostGenetics
 
 # GWAS meta-analyses file format
 
 1 #CHR    chromosome
 2 POS     chromosome position in build 37
 3 REF     non-effect allele
 4 ALT     effect allele (beta is for this allele)
 5 SNP     #CHR:POS:REF:ALT
 {STUDY}_AF_Allele2      allele frequency in {STUDY}
 {STUDY}_AF_fc   allele frequency in {STUDY} / allele frequency in gnomAD v3 (1000000 if frequency in gnomAD is 0). Calculated based on each study's ancestry in gnomAD
 {STUDY}_N
 6 + (X = #studies * 3) all_meta_N      number of studies that had the variant after AF and INFO filtering and as such were used for the meta
 7 + X all_inv_var_meta_beta   effect size on log(OR) scale
 8 + X all_inv_var_meta_sebeta standard error of effect size
 9 + X all_inv_var_meta_p      p-value
 10 + X all_inv_var_het_p       p-value from Cochran's Q heterogeneity test
 
 # additional columns:
 11 + X "all_meta_sample_N"
 12 + X "all_meta_AF"
 13 + X "rsid"
 
 # additional for hg19 liftover. Values in hg38.
 14 + X "anew_chr"
 15 + X "anew_pos"
 16 + X "REF.1"
 17 + X "ALT.1"
 
 # Studies
 
 1 Genetic determinants of COVID-19 complications in the Brazilian population      BRACOVID
 2 Genetic modifiers for COVID-19 related illness  BelCovid
 3 deCODE  DECODE
 4 FinnGen FinnGen
 5 GEN-COVID, reCOVID      GENCOVID
 6 UK 100,000 Genomes Project      genomicsengland100kgp_EUR
 7 Genes & Health  GNH
 8 Generation Scotland     GS
 9 COVID19-Host(a)ge       HOSTAGE
 10 Helix Exome+ COVID-19 Phenotypes        Helix
 11 UK Blood Donors Cohort  INTERVAL
 12 LifeLines CytoSNP       LifelinesCyto
 13 LifeLines Global Screening Array        LifelinesGsa
 14 Netherlands Twin Register       NTR
 15 Partners Healthcare Biobank     PHBB
 16 Qatar Genome Program    QGP
 17 UK Biobank      UKBB
 
 #####################
 # Consult with Ana on Aug 12
 
 1. Restrict to analyses with enough power (based on plots):
         B2: hospitalized covid vs. population  (3199 cases, 8 studies)
         C2: covid vs. population (6696 cases, 18 studies)
 
 2. Label options: rsID and/or ALT/REF (use ALT/REF if no label)
 
 3. Mouseover: pValue, effect size, #studies
 
 4. Filters: pValue (default=5), #studies,
 
 ######################
 # Make with new .as (input from Juha), and hg38 tracks
 # (2020-09-04 kate)
 
 # Analysis B2: hospitalized covid vs. population
 # cases: 3199
 # studies: 8
 
 # Analysis C2: covid vs. population
 # cases: 6696
 # studies: 18
 
 
 # download hg19 files
 wget https://storage.googleapis.com/covid19-hg-public/20200619/results/build_37/COVID19_HGI_ANA_B2_V2_20200701.b37.txt.gz
 wget https://storage.googleapis.com/covid19-hg-public/20200619/results/build_37/COVID19_HGI_ANA_C2_V2_20200701.b37.txt.gz
 
 # download hg38 files
 wget https://storage.googleapis.com/covid19-hg-public/20200619/results/COVID19_HGI_ANA_B2_V2_20200701.txt.gz
 wget https://storage.googleapis.com/covid19-hg-public/20200619/results/COVID19_HGI_ANA_C2_V2_20200701.txt.gz
 gunzip *.z
 
 wc -l covidHgiGwas*.hg38.txt
    15392647 covidHgiGwas.B2.hg38.txt
    24600933 covidHgiGwas.C2.hg38.txt
 
 
 # rename
 ln -s COVID19_HGI_ANA_B2_V2_20200701.txt covidHgiGwas.B2.hg38.txt
 ln -s COVID19_HGI_ANA_C2_V2_20200701.txt covidHgiGwas.C2.hg38.txt
 ln -s COVID19_HGI_ANA_B2_V2_20200701.b37.txt covidHgiGwas.B2.hg19.txt
 ln -s COVID19_HGI_ANA_C2_V2_20200701.b37.txt covidHgiGwas.C2.hg19.txt
 
 wc -l covidHgiGwas*
    15392647 covidHgiGwas.B2.hg38.txt
    24600933 covidHgiGwas.C2.hg38.txt
 
 mkdir -p /gbdb/hg19/covidHgiGwas /gbdb/hg38/covidHgiGwas
 
 cat > makeHgi.csh << 'EOF'
-set bin = ~/kent/src/makeDb/outside/covid
+set bin = ~/kent/src/hg/makeDb/outside/covid
 foreach d (B2.hg19 B2.hg38 C2.hg19 C2.hg38)
     set db = $d:e
     set a = $d:r
     set sizes = /hive/data/genomes/$db/chrom.sizes
-    set f = covidHgiGwas$a.$db
-    echo $f.txt
-    perl $bin/makeCovidHgiGwas.pl $db $f.txt > $f.bed
+    set b = covidHgiGwas
+    set f = $b.$a.$db
+    set bb = $b$a.$db
+    echo $b.$a.$db
+    if ($a == "B2") then
+        set studies = 8
+    else
+        set studies = 18
+    endif
+    perl $bin/makeCovidHgiGwas.pl $db $studies $f.txt > $f.bed
     bedSort $f.bed $f.sorted.bed
-    bedToBigBed -type=bed9+10 -as=$bin/covidHgiGwas.as -tab $f.sorted.bed $sizes $f.bb
-    ln -s `pwd`/$f.bb /gbdb/hg19/covidHgiGwas
+    bedToBigBed -type=bed9+12 -as=$bin/covidHgiGwas.as -tab $f.sorted.bed $sizes $bb.bb
+    ln -s `pwd`/$bb.bb /gbdb/hg19/covidHgiGwas
 end
 'EOF'
 
 make makeHgi.csh >&! makeHgi.out &