2f53c99dd16cc36dd899b2c237631b30d95d6bbb
kate
  Wed Sep 9 20:44:58 2020 -0700
Clean-up after track, add make doc. refs #26129

diff --git src/hg/makeDb/doc/covid/covidHgiGwas.txt src/hg/makeDb/doc/covid/covidHgiGwas.txt
new file mode 100644
index 0000000..259060a
--- /dev/null
+++ src/hg/makeDb/doc/covid/covidHgiGwas.txt
@@ -0,0 +1,127 @@
+# GWAS from the COVID-19 Host Genetics Initiative (HGI)
+# (2020-07-02 kate)
+
+# From: covid19hg.org/results
+
+# Contacts:  Rachel Liao,  Juha Karjalainen (Broad)
+juha.karjalainen@helsinki.fi
+
+# Create build dir
+cd /hive/data/outside/covidHostGenetics
+
+# GWAS meta-analyses file format
+
+1 #CHR    chromosome
+2 POS     chromosome position in build 37
+3 REF     non-effect allele
+4 ALT     effect allele (beta is for this allele)
+5 SNP     #CHR:POS:REF:ALT
+{STUDY}_AF_Allele2      allele frequency in {STUDY}
+{STUDY}_AF_fc   allele frequency in {STUDY} / allele frequency in gnomAD v3 (1000000 if frequency in gnomAD is 0). Calculated based on each study's ancestry in gnomAD
+{STUDY}_N
+6 + (X = #studies * 3) all_meta_N      number of studies that had the variant after AF and INFO filtering and as such were used for the meta
+7 + X all_inv_var_meta_beta   effect size on log(OR) scale
+8 + X all_inv_var_meta_sebeta standard error of effect size
+9 + X all_inv_var_meta_p      p-value
+10 + X all_inv_var_het_p       p-value from Cochran's Q heterogeneity test
+
+# additional columns:
+11 + X "all_meta_sample_N"
+12 + X "all_meta_AF"
+13 + X "rsid"
+
+# additional for hg19 liftover. Values in hg38.
+14 + X "anew_chr"
+15 + X "anew_pos"
+16 + X "REF.1"
+17 + X "ALT.1"
+
+# Studies
+
+1 Genetic determinants of COVID-19 complications in the Brazilian population      BRACOVID
+2 Genetic modifiers for COVID-19 related illness  BelCovid
+3 deCODE  DECODE
+4 FinnGen FinnGen
+5 GEN-COVID, reCOVID      GENCOVID
+6 UK 100,000 Genomes Project      genomicsengland100kgp_EUR
+7 Genes & Health  GNH
+8 Generation Scotland     GS
+9 COVID19-Host(a)ge       HOSTAGE
+10 Helix Exome+ COVID-19 Phenotypes        Helix
+11 UK Blood Donors Cohort  INTERVAL
+12 LifeLines CytoSNP       LifelinesCyto
+13 LifeLines Global Screening Array        LifelinesGsa
+14 Netherlands Twin Register       NTR
+15 Partners Healthcare Biobank     PHBB
+16 Qatar Genome Program    QGP
+17 UK Biobank      UKBB
+
+#####################
+# Consult with Ana on Aug 12
+
+1. Restrict to analyses with enough power (based on plots):
+        B2: hospitalized covid vs. population  (3199 cases, 8 studies)
+        C2: covid vs. population (6696 cases, 18 studies)
+
+2. Label options: rsID and/or ALT/REF (use ALT/REF if no label)
+
+3. Mouseover: pValue, effect size, #studies
+
+4. Filters: pValue (default=5), #studies,
+
+######################
+# Make with new .as (input from Juha), and hg38 tracks
+# (2020-09-04 kate)
+
+# Analysis B2: hospitalized covid vs. population
+# cases: 3199
+# studies: 8
+
+# Analysis C2: covid vs. population
+# cases: 6696
+# studies: 18
+
+
+# download hg19 files
+wget https://storage.googleapis.com/covid19-hg-public/20200619/results/build_37/COVID19_HGI_ANA_B2_V2_20200701.b37.txt.gz
+wget https://storage.googleapis.com/covid19-hg-public/20200619/results/build_37/COVID19_HGI_ANA_C2_V2_20200701.b37.txt.gz
+
+# download hg38 files
+wget https://storage.googleapis.com/covid19-hg-public/20200619/results/COVID19_HGI_ANA_B2_V2_20200701.txt.gz
+wget https://storage.googleapis.com/covid19-hg-public/20200619/results/COVID19_HGI_ANA_C2_V2_20200701.txt.gz
+gunzip *.z
+
+wc -l covidHgiGwas*.hg38.txt
+   15392647 covidHgiGwas.B2.hg38.txt
+   24600933 covidHgiGwas.C2.hg38.txt
+
+
+# rename
+ln -s COVID19_HGI_ANA_B2_V2_20200701.txt covidHgiGwas.B2.hg38.txt
+ln -s COVID19_HGI_ANA_C2_V2_20200701.txt covidHgiGwas.C2.hg38.txt
+ln -s COVID19_HGI_ANA_B2_V2_20200701.b37.txt covidHgiGwas.B2.hg19.txt
+ln -s COVID19_HGI_ANA_C2_V2_20200701.b37.txt covidHgiGwas.C2.hg19.txt
+
+wc -l covidHgiGwas*
+   15392647 covidHgiGwas.B2.hg38.txt
+   24600933 covidHgiGwas.C2.hg38.txt
+
+mkdir -p /gbdb/hg19/covidHgiGwas /gbdb/hg38/covidHgiGwas
+
+cat > makeHgi.csh << 'EOF'
+set bin = ~/kent/src/makeDb/outside/covid
+foreach d (B2.hg19 B2.hg38 C2.hg19 C2.hg38)
+    set db = $d:e
+    set a = $d:r
+    set sizes = /hive/data/genomes/$db/chrom.sizes
+    set f = covidHgiGwas$a.$db
+    echo $f.txt
+    perl $bin/makeCovidHgiGwas.pl $db $f.txt > $f.bed
+    bedSort $f.bed $f.sorted.bed
+    bedToBigBed -type=bed9+10 -as=$bin/covidHgiGwas.as -tab $f.sorted.bed $sizes $f.bb
+    ln -s `pwd`/$f.bb /gbdb/hg19/covidHgiGwas
+end
+'EOF'
+
+make makeHgi.csh >&! makeHgi.out &
+