2f53c99dd16cc36dd899b2c237631b30d95d6bbb kate Wed Sep 9 20:44:58 2020 -0700 Clean-up after track, add make doc. refs #26129 diff --git src/hg/makeDb/doc/covid/covidHgiGwas.txt src/hg/makeDb/doc/covid/covidHgiGwas.txt new file mode 100644 index 0000000..259060a --- /dev/null +++ src/hg/makeDb/doc/covid/covidHgiGwas.txt @@ -0,0 +1,127 @@ +# GWAS from the COVID-19 Host Genetics Initiative (HGI) +# (2020-07-02 kate) + +# From: covid19hg.org/results + +# Contacts: Rachel Liao, Juha Karjalainen (Broad) +juha.karjalainen@helsinki.fi + +# Create build dir +cd /hive/data/outside/covidHostGenetics + +# GWAS meta-analyses file format + +1 #CHR chromosome +2 POS chromosome position in build 37 +3 REF non-effect allele +4 ALT effect allele (beta is for this allele) +5 SNP #CHR:POS:REF:ALT +{STUDY}_AF_Allele2 allele frequency in {STUDY} +{STUDY}_AF_fc allele frequency in {STUDY} / allele frequency in gnomAD v3 (1000000 if frequency in gnomAD is 0). Calculated based on each study's ancestry in gnomAD +{STUDY}_N +6 + (X = #studies * 3) all_meta_N number of studies that had the variant after AF and INFO filtering and as such were used for the meta +7 + X all_inv_var_meta_beta effect size on log(OR) scale +8 + X all_inv_var_meta_sebeta standard error of effect size +9 + X all_inv_var_meta_p p-value +10 + X all_inv_var_het_p p-value from Cochran's Q heterogeneity test + +# additional columns: +11 + X "all_meta_sample_N" +12 + X "all_meta_AF" +13 + X "rsid" + +# additional for hg19 liftover. Values in hg38. +14 + X "anew_chr" +15 + X "anew_pos" +16 + X "REF.1" +17 + X "ALT.1" + +# Studies + +1 Genetic determinants of COVID-19 complications in the Brazilian population BRACOVID +2 Genetic modifiers for COVID-19 related illness BelCovid +3 deCODE DECODE +4 FinnGen FinnGen +5 GEN-COVID, reCOVID GENCOVID +6 UK 100,000 Genomes Project genomicsengland100kgp_EUR +7 Genes & Health GNH +8 Generation Scotland GS +9 COVID19-Host(a)ge HOSTAGE +10 Helix Exome+ COVID-19 Phenotypes Helix +11 UK Blood Donors Cohort INTERVAL +12 LifeLines CytoSNP LifelinesCyto +13 LifeLines Global Screening Array LifelinesGsa +14 Netherlands Twin Register NTR +15 Partners Healthcare Biobank PHBB +16 Qatar Genome Program QGP +17 UK Biobank UKBB + +##################### +# Consult with Ana on Aug 12 + +1. Restrict to analyses with enough power (based on plots): + B2: hospitalized covid vs. population (3199 cases, 8 studies) + C2: covid vs. population (6696 cases, 18 studies) + +2. Label options: rsID and/or ALT/REF (use ALT/REF if no label) + +3. Mouseover: pValue, effect size, #studies + +4. Filters: pValue (default=5), #studies, + +###################### +# Make with new .as (input from Juha), and hg38 tracks +# (2020-09-04 kate) + +# Analysis B2: hospitalized covid vs. population +# cases: 3199 +# studies: 8 + +# Analysis C2: covid vs. population +# cases: 6696 +# studies: 18 + + +# download hg19 files +wget https://storage.googleapis.com/covid19-hg-public/20200619/results/build_37/COVID19_HGI_ANA_B2_V2_20200701.b37.txt.gz +wget https://storage.googleapis.com/covid19-hg-public/20200619/results/build_37/COVID19_HGI_ANA_C2_V2_20200701.b37.txt.gz + +# download hg38 files +wget https://storage.googleapis.com/covid19-hg-public/20200619/results/COVID19_HGI_ANA_B2_V2_20200701.txt.gz +wget https://storage.googleapis.com/covid19-hg-public/20200619/results/COVID19_HGI_ANA_C2_V2_20200701.txt.gz +gunzip *.z + +wc -l covidHgiGwas*.hg38.txt + 15392647 covidHgiGwas.B2.hg38.txt + 24600933 covidHgiGwas.C2.hg38.txt + + +# rename +ln -s COVID19_HGI_ANA_B2_V2_20200701.txt covidHgiGwas.B2.hg38.txt +ln -s COVID19_HGI_ANA_C2_V2_20200701.txt covidHgiGwas.C2.hg38.txt +ln -s COVID19_HGI_ANA_B2_V2_20200701.b37.txt covidHgiGwas.B2.hg19.txt +ln -s COVID19_HGI_ANA_C2_V2_20200701.b37.txt covidHgiGwas.C2.hg19.txt + +wc -l covidHgiGwas* + 15392647 covidHgiGwas.B2.hg38.txt + 24600933 covidHgiGwas.C2.hg38.txt + +mkdir -p /gbdb/hg19/covidHgiGwas /gbdb/hg38/covidHgiGwas + +cat > makeHgi.csh << 'EOF' +set bin = ~/kent/src/makeDb/outside/covid +foreach d (B2.hg19 B2.hg38 C2.hg19 C2.hg38) + set db = $d:e + set a = $d:r + set sizes = /hive/data/genomes/$db/chrom.sizes + set f = covidHgiGwas$a.$db + echo $f.txt + perl $bin/makeCovidHgiGwas.pl $db $f.txt > $f.bed + bedSort $f.bed $f.sorted.bed + bedToBigBed -type=bed9+10 -as=$bin/covidHgiGwas.as -tab $f.sorted.bed $sizes $f.bb + ln -s `pwd`/$f.bb /gbdb/hg19/covidHgiGwas +end +'EOF' + +make makeHgi.csh >&! makeHgi.out & +