b6acb5dd3f5fdafad0ae7cfda25f60047823d842 chmalee Tue Jan 14 15:28:20 2020 -0800 add _size field to hg38 dgv for filtering, refs #24749 diff --git src/hg/makeDb/doc/hg38/variation.txt src/hg/makeDb/doc/hg38/variation.txt index d1d5cfb..1e06434 100644 --- src/hg/makeDb/doc/hg38/variation.txt +++ src/hg/makeDb/doc/hg38/variation.txt @@ -1848,15 +1848,29 @@ cp /dev/null tgpPhase3.txt for c in 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 X; do file=ALL.chr$c.shapeit2_integrated_snvindels_v2a_27022019.GRCh38.phased.vcf.gz echo -e "/gbdb/hg38/1000Genomes/$file\tchr$c" >> tgpPhase3.txt done # hgBbiDbLink doesn't support the seq column so use hgLoadSqlTab: hgLoadSqlTab hg38 tgpPhase3 ~/kent/src/hg/lib/bbiChroms.sql tgpPhase3.txt # Make a chromosomes line for trackDb (no alts, no Y!): hgsql hg38 -NBe 'select seqName from tgpPhase3' | xargs echo | sed -e 's/ /,/g' #chr1,chr10,chr11,chr12,chr13,chr14,chr15,chr16,chr17,chr18,chr19,chr2,chr20,chr21,chr22,chr3,chr4,chr5,chr6,chr7,chr8,chr9,chrX # I don't see counts of SNPs / indels documented anywhere, so extract: time (zcat ALL.chr*.shapeit2_integrated_snvindels_v2a_27022019.GRCh38.phased.vcf.gz | g -v ^# | cut -f 8 | sed -re 's/.*VT=//; s/;.*//' | sort | uniq -c | head -100) ############################################################################## +# DGV with filters - DONE Jan 01 2020 + zcat dgvSupporting.bed.gz | tawk '{print $0, $3-$2}' | sort -k1,1 -k2,2n > dgvSupportingWithSize.bed + zcat dgvMerged.bed.gz | tawk '{print $0, $3-$2}' | sort -k1,1 -k2,2n > dgvMergedWithSize.bed + bedToBigBed -tab -as=/hive/data/genomes/hg19/bed/dgv/160810/dgvPlusSize.as -type=bed9+14 dgvMergedWithSize.bed /hive/data/genomes/hg38/chrom.sizes dgvMerged.bb + # pass1 - making usageList (41 chroms): 333 millis + # pass2 - checking and writing primary data (389106 records, 23 fields): 9803 millis + bedToBigBed -tab -as=/hive/data/genomes/hg19/bed/dgv/160810/dgvPlusSize.as -type=bed9+14 dgvSupportingWithSize.bed /hive/data/genomes/hg38/chrom.sizes dgvSupporting.bb + # pass1 - making usageList (41 chroms): 2695 millis + # pass2 - checking and writing primary data (6584665 records, 23 fields): 27859 millis + mkdir -p /gbdb/hg38/dgv/ + cd /gbdb/hg38/dgv + ln -s /hive/data/genomes/hg38/bed/dgv/160906/dgvMerged.bb + ln -s /hive/data/genomes/hg38/bed/dgv/160906/dgvSupporting.bb +##############################################################################