b6acb5dd3f5fdafad0ae7cfda25f60047823d842
chmalee
  Tue Jan 14 15:28:20 2020 -0800
add _size field to hg38 dgv for filtering, refs #24749

diff --git src/hg/makeDb/doc/hg38/variation.txt src/hg/makeDb/doc/hg38/variation.txt
index d1d5cfb..1e06434 100644
--- src/hg/makeDb/doc/hg38/variation.txt
+++ src/hg/makeDb/doc/hg38/variation.txt
@@ -1848,15 +1848,29 @@
     cp /dev/null tgpPhase3.txt
     for c in 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 X; do
       file=ALL.chr$c.shapeit2_integrated_snvindels_v2a_27022019.GRCh38.phased.vcf.gz
       echo -e "/gbdb/hg38/1000Genomes/$file\tchr$c" >> tgpPhase3.txt
     done
     # hgBbiDbLink doesn't support the seq column so use hgLoadSqlTab:
     hgLoadSqlTab hg38 tgpPhase3 ~/kent/src/hg/lib/bbiChroms.sql tgpPhase3.txt
     # Make a chromosomes line for trackDb (no alts, no Y!):
     hgsql hg38 -NBe 'select seqName from tgpPhase3' | xargs echo | sed -e 's/ /,/g'
 #chr1,chr10,chr11,chr12,chr13,chr14,chr15,chr16,chr17,chr18,chr19,chr2,chr20,chr21,chr22,chr3,chr4,chr5,chr6,chr7,chr8,chr9,chrX
     # I don't see counts of SNPs / indels documented anywhere, so extract:
     time (zcat ALL.chr*.shapeit2_integrated_snvindels_v2a_27022019.GRCh38.phased.vcf.gz | g -v ^# | cut -f 8 | sed -re 's/.*VT=//; s/;.*//' | sort | uniq -c | head -100)
 
 
 ##############################################################################
+# DGV with filters - DONE Jan 01 2020
+    zcat dgvSupporting.bed.gz | tawk '{print $0, $3-$2}' | sort -k1,1 -k2,2n > dgvSupportingWithSize.bed
+    zcat dgvMerged.bed.gz | tawk '{print $0, $3-$2}' | sort -k1,1 -k2,2n > dgvMergedWithSize.bed
+    bedToBigBed -tab -as=/hive/data/genomes/hg19/bed/dgv/160810/dgvPlusSize.as -type=bed9+14 dgvMergedWithSize.bed  /hive/data/genomes/hg38/chrom.sizes dgvMerged.bb
+    # pass1 - making usageList (41 chroms): 333 millis
+    # pass2 - checking and writing primary data (389106 records, 23 fields): 9803 millis
+    bedToBigBed -tab -as=/hive/data/genomes/hg19/bed/dgv/160810/dgvPlusSize.as -type=bed9+14 dgvSupportingWithSize.bed  /hive/data/genomes/hg38/chrom.sizes dgvSupporting.bb
+    # pass1 - making usageList (41 chroms): 2695 millis
+    # pass2 - checking and writing primary data (6584665 records, 23 fields): 27859 millis
+    mkdir -p /gbdb/hg38/dgv/
+    cd /gbdb/hg38/dgv
+    ln -s /hive/data/genomes/hg38/bed/dgv/160906/dgvMerged.bb
+    ln -s /hive/data/genomes/hg38/bed/dgv/160906/dgvSupporting.bb
+##############################################################################