13d78f3b0149c4d9216fb78f28fbd9f91c088237 chmalee Tue Jun 13 15:50:10 2023 -0700 Remove sample level high repro region beds and replace with a single overlapped item bed, refs #31122 diff --git src/hg/makeDb/doc/hg38/problematic.txt src/hg/makeDb/doc/hg38/problematic.txt index 27dd8cf..7b22557 100644 --- src/hg/makeDb/doc/hg38/problematic.txt +++ src/hg/makeDb/doc/hg38/problematic.txt @@ -55,15 +55,21 @@ head *.bed ==> CQ-56.bed <== chr1 12857 12904 chr1 12932 13028 chr1 13129 13367 chr1 13520 13633 ... # so turn those into bigBeds and link everything to /gbdb for f in $(ls *.bed); do base=$(basename -s .bed $f); bedSort ${f} ${f}.sorted; bedToBigBed ${f}.sorted../../../chrom.sizes ${base}.bb; done # for some reason tabix doesn't like to be run in a for loop from ls, needs find for f in $(find . -name "*.vcf.gz"); do tabix --verbosity 3 -p vcf "${f}"; done mkdir -p /gbdb/hg38/problematic/highRepro ln -s `pwd`/*.bb /gbdb/hg38/problematic/highRepro/ ln -s `pwd`/*.vcf.gz* /gbdb/hg38/problematic/highRepro/ + +# turns the beds into one single bed with the overlapped regions: +for f in $(ls *.bed); do echo $f; n=${f/.bed/}; tawk -v name=${n} '{print $0,name}' $f > ${n}.bed4; done +cat *.bed4 | sort -k1,1 -k2,2n > highRepro.allRegions +bedOverlapMerge highRepro.allRegions > highRepro.merged +bedToBigBed -type=bed3+ -tab -as=highRepro.as highRepro.merged ../../../chrom.sizes highRepro.bb