fffb37c69309b49959efe512c85b68424b27ffdf max Thu Apr 23 06:12:23 2020 -0700 adding makedoc, fixing field names, fixing track docs for problematic regions track, refs #24245 diff --git src/hg/makeDb/doc/hg19.txt src/hg/makeDb/doc/hg19.txt index 053d9bf..a4bff82 100644 --- src/hg/makeDb/doc/hg19.txt +++ src/hg/makeDb/doc/hg19.txt @@ -34274,15 +34274,61 @@ ../../makeDb/outside/gencode/gencodeGenerateTrackDbs hg19 34lift37 100 'March 2020' # Update human/hg19/wgEncodeGencodeSuper.html and update 'Release Notes' # to describe new release. [ONLY if it's going to be pushed] # edit human/hg19/trackDb.gencode.ra to add new .ra file include make DBS=hg19 # edit all.joiner to add ~/tmp/gencodeV34lift37.joiner # verify with: pushd /hive/data/genomes/hg19/bed/gencodeV34lift37Pre make -f ~/kent/src/hg/makeDb/outside/gencode/gencodeLoad.mk joinerCheck # commit all ############################################################################## +# NCBI regions that are problematic for sequencing, Mon Nov 18 05:06:17 PST 2019, Max + +mkdir /hive/data/genomes/hg19/bed/specialRegions/orig +cd /hive/data/genomes/hg19/bed/specialRegions/orig +# download and convert to Excel +wget ftp://ftp.ncbi.nlm.nih.gov/variation/get-rm/highly_homologous_genes/Table_S1_List1_NGS_Dead_Zone_exon_level.xlsx +wget ftp://ftp.ncbi.nlm.nih.gov/variation/get-rm/highly_homologous_genes/Table_S2_List2_NGS_Problem_List_High_Stringency_exon_level.xlsx +wget ftp://ftp.ncbi.nlm.nih.gov/variation/get-rm/highly_homologous_genes/Table_S3_List3_NGS_Problem_List_Low_Stringency_exon_level.xlsx +wget ftp://ftp.ncbi.nlm.nih.gov/variation/get-rm/highly_homologous_genes/Table_S4_List4_Sanger_Dead_Zone_exon_level.xlsx +in2csv Table_S1_List1_NGS_Dead_Zone_exon_level.xlsx | csvformat -T > Table_S1_List1_NGS_Dead_Zone_exon_level.tsv +in2csv Table_S2_List2_NGS_Problem_List_High_Stringency_exon_level.xlsx | csvformat -T > Table_S2_List2_NGS_Problem_List_High_Stringency_exon_level.tsv +in2csv Table_S3_List3_NGS_Problem_List_Low_Stringency_exon_level.xlsx | csvformat -T > Table_S3_List3_NGS_Problem_List_Low_Stringency_exon_level.tsv +in2csv Table_S4_List4_Sanger_Dead_Zone_exon_level.xlsx | csvformat -T > Table_S4_List4_Sanger_Dead_Zone_exon_level.tsv + +cd .. +# tabToBed is from https://github.com/maximilianh/maxtools +tabToBed orig/Table_S1_List1_NGS_Dead_Zone_exon_level.tsv bed/deadZone.bed as/deadZone.as -t bed3+ +bedToBigBed -tab bed/deadZone.bed /hive/data/genomes/hg19/chrom.sizes bb/deadZone.bb -tab -type=bed3+ -as=as/deadZone.as + +tabToBed orig/Table_S2_List2_NGS_Problem_List_High_Stringency_exon_level.tsv bed/ngsProblemHigh.bed as/ngsProblemHigh.as -t bed3+ +bedToBigBed -tab bed/ngsProblemHigh.bed /hive/data/genomes/hg19/chrom.sizes bb/ngsProblemHigh.bb -tab -type=bed3+ -as=as/ngsProblemHigh.as + +tabToBed orig/Table_S3_List3_NGS_Problem_List_Low_Stringency_exon_level.tsv bed/ngsProblemLow.bed as/ngsProblemLow.as -t bed3+ +bedToBigBed -tab bed/ngsProblemLow.bed /hive/data/genomes/hg19/chrom.sizes bb/ngsProblemLow.bb -tab -type=bed3+ -as=as/ngsProblemLow.as + +tabToBed orig/Table_S4_List4_Sanger_Dead_Zone_exon_level.tsv bed/sangerDeadZone.bed as/sangerDeadZone.as -t bed3+ +bedToBigBed -tab bed/sangerDeadZone.bed /hive/data/genomes/hg19/chrom.sizes bb/sangerDeadZone.bb -tab -type=bed3+ -as=as/sangerDeadZone.as + +# the GIAB BED filter files +cd orig +wget ftp://ftp-trace.ncbi.nih.gov/giab/ftp/data/NA12878/analysis/NIST_union_callsets_06172013/VQSRv2.18_filterABQD.bed.gz +wget ftp://ftp-trace.ncbi.nih.gov/giab/ftp/data/NA12878/analysis/NIST_union_callsets_06172013/VQSRv2.18_filterAlign.bed.gz +wget ftp://ftp-trace.ncbi.nih.gov/giab/ftp/data/NA12878/analysis/NIST_union_callsets_06172013/VQSRv2.18_filterConflicting.bed.gz +wget ftp://ftp-trace.ncbi.nih.gov/giab/ftp/data/NA12878/analysis/NIST_union_callsets_06172013/VQSRv2.18_filterCov.bed.gz +wget ftp://ftp-trace.ncbi.nih.gov/giab/ftp/data/NA12878/analysis/NIST_union_callsets_06172013/VQSRv2.18_filterHapNoVar.bed.gz +wget ftp://ftp-trace.ncbi.nih.gov/giab/ftp/data/NA12878/analysis/NIST_union_callsets_06172013/VQSRv2.18_filterMap.bed.gz +wget ftp://ftp-trace.ncbi.nih.gov/giab/ftp/data/NA12878/analysis/NIST_union_callsets_06172013/VQSRv2.18_filterSSE.bed.gz +wget ftp://ftp-trace.ncbi.nih.gov/giab/ftp/data/NA12878/analysis/NIST_union_callsets_06172013/VQSRv2.18_filterlt2Datasets.bed.gz +gunzip *.gz +cd .. +for i in orig/*.bed; do out=`echo $i | sed -e 's|orig/VQSRv2.18_||g'`; out=`basename $out .bed`; echo $out; chromToUcsc -a hg19.chromAlias.tsv -i $i -o bed/$out.bed; done +for i in bed/filter*.bed; do echo $i; bedSort $i $i; bedToBigBed $i /hive/data/genomes/hg19/chrom.sizes bb/`basename $i .bed`.bed -type=bed3; done +cd /gbdb/hg19/bbi/special; +for i in /hive/data/genomes/hg19/bed/specialRegions/bb/filter*.bb; do ln -s $i; done +bedSort orig/hg19-blacklist.v2.bed orig/hg19-blacklist.v2.bed +bedToBigBed orig/hg19-blacklist.v2.bed /hive/data/genomes/hg19/chrom.sizes bb/encBlacklist.bb -tab