54a8de5f8364d2890ef8d414ac51a507050cc3f6 markd Thu Apr 28 11:32:49 2022 -0700 added T2T generated repeat masker tracks diff --git src/hg/makeDb/doc/chm13v2.0userData/build.txt src/hg/makeDb/doc/chm13v2.0userData/build.txt index e35d235..d9d8a81 100644 --- src/hg/makeDb/doc/chm13v2.0userData/build.txt +++ src/hg/makeDb/doc/chm13v2.0userData/build.txt @@ -131,38 +131,37 @@ chm13v2-hg19_chrMT.chain hg19_chrM-chm13v2.chain hg19_chrMT-chm13v2.chain cd trackData/hgLiftOver # rename to match UCSC conventions mv chm13v2-grch38.chain chm13v2-hg38.over.no-id.chain mv grch38-chm13v2.chain hg38-chm13v2.over.no-id.chain mv chm13v2-hg19_chrM.chain chm13v2-hg19_chrM.over.no-id.chain mv chm13v2-hg19_chrMT.chain chm13v2-hg19_chrMT.over.no-id.chain mv hg19_chrM-chm13v2.chain hg19_chrM-chm13v2.over.no-id.chain mv hg19_chrMT-chm13v2.chain hg19_chrMT-chm13v2.over.no-id.chain # add chain ids and score - chainMergeSort chm13v2-hg19_chrM.over.no-id.chain | chainScore stdin ../ucscChromNames/t2t-chm13-v2.0.2bit /hive/data/genomes/hg19/hg19.2bit chm13v2-hg19_chrM.over.chain - chainMergeSort chm13v2-hg19_chrMT.over.no-id.chain | chainScore stdin ../ucscChromNames/t2t-chm13-v2.0.2bit /hive/data/genomes/hg19/hg19.2bit chm13v2-hg19_chrMT.over.chain - chainMergeSort chm13v2-hg38.over.no-id.chain | chainScore stdin ../ucscChromNames/t2t-chm13-v2.0.2bit /hive/data/genomes/hg38/hg38.2bit chm13v2-hg38.over.chain + chainMergeSort chm13v2-hg19_chrM.over.no-id.chain | chainScore stdin ../ucscChromNames/t2t-chm13-v2.0.2bit /hive/data/genomes/hg19/hg19.2bit chm13v2-hg19_chrM.over.chain & + chainMergeSort chm13v2-hg19_chrMT.over.no-id.chain | chainScore stdin ../ucscChromNames/t2t-chm13-v2.0.2bit /hive/data/genomes/hg19/hg19.2bit chm13v2-hg19_chrMT.over.chain & + chainMergeSort chm13v2-hg38.over.no-id.chain | chainScore stdin ../ucscChromNames/t2t-chm13-v2.0.2bit /hive/data/genomes/hg38/hg38.2bit chm13v2-hg38.over.chain & - chainMergeSort hg19_chrM-chm13v2.over.no-id.chain | chainScore stdin /hive/data/genomes/hg19/hg19.2bit ../ucscChromNames/t2t-chm13-v2.0.2bit hg19_chrM-chm13v2.over.chain - chainMergeSort hg19_chrMT-chm13v2.over.no-id.chain | chainScore stdin /hive/data/genomes/hg19/hg19.2bit ../ucscChromNames/t2t-chm13-v2.0.2bit hg19_chrMT-chm13v2.over.chain - - chainMergeSort hg38-chm13v2.over.no-id.chain > hg38-chm13v2.over.chain + chainMergeSort hg19_chrM-chm13v2.over.no-id.chain | chainScore stdin /hive/data/genomes/hg19/hg19.2bit ../ucscChromNames/t2t-chm13-v2.0.2bit hg19_chrM-chm13v2.over.chain & + chainMergeSort hg19_chrMT-chm13v2.over.no-id.chain | chainScore stdin /hive/data/genomes/hg19/hg19.2bit ../ucscChromNames/t2t-chm13-v2.0.2bit hg19_chrMT-chm13v2.over.chain & + chainMergeSort hg38-chm13v2.over.no-id.chain | chainScore stdin /hive/data/genomes/hg38/hg38.2bit ../ucscChromNames/t2t-chm13-v2.0.2bit hg38-chm13v2.over.chain & # create hg19 chains that combine chrM and chrMT for use in browser. chainFilter -q=chrMT chm13v2-hg19_chrMT.over.chain | chainMergeSort stdin chm13v2-hg19_chrM.over.chain > chm13v2-hg19.over.chain chainFilter -t=chrMT hg19_chrMT-chm13v2.over.chain | chainMergeSort stdin hg19_chrM-chm13v2.over.chain > hg19-chm13v2.over.chain pigz *.chain # build tracks hgLoadChain -noBin -test none bigChain chm13v2-hg38.over.chain.gz sed 's/\.000000//' chain.tab | awk 'BEGIN {OFS="\t"} {print $2, $4, $5, $11, 1000, $8, $3, $6, $7, $9, $10, $1}' > bigChainIn.tab bedToBigBed -type=bed6+6 -as=${HOME}/kent/src/hg/lib/bigChain.as -tab bigChainIn.tab ../chromAlias/ucsc.sizes.txt chm13v2-hg38.over.chain.bb tawk '{print $1, $2, $3, $5, $4}' link.tab | csort -k1,1 -k2,2n --parallel=64 > bigLinkIn.tab bedToBigBed -type=bed4+1 -as=${HOME}/kent/src/hg/lib/bigLink.as -tab bigLinkIn.tab ../chromAlias/ucsc.sizes.txt chm13v2-hg38.over.link.bb @@ -315,35 +314,73 @@ Mitchell R. Vollger, William Harvey https://eichlerlab.gs.washington.edu/help/mvollger/share/tracks/t2t-chm13-v2.0/SGDP_CN/hub.txt https://eichlerlab.gs.washington.edu/help/mvollger/share/tracks/t2t-chm13-v2.0/SGDP_CN/trackDb.t2t-chm13-v2.0.txt https://eichlerlab.gs.washington.edu/help/mvollger/share/tracks/t2t-chm13-v2.0/SGDP_CN/bigbed/description.html download the 348 bigBeds in trackDb from https://eichlerlab.gs.washington.edu/help/mvollger/share/tracks/t2t-chm13-v2.0/SGDP_CN/bigbed/ ================================================================ * encode (2022-04-26 markd) ---------------------------------------------------------------- Michael Sauria in hub https://bx.bio.jhu.edu/track-hubs/T2T/hub.txt pull from https://bx.bio.jhu.edu/track-hubs/T2T/chm13v2.0/encode/ +================================================================ +* t2tRepeatMasker (2022-04-25 markd) +---------------------------------------------------------------- +Savannah Hoyt, Jessica Storer, Robert Hubley +http://www.repeatmasker.org/~rhubley/forMark.tar.gz + + chm13v2.0_RMSK_ALIGN.bb + chm13v2.0_RMSK.bb + combo.align.gz + combo.out.gz + notebook + +Original version was missing chrY in bigBed (find in out and align), got new one from: + +http://www.repeatmasker.org/~rhubley/forMark2.tar.gz + +rename these + mv chm13v2.0_RMSK_ALIGN.bb chm13v2.0_rmsk.align.bb + mv chm13v2.0_RMSK.bb chm13v2.0_rmsk.bb + mv combo.align.gz chm13v2.0_rmsk.align.gz + mv combo.out.gz chm13v2.0_rmsk.out.gz + +Track documentation was received from Savannah and updated from DFAM public +hub documentation. Download images from DFAM hub, base64 encode them and +insert in html/t2tRepeatMasker.html with src="data:image/png;base64,...". +This makes page independent of location installed. + + +# notes from Robert on how tracks were created: + # Build trackHub tsv files from the combo* files: + /home/rhubley/projects/RepeatMasker/util/rmToTrackHub2.pl \ + -out combo.out \ + -align combo.align + + # Sort tsv files + sort -k1,1 -k2,2n combo.join.tsv > combo.join.tsv.sorted + sort -k1,1 -k2,2n combo.align.tsv > combo.align.tsv.sorted + + # Convert to bigRmskBed and bigRmskAlignBed files + /usr/local/ucscTools/bedToBigBed -tab -as=bigRmskAlignBed.as -type=bed3+14 combo.align.tsv.sorted chrom.sizes chm13v2.0_RMSK_ALIGN.bb + /usr/local/ucscTools/bedToBigBed -tab -as=bigRmskBed.as -type=bed9+5 combo.join.tsv.sorted chrom.sizes chm13v2.0_RMSK.bb - ENCODE ENCODE pileups Ready: See hub.txt Michael Sauria https://bx.bio.jhu.edu/track-hubs/T2T/hub.txt H - ENCODE macs2 peaks Michael Sauria H - ENCODE macs2 LO peaks Michael Sauria H ================================================================ pending: - ensembl: http://ftp.ebi.ac.uk/pub/databases/ensembl/hprc/y1_freeze/ contains all Y1 assemblies; http://ftp.ebi.ac.uk/pub/databases/ensembl/hprc/y1_freeze/GCA_009914755.4/ is CHM13v2 - isoseq BAMs http://courtyard.gi.ucsc.edu/~mhauknes/T2T/t2t_Y/out-t2t-chrY-augPB/assemblyHub/CHM13/ @PG ID:minimap2 PN:minimap2 VN:2.22-r1105-dirty CL:minimap2 -ax splice -f 1000 --sam-hit-only --secondary=no --eqx -K 100M -t 8 --cap-sw-mem=3g chm13v2.0.chrY.fasta HG002-NA24385-LCL-polished_isoforms_hq.fasta globus /HG002-IsoSeq - isoseq Fritz Sedlazeck 1 minute ago @@ -355,30 +392,39 @@ SAMPLE: GM26105 (SAMN20741797) EXPERIMENT: PCD_NISTRM.NA26105-1_1sA-40 (SRX14226558) RUN: m64139_220131_122551 (SRR18074969) STUDY: PRJNA200694 SAMPLE: NIST HG002 NA24385 (SAMN03283347) EXPERIMENT: PCD_NISTRM.NA24385-1_1sA-40 (SRX14226557) RUN: m64139_220127_180020 (SRR18074968) * unique kmers Min unique k-mer (+) Present in v1.0 and v2.0 Michael Sauria /team-epigenetics/032522_chm13v2.0_kmers/mu/chm13v2.0.mul.bw H min_unique_kmer.html Min unique k-mer (-) Present in v1.0 and v2.0 Michael Sauria /team-epigenetics/032522_chm13v2.0_kmers/mu/chm13v2.0.mur.bw H * RepeatMasker Savannah Hoyt/Jessica Storer https://s3-us-west-2.amazonaws.com/human-pangenomics/T2T/CHM13/assemblies/annotation/chm13v2.0_RepeatMasker_4.1.2p1.out H + Robert Hubley + I have generated trackhub files for the T2T TE track based on the + results of Jessica's and Savannah's latest RepeatMasker runs ( 4/14/22 with + v5 of the TE library ). See the notebook file for details of the + construction. Please let me know if you have any questions. + + http://www.repeatmasker.org/~rhubley/forMark.tar.gz + + * ENCODE ENCODE pileups Present in v1.0 and v2.0 Michael Sauria /team-epigenetics/032522_chm13v2.0_encode/coverage/*.bw H ENCODE macs2 peaks Present in v1.0 and v2.0 Michael Sauria /team-epigenetics/032522_chm13v2.0_encode/peaks/*.bb H ENCoDE macs2 LO peaks Present in v1.0 Michael Sauria H * GRCh38 Unresolved in GRCh GRCh38 TBD Sergey Koren browser/tracks/chm13v2.0_unmapped_byHG38.bed H chm13_uncovered_byGRCh38.html GRCh37 Sergey Koren browser/tracks/chm13v2.0_unmapped_byHG19.bed H * GRCh38 variants TBD Nancy Hansen team-liftover/chain_variants/vcffiles/v1_nflo/chm13v2-grch38.sort.vcf.gz L grch_allele_differences.html GRCh37 variants TBD Nancy Hansen team-liftover/chain_variants/vcffiles/v1_nflo/chm13v2-hg19.sort.vcf.gz L