54a8de5f8364d2890ef8d414ac51a507050cc3f6
markd
  Thu Apr 28 11:32:49 2022 -0700
added T2T generated repeat masker tracks

diff --git src/hg/makeDb/doc/chm13v2.0userData/build.txt src/hg/makeDb/doc/chm13v2.0userData/build.txt
index e35d235..d9d8a81 100644
--- src/hg/makeDb/doc/chm13v2.0userData/build.txt
+++ src/hg/makeDb/doc/chm13v2.0userData/build.txt
@@ -131,38 +131,37 @@
     chm13v2-hg19_chrMT.chain
     hg19_chrM-chm13v2.chain
     hg19_chrMT-chm13v2.chain
 
    cd trackData/hgLiftOver
 
 # rename to match UCSC conventions
     mv chm13v2-grch38.chain chm13v2-hg38.over.no-id.chain
     mv grch38-chm13v2.chain hg38-chm13v2.over.no-id.chain
     mv chm13v2-hg19_chrM.chain chm13v2-hg19_chrM.over.no-id.chain
     mv chm13v2-hg19_chrMT.chain chm13v2-hg19_chrMT.over.no-id.chain
     mv hg19_chrM-chm13v2.chain hg19_chrM-chm13v2.over.no-id.chain
     mv hg19_chrMT-chm13v2.chain  hg19_chrMT-chm13v2.over.no-id.chain
 
 # add chain ids and score
-    chainMergeSort chm13v2-hg19_chrM.over.no-id.chain | chainScore stdin ../ucscChromNames/t2t-chm13-v2.0.2bit /hive/data/genomes/hg19/hg19.2bit chm13v2-hg19_chrM.over.chain     
-    chainMergeSort chm13v2-hg19_chrMT.over.no-id.chain | chainScore stdin ../ucscChromNames/t2t-chm13-v2.0.2bit /hive/data/genomes/hg19/hg19.2bit chm13v2-hg19_chrMT.over.chain    
-    chainMergeSort chm13v2-hg38.over.no-id.chain | chainScore stdin ../ucscChromNames/t2t-chm13-v2.0.2bit /hive/data/genomes/hg38/hg38.2bit chm13v2-hg38.over.chain
+    chainMergeSort chm13v2-hg19_chrM.over.no-id.chain | chainScore stdin ../ucscChromNames/t2t-chm13-v2.0.2bit /hive/data/genomes/hg19/hg19.2bit chm13v2-hg19_chrM.over.chain &
+    chainMergeSort chm13v2-hg19_chrMT.over.no-id.chain | chainScore stdin ../ucscChromNames/t2t-chm13-v2.0.2bit /hive/data/genomes/hg19/hg19.2bit chm13v2-hg19_chrMT.over.chain &
+    chainMergeSort chm13v2-hg38.over.no-id.chain | chainScore stdin ../ucscChromNames/t2t-chm13-v2.0.2bit /hive/data/genomes/hg38/hg38.2bit chm13v2-hg38.over.chain &
 
-    chainMergeSort hg19_chrM-chm13v2.over.no-id.chain | chainScore stdin /hive/data/genomes/hg19/hg19.2bit ../ucscChromNames/t2t-chm13-v2.0.2bit hg19_chrM-chm13v2.over.chain     
-    chainMergeSort hg19_chrMT-chm13v2.over.no-id.chain  | chainScore stdin /hive/data/genomes/hg19/hg19.2bit ../ucscChromNames/t2t-chm13-v2.0.2bit  hg19_chrMT-chm13v2.over.chain    
-
-    chainMergeSort hg38-chm13v2.over.no-id.chain         > hg38-chm13v2.over.chain          
+    chainMergeSort hg19_chrM-chm13v2.over.no-id.chain | chainScore stdin /hive/data/genomes/hg19/hg19.2bit ../ucscChromNames/t2t-chm13-v2.0.2bit hg19_chrM-chm13v2.over.chain &
+    chainMergeSort hg19_chrMT-chm13v2.over.no-id.chain | chainScore stdin /hive/data/genomes/hg19/hg19.2bit ../ucscChromNames/t2t-chm13-v2.0.2bit  hg19_chrMT-chm13v2.over.chain &
+    chainMergeSort hg38-chm13v2.over.no-id.chain | chainScore stdin /hive/data/genomes/hg38/hg38.2bit ../ucscChromNames/t2t-chm13-v2.0.2bit hg38-chm13v2.over.chain &
 
 
 # create hg19 chains that combine chrM and chrMT for use in browser.
    chainFilter -q=chrMT chm13v2-hg19_chrMT.over.chain | chainMergeSort stdin chm13v2-hg19_chrM.over.chain > chm13v2-hg19.over.chain
    chainFilter -t=chrMT hg19_chrMT-chm13v2.over.chain | chainMergeSort stdin  hg19_chrM-chm13v2.over.chain > hg19-chm13v2.over.chain
 
    pigz *.chain
 
 # build tracks
     hgLoadChain -noBin -test none bigChain chm13v2-hg38.over.chain.gz 
     sed 's/\.000000//' chain.tab | awk 'BEGIN {OFS="\t"} {print $2, $4, $5, $11, 1000, $8, $3, $6, $7, $9, $10, $1}' > bigChainIn.tab
     bedToBigBed -type=bed6+6 -as=${HOME}/kent/src/hg/lib/bigChain.as -tab bigChainIn.tab ../chromAlias/ucsc.sizes.txt chm13v2-hg38.over.chain.bb
     tawk '{print $1, $2, $3, $5, $4}' link.tab | csort -k1,1 -k2,2n --parallel=64 > bigLinkIn.tab
     bedToBigBed -type=bed4+1 -as=${HOME}/kent/src/hg/lib/bigLink.as -tab bigLinkIn.tab ../chromAlias/ucsc.sizes.txt chm13v2-hg38.over.link.bb
 
@@ -315,35 +314,73 @@
 Mitchell R. Vollger, William Harvey
 
 https://eichlerlab.gs.washington.edu/help/mvollger/share/tracks/t2t-chm13-v2.0/SGDP_CN/hub.txt
 https://eichlerlab.gs.washington.edu/help/mvollger/share/tracks/t2t-chm13-v2.0/SGDP_CN/trackDb.t2t-chm13-v2.0.txt
 https://eichlerlab.gs.washington.edu/help/mvollger/share/tracks/t2t-chm13-v2.0/SGDP_CN/bigbed/description.html
 
 download the 348 bigBeds in trackDb from 
   https://eichlerlab.gs.washington.edu/help/mvollger/share/tracks/t2t-chm13-v2.0/SGDP_CN/bigbed/
 
 ================================================================
 * encode (2022-04-26 markd)
 ----------------------------------------------------------------
 Michael Sauria
 in hub https://bx.bio.jhu.edu/track-hubs/T2T/hub.txt
 pull from https://bx.bio.jhu.edu/track-hubs/T2T/chm13v2.0/encode/
+================================================================
+* t2tRepeatMasker (2022-04-25 markd)
+----------------------------------------------------------------
+Savannah Hoyt, Jessica Storer, Robert Hubley
+http://www.repeatmasker.org/~rhubley/forMark.tar.gz
+
+    chm13v2.0_RMSK_ALIGN.bb
+    chm13v2.0_RMSK.bb
+    combo.align.gz
+    combo.out.gz
+    notebook
+
+Original version was missing chrY in bigBed (find in out and align), got new one from:
+
+http://www.repeatmasker.org/~rhubley/forMark2.tar.gz
+
+rename these
+    mv chm13v2.0_RMSK_ALIGN.bb  chm13v2.0_rmsk.align.bb
+    mv chm13v2.0_RMSK.bb        chm13v2.0_rmsk.bb
+    mv combo.align.gz           chm13v2.0_rmsk.align.gz
+    mv combo.out.gz             chm13v2.0_rmsk.out.gz
+
+Track documentation was received from Savannah and updated from DFAM public
+hub documentation.  Download images from DFAM hub, base64 encode them and
+insert in html/t2tRepeatMasker.html with src="data:image/png;base64,...".
+This makes page independent of location installed.
+
+
+# notes from Robert on how tracks were created:
+    # Build trackHub tsv files from the combo* files:
+    /home/rhubley/projects/RepeatMasker/util/rmToTrackHub2.pl \
+      -out combo.out \
+      -align combo.align
+
+    # Sort tsv files
+    sort -k1,1 -k2,2n  combo.join.tsv >  combo.join.tsv.sorted
+    sort -k1,1 -k2,2n  combo.align.tsv >  combo.align.tsv.sorted
+
+    # Convert to bigRmskBed and bigRmskAlignBed files
+    /usr/local/ucscTools/bedToBigBed -tab -as=bigRmskAlignBed.as -type=bed3+14  combo.align.tsv.sorted chrom.sizes chm13v2.0_RMSK_ALIGN.bb
+    /usr/local/ucscTools/bedToBigBed -tab -as=bigRmskBed.as -type=bed9+5 combo.join.tsv.sorted chrom.sizes chm13v2.0_RMSK.bb
 
 
-	ENCODE	ENCODE pileups	Ready: See hub.txt	Michael Sauria			https://bx.bio.jhu.edu/track-hubs/T2T/hub.txt	H
-		ENCODE macs2 peaks		Michael Sauria				H
-		ENCODE macs2 LO peaks		Michael Sauria				H
 
 ================================================================
 pending:
 
 - ensembl:
   http://ftp.ebi.ac.uk/pub/databases/ensembl/hprc/y1_freeze/ contains all Y1 assemblies;
   http://ftp.ebi.ac.uk/pub/databases/ensembl/hprc/y1_freeze/GCA_009914755.4/ is CHM13v2
 
 - isoseq BAMs
   http://courtyard.gi.ucsc.edu/~mhauknes/T2T/t2t_Y/out-t2t-chrY-augPB/assemblyHub/CHM13/
   @PG   ID:minimap2   PN:minimap2   VN:2.22-r1105-dirty   CL:minimap2 -ax splice -f 1000 --sam-hit-only --secondary=no --eqx -K 100M -t 8 --cap-sw-mem=3g chm13v2.0.chrY.fasta HG002-NA24385-LCL-polished_isoforms_hq.fasta
   globus /HG002-IsoSeq
 
 - isoseq
     Fritz Sedlazeck  1 minute ago
@@ -355,30 +392,39 @@
      SAMPLE: GM26105 (SAMN20741797)
       EXPERIMENT: PCD_NISTRM.NA26105-1_1sA-40 (SRX14226558)
         RUN: m64139_220131_122551 (SRR18074969)
     STUDY: PRJNA200694
      SAMPLE: NIST HG002 NA24385 (SAMN03283347)
       EXPERIMENT: PCD_NISTRM.NA24385-1_1sA-40 (SRX14226557)
         RUN: m64139_220127_180020 (SRR18074968)
 
 * unique kmers
   Min unique k-mer (+)	Present in v1.0 and v2.0	Michael Sauria	/team-epigenetics/032522_chm13v2.0_kmers/mu/chm13v2.0.mul.bw	H	min_unique_kmer.html
   Min unique k-mer (-)	Present in v1.0 and v2.0	Michael Sauria	/team-epigenetics/032522_chm13v2.0_kmers/mu/chm13v2.0.mur.bw	H
 
 * RepeatMasker
   Savannah Hoyt/Jessica Storer	https://s3-us-west-2.amazonaws.com/human-pangenomics/T2T/CHM13/assemblies/annotation/chm13v2.0_RepeatMasker_4.1.2p1.out	H
 
+  Robert Hubley
+  I have generated trackhub files for the T2T TE track based on the
+  results of Jessica's and Savannah's latest RepeatMasker runs ( 4/14/22 with
+  v5 of the TE library ).  See the notebook file for details of the
+  construction.  Please let me know if you have any questions.
+
+  http://www.repeatmasker.org/~rhubley/forMark.tar.gz
+
+
 * ENCODE
   ENCODE pileups	Present in v1.0 and v2.0	Michael Sauria	/team-epigenetics/032522_chm13v2.0_encode/coverage/*.bw	H
   ENCODE macs2 peaks	Present in v1.0 and v2.0	Michael Sauria	/team-epigenetics/032522_chm13v2.0_encode/peaks/*.bb	H
   ENCoDE macs2 LO peaks	Present in v1.0	Michael Sauria		H
 
 * GRCh38
   Unresolved in GRCh	GRCh38	TBD	Sergey Koren	browser/tracks/chm13v2.0_unmapped_byHG38.bed	H	chm13_uncovered_byGRCh38.html
   GRCh37		Sergey Koren	browser/tracks/chm13v2.0_unmapped_byHG19.bed	H
 
 
 
 * GRCh38 variants
   TBD	Nancy Hansen	team-liftover/chain_variants/vcffiles/v1_nflo/chm13v2-grch38.sort.vcf.gz	L	grch_allele_differences.html
   GRCh37 variants	TBD	Nancy Hansen	team-liftover/chain_variants/vcffiles/v1_nflo/chm13v2-hg19.sort.vcf.gz	L