890098d958d7df3a643b6a0b3589b1bfe4b7f6a0 markd Sun Apr 17 22:37:36 2022 -0700 added microsatellites diff --git src/hg/makeDb/doc/chm13v2.0userData/build.txt src/hg/makeDb/doc/chm13v2.0userData/build.txt index 912718c..fe44bcc 100644 --- src/hg/makeDb/doc/chm13v2.0userData/build.txt +++ src/hg/makeDb/doc/chm13v2.0userData/build.txt @@ -231,68 +231,85 @@ ---------------------------------------------------------------- from Nick Altemose via Slack: t2t_censat_CHM13v2.0_trackv2.0.10col.bed t2t_censat_CHM13v2.0_trackv2.0_description.html cd censat/ # drop track header tawk 'NR>1' t2t_censat_CHM13v2.0_trackv2.0.10col.bed | csort -k1,1 -k2,2n >tmp.bed bedToBigBed -type=bed9+1 -as=${HOME}/compbio/t2t/projs/chm13-v2.0/makeDir/schema/cenSat.as -tab tmp.bed ../chromAlias/ucsc.sizes.txt censat.bb ================================================================ * dbSNP155 (2022-03-29 markd) ---------------------------------------------------------------- -# dbSNP Variants Lifted+Recovered TBD Dylan Taylor +# dbSNP Variants Lifted+Recovered Dylan Taylor https://s3-us-west-2.amazonaws.com/human-pangenomics/T2T/CHM13/assemblies/annotation/liftover/chm13v2.0_dbSNPv155.vcf.gz dbSNP_lifted-recovered.html # need to use NCBI names until supported by chromAlias zcat chm13v2.0_dbSNPv155.vcf.gz | chromToUcsc --chromAlias=../chromAlias/GCA_009914755.4_T2T-CHM13v2.0.chromAlias.txt /dev/stdin | bgzip -c >chm13v2.0_dbSNPv155.ncbi-names.vcf.gz tabix -p vcf chm13v2.0_dbSNPv155.vcf.gz & tabix -p vcf chm13v2.0_dbSNPv155.ncbi-names.vcf.gz & ================================================================ * clinVar20220313 (2022-03-29 markd) ---------------------------------------------------------------- -ClinVar Lifted+Recovered TBD Dylan Taylor +ClinVar Lifted+Recovered Dylan Taylor https://s3-us-west-2.amazonaws.com/human-pangenomics/T2T/CHM13/assemblies/annotation/liftover/chm13v2.0_ClinVar20220313.vcf.gz zcat chm13v2.0_ClinVar20220313.vcf.gz | chromToUcsc --chromAlias=../chromAlias/GCA_009914755.4_T2T-CHM13v2.0.chromAlias.txt /dev/stdin | bgzip -c >chm13v2.0_ClinVar20220313.ncbi-names.vcf.gz tabix -p vcf chm13v2.0_ClinVar20220313.vcf.gz & tabix -p vcf chm13v2.0_ClinVar20220313.ncbi-names.vcf.gz & ================================================================ -* gwasSNPs2022-03-08 (2022-03-29 markd +* gwasSNPs2022-03-08 (2022-03-29 markd) ---------------------------------------------------------------- GWAS SNPs Lifted+Recovered TBD Dylan Taylor https://s3-us-west-2.amazonaws.com/human-pangenomics/T2T/CHM13/assemblies/annotation/liftover/chm13v2.0_GWASv1.0rsids_e100_r2022-03-08.vcf.gz gwas_catalog_lifted-recovered.html # need to use NCBI names until supported by chromAlias zcat chm13v2.0_GWASv1.0rsids_e100_r2022-03-08.vcf.gz | chromToUcsc --chromAlias=../chromAlias/GCA_009914755.4_T2T-CHM13v2.0.chromAlias.txt /dev/stdin | bgzip -c >chm13v2.0_GWASv1.0rsids_e100_r2022-03-08.ncbi-names.vcf.gz tabix -p vcf chm13v2.0_GWASv1.0rsids_e100_r2022-03-08.ncbi-names.vcf.gz& tabix -p vcf chm13v2.0_GWASv1.0rsids_e100_r2022-03-08.vcf.gz& + +================================================================ +* microsatellites (2022-04-17 markd) +---------------------------------------------------------------- +Arang Rhie + +doc https://s3-us-west-2.amazonaws.com/human-pangenomics/T2T/CHM13/assemblies/annotation/pattern/microsatellite.html + +GA https://s3-us-west-2.amazonaws.com/human-pangenomics/T2T/CHM13/assemblies/annotation/pattern/chm13v2.0.microsatellite.GA.128.wig +TC https://s3-us-west-2.amazonaws.com/human-pangenomics/T2T/CHM13/assemblies/annotation/pattern/chm13v2.0.microsatellite.TC.128.wig +GC https://s3-us-west-2.amazonaws.com/human-pangenomics/T2T/CHM13/assemblies/annotation/pattern/chm13v2.0.microsatellite.GC.128.wig +AT https://s3-us-west-2.amazonaws.com/human-pangenomics/T2T/CHM13/assemblies/annotation/pattern/chm13v2.0.microsatellite.AT.128.wig + +# convert to bigWi +for f in *.wig ; do wigToBigWig -clip $f ../ucscChromNames/t2t-chm13-v2.0.sizes $(basename $f .wig).bw ; done +pigz *.wig + ================================================================ pending: - ensembl: http://ftp.ebi.ac.uk/pub/databases/ensembl/hprc/y1_freeze/ contains all Y1 assemblies; http://ftp.ebi.ac.uk/pub/databases/ensembl/hprc/y1_freeze/GCA_009914755.4/ is CHM13v2 - isoseq BAMs http://courtyard.gi.ucsc.edu/~mhauknes/T2T/t2t_Y/out-t2t-chrY-augPB/assemblyHub/CHM13/ @PG ID:minimap2 PN:minimap2 VN:2.22-r1105-dirty CL:minimap2 -ax splice -f 1000 --sam-hit-only --secondary=no --eqx -K 100M -t 8 --cap-sw-mem=3g chm13v2.0.chrY.fasta HG002-NA24385-LCL-polished_isoforms_hq.fasta globus /HG002-IsoSeq - isoseq Fritz Sedlazeck 1 minute ago STUDY: PRJNA754107