5148aa7d55ecbad1137ae3cd4bb0c85b67e8fed3 markd Thu Apr 14 12:13:05 2022 -0700 added cat/liftoff rna and protein fastas diff --git src/hg/makeDb/doc/chm13v2.0userData/build.txt src/hg/makeDb/doc/chm13v2.0userData/build.txt index 349b2da..912718c 100644 --- src/hg/makeDb/doc/chm13v2.0userData/build.txt +++ src/hg/makeDb/doc/chm13v2.0userData/build.txt @@ -93,30 +93,40 @@ catLiftOffGenesV1 (2022-03-15 markd) ---------------------------------------------------------------- from Marina Haukness http://courtyard.gi.ucsc.edu/~mhauknes/T2T/t2t_Y/annotation_set/CHM13.v2.0.bb http://courtyard.gi.ucsc.edu/~mhauknes/T2T/t2t_Y/annotation_set/CHM13.v2.0.gff3 rename to catLiftOffGenesV1.bb catLiftOffGenesV1.gff3.gz # create GTF zcat catLiftOffGenesV1.gff3.gz | gffread /dev/stdin -T -o catLiftOffGenesV1.gtf pigz catLiftOffGenesV1.gtf + +# obtain sequence fastas + http://courtyard.gi.ucsc.edu/~mhauknes/T2T/t2t_Y/annotation_set/CHM13.v2.0.fasta + http://courtyard.gi.ucsc.edu/~mhauknes/T2T/t2t_Y/annotation_set/CHM13.v2.0.protein.fasta + + mv CHM13.v2.0.fasta catLiftOffGenesV1.rna.fa + mv CHM13.v2.0.protein.fasta catLiftOffGenesV1.protein.fa + pigz *.fa + + ================================================================ * hgLiftOver (2022-03-26 markd) ---------------------------------------------------------------- GRCh38 & GRCh37 Nae-Chyun Chen # 2022-04-09 it was noted that chrM was left out of above alignments, so obtain them and repeat globus: /team-liftover/v1_nflo/with_chrM/ chm13v2-grch38.chain grch38-chm13v2.chain chm13v2-hg19_chrM.chain chm13v2-hg19_chrMT.chain hg19_chrM-chm13v2.chain hg19_chrMT-chm13v2.chain @@ -173,33 +183,36 @@ http://courtyard.gi.ucsc.edu/~mhauknes/T2T/t2t_Y/t2tChm13.v2.0.hal # rename genomes to match browser, in renameFile.tab put GRCh38 hg38 CHM13 GCA_009914755.4 halRenameGenomes t2tChm13.v2.0.hal renameFile.tab # NOTE: disabled due to Snakes not using chromAlias ================================================================ * hgUnique (2022-03-30 markd) ---------------------------------------------------------------- regions not in hg38: +original version in: globus: /team-liftover/v1_nflo/T2T-CHM13v2.0_new_and_non_syntenic_regions.bed chm13v2-unique_to_hg19.bed chm13v2-unique_to_hg38.bed +however, with the addition of + # chainToPslBasic ../hgLiftOver/chm13v2-hg38.over.chain.gz stdout \ | pslToBed stdin stdout \ | bedtools sort -i - -g ../ucscChromNames/t2t-chm13-v2.0.sizes \ | bedtools merge \ | bedtools complement -i - -g ../ucscChromNames/t2t-chm13-v2.0.sizes \ | bedtools merge \ | sort -k1,1 -k2,2n \ > chm13v2-unique_to_hg38.bed chainToPslBasic ../hgLiftOver/chm13v2-hg19.over.chain.gz stdout \ | pslToBed stdin stdout \ | bedtools sort -i - -g ../ucscChromNames/t2t-chm13-v2.0.sizes \ | bedtools merge \