d053fa86c4f6a51cd351b468eaad73f6c894f25d markd Sat Apr 9 14:42:55 2022 -0700 update CHM13 liftover due to missing chrM diff --git src/hg/makeDb/doc/hg38/chm13LiftOver.txt src/hg/makeDb/doc/hg38/chm13LiftOver.txt index e6d9bab..758ad95 100644 --- src/hg/makeDb/doc/hg38/chm13LiftOver.txt +++ src/hg/makeDb/doc/hg38/chm13LiftOver.txt @@ -1,34 +1,35 @@ ############################################################################# # chm13 liftover alignments (2022-03-29 markd) # preliminary CHM13 <-> hg38 liftOver chains until NCBI produces consensus alignments # provide by Nae-Chyun Chen <naechyun.chen@gmail.com> mkdir -p /hive/data/genomes/hg38/bed/chm13LiftOver cd /hive/data/genomes/hg38/bed/chm13LiftOver -# Obtain GRCh38 from T2T Globus: team-liftover/v1_nflo/grch38-chm13v2.chain +# originally Obtain GRCh37 from T2T Globus: team-liftover/v1_nflo/grch38-chm13v2.chain -# rename to better match UCSC convetions and compress - mv grch38-chm13v2.chain hg38-chm13v2.over.chain - pigz hg38-chm13v2.over.chain +# 2022-04-09 updated due to discovery that chrM was missing +# obtain updated chains from + + cp /hive/data/genomes/asmHubs/genbankBuild/GCA/009/914/755/GCA_009914755.4_T2T-CHM13v2.0/trackData/hgLiftOver/hg38-chm13v2.over.chain.gz . # make NCBI query names as well, since chromAlias doesn't work yet for click-through # note doc is wrong in chromToPsl PSL query is 10 - chainToPslBasic hg38-chm13v2.over.chain.gz stdout | chromToUcsc -k 10 -a /hive/data/genomes/asmHubs/genbankBuild/GCA/009/914/755/GCA_009914755.4_CHM13_T2T_v2.0/GCA_009914755.4_CHM13_T2T_v2.0.chromAlias.txt | pslToChain stdin stdout | pigz -c > hg38-chm13v2.ncbi-qnames.over.chain.gz + chainToPslBasic hg38-chm13v2.over.chain.gz stdout | chromToUcsc -k 10 -a /hive/data/genomes/asmHubs/genbankBuild/GCA/009/914/755/GCA_009914755.4_T2T-CHM13v2.0/GCA_009914755.4_T2T-CHM13v2.0.chromAlias.txt | pslToChain stdin stdout | pigz -c > hg38-chm13v2.ncbi-qnames.over.chain.gz # build bigChain files: hgLoadChain -noBin -test none bigChain hg38-chm13v2.over.chain.gz sed 's/\.000000//' chain.tab | awk 'BEGIN {OFS="\t"} {print $2, $4, $5, $11, 1000, $8, $3, $6, $7, $9, $10, $1}' > bigChainIn.tab bedToBigBed -type=bed6+6 -as=${HOME}/kent/src/hg/lib/bigChain.as -tab bigChainIn.tab ../../chrom.sizes hg38-chm13v2.over.chain.bb tawk '{print $1, $2, $3, $5, $4}' link.tab | csort -k1,1 -k2,2n --parallel=64 > bigLinkIn.tab bedToBigBed -type=bed4+1 -as=${HOME}/kent/src/hg/lib/bigLink.as -tab bigLinkIn.tab ../../chrom.sizes hg38-chm13v2.over.link.bb # build bigChain with NCBI names files: hgLoadChain -noBin -test none bigChain hg38-chm13v2.ncbi-qnames.over.chain.gz sed 's/\.000000//' chain.tab | awk 'BEGIN {OFS="\t"} {print $2, $4, $5, $11, 1000, $8, $3, $6, $7, $9, $10, $1}' > bigChainIn.tab bedToBigBed -type=bed6+6 -as=${HOME}/kent/src/hg/lib/bigChain.as -tab bigChainIn.tab ../../chrom.sizes hg38-chm13v2.ncbi-qnames.over.chain.bb tawk '{print $1, $2, $3, $5, $4}' link.tab | csort -k1,1 -k2,2n --parallel=64 > bigLinkIn.tab bedToBigBed -type=bed4+1 -as=${HOME}/kent/src/hg/lib/bigLink.as -tab bigLinkIn.tab ../../chrom.sizes hg38-chm13v2.ncbi-qnames.over.link.bb