3442f799e4ddde42dfc0257e514ea1831a514f97 markd Sun Apr 24 21:03:28 2022 -0700 add score to T2T supplied liftOver chains diff --git src/hg/makeDb/doc/chm13v2.0userData/build.txt src/hg/makeDb/doc/chm13v2.0userData/build.txt index 57fe414..179dce6 100644 --- src/hg/makeDb/doc/chm13v2.0userData/build.txt +++ src/hg/makeDb/doc/chm13v2.0userData/build.txt @@ -130,64 +130,64 @@ chm13v2-hg19_chrM.chain chm13v2-hg19_chrMT.chain hg19_chrM-chm13v2.chain hg19_chrMT-chm13v2.chain cd trackData/hgLiftOver # rename to match UCSC conventions mv chm13v2-grch38.chain chm13v2-hg38.over.no-id.chain mv grch38-chm13v2.chain hg38-chm13v2.over.no-id.chain mv chm13v2-hg19_chrM.chain chm13v2-hg19_chrM.over.no-id.chain mv chm13v2-hg19_chrMT.chain chm13v2-hg19_chrMT.over.no-id.chain mv hg19_chrM-chm13v2.chain hg19_chrM-chm13v2.over.no-id.chain mv hg19_chrMT-chm13v2.chain hg19_chrMT-chm13v2.over.no-id.chain -# add chain ids - chainMergeSort chm13v2-hg19_chrM.over.no-id.chain > chm13v2-hg19_chrM.over.chain - chainMergeSort chm13v2-hg19_chrMT.over.no-id.chain > chm13v2-hg19_chrMT.over.chain - chainMergeSort chm13v2-hg19.over.no-id.chain > chm13v2-hg19.over.chain - chainMergeSort chm13v2-hg38.over.no-id.chain > chm13v2-hg38.over.chain - chainMergeSort hg19-chm13v2.over.no-id.chain > hg19-chm13v2.over.chain - chainMergeSort hg19_chrM-chm13v2.over.no-id.chain > hg19_chrM-chm13v2.over.chain - chainMergeSort hg19_chrMT-chm13v2.over.no-id.chain > hg19_chrMT-chm13v2.over.chain +# add chain ids and score + chainMergeSort chm13v2-hg19_chrM.over.no-id.chain | chainScore stdin ../ucscChromNames/t2t-chm13-v2.0.2bit /hive/data/genomes/hg19/hg19.2bit chm13v2-hg19_chrM.over.chain + chainMergeSort chm13v2-hg19_chrMT.over.no-id.chain | chainScore stdin ../ucscChromNames/t2t-chm13-v2.0.2bit /hive/data/genomes/hg19/hg19.2bit chm13v2-hg19_chrMT.over.chain + chainMergeSort chm13v2-hg38.over.no-id.chain | chainScore stdin ../ucscChromNames/t2t-chm13-v2.0.2bit /hive/data/genomes/hg38/hg38.2bit chm13v2-hg38.over.chain + + chainMergeSort hg19_chrM-chm13v2.over.no-id.chain | chainScore stdin /hive/data/genomes/hg19/hg19.2bit ../ucscChromNames/t2t-chm13-v2.0.2bit hg19_chrM-chm13v2.over.chain + chainMergeSort hg19_chrMT-chm13v2.over.no-id.chain | chainScore stdin /hive/data/genomes/hg19/hg19.2bit ../ucscChromNames/t2t-chm13-v2.0.2bit hg19_chrMT-chm13v2.over.chain + chainMergeSort hg38-chm13v2.over.no-id.chain > hg38-chm13v2.over.chain # create hg19 chains that combine chrM and chrMT for use in browser. chainFilter -q=chrMT chm13v2-hg19_chrMT.over.chain | chainMergeSort stdin chm13v2-hg19_chrM.over.chain > chm13v2-hg19.over.chain chainFilter -t=chrMT hg19_chrMT-chm13v2.over.chain | chainMergeSort stdin hg19_chrM-chm13v2.over.chain > hg19-chm13v2.over.chain pigz *.chain # build tracks hgLoadChain -noBin -test none bigChain chm13v2-hg38.over.chain.gz sed 's/\.000000//' chain.tab | awk 'BEGIN {OFS="\t"} {print $2, $4, $5, $11, 1000, $8, $3, $6, $7, $9, $10, $1}' > bigChainIn.tab bedToBigBed -type=bed6+6 -as=${HOME}/kent/src/hg/lib/bigChain.as -tab bigChainIn.tab ../chromAlias/ucsc.sizes.txt chm13v2-hg38.over.chain.bb tawk '{print $1, $2, $3, $5, $4}' link.tab | csort -k1,1 -k2,2n --parallel=64 > bigLinkIn.tab bedToBigBed -type=bed4+1 -as=${HOME}/kent/src/hg/lib/bigLink.as -tab bigLinkIn.tab ../chromAlias/ucsc.sizes.txt chm13v2-hg38.over.link.bb hgLoadChain -noBin -test none bigChain chm13v2-hg19.over.chain.gz sed 's/\.000000//' chain.tab | awk 'BEGIN {OFS="\t"} {print $2, $4, $5, $11, 1000, $8, $3, $6, $7, $9, $10, $1}' > bigChainIn.tab bedToBigBed -type=bed6+6 -as=${HOME}/kent/src/hg/lib/bigChain.as -tab bigChainIn.tab ../chromAlias/ucsc.sizes.txt chm13v2-hg19.over.chain.bb tawk '{print $1, $2, $3, $5, $4}' link.tab | csort -k1,1 -k2,2n --parallel=64 > bigLinkIn.tab bedToBigBed -type=bed4+1 -as=${HOME}/kent/src/hg/lib/bigLink.as -tab bigLinkIn.tab ../chromAlias/ucsc.sizes.txt chm13v2-hg19.over.link.bb rm *.tab # make available is liftOver directory as we - ln -f *.chain.gz ../../liftOver/ + ln -f *.over.chain.gz ../../liftOver/ # GRCh38 mask used in liftover. This is based on: # https://ftp-trace.ncbi.nlm.nih.gov/ReferenceSamples/giab/release/references/GRCh38/GCA_000001405.15_GRCh38_GRC_exclusions_T2Tv2.bed # plus UCSC hg38 centromeres track GRCh38: /team-liftover/grch38_masked_fasta/grch38-centromere_and_falsedup.bed (edited) rename to hg38.liftover-mask.bed ln -f hg38.liftover-mask.bed ../../liftOver/ ================================================================ * hgCactus (2022-03-28 markd) ---------------------------------------------------------------- # HAL from Marina Haukness