41f199e88fc710d1f86b96b606d15a5b975dda71
markd
  Tue Apr 19 22:05:14 2022 -0700
fixed chain ids in hgLiftOver

diff --git src/hg/makeDb/doc/chm13v2.0userData/build.txt src/hg/makeDb/doc/chm13v2.0userData/build.txt
index fe44bcc..57fe414 100644
--- src/hg/makeDb/doc/chm13v2.0userData/build.txt
+++ src/hg/makeDb/doc/chm13v2.0userData/build.txt
@@ -109,73 +109,83 @@
 # obtain sequence fastas
    http://courtyard.gi.ucsc.edu/~mhauknes/T2T/t2t_Y/annotation_set/CHM13.v2.0.fasta
    http://courtyard.gi.ucsc.edu/~mhauknes/T2T/t2t_Y/annotation_set/CHM13.v2.0.protein.fasta
 
    mv CHM13.v2.0.fasta catLiftOffGenesV1.rna.fa
    mv CHM13.v2.0.protein.fasta  catLiftOffGenesV1.protein.fa
    pigz *.fa
    
 
 ================================================================
 * hgLiftOver (2022-03-26 markd)
 ----------------------------------------------------------------
 GRCh38 & GRCh37 Nae-Chyun Chen <naechyun.chen@gmail.com>
 
 # 2022-04-09 it was noted that chrM was left out of above alignments, so obtain them and repeat
+# 2022-04-19 it was discover that chains render oddly due to the lack of chain ids.  Use chainMergeSort
+# to fix this
 
 globus: /team-liftover/v1_nflo/with_chrM/
     chm13v2-grch38.chain
     grch38-chm13v2.chain
     chm13v2-hg19_chrM.chain
     chm13v2-hg19_chrMT.chain
     hg19_chrM-chm13v2.chain
     hg19_chrMT-chm13v2.chain
 
    cd trackData/hgLiftOver
 
 # rename to match UCSC conventions
-    mv chm13v2-grch38.chain chm13v2-hg38.over.chain      
-    mv grch38-chm13v2.chain hg38-chm13v2.over.chain
-    mv chm13v2-hg19_chrM.chain chm13v2-hg19_chrM.over.chain
-    mv chm13v2-hg19_chrMT.chain chm13v2-hg19_chrMT.over.chain
-    mv hg19_chrM-chm13v2.chain hg19_chrM-chm13v2.over.chain
-    mv hg19_chrMT-chm13v2.chain  hg19_chrMT-chm13v2.over.chain
+    mv chm13v2-grch38.chain chm13v2-hg38.over.no-id.chain
+    mv grch38-chm13v2.chain hg38-chm13v2.over.no-id.chain
+    mv chm13v2-hg19_chrM.chain chm13v2-hg19_chrM.over.no-id.chain
+    mv chm13v2-hg19_chrMT.chain chm13v2-hg19_chrMT.over.no-id.chain
+    mv hg19_chrM-chm13v2.chain hg19_chrM-chm13v2.over.no-id.chain
+    mv hg19_chrMT-chm13v2.chain  hg19_chrMT-chm13v2.over.no-id.chain
+
+# add chain ids
+    chainMergeSort chm13v2-hg19_chrM.over.no-id.chain   > chm13v2-hg19_chrM.over.chain     
+    chainMergeSort chm13v2-hg19_chrMT.over.no-id.chain  > chm13v2-hg19_chrMT.over.chain    
+    chainMergeSort chm13v2-hg19.over.no-id.chain        > chm13v2-hg19.over.chain          
+    chainMergeSort chm13v2-hg38.over.no-id.chain        > chm13v2-hg38.over.chain          
+    chainMergeSort hg19-chm13v2.over.no-id.chain        > hg19-chm13v2.over.chain          
+    chainMergeSort hg19_chrM-chm13v2.over.no-id.chain   > hg19_chrM-chm13v2.over.chain     
+    chainMergeSort hg19_chrMT-chm13v2.over.no-id.chain  > hg19_chrMT-chm13v2.over.chain    
+    chainMergeSort hg38-chm13v2.over.no-id.chain        > hg38-chm13v2.over.chain          
+
 
 # create hg19 chains that combine chrM and chrMT for use in browser.
-   cp chm13v2-hg19_chrM.over.chain chm13v2-hg19.over.chain
-   chainFilter -q=chrMT chm13v2-hg19_chrMT.over.chain >>chm13v2-hg19.over.chain
-   cp hg19_chrM-chm13v2.over.chain hg19-chm13v2.over.chain
-   chainFilter -t=chrMT  hg19_chrMT-chm13v2.over.chain >>hg19-chm13v2.over.chain
+   chainFilter -q=chrMT chm13v2-hg19_chrMT.over.chain | chainMergeSort stdin chm13v2-hg19_chrM.over.chain > chm13v2-hg19.over.chain
+   chainFilter -t=chrMT hg19_chrMT-chm13v2.over.chain | chainMergeSort stdin  hg19_chrM-chm13v2.over.chain > hg19-chm13v2.over.chain
 
    pigz *.chain
 
 # build tracks
     hgLoadChain -noBin -test none bigChain chm13v2-hg38.over.chain.gz 
     sed 's/\.000000//' chain.tab | awk 'BEGIN {OFS="\t"} {print $2, $4, $5, $11, 1000, $8, $3, $6, $7, $9, $10, $1}' > bigChainIn.tab
     bedToBigBed -type=bed6+6 -as=${HOME}/kent/src/hg/lib/bigChain.as -tab bigChainIn.tab ../chromAlias/ucsc.sizes.txt chm13v2-hg38.over.chain.bb
     tawk '{print $1, $2, $3, $5, $4}' link.tab | csort -k1,1 -k2,2n --parallel=64 > bigLinkIn.tab
     bedToBigBed -type=bed4+1 -as=${HOME}/kent/src/hg/lib/bigLink.as -tab bigLinkIn.tab ../chromAlias/ucsc.sizes.txt chm13v2-hg38.over.link.bb
 
     hgLoadChain -noBin -test none bigChain chm13v2-hg19.over.chain.gz 
     sed 's/\.000000//' chain.tab | awk 'BEGIN {OFS="\t"} {print $2, $4, $5, $11, 1000, $8, $3, $6, $7, $9, $10, $1}' > bigChainIn.tab
     bedToBigBed -type=bed6+6 -as=${HOME}/kent/src/hg/lib/bigChain.as -tab bigChainIn.tab ../chromAlias/ucsc.sizes.txt chm13v2-hg19.over.chain.bb
     tawk '{print $1, $2, $3, $5, $4}' link.tab | csort -k1,1 -k2,2n --parallel=64 > bigLinkIn.tab
     bedToBigBed -type=bed4+1 -as=${HOME}/kent/src/hg/lib/bigLink.as -tab bigLinkIn.tab ../chromAlias/ucsc.sizes.txt chm13v2-hg19.over.link.bb
 
     rm *.tab
 
-   pigz *.chain
    # make available is liftOver directory as we
    ln -f *.chain.gz ../../liftOver/
 
 # GRCh38 mask used in liftover. This is based on:
 #  https://ftp-trace.ncbi.nlm.nih.gov/ReferenceSamples/giab/release/references/GRCh38/GCA_000001405.15_GRCh38_GRC_exclusions_T2Tv2.bed
 #  plus UCSC hg38 centromeres track
 
    GRCh38: /team-liftover/grch38_masked_fasta/grch38-centromere_and_falsedup.bed (edited)
    rename to hg38.liftover-mask.bed
    ln -f hg38.liftover-mask.bed ../../liftOver/
 
 
 ================================================================
 * hgCactus (2022-03-28 markd)
 ----------------------------------------------------------------