568883e90c09ae0acc3a53010b2fc2f098cffa68
hiram
  Tue Aug 22 10:11:29 2023 -0700
some experiments with same species liftover to HPRC assemblies does not work refs #31561

diff --git src/hg/makeDb/doc/hg38/hg38.txt src/hg/makeDb/doc/hg38/hg38.txt
index 01c97b6..b563a9f 100644
--- src/hg/makeDb/doc/hg38/hg38.txt
+++ src/hg/makeDb/doc/hg38/hg38.txt
@@ -7030,15 +7030,187 @@
 #Tiny bit of python to identify the broken lines in the file where chromStart > chromEnd
 
 #for line in myFile:
 #    newLine = line.split("\t")
 #    if int(newLine[1]) > int(newLine[2]):
 #        print(line)
 #        n+=1
 #print(n)
 
 #remove those broken records from the file
 cat cosmic.bed | grep -vf badRecords.bed > cosmic.fixed.bed
 bedToBigBed -type=bed6+3 -as=/hive/data/outside/cosmic/hg38/v98/cosmic.as /hive/data/outside/cosmic/hg38/v98/cosmic.fixed.bed /hive/data/genomes/hg38/chrom.sizes /hive/data/outside/cosmic/hg38/v98/cosmic.bb -tab
 
 #make symlink
 ln -s /gbdb/hg38/cosmic/cosmic.bb /hive/data/outside/cosmic/hg38/v98/cosmic.bb
+
+##############################################################################
+# LIFTOVER TO GCA_018873775.2_hg01243.v3.0 (DONE - 2023-08-13 - Hiram)
+    ssh hgwdev
+    # going to need an ooc for hg38.p14.2bit
+    cd /hive/data/genomes/hg38
+    time blat hg38.p14.2bit /dev/null /dev/null -tileSize=11 \
+      -makeOoc=hg38.p14.ooc -repMatch=1024
+    # Wrote 36808 overused 11-mers to hg38.p14.ooc
+    # real    0m50.753s
+
+    # and ooc for this GenArk hub
+    cd /hive/data/genomes/asmHubs/genbankBuild/GCA/018/873/775/GCA_018873775.2_hg01243.v3.0
+  time blat GCA_018873775.2_hg01243.v3.0.2bit /dev/null /dev/null -tileSize=11 \
+      -makeOoc=GCA_018873775.2_hg01243.v3.0.ooc -repMatch=1024
+# Wrote 39087 overused 11-mers to GCA_018873775.2_hg01243.v3.0.ooc
+# real    0m49.426s
+
+  mkdir /hive/data/genomes/hg38/bed/blat.GCA_018873775.2_hg01243.v3.0.2023-08-13
+    cd /hive/data/genomes/hg38/bed/blat.GCA_018873775.2_hg01243.v3.0.2023-08-13
+
+    doSameSpeciesLiftOver.pl -verbose=2 -buildDir=`pwd` \
+        -debug -bigClusterHub=hgwdev -dbHost=hgwdev -workhorse=hgwdev \
+        -target2Bit=/hive/data/genomes/hg38/hg38.2bit \
+        -targetSizes=/hive/data/genomes/hg38/chrom.sizes \
+ -query2Bit=/hive/data/genomes/asmHubs/genbankBuild/GCA/018/873/775/GCA_018873775.2_hg01243.v3.0/GCA_018873775.2_hg01243.v3.0.2bit \
+ -querySizes=/hive/data/genomes/asmHubs/genbankBuild/GCA/018/873/775/GCA_018873775.2_hg01243.v3.0/GCA_018873775.2_hg01243.v3.0.chrom.sizes \
+        -ooc=/hive/data/genomes/hg38/hg38.p14.ooc \
+         hg38 GCA_018873775.2
+
+    # trying -ram=6g to get full use of hgwdev kluster nodes
+    time (~/kent/src/hg/utils/automation/doSameSpeciesLiftOver.pl \
+        -verbose=2 -buildDir=`pwd` \
+        -bigClusterHub=hgwdev -dbHost=hgwdev -workhorse=hgwdev \
+        -target2Bit=/hive/data/genomes/hg38/hg38.2bit \
+        -targetSizes=/hive/data/genomes/hg38/chrom.sizes \
+ -query2Bit=/hive/data/genomes/asmHubs/genbankBuild/GCA/018/873/775/GCA_018873775.2_hg01243.v3.0/GCA_018873775.2_hg01243.v3.0.2bit \
+ -querySizes=/hive/data/genomes/asmHubs/genbankBuild/GCA/018/873/775/GCA_018873775.2_hg01243.v3.0/GCA_018873775.2_hg01243.v3.0.chrom.sizes \
+        -ooc=/hive/data/genomes/hg38/hg38.p14.ooc \
+         hg38 GCA_018873775.2) > doLiftOverToGCA_018873775.2.log 2>&1
+    # real    12654m58.134s
+
+    # broken after the alignment was done, with the parasol endless loop
+    # error message in the log file:
+    #  select failure in rudp: Invalid argument
+    # killed that, cleaned the 4Tb log file, and gave up on this alignment
+    # since the lastz/chain/net is much better
+
+    # see if the liftOver menus function in the browser from hg38
+    #    to GCA_018873775.2
+
+##############################################################################
+# LIFTOVER GCA_018873775.2_hg01243.v3.0 to hg38 (DONE - 2023-08-13 - Hiram)
+    ssh hgwdev
+
+    mkdir /hive/data/genomes/asmHubs/genbankBuild/GCA/018/873/775/GCA_018873775.2_hg01243.v3.0/trackData/blat.hg38.2023-08-13
+    cd /hive/data/genomes/asmHubs/genbankBuild/GCA/018/873/775/GCA_018873775.2_hg01243.v3.0/trackData/blat.hg38.2023-08-13
+
+    doSameSpeciesLiftOver.pl -verbose=2 -buildDir=`pwd` \
+        -debug -bigClusterHub=hgwdev -dbHost=hgwdev -workhorse=hgwdev \
+ -target2Bit=/hive/data/genomes/asmHubs/genbankBuild/GCA/018/873/775/GCA_018873775.2_hg01243.v3.0/GCA_018873775.2_hg01243.v3.0.2bit \
+ -targetSizes=/hive/data/genomes/asmHubs/genbankBuild/GCA/018/873/775/GCA_018873775.2_hg01243.v3.0/GCA_018873775.2_hg01243.v3.0.chrom.sizes \
+        -query2Bit=/hive/data/genomes/hg38/hg38.2bit \
+        -querySizes=/hive/data/genomes/hg38/chrom.sizes \
+        -ooc=/hive/data/genomes/asmHubs/genbankBuild/GCA/018/873/775/GCA_018873775.2_hg01243.v3.0/GCA_018873775.2_hg01243.v3.0.ooc \
+         GCA_018873775.2 hg38
+
+    # trying -ram=6g to get full use of hgwdev kluster nodes
+    time (~/kent/src/hg/utils/automation/doSameSpeciesLiftOver.pl \
+        -verbose=2 -buildDir=`pwd` \
+        -bigClusterHub=hgwdev -dbHost=hgwdev -workhorse=hgwdev \
+ -target2Bit=/hive/data/genomes/asmHubs/genbankBuild/GCA/018/873/775/GCA_018873775.2_hg01243.v3.0/GCA_018873775.2_hg01243.v3.0.2bit \
+ -targetSizes=/hive/data/genomes/asmHubs/genbankBuild/GCA/018/873/775/GCA_018873775.2_hg01243.v3.0/GCA_018873775.2_hg01243.v3.0.chrom.sizes \
+        -query2Bit=/hive/data/genomes/hg38/hg38.2bit \
+        -querySizes=/hive/data/genomes/hg38/chrom.sizes \
+        -ooc=/hive/data/genomes/asmHubs/genbankBuild/GCA/018/873/775/GCA_018873775.2_hg01243.v3.0/GCA_018873775.2_hg01243.v3.0.ooc \
+         GCA_018873775.2 hg38) > doLiftOverToHg38.log 2>&1
+
+    # broken after the alignment was done, with the parasol endless loop
+    # error message in the log file:
+    #  select failure in rudp: Invalid argument
+    # killed that, cleaned the 4Tb log file, and gave up on this alignment
+    # since the lastz/chain/net is much better
+    # real    193m24.137s
+
+    # see if the liftOver menus function in the browser from GCA_018873775.2
+    #    to hg38
+
+##############################################################################
+# LIFTOVER TO GCA_018503275.1_NA19240.pri.mat.f1_v2 (TBD - 2023-08-14 - Hiram)
+    ssh hgwdev
+
+    # ooc for this GenArk hub
+    cd /hive/data/genomes/asmHubs/genbankBuild/GCA/018/503/275/GCA_018503275.1_NA19240.pri.mat.f1_v2
+  time blat GCA_018503275.1_NA19240.pri.mat.f1_v2.2bit /dev/null /dev/null \
+      -tileSize=11 -repMatch=1024 \
+      -makeOoc=GCA_018503275.1_NA19240.pri.mat.f1_v2.11.ooc
+  # Wrote 35866 overused 11-mers to GCA_018503275.1_NA19240.pri.mat.f1_v2.11.ooc
+    # real    0m32.298s
+
+  mkdir /hive/data/genomes/hg38/bed/blat.GCA_018503275.1_NA19240.pri.mat.f1_v2.2023-08-14
+  cd /hive/data/genomes/hg38/bed/blat.GCA_018503275.1_NA19240.pri.mat.f1_v2.2023-08-14
+
+    ~/kent/src/hg/utils/automation/doSameSpeciesLiftOver.pl -verbose=2 \
+        -buildDir=`pwd` -ram=4g -chainRam=16g \
+        -debug -bigClusterHub=hgwdev -dbHost=hgwdev -workhorse=hgwdev \
+        -target2Bit=/hive/data/genomes/hg38/hg38.2bit \
+        -targetSizes=/hive/data/genomes/hg38/chrom.sizes \
+ -query2Bit=/hive/data/genomes/asmHubs/genbankBuild/GCA/018/503/275/GCA_018503275.1_NA19240.pri.mat.f1_v2/GCA_018503275.1_NA19240.pri.mat.f1_v2.2bit \
+ -querySizes=/hive/data/genomes/asmHubs/genbankBuild/GCA/018/503/275/GCA_018503275.1_NA19240.pri.mat.f1_v2/GCA_018503275.1_NA19240.pri.mat.f1_v2.chrom.sizes \
+        -ooc=/hive/data/genomes/hg38/hg38.p14.ooc \
+         hg38 GCA_018503275.1
+
+    # trying -ram=4g to get full use of hgwdev kluster nodes
+    time (~/kent/src/hg/utils/automation/doSameSpeciesLiftOver.pl \
+        -verbose=2 -buildDir=`pwd` -ram=4g -chainRam=16g \
+        -bigClusterHub=hgwdev -dbHost=hgwdev -workhorse=hgwdev \
+        -target2Bit=/hive/data/genomes/hg38/hg38.2bit \
+        -targetSizes=/hive/data/genomes/hg38/chrom.sizes \
+ -query2Bit=/hive/data/genomes/asmHubs/genbankBuild/GCA/018/503/275/GCA_018503275.1_NA19240.pri.mat.f1_v2/GCA_018503275.1_NA19240.pri.mat.f1_v2.2bit \
+ -querySizes=/hive/data/genomes/asmHubs/genbankBuild/GCA/018/503/275/GCA_018503275.1_NA19240.pri.mat.f1_v2/GCA_018503275.1_NA19240.pri.mat.f1_v2.chrom.sizes \
+        -ooc=/hive/data/genomes/hg38/hg38.p14.ooc \
+         hg38 GCA_018503275.1) > doLiftOverToGCA_018503275.1.log 2>&1
+    # real    11370m18.026s
+
+    # broken after the alignment was done, with the parasol endless loop
+    # error message in the log file:
+    #  select failure in rudp: Invalid argument
+    # killed that, cleaned the 4Tb log file, and gave up on this alignment
+    # since the lastz/chain/net is much better
+    # -rw-rw-r-- 1 4363949695640 Aug 22 09:16 doLiftOverToGCA_018503275.1.log
+
+    # see if the liftOver menus function in the browser from hg38
+    #    to GCA_018503275.1
+
+##############################################################################
+# LIFTOVER GCA_018503275.1_NA19240.pri.mat.f1_v2 to hg38 (DONE - 2023-08-14 - Hiram)
+    ssh hgwdev
+
+    mkdir /hive/data/genomes/asmHubs/genbankBuild/GCA/018/503/275/GCA_018503275.1_NA19240.pri.mat.f1_v2/trackData/blat.hg38.2023-08-14
+    cd /hive/data/genomes/asmHubs/genbankBuild/GCA/018/503/275/GCA_018503275.1_NA19240.pri.mat.f1_v2/trackData/blat.hg38.2023-08-14
+
+    ~/kent/src/hg/utils/automation/doSameSpeciesLiftOver.pl -verbose=2 \
+        -buildDir=`pwd` -ram=4g -chainRam=16g \
+        -debug -bigClusterHub=hgwdev -dbHost=hgwdev -workhorse=hgwdev \
+ -target2Bit=/hive/data/genomes/asmHubs/genbankBuild/GCA/018/503/275/GCA_018503275.1_NA19240.pri.mat.f1_v2/GCA_018503275.1_NA19240.pri.mat.f1_v2.2bit \
+ -targetSizes=/hive/data/genomes/asmHubs/genbankBuild/GCA/018/503/275/GCA_018503275.1_NA19240.pri.mat.f1_v2/GCA_018503275.1_NA19240.pri.mat.f1_v2.chrom.sizes \
+        -query2Bit=/hive/data/genomes/hg38/hg38.2bit \
+        -querySizes=/hive/data/genomes/hg38/chrom.sizes \
+        -ooc=/hive/data/genomes/asmHubs/genbankBuild/GCA/018/503/275/GCA_018503275.1_NA19240.pri.mat.f1_v2/GCA_018503275.1_NA19240.pri.mat.f1_v2.11.ooc \
+         GCA_018503275.1 hg38
+
+    time (~/kent/src/hg/utils/automation/doSameSpeciesLiftOver.pl -verbose=2 \
+        -buildDir=`pwd` -ram=4g -chainRam=16g \
+        -bigClusterHub=hgwdev -dbHost=hgwdev -workhorse=hgwdev \
+ -target2Bit=/hive/data/genomes/asmHubs/genbankBuild/GCA/018/503/275/GCA_018503275.1_NA19240.pri.mat.f1_v2/GCA_018503275.1_NA19240.pri.mat.f1_v2.2bit \
+ -targetSizes=/hive/data/genomes/asmHubs/genbankBuild/GCA/018/503/275/GCA_018503275.1_NA19240.pri.mat.f1_v2/GCA_018503275.1_NA19240.pri.mat.f1_v2.chrom.sizes \
+        -query2Bit=/hive/data/genomes/hg38/hg38.2bit \
+        -querySizes=/hive/data/genomes/hg38/chrom.sizes \
+        -ooc=/hive/data/genomes/asmHubs/genbankBuild/GCA/018/503/275/GCA_018503275.1_NA19240.pri.mat.f1_v2/GCA_018503275.1_NA19240.pri.mat.f1_v2.11.ooc \
+         GCA_018503275.1 hg38) > liftOverToHg38.log 2>&1
+    # real    5082m17.500s
+
+    # this is interesting, this alignment completed and actually has good
+    # coverage:
+    cat fb.GCA_018503275.1.chain.Hg38Link.txt
+    # 2928654519 bases of 3032066086 (96.589%) in intersection
+
+    # see if the liftOver menus function in the browser from GCA_018503275.1
+    #    to hg38
+
+##############################################################################