c139f35504866c1dcf0eec5e3e07a83da8866fe5
mspeir
  Tue Jan 16 19:49:03 2024 -0800
adding makedocs for various lastz runs for liftOver files in MLQ request, refs #32804

diff --git src/hg/makeDb/doc/hg19.txt src/hg/makeDb/doc/hg19.txt
index 79b8557..ce3c7a1 100644
--- src/hg/makeDb/doc/hg19.txt
+++ src/hg/makeDb/doc/hg19.txt
@@ -35149,15 +35149,177 @@
 vi trackDb.ra
 #############################################################################
 # Polygenic risk scores, Zia Truong (committed by Max)
 # Mon Oct  2 04:44:39 PDT 2023
 cd /hive/data/genomes/hg19/bed/prsEmerge/
 # data is now available from https://github.com/broadinstitute/eMERGE-implemented-PRS-models-Lennon-et-al
 # originally received by email from Niall Lennon
 sh ~/kent/src/hg/makeDb/scripts/prsEmerge/prs2bigBed.sh 
 #############################################################################
 # JASPAR 2024 bigBed update 11/13/24
 
 cd /hive/data/genomes/hg19/bed/jaspar
 wget https://frigg.uio.no/JASPAR/JASPAR_genome_browser_tracks/current/hg19/JASPAR2024_hg19.bb
 mv JASPAR2024_hg19.bb JASPAR2024.bb
 ln -s JASPAR2024.bb /gbdb/hg19/jaspar/JASPAR2024.bb
+
+##############################################################################
+# LASTZ Human Hg19 vs. water buffalo GCF_019923935.1
+#    (DONE - 2024-01-04 - mspeir)
+
+    mkdir /hive/data/genomes/hg19/bed/lastzGCF_019923935.1.2024-01-04
+    cd /hive/data/genomes/hg19/bed/lastzGCF_019923935.1.2024-01-04
+
+    printf '# water buffalo GCF_019923935.1 vs. Human Hg19
+BLASTZ=/cluster/bin/penn/lastz-distrib-1.04.03/bin/lastz
+
+# TARGET: Human  hg19
+SEQ1_DIR=/hive/data/genomes/hg19/hg19.2bit
+SEQ1_LEN=/hive/data/genomes/hg19/chrom.sizes
+SEQ1_CHUNK=20000000
+SEQ1_LAP=10000
+SEQ1_LIMIT=40
+
+# QUERY: water buffalo 2021-09-10 GCF_019923935.1_NDDB_SH_1
+SEQ2_DIR=/hive/data/genomes/asmHubs/GCF/019/923/935/GCF_019923935.1/GCF_019923935.1.2bit
+SEQ2_LEN=/hive/data/genomes/asmHubs/GCF/019/923/935/GCF_019923935.1/GCF_019923935.1.chrom.sizes.txt
+SEQ2_CHUNK=20000000
+SEQ2_LAP=0
+SEQ2_LIMIT=100
+
+BASE=/hive/data/genomes/hg19/bed/lastzGCF_019923935.1.2024-01-04
+TMPDIR=/dev/shm
+
+' > DEF
+
+    time (~/kent/src/hg/utils/automation/doBlastzChainNet.pl -trackHub -noDbNameCheck -verbose=2 `pwd`/DEF -syntenicNet \
+       -qAsmId GCF_019923935.1_NDDB_SH_1 -workhorse=hgwdev -smallClusterHub=hgwdev -bigClusterHub=ku \
+        -chainMinScore=3000 -chainLinearGap=medium) > do.log 2>&1
+    grep -w real do.log | sed -e 's/^/    # /;'
+    # real	691m37.365s
+
+    sed -e 's/^/    # /;' fb.hg19.chainGCF_019923935.1Link.txt
+    # 1409064227 bases of 3234851260 (43.559%) in intersection
+    sed -e 's/^/    # /;' fb.hg19.chainSynGCF_019923935.1Link.txt
+    # 1352165314 bases of 3234851260 (41.800%) in intersection
+
+    time (~/kent/src/hg/utils/automation/doRecipBest.pl -trackHub -load -workhorse=hgwdev -buildDir=`pwd` \
+       \
+      -query2Bit="/hive/data/genomes/asmHubs/GCF/019/923/935/GCF_019923935.1/GCF_019923935.1.2bit" \
+-querySizes="/hive/data/genomes/asmHubs/GCF/019/923/935/GCF_019923935.1/GCF_019923935.1.chrom.sizes.txt" \
+        hg19 GCF_019923935.1) > rbest.log 2>&1
+
+    grep -w real rbest.log | sed -e 's/^/    # /;'
+    # real	281m37.539s
+
+    sed -e 's/^/    # /;' fb.hg19.chainRBest.GCF_019923935.1.txt
+    # 1288041955 bases of 3234851260 (39.818%) in intersection
+
+    ### and for the swap
+# swap into: /hive/data/genomes/asmHubs/allBuild/GCF/019/923/935/GCF_019923935.1_NDDB_SH_1/trackData/blastz.hg19.swap
+# running /hive/data/genomes/asmHubs/allBuild/GCF/019/923/935/GCF_019923935.1_NDDB_SH_1/trackData/blastz.hg19.swap/runSwap.sh
++ cd /hive/data/genomes/asmHubs/refseqBuild/GCF/019/923/935/GCF_019923935.1_NDDB_SH_1
++ export defaultName=GCF_019923935.1_NDDB_SH_1
++ defaultName=GCF_019923935.1_NDDB_SH_1
++ export asmId=GCF_019923935.1_NDDB_SH_1
++ asmId=GCF_019923935.1_NDDB_SH_1
++ export buildDir=/hive/data/genomes/asmHubs/refseqBuild/GCF/019/923/935/GCF_019923935.1_NDDB_SH_1
++ buildDir=/hive/data/genomes/asmHubs/refseqBuild/GCF/019/923/935/GCF_019923935.1_NDDB_SH_1
++ rm -f GCF_019923935.1_NDDB_SH_1.chromAlias.txt
++ ln -s trackData/chromAlias/GCF_019923935.1_NDDB_SH_1.chromAlias.txt .
++ '[' -s trackData/chromAlias/GCF_019923935.1_NDDB_SH_1.chromAlias.bb ']'
++ rm -f GCF_019923935.1_NDDB_SH_1.chromAlias.bb
++ ln -s trackData/chromAlias/GCF_019923935.1_NDDB_SH_1.chromAlias.bb .
++ /cluster/home/mspeir/kent/src/hg/utils/automation/asmHubTrackDb.sh GCF_019923935.1_NDDB_SH_1 GCF_019923935.1_NDDB_SH_1 /hive/data/genomes/asmHubs/refseqBuild/GCF/019/923/935/GCF_019923935.1_NDDB_SH_1
+# no ensGene found
+composite chainNet
+constructing synNet.bb links GCF_019923935.1_NDDB_SH_1 hg19
+constructing rbestNet.bb links GCF_019923935.1_NDDB_SH_1 hg19
+constructing synNet.bb links GCF_019923935.1_NDDB_SH_1 hg38
+constructing rbestNet.bb links GCF_019923935.1_NDDB_SH_1 hg38
+constructing synNet.bb links GCF_019923935.1_NDDB_SH_1 mm10
+constructing rbestNet.bb links GCF_019923935.1_NDDB_SH_1 mm10
+##############################################################################
+# LASTZ Human Hg19 vs. water buffalo GCF_019923935.1
+#    (DONE - 2024-01-04 - mspeir)
+
+    mkdir /hive/data/genomes/hg19/bed/lastzGCF_019923935.1.2024-01-04
+    cd /hive/data/genomes/hg19/bed/lastzGCF_019923935.1.2024-01-04
+
+    printf '# water buffalo GCF_019923935.1 vs. Human Hg19
+BLASTZ=/cluster/bin/penn/lastz-distrib-1.04.03/bin/lastz
+
+# TARGET: Human  hg19
+SEQ1_DIR=/hive/data/genomes/hg19/hg19.2bit
+SEQ1_LEN=/hive/data/genomes/hg19/chrom.sizes
+SEQ1_CHUNK=20000000
+SEQ1_LAP=10000
+SEQ1_LIMIT=40
+
+# QUERY: water buffalo 2021-09-10 GCF_019923935.1_NDDB_SH_1
+SEQ2_DIR=/hive/data/genomes/asmHubs/GCF/019/923/935/GCF_019923935.1/GCF_019923935.1.2bit
+SEQ2_LEN=/hive/data/genomes/asmHubs/GCF/019/923/935/GCF_019923935.1/GCF_019923935.1.chrom.sizes.txt
+SEQ2_CHUNK=20000000
+SEQ2_LAP=0
+SEQ2_LIMIT=100
+
+BASE=/hive/data/genomes/hg19/bed/lastzGCF_019923935.1.2024-01-04
+TMPDIR=/dev/shm
+
+' > DEF
+
+    time (~/kent/src/hg/utils/automation/doBlastzChainNet.pl -trackHub -noDbNameCheck -verbose=2 `pwd`/DEF -syntenicNet \
+       -qAsmId GCF_019923935.1_NDDB_SH_1 -workhorse=hgwdev -smallClusterHub=hgwdev -bigClusterHub=ku \
+        -chainMinScore=3000 -chainLinearGap=medium) > do.log 2>&1
+    grep -w real do.log | sed -e 's/^/    # /;'
+    # real	691m37.365s
+
+    sed -e 's/^/    # /;' fb.hg19.chainGCF_019923935.1Link.txt
+    # 1409064227 bases of 3234851260 (43.559%) in intersection
+    sed -e 's/^/    # /;' fb.hg19.chainSynGCF_019923935.1Link.txt
+    # 1352165314 bases of 3234851260 (41.800%) in intersection
+
+    time (~/kent/src/hg/utils/automation/doRecipBest.pl -trackHub -load -workhorse=hgwdev -buildDir=`pwd` \
+       \
+      -query2Bit="/hive/data/genomes/asmHubs/GCF/019/923/935/GCF_019923935.1/GCF_019923935.1.2bit" \
+-querySizes="/hive/data/genomes/asmHubs/GCF/019/923/935/GCF_019923935.1/GCF_019923935.1.chrom.sizes.txt" \
+        hg19 GCF_019923935.1) > rbest.log 2>&1
+
+    grep -w real rbest.log | sed -e 's/^/    # /;'
+    # real	281m37.539s
+
+    sed -e 's/^/    # /;' fb.hg19.chainRBest.GCF_019923935.1.txt
+    # 1288041955 bases of 3234851260 (39.818%) in intersection
+
+    ### and for the swap
+
+    cd /hive/data/genomes/asmHubs/allBuild/GCF/019/923/935/GCF_019923935.1_NDDB_SH_1/trackData/blastz.hg19.swap
+
+   time (~/kent/src/hg/utils/automation/doBlastzChainNet.pl -trackHub -noDbNameCheck -swap -verbose=2 \
+   -qAsmId GCF_019923935.1_NDDB_SH_1 /hive/data/genomes/hg19/bed/lastzGCF_019923935.1.2024-01-04/DEF -swapDir=`pwd` \
+  -syntenicNet -workhorse=hgwdev -smallClusterHub=hgwdev -bigClusterHub=ku \
+    -chainMinScore=3000 -chainLinearGap=medium) > swap.log 2>&1
+
+    grep -w real swap.log | sed -e 's/^/    # /;'
+    # real	178m31.300s
+
+    sed -e 's/^/    # /;' fb.GCF_019923935.1.chainHg19Link.txt
+    # 1333549561 bases of 2622460639 (50.851%) in intersection
+    sed -e 's/^/    # /;' fb.GCF_019923935.1.chainSynHg19Link.txt
+    # 1297278611 bases of 2622460639 (49.468%) in intersection
+\    time (~/kent/src/hg/utils/automation/doRecipBest.pl -trackHub -load -workhorse=hgwdev -buildDir=`pwd` \
+    \
+   -target2bit="/hive/data/genomes/asmHubs/GCF/019/923/935/GCF_019923935.1/GCF_019923935.1.2bit" \
+-targetSizes="/hive/data/genomes/asmHubs/GCF/019/923/935/GCF_019923935.1/GCF_019923935.1.chrom.sizes.txt" \
+   GCF_019923935.1 hg19) > rbest.log 2>&1
+
+    grep -w real rbest.log | sed -e 's/^/    # /;'
+    # real	313m32.842s
+
+    sed -e 's/^/    # /;' fb.GCF_019923935.1.chainRBest.Hg19.txt
+    # 1289005387 bases of 2622460639 (49.153%) in intersection
+
+real	1465m27.987s
+user	0m3.016s
+sys	0m2.691s
+
+##############################################################################