d82bb72f23414b783c5f3a7158a36002f27270d8 hiram Mon Sep 15 15:24:56 2025 -0700 adding quickLift file creation refs #35575 diff --git src/hg/utils/automation/doBlastzChainNet.pl src/hg/utils/automation/doBlastzChainNet.pl index aae0611b5cf..27298514d92 100755 --- src/hg/utils/automation/doBlastzChainNet.pl +++ src/hg/utils/automation/doBlastzChainNet.pl @@ -987,43 +987,57 @@ } my $whatItDoes = "It generates nets (without repeat/gap stats -- those are added later on $dbHost) from chains, and generates axt, maf and .over.chain from the nets."; my $bossScript = new HgRemoteScript("$runDir/netChains.csh", $workhorse, $runDir, $whatItDoes, $DEF); $bossScript->add(<<_EOF_ # Make nets ("noClass", i.e. without rmsk/class stats which are added later): chainPreNet $inclHap $chain $defVars{SEQ1_LEN} $defVars{SEQ2_LEN} stdout \\ | chainNet $inclHap stdin -minSpace=1 $defVars{SEQ1_LEN} $defVars{SEQ2_LEN} stdout /dev/null \\ | netSyntenic stdin noClass.net # Make liftOver chains: netChainSubset -verbose=0 noClass.net $chain stdout \\ | chainStitchId stdin stdout | gzip -c > $tDb.$qDb.over.chain.gz +# make quickLift chain: +chainSwap $tDb.$qDb.over.chain.gz stdout \\ + | chainToBigChain stdin $tDb.$qDb.quick.chain.txt \\ + $tDb.$qDb.quick.link.txt hgLoadChain -test -noBin -tIndex $tDb chainLiftOver$QDb $tDb.$qDb.over.chain.gz wget --no-check-certificate -O bigChain.as 'https://raw.githubusercontent.com/ucscGenomeBrowser/kent/refs/heads/master/src/hg/lib/bigChain.as' wget --no-check-certificate -O bigLink.as 'https://raw.githubusercontent.com/ucscGenomeBrowser/kent/refs/heads/master/src/hg/lib/bigLink.as' sed 's/.000000//' chain.tab | awk 'BEGIN {OFS="\\t"} {print \$2, \$4, \$5, \$11, 1000, \$8, \$3, \$6, \$7, \$9, \$10, \$1}' > chainLiftOver${QDb}.tab bedToBigBed -type=bed6+6 -as=bigChain.as -tab chainLiftOver${QDb}.tab $defVars{SEQ1_LEN} chainLiftOver${QDb}.bb awk 'BEGIN {OFS="\\t"} {print \$1, \$2, \$3, \$5, \$4}' link.tab | sort -k1,1 -k2,2n > chainLiftOver${QDb}Link.tab bedToBigBed -type=bed4+1 -as=bigLink.as -tab chainLiftOver${QDb}Link.tab $defVars{SEQ1_LEN} chainLiftOver${QDb}Link.bb + +bedToBigBed -type=bed6+6 -as=bigChain.as -tab $tDb.$qDb.quick.chain.txt $defVars{SEQ2_LEN} $tDb.$qDb.quick.bb +bedToBigBed -type=bed4+1 -as=bigLink.as -tab $tDb.$qDb.quick.link.txt $defVars{SEQ2_LEN} $tDb.$qDb.quickLink.bb + set totalBases = `ave -col=2 $defVars{SEQ1_LEN} | grep "^total" | awk '{printf "%d", \$2}'` set basesCovered = `bedSingleCover.pl chainLiftOver${QDb}Link.tab | ave -col=4 stdin | grep "^total" | awk '{printf "%d", \$2}'` set percentCovered = `echo \$basesCovered \$totalBases | awk '{printf "%.3f", 100.0*\$1/\$2}'` printf "%d bases of %d (%s%%) in intersection\\n" "\$basesCovered" "\$totalBases" "\$percentCovered" > ../fb.$tDb.chainLiftOver${QDb}Link.txt -rm -f link.tab chain.tab bigChain.as bigLink.as chainLiftOver${QDb}Link.tab chainLiftOver${QDb}.tab + +set totalBases = `ave -col=2 $defVars{SEQ2_LEN} | grep "^total" | awk '{printf "%d", \$2}'` +set basesCovered = `bedSingleCover.pl $tDb.$qDb.quick.link.txt | ave -col=4 stdin | grep "^total" | awk '{printf "%d", \$2}'` +set percentCovered = `echo \$basesCovered \$totalBases | awk '{printf "%.3f", 100.0*\$1/\$2}'` +printf "%d bases of %d (%s%%) in intersection\\n" "\$basesCovered" "\$totalBases" "\$percentCovered" > ../fb.$tDb.quick${QDb}Link.txt + +rm -f link.tab chain.tab bigChain.as bigLink.as chainLiftOver${QDb}Link.tab chainLiftOver${QDb}.tab $tDb.$qDb.quick.chain.txt $tDb.$qDb.quick.link.txt _EOF_ ); my $seq1Dir = $defVars{'SEQ1_DIR'}; my $seq2Dir = $defVars{'SEQ2_DIR'}; if ($splitRef) { $bossScript->add(<<_EOF_ # Make axtNet for download: one .axt per $tDb seq. netSplit noClass.net net cd .. mkdir -p axtNet foreach f (axtChain/net/*.net) netToAxt \$f axtChain/chain/\$f:t:r.chain \\ $seq1Dir $seq2Dir stdout \\ | axtSort stdin stdout \\ @@ -1609,30 +1623,31 @@ die "installDownloads: looks like previous stage was not successful " . "(can't find $successFile).\n"; } my $goldenPath = $HgAutomate::goldenPath; if ($tDb =~ m/^GC/) { $goldenPath = &HgAutomate::asmHubDownloadDir($tDb); } &dumpDownloadReadme("$runDir/README.txt"); my $over = $tDb . "To$QDb.over.chain.gz"; my $liftOverDir = "$HgAutomate::clusterData/$tDb/$HgAutomate::trackBuild/liftOver"; if ($tDb =~ m/^GC/) { $liftOverDir = &HgAutomate::asmHubBuildDir($tAsmId) . "/liftOver"; } my $gpLiftOverDir = "$goldenPath/$tDb/liftOver"; my $gbdbLiftOverDir = "$HgAutomate::gbdb/$tDb/liftOver"; + my $gbdbQuickLiftDir = "$HgAutomate::gbdb/$tDb/quickLift"; my $andNets = $isSelf ? "." : ", nets and axtNet,\n" . "# and copies the liftOver chains to the liftOver download dir."; my $whatItDoes = "It creates the download directory for chains$andNets"; my $bossScript = new HgRemoteScript("$runDir/installDownloads.csh", $dbHost, $runDir, $whatItDoes, $DEF); $bossScript->add(<<_EOF_ mkdir -p $goldenPath/$tDb rm -rf $goldenPath/$tDb/vs$QDb mkdir -p $goldenPath/$tDb/vs$QDb cd $goldenPath/$tDb/vs$QDb ln -s $runDir/$tDb.$qDb.all.chain.gz . ln -s $runDir/README.txt . ln -s $runDir/md5sum.txt . @@ -1647,31 +1662,31 @@ ln -s $runDir/$tDb.$qDb.net.gz . _EOF_ ); } $bossScript->add(<<_EOF_ $axt mkdir -p $gpLiftOverDir rm -f $gpLiftOverDir/$over ln -s $liftOverDir/$over $gpLiftOverDir/$over _EOF_ ); if ($tDb !~ m/^GC/) { $bossScript->add(<<_EOF_ -mkdir -p $gbdbLiftOverDir +mkdir -p $gbdbLiftOverDir $gbdbQuickLiftDir rm -f $gbdbLiftOverDir/$over ln -s $liftOverDir/$over $gbdbLiftOverDir/$over hgAddLiftOverChain -minMatch=0.1 -multiple -path=$gbdbLiftOverDir/$over \\ $tDb $qDb _EOF_ ); } $bossScript->add(<<_EOF_ unsetenv TMPDIR if ( -d "/data/tmp" ) then setenv TMPDIR "/data/tmp" else if ( -d "/scratch/tmp" ) then setenv TMPDIR "/scratch/tmp" else