c71e68a92e5d8d8393e7883f8dee5d9695ae7373 hiram Mon Oct 27 12:45:05 2025 -0700 ram size only needs to be 3Gb and create the quick.lift files still needs the addQuickLift.py addition refs #35575 diff --git src/hg/utils/automation/doSameSpeciesLiftOver.pl src/hg/utils/automation/doSameSpeciesLiftOver.pl index fa89b9260c5..3a57dad3b6b 100755 --- src/hg/utils/automation/doSameSpeciesLiftOver.pl +++ src/hg/utils/automation/doSameSpeciesLiftOver.pl @@ -31,31 +31,31 @@ # Specify the steps supported with -continue / -stop: my $stepper = new HgStepManager( [ { name => 'align', func => \&doAlign }, { name => 'chain', func => \&doChain }, { name => 'net', func => \&doNet }, { name => 'load', func => \&doLoad }, { name => 'cleanup', func => \&doCleanup }, ] ); # Option defaults: my $dbHost = 'hgwdev'; my $ramG = '4g'; my $cpu = 1; -my $blatRam = '4g'; # -ram=Ng argument +my $blatRam = '3g'; # -ram=Ng argument my $blatCpu = 1; # -cpu=N argument my $chainRam = '16g'; # -chainRam=Ng argument my $chainCpu = 1; # -chainCpu=N argument # This could be made into an option: # BLAT -fastMap will not work with query chunks greater than 5000 my $splitSize = '5000'; my $splitOverlap = '500'; my $base = $0; $base =~ s/^(.*\/)?//; sub usage { # Usage / help / self-documentation: my ($status, $detailed) = @_; @@ -544,30 +544,34 @@ foreach f (\$tmpDir/chainSplit/*.chain) set split = \$f:t:r chainNet \$f \\ $tSizes $qSizes \\ \$tmpDir/netSplit/\$split.net /dev/null netChainSubset \$tmpDir/netSplit/\$split.net \$f stdout \\ | chainStitchId stdin \$tmpDir/overSplit/\$split.chain end endsInLf \$tmpDir/netSplit/*.net endsInLf \$tmpDir/overSplit/*.chain cat \$tmpDir/chainSplit/*.chain | gzip -c > $tDb.$qDb.all.chain.gz cat \$tmpDir/netSplit/*.net | gzip -c > $tDb.$qDb.noClass.net.gz cat \$tmpDir/overSplit/*.chain | gzip -c > $buildDir/$liftOverChainFile +# make quickLift chain: +chainSwap $buildDir/$liftOverChainFile stdout \\ + | chainToBigChain stdin $buildDir/$tDb.$qDb.quick.chain.txt \\ + $buildDir/$tDb.$qDb.quick.link.txt rm -rf \$tmpDir/ _EOF_ ); $bossScript->execute(); } # doNet ######################################################################### # * step: load [dbHost] sub doLoad { my $runDir = "$buildDir"; &HgAutomate::checkExistsUnlessDebug('net', 'load', "$buildDir/$liftOverChainFile"); @@ -601,35 +605,45 @@ # Add an entry to liftOverChain table in central database (specified in # ~/.hg.conf) so that hgLiftOver will know that this is available: hgAddLiftOverChain $tDb $qDb _EOF_ ); } else { $bossScript->add(<<_EOF_ hgLoadChain -test -noBin -tIndex $tDb chain$QDb $buildDir/$liftOverChainFile wget --no-check-certificate -O bigChain.as 'https://raw.githubusercontent.com/ucscGenomeBrowser/kent/refs/heads/master/src/hg/lib/bigChain.as' wget --no-check-certificate -O bigLink.as 'https://raw.githubusercontent.com/ucscGenomeBrowser/kent/refs/heads/master/src/hg/lib/bigLink.as' sed 's/.000000//' chain.tab | awk 'BEGIN {OFS="\\t"} {print \$2, \$4, \$5, \$11, 1000, \$8, \$3, \$6, \$7, \$9, \$10, \$1}' > chain${QDb}.tab bedToBigBed -type=bed6+6 -as=bigChain.as -tab chain${QDb}.tab $tSizes chain${QDb}.bb awk 'BEGIN {OFS="\\t"} {print \$1, \$2, \$3, \$5, \$4}' link.tab | sort -k1,1 -k2,2n > chain${QDb}Link.tab bedToBigBed -type=bed4+1 -as=bigLink.as -tab chain${QDb}Link.tab $tSizes chain${QDb}Link.bb + +bedToBigBed -type=bed6+6 -as=bigChain.as -tab $tDb.$qDb.quick.chain.txt $qSizes $tDb.$qDb.quick.bb +bedToBigBed -type=bed4+1 -as=bigLink.as -tab $tDb.$qDb.quick.link.txt $qSizes $tDb.$qDb.quickLink.bb + set totalBases = `ave -col=2 $tSizes | grep "^total" | awk '{printf "%d", \$2}'` -set basesCovered = `bedSingleCover.pl chain${QDb}Link.tab | ave -col=4 stdin | grep "^total" | awk '{printf "%d", \$2}'` +set basesCovered = `bigBedInfo chain${QDb}Link.bb | grep "basesCovered" | cut -d' ' -f2 | tr -d ','` set percentCovered = `echo \$basesCovered \$totalBases | awk '{printf "%.3f", 100.0*\$1/\$2}'` printf "%d bases of %d (%s%%) in intersection\\n" "\$basesCovered" "\$totalBases" "\$percentCovered" > fb.$tDb.chain.${QDb}Link.txt -rm -f link.tab chain.tab bigChain.as bigLink.as chain${QDb}.tab chain${QDb}Link.tab + +set qBases = `ave -col=2 $qSizes | grep "^total" | awk '{printf "%d", \$2}'` +set qCovered = `bigBedInfo $tDb.$qDb.quickLink.bb | grep "basesCovered" | cut -d' ' -f2 | tr -d ','` +set qPerCent = `echo \$qCovered \$qBases | awk '{printf "%.3f", 100.0*\$1/\$2}'` +printf "%d bases of %d (%s%%) in intersection\\n" "\$qCovered" "\$qBases" "\$qPerCent" > fb.$tDb.quick${QDb}Link.txt +rm -f link.tab chain.tab bigChain.as bigLink.as chain${QDb}.tab chain${QDb}Link.tab $tDb.$qDb.quick.chain.txt $tDb.$qDb.quick.link.txt + _EOF_ ); } $bossScript->execute(); } # doLoad ######################################################################### # * step: cleanup [fileServer] sub doCleanup { my $runDir = "$buildDir"; my $whatItDoes = "It cleans up or compresses intermediate files."; $fileServer = &HgAutomate::chooseFileServer($runDir); my $bossScript = new HgRemoteScript("$runDir/doCleanup.csh", $fileServer, $runDir, $whatItDoes);