06d7be056190c14b85e71bc12523f18ea6815b5e markd Mon Dec 7 00:50:29 2020 -0800 BLAT mmap index support merge with master diff --git src/hg/utils/automation/doIdKeys.pl src/hg/utils/automation/doIdKeys.pl index b65c416..ec4770f 100755 --- src/hg/utils/automation/doIdKeys.pl +++ src/hg/utils/automation/doIdKeys.pl @@ -115,34 +115,35 @@ my $runDir = "$buildDir"; # First, make sure we're starting clean. if ( ! $opt_debug && ( -s "$runDir/doSetup.bash" ) ) { die "doSetup: looks like this was run successfully already " . "(directory db/bed/idKeys exists). Either run with -continue clusterRun or some later " . "stage, or move aside/remove $runDir and run again.\n"; } &HgAutomate::mustMkdir($runDir); my $whatItDoes = "Establish working directory and scripts to run the job."; my $bossScript = newBash HgRemoteScript("$runDir/doSetup.bash", $workhorse, $runDir, $whatItDoes); + # improved twoBitDup 2020-12-04 can now do billions in one go $bossScript->add(<<_EOF_ twoBitInfo $twoBit stdout | sort -k2nr | cut -f1 > part.list export partCount=`cat part.list | wc -l` -if [ "\${partCount}" -lt 5000 ]; then +if [ "\${partCount}" -lt 10000000000 ]; then time ( twoBitDup -keyList=stdout $twoBit | grep -v "are identical" | sort > $db.idKeys.txt) > twoBitDup.log 2>&1 else mkdir -p splitList split -a 3 -d -l 5000 part.list splitList/part for F in splitList/part* do export B=`basename \$F` cat \$F | while read P do printf "runOne %s {check out exists+ result/%s/%s.txt}\n" \\ "\${P}" "\${B}" "\${P}" done done > jobList printf '#!/bin/bash