06d7be056190c14b85e71bc12523f18ea6815b5e
markd
  Mon Dec 7 00:50:29 2020 -0800
BLAT mmap index support merge with master

diff --git src/hg/utils/automation/doIdKeys.pl src/hg/utils/automation/doIdKeys.pl
index b65c416..ec4770f 100755
--- src/hg/utils/automation/doIdKeys.pl
+++ src/hg/utils/automation/doIdKeys.pl
@@ -115,34 +115,35 @@
   my $runDir = "$buildDir";
 
   # First, make sure we're starting clean.
   if ( ! $opt_debug && ( -s "$runDir/doSetup.bash" ) ) {
     die "doSetup: looks like this was run successfully already " .
       "(directory db/bed/idKeys exists).  Either run with -continue clusterRun or some later " .
         "stage, or move aside/remove $runDir and run again.\n";
   }
 
   &HgAutomate::mustMkdir($runDir);
 
   my $whatItDoes = "Establish working directory and scripts to run the job.";
   my $bossScript = newBash HgRemoteScript("$runDir/doSetup.bash", $workhorse,
 				      $runDir, $whatItDoes);
 
+  # improved twoBitDup 2020-12-04 can now do billions in one go
   $bossScript->add(<<_EOF_
 twoBitInfo $twoBit stdout | sort -k2nr | cut -f1 > part.list
 export partCount=`cat part.list | wc -l`
-if [ "\${partCount}" -lt 5000 ]; then
+if [ "\${partCount}" -lt 10000000000 ]; then
   time ( twoBitDup -keyList=stdout $twoBit | grep -v "are identical" | sort > $db.idKeys.txt) > twoBitDup.log 2>&1
 else
   mkdir -p splitList
   split -a 3 -d -l 5000 part.list splitList/part
   for F in splitList/part*
   do
     export B=`basename \$F`
     cat \$F | while read P
     do
       printf "runOne %s {check out exists+ result/%s/%s.txt}\n" \\
              "\${P}" "\${B}" "\${P}"
     done
   done > jobList
 
   printf '#!/bin/bash