4e32052e618df259f0c0d77434641db7793e41d1 hiram Sun Nov 29 23:24:26 2020 -0800 correctly remove duplicate sequences from NCBI rm.out refs #24396 diff --git src/hg/utils/automation/doAssemblyHub.pl src/hg/utils/automation/doAssemblyHub.pl index 6a1ed3d..14a9717 100755 --- src/hg/utils/automation/doAssemblyHub.pl +++ src/hg/utils/automation/doAssemblyHub.pl @@ -1126,30 +1126,34 @@ "\nERROR: step repeatmasker may be running\n"); exit 255; } } &HgAutomate::mustMkdir($runDir); my $whatItDoes = "construct repeatMasker track data"; my $bossScript = newBash HgRemoteScript("$runDir/doRepeatMasker.bash", $workhorse, $runDir, $whatItDoes); my $rmskOpts = ""; if ($ncbiRmsk) { if ( -s "$buildDir/download/${asmId}_rm.out.gz" ) { $rmskOpts = " \\ -ncbiRmsk=\"$buildDir/download/${asmId}_rm.out.gz\" "; + if ( -s "${buildDir}/download/${asmId}.remove.dups.list" ) { + $rmskOpts .= " \\ + -dupList=\"${buildDir}/download/${asmId}.remove.dups.list\" "; + } if ($ucscNames) { $rmskOpts .= " \\ -liftSpec=\"$buildDir/sequence/$asmId.ncbiToUcsc.lift\""; } } } $bossScript->add(<<_EOF_ export asmId=$asmId if [ $buildDir/\$asmId.2bit -nt faSize.rmsk.txt ]; then export species=`echo $rmskSpecies | sed -e 's/_/ /g;'` rm -f versionInfo.txt