da068f4b13595bfb45af25849abab4d89f816b5b angie Tue Jun 3 08:40:49 2025 -0700 Reduce threads to avoid lockup in usher-sampled and matOptimize. Update hgdownload2 --> hgdownload3 and tolerate hgdownload rsync failure. diff --git src/hg/utils/otto/dengue/buildTree.sh src/hg/utils/otto/dengue/buildTree.sh index aba6c7c0474..9afe9f3b2c1 100755 --- src/hg/utils/otto/dengue/buildTree.sh +++ src/hg/utils/otto/dengue/buildTree.sh @@ -130,44 +130,44 @@ --include-reference \ --retry-reverse-complement true \ --output-fasta aligned.$subtype.fa.xz \ --output-columns-selection seqName,clade,totalSubstitutions,totalDeletions,totalInsertions,totalMissing,totalNonACGTNs,alignmentStart,alignmentEnd,substitutions,deletions,insertions,aaSubstitutions,aaDeletions,aaInsertions,missing,unknownAaRanges,nonACGTNs \ --output-tsv nextclade.denv$subtype.tsv \ $dengueDir/ncbi/ncbi.$today/genbank.fa.xz \ >& nextclade.denv$subtype.log # If it becomes necessary, add -excludeFile=$dengueScriptDir/exclude.ids time faToVcf -verbose=2 -includeRef -includeNoAltN \ <(xzcat aligned.$subtype.fa.xz) stdout \ | vcfRenameAndPrune stdin renaming.tsv stdout \ | pigz -p 8 \ > all.$subtype.vcf.gz - time $usherSampled -T 64 -A -e 5 \ + time $usherSampled -T 16 -A -e 5 \ -t emptyTree.nwk \ -v all.$subtype.vcf.gz \ -o denv$subtype.$today.preFilter.pb\ --optimization_radius 0 --batch_size_per_process 10 \ > usher.addNew.$subtype.log 2>usher-sampled.$subtype.stderr # Filter out branches that are so long they must lead to some other subtype $matUtils extract -i denv$subtype.$today.preFilter.pb \ --max-branch-length 1000 \ -O -o denv$subtype.$today.preOpt.pb >& tmp.log # Optimize: - time $matOptimize -T 64 -r 20 -M 2 -S move_log.$subtype \ + time $matOptimize -T 16 -r 20 -M 2 -S move_log.$subtype \ -i denv$subtype.$today.preOpt.pb \ -o denv$subtype.$today.pb \ >& matOptimize.$subtype.log rm -f *.pbintermediate*.pb chmod 664 denv$subtype.$today.pb # Make metadata that uses same names as tree echo -e "strain\tgenbank_accession\tdate\tcountry\tlocation\tlength\thost\tbioproject_accession\tbiosample_accession\tsra_accession\tauthors\tpublications\tNextclade_lineage" \ > denv$subtype.$today.metadata.tsv sort $dengueNcbiDir/metadata.tsv \ | perl -F'/\t/' -walne '$F[3] =~ s/(: ?|$)/\t/; print join("\t", @F);' \ | join -t$'\t' -o 1.1,1.1,1.6,1.4,1.5,1.8,1.9,1.10,1.11,1.12,1.14,1.15,2.2 \ - <(cut -f 1,2 nextclade.denv$subtype.tsv | sort) \ | join -t$'\t' -o 1.2,2.2,2.3,2.4,2.5,2.6,2.7,2.8,2.9,2.10,2.11,2.12,2.13 \ <(sort renaming.tsv) \ @@ -216,23 +216,29 @@ gzip -c denv$subtype.$today.pb > $archive/denv$subtype.$today.pb.gz ln -f $(pwd)/hgPhyloPlace.description.$subtype.txt $archive/denv$subtype.$today.version.txt # Update 'latest' in $archiveRoot for f in $archive/denv$subtype.$today.*; do latestF=$(echo $(basename $f) | sed -re 's/'$today'/latest/') ln -f $f $archiveRoot/$latestF done # Update hgdownload-test link for archive asmDir=$(echo $asmAcc \ | sed -re 's@^(GC[AF])_([0-9]{3})([0-9]{3})([0-9]{3})\.([0-9]+)@\1/\2/\3/\4/\1_\2\3\4.\5@') mkdir -p /data/apache/htdocs-hgdownload/hubs/$asmDir/UShER_DENV-$subtype/$y/$m ln -sf $archive /data/apache/htdocs-hgdownload/hubs/$asmDir/UShER_DENV-$subtype/$y/$m # rsync to hgdownload hubs dir - for h in hgdownload1 hgdownload2; do - rsync -a -L --delete /data/apache/htdocs-hgdownload/hubs/$asmDir/UShER_DENV-$subtype/* \ - qateam@$h:/mirrordata/hubs/$asmDir/UShER_DENV-$subtype/ + for h in hgdownload1 hgdownload3; do + if rsync -a -L --delete /data/apache/htdocs-hgdownload/hubs/$asmDir/UShER_DENV-$subtype/* \ + qateam@$h:/mirrordata/hubs/$asmDir/UShER_DENV-$subtype/; then + true + else + echo "" + echo "*** rsync to $h failed; disk full? ***" + echo "" + fi done done rm -f mutation-paths.txt *.pre*.pb final-tree.nh nice gzip -f *.log *.tsv move_log* *.stderr samples.*