31cfa0823734a570e9aabdbfd730dbd01f0b0a85 hiram Tue Apr 6 13:51:13 2021 -0700 eliminate the trailing ^M on the taxId and testing blat-backup refs #26658 diff --git src/hg/makeDb/doc/asmHubs/mkGenomes.pl src/hg/makeDb/doc/asmHubs/mkGenomes.pl index 7fa3282..056d735 100755 --- src/hg/makeDb/doc/asmHubs/mkGenomes.pl +++ src/hg/makeDb/doc/asmHubs/mkGenomes.pl @@ -1,39 +1,39 @@ #!/usr/bin/env perl use strict; use warnings; use File::Basename; my $argc = scalar(@ARGV); if ($argc != 3) { printf STDERR "mkGenomes.pl blatHost blatPort [two column name list] > .../hub/genomes.txt\n"; - printf STDERR "e.g.: mkGenomes.pl localhost 4040 vgp.primary.assemblies.tsv > .../vgp/genomes.txt\n"; + printf STDERR "e.g.: mkGenomes.pl blat-backup 4040 vgp.primary.assemblies.tsv > .../vgp/genomes.txt\n"; printf STDERR "e.g.: mkGenomes.pl hgwdev 4040 vgp.primary.assemblies.tsv > .../vgp/download.genomes.txt\n"; printf STDERR "the name list is found in \$HOME/kent/src/hg/makeDb/doc/asmHubs/\n"; printf STDERR "\nthe two columns are 1: asmId (accessionId_assemblyName)\n"; printf STDERR "column 2: common name for species, columns separated by tab\n"; printf STDERR "result will write a local asmId.genomes.txt file for each hub\n"; printf STDERR "and a local asmId.hub.txt file for each hub\n"; printf STDERR "and a local asmId.groups.txt file for each hub\n"; printf STDERR "and the output to stdout will be the overall genomes.txt\n"; printf STDERR "index file for all genomes in the given list\n"; exit 255; } my $downloadHost = "hgwdev"; -my @blatHosts = qw( localhost hgwdev ); +my @blatHosts = qw( blat-backup hgwdev ); my @blatPorts = qw( 4040 4040 ); ################### writing out hub.txt file, twice ########################## sub singleFileHub($$$$$$$$$$) { my ($fh1, $fh2, $accessionId, $orgName, $descr, $asmId, $defPos, $taxId, $trackDb, $accessionDir) = @_; my @fhN; push @fhN, $fh1; push @fhN, $fh2; my $fileCount = 0; my @tdbLines; open (TD, "<$trackDb") or die "can not read trackDb: $trackDb"; while (my $tdbLine = <TD>) { chomp $tdbLine; push @tdbLines, $tdbLine; @@ -135,31 +135,31 @@ next; } my $asmReport="$buildDir/download/${asmId}_assembly_report.txt"; my $trackDb = "$buildDir/$asmId.trackDb.txt"; if ( ! -s "${trackDb}" ) { printf STDERR "# %03d not built yet: %s\n", $orderKey, $asmId; printf STDERR "# '%s'\n", $trackDb; next; } if ( ! -s "${asmReport}" ) { printf STDERR "# %03d missing assembly_report: %s\n", $orderKey, $asmId; next; } ++$buildDone; printf STDERR "# %03d genomes.txt %s/%s\n", $buildDone, $accessionDir, $accessionId; - my $taxId=`grep -i "taxid:" $asmReport | head -1 | awk '{printf \$(NF)}'`; + my $taxId=`grep -i "taxid:" $asmReport | head -1 | awk '{printf \$(NF)}' | tr -d \$'\\r'`; chomp $taxId; my $descr=`grep -i "organism name:" $asmReport | head -1 | sed -e 's#.*organism name: *##i; s# (.*\$##;'`; chomp $descr; my $orgName=`grep -i "organism name:" $asmReport | head -1 | sed -e 's#.* name: .* (##; s#).*##;'`; chomp $orgName; if (defined($commonName{$asmId})) { $orgName = $commonName{$asmId}; } printf "genome %s\n", $accessionId; printf "taxId %s\n", $taxId if (length($taxId) > 1); printf "trackDb ../%s/%s/trackDb.txt\n", $accessionDir, $accessionId; printf "groups groups.txt\n"; printf "description %s\n", $orgName; printf "twoBitPath ../%s/%s/%s.2bit\n", $accessionDir, $accessionId, $accessionId;