7f0ad33bd004bc5616a9214534d74371f9f51940 angie Thu Sep 2 17:48:56 2021 -0700 Greatly increase tolerance for missing BioSample records. I reported this to NCBI; sounds like that's just the way it is with importing delays. diff --git src/hg/utils/otto/sarscov2phylo/gbMetadataAddBioSample.pl src/hg/utils/otto/sarscov2phylo/gbMetadataAddBioSample.pl index 8e089c7..e2c7b35 100755 --- src/hg/utils/otto/sarscov2phylo/gbMetadataAddBioSample.pl +++ src/hg/utils/otto/sarscov2phylo/gbMetadataAddBioSample.pl @@ -69,27 +69,27 @@ if (! $gbName) { $gbName = $bName; } elsif (($gbName eq '1' || $gbName eq 'NA') && length($bName) > length($gbName)) { $gbName = $bName; } elsif ($gbName eq 'nasopharyngeal' && $bName =~ m/\d/) { $gbName = $bName; } if (! $gbGeo) { $gbGeo = $bCountry; } print join("\t", $gbAcc, $bAcc, $gbDate, $gbGeo, $host, $gbName, $completeness, $len); } else { # BioSample file doesn't have info for this BioSample accession print STDERR "Missing BioSample info for $bAcc\n"; $missingCount++; - if ($missingCount >= 1000) { + if ($missingCount >= 100000) { die "Too many missing BioSamples, quitting.\n"; } # Pass through as-is print; } } else { # No associated BioSample, just pass through as-is print; } }