6bbfd4220159b66826230dfaf577a67c4a2d7236 hiram Mon Mar 1 10:52:48 2021 -0800 add full table psl test and bring tests up to date no redmine diff --git src/hg/hubApi/tests/jsonConsumer.pl src/hg/hubApi/tests/jsonConsumer.pl index 2d378d6..364a716 100755 --- src/hg/hubApi/tests/jsonConsumer.pl +++ src/hg/hubApi/tests/jsonConsumer.pl @@ -1,482 +1,482 @@ #!/usr/bin/env perl use strict; use warnings; use HTTP::Tiny; use Time::HiRes; use JSON; use Getopt::Long; # forward declaration sub performRestAction($$$); my $http = HTTP::Tiny->new(); # my $server = 'https://api.genome.ucsc.edu'; # my $server = 'https://apibeta.soe.ucsc.edu'; my $server = 'https://api-test.gi.ucsc.edu'; # my $server = 'https://hgwdev-api.gi.ucsc.edu'; my $globalHeaders = { 'Content-Type' => 'application/json' }; my $lastRequestTime = Time::HiRes::time(); my $processStartTime = Time::HiRes::time(); my $requestCount = 0; ############################################################################## # command line options my $endpoint = ""; my $hubUrl = ""; my $genome = ""; my $track = ""; my $chrom = ""; my $start = ""; my $end = ""; my $test0 = 0; my $trackDump = 0; my $debug = 0; my $trackLeavesOnly = 0; my $measureTiming = 0; my $jsonOutputArrays = 0; my $maxItemsOutput = ""; ############################################################################## sub usage() { printf STDERR "usage: ./jsonConsumer.pl [arguments]\n"; printf STDERR "arguments: -test0 - perform test of /list/publicHubs and /list/ucscGenomes endpoints -trackDump - obtain all data for a single track from: track, genome (hubUrl) - proof of concept, will not work for all cases -hubUrl=<URL> - use the URL to access the track or assembly hub -genome=<name> - name for UCSC database genome or assembly/track hub genome -track=<trackName> - specify a single track in a hub or database -chrom=<chromName> - restrict the operation to a single chromosome -start=<coordinate> - restrict the operation to a range, use both start and end -end=<coordinate> - restrict the operation to a range, use both start and end -maxItemsOutput=<N> - limit output to this number of items. Default 1,000 maximum allowed 1,000,000 -trackLeavesOnly - for list tracks function, no containers listed -measureTimeing - turn on timing measurement -debug - turn on debugging business -endpoint=<function> - where <function> is one of the following: /list/publicHubs - provide a listing of all available public hubs /list/ucscGenomes - provide a listing of all available UCSC genomes /list/hubGenomes - list genomes from a specified hub (with hubUrl=...) /list/tracks - list data tracks available in specified hub or database genome /list/chromosomes - list chromosomes from specified data track /list/schema - show schema from specified data track in hubUrl or database /getData/sequence - return sequence from specified hub or database genome /getData/track - return data from specified track in hub or database genome "; } ######################################################################### # generic output of a hash pointer sub hashOutput($) { my ($hashRef) = @_; foreach my $key (sort keys %$hashRef) { my $value = $hashRef->{$key}; $value = "<array>" if (ref($value) eq "ARRAY"); $value = "<hash>" if (ref($value) eq "HASH"); printf STDERR "%s - %s\n", $key, $hashRef->{$key}; } } sub arrayOutput($) { my ($ary) = @_; my $i = 0; foreach my $element (@$ary) { printf STDERR "# %d\t%s\n", $i++, ref($element); if (ref($element) eq "HASH") { hashOutput($element); } } } ######################################################################### ############################################################################## ### ### these functions were copied from Ensembl HTTP::Tiny example code: ### https://github.com/Ensembl/ensembl-rest/wiki/Example-Perl-Client ### ############################################################################## ############################################################################## sub performJsonAction($$) { my ($endpoint, $parameters) = @_; my $headers = $globalHeaders; my $content = performRestAction($endpoint, $parameters, $headers); return {} unless $content; my $json = decode_json($content); return $json; } ############################################################################## sub performRestAction($$$) { my ($endpoint, $parameters, $headers) = @_; $parameters ||= {}; $headers ||= {}; $headers->{'Content-Type'} = 'application/json' unless exists $headers->{'Content-Type'}; if($requestCount == 15) { # check every 15 my $currentTime = Time::HiRes::time(); my $diff = $currentTime - $lastRequestTime; # if less than a second then sleep for the remainder of the second if($diff < 1) { Time::HiRes::sleep(1-$diff); } # reset $lastRequestTime = Time::HiRes::time(); $requestCount = 0; } $endpoint =~ s#^/##; my $url = "$server/$endpoint"; if(%{$parameters}) { my @params; foreach my $key (keys %{$parameters}) { my $value = $parameters->{$key}; push(@params, "$key=$value"); } my $param_string = join(';', @params); $url.= '?'.$param_string; } if ($debug) { $url .= ";debug=1"; } if ($measureTiming) { $url .= ";measureTiming=1"; } if ($jsonOutputArrays) { $url .= ";jsonOutputArrays=1"; } if (length($maxItemsOutput)) { $url .= ";maxItemsOutput=$maxItemsOutput"; } printf STDERR "### '%s'\n", $url; my $response = $http->get($url, {headers => $headers}); my $status = $response->{status}; if(!$response->{success}) { # Quickly check for rate limit exceeded & Retry-After (lowercase due to our client) if($status == 429 && exists $response->{headers}->{'retry-after'}) { my ($status, $reason) = ($response->{status}, $response->{reason}); my $retry = $response->{headers}->{'retry-after'}; printf STDERR "Failed for $endpoint! Status code: ${status}. Reason: ${reason}, retry-after: $retry seconds\n"; # hashOutput($response->{headers}); Time::HiRes::sleep($retry); # After sleeping see that we re-request return performRestAction($endpoint, $parameters, $headers); } else { my ($status, $reason) = ($response->{status}, $response->{reason}); # die "Failed for $endpoint! Status code: ${status}. Reason: ${reason}\n"; printf STDERR "Failed for $endpoint! Status code: ${status}. Reason: ${reason}\n"; # hashOutput($response->{headers}); # hashOutput($response->{content}); # printf STDERR "'%s'\n", $response->{content}; # printf STDERR "'%s'\n", $response->{headers}; return return $response->{content}; } } $requestCount++; if(length $response->{content}) { return $response->{content}; } return; } ############################################################################# sub columnNames($) { my ($nameArray) = @_; if (ref($nameArray) ne "ARRAY") { printf "ERROR: do not have an array reference in columnNames\n"; } else { printf "### Column names in table return:\n"; my $i = 0; foreach my $name (@$nameArray) { printf "%d\t\"%s\"\n", ++$i, $name; } } } sub topLevelKeys($) { my ($topHash) = @_; printf "### keys in top level hash:\n"; foreach my $topKey ( sort keys %$topHash) { # do not print out the downloadTime and downloadTimeStamps since that # would make it difficult to have a consistent test output. next if ($topKey eq "downloadTime"); next if ($topKey eq "downloadTimeStamp"); next if ($topKey eq "botDelay"); next if ($topKey eq "dataTime"); next if ($topKey eq "dataTimeStamp"); my $value = $topHash->{$topKey}; $value = "<array>" if (ref($value) eq "ARRAY"); $value = "<hash>" if (ref($value) eq "HASH"); printf "\"%s\":\"%s\"\n", $topKey,$value; } } ############################################################################# sub checkError($$$) { my ($json, $endpoint, $expect) = @_; my $jsonReturn = performJsonAction($endpoint, ""); # printf "%s", $json->pretty->encode( $jsonReturn ); if (! defined($jsonReturn->{'error'}) ) { printf "ERROR: no error received from endpoint: '%s', received:\n", $endpoint; printf "%s", $json->pretty->encode( $jsonReturn ); } else { if ($jsonReturn->{'error'} ne "$expect '$endpoint'") { printf "incorrect error received from endpoint '%s':\n\t'%s'\n", $endpoint, $jsonReturn->{'error'}; } printf "%s", $json->pretty->encode( $jsonReturn ); } } ############################################################################# sub verifyCommandProcessing() { my $json = JSON->new; # verify command processing can detected bad input my $endpoint = "/list/noSubCommand"; my $expect = "do not recognize endpoint function:"; checkError($json, $endpoint,$expect); } # sub verifyCommandProcessing() ############################################################################# # Find the highest chromStart in the returned to data to obtain a continuation # point. # The item 'chromStart' is not necessarily always named as such, # depending upon track type, it could be: tStart or genoStart or txStart sub findHighestChromStart($$) { my $highStart = -1; my ($hashPtr, $track) = @_; my $trackData = $hashPtr->{$track}; foreach my $item (@$trackData) { if (defined($item->{'tStart'})) { $highStart = $item->{'tStart'} if ($item->{'tStart'} > $highStart); } elsif (defined($item->{'genoStart'})) { $highStart = $item->{'genoStart'} if ($item->{'genoStart'} > $highStart); } elsif (defined($item->{'txStart'})) { $highStart = $item->{'txStart'} if ($item->{'txStart'} > $highStart); } elsif (defined($item->{'chromStart'})) { $highStart = $item->{'chromStart'} if ($item->{'chromStart'} > $highStart); } else { die "ERROR: do not recognize table type for track '%s', can not find chrom start.\n", $track; } } return $highStart; } ############################################################################# # walk through all the chromosomes for a track to extract all data # XXX - NOT ADDRESSED - this produces duplicate items at the breaks when # maxItemsLimit is used sub trackDump($$) { my ($endpoint, $parameters) = @_; my $errReturn = 0; my %localParameters; if (length($hubUrl)) { $localParameters{"hubUrl"} = "$hubUrl"; } if (length($genome)) { $localParameters{"genome"} = "$genome"; } if (length($track)) { $localParameters{"track"} = "$track"; } my $endPoint = "/list/chromosomes"; my $jsonChromosomes = performJsonAction($endPoint, \%localParameters); $errReturn = 1 if (defined ($jsonChromosomes->{'error'})); my $json = JSON->new; my %chromInfo; # key is chrom name, value is size if (0 == $errReturn) { my $chromHash = $jsonChromosomes->{'chromosomes'}; foreach my $chr (keys %$chromHash) { $chromInfo{$chr} = $chromHash->{$chr}; } # for each chromosome, in order by size, smallest first $endPoint = "/getData/track"; $maxItemsOutput = 14000; foreach my $chr (sort {$chromInfo{$a} <=> $chromInfo{$b}} keys %chromInfo) { $localParameters{"chrom"} = "$chr"; delete $localParameters{'start'}; delete $localParameters{'end'}; printf STDERR "# working\t%s\t%d\n", $chr, $chromInfo{$chr}; my $oneChrom = performJsonAction($endPoint, \%localParameters); my $itemsReturned = $oneChrom->{'itemsReturned'}; my $reachedMaxItems = 0; $reachedMaxItems = 1 if (defined($oneChrom->{'maxItemsLimit'})); if ($reachedMaxItems) { while ($reachedMaxItems) { my $highestChromStart = findHighestChromStart($oneChrom, $track); printf STDERR "# chrom: %s\t%d items -> max item limit last chromStart %d\n", $chr, $itemsReturned, $highestChromStart; $localParameters{'start'} = "$highestChromStart"; $localParameters{'end'} = "$chromInfo{$chr}"; $reachedMaxItems = 0; $oneChrom = performJsonAction($endPoint, \%localParameters); $itemsReturned = $oneChrom->{'itemsReturned'}; $reachedMaxItems = 1 if (defined($oneChrom->{'maxItemsLimit'})); if (0 == $reachedMaxItems) { $highestChromStart = findHighestChromStart($oneChrom, $track); printf STDERR "# chrom: %s\t%d items completed at last chromStart %d\n", $chr, $itemsReturned, $highestChromStart; } } } else { printf STDERR "# chrom: %s\t%d items - completed\n", $chr, $itemsReturned; } } # foreach chrom in chromInfo } # if (0 == $errReturn) chromInfo was successful return $errReturn; } # sub trackDump($$) ############################################################################# sub processEndPoint() { my $errReturn = 0; if (length($endpoint)) { my $json = JSON->new; my $jsonReturn = {}; my %parameters; if (length($hubUrl)) { $parameters{"hubUrl"} = "$hubUrl"; } if (length($genome)) { $parameters{"genome"} = "$genome"; } if (length($chrom)) { $parameters{"chrom"} = "$chrom"; } if ($trackLeavesOnly) { $parameters{"trackLeavesOnly"} = "1"; } if (length($track)) { $parameters{"track"} = "$track"; } if (length($start)) { $parameters{"start"} = "$start"; } if (length($end)) { $parameters{"end"} = "$end"; } # Pass along any bogus request just to test the error handling. if ($trackDump) { $errReturn = trackDump($endpoint, \%parameters); } else { $jsonReturn = performJsonAction($endpoint, \%parameters); $errReturn = 1 if (defined ($jsonReturn->{'error'})); printf "%s", $json->pretty->encode( $jsonReturn ); } } else { printf STDERR "ERROR: no endpoint given ?\n"; usage(); exit 255; } return $errReturn; } # sub processEndPoint() ########################################################################### ### test /list/publicHubs and /list/ucscGenomes sub test0() { my $json = JSON->new; my $jsonReturn = {}; verifyCommandProcessing(); # check 'command' and 'subCommand' $jsonReturn = performJsonAction("/list/publicHubs", ""); # this prints everything out indented nicely: # printf "%s", $json->pretty->encode( $jsonReturn ); # exit 255; # __END__ # "dataTimeStamp" : 1552320994, # "downloadTime" : "2019:03:26T21:40:10Z", # "botDelay" : 2, # "downloadTimeStamp" : 1553636410, # "dataTime" : "2019-03-11T09:16:34" # look for the specific public hub named "Plants" to print out # for a verify test case # if (ref($jsonReturn) eq "HASH") { topLevelKeys($jsonReturn); if (defined($jsonReturn->{"publicHubs"})) { my $arrayData = $jsonReturn->{"publicHubs"}; foreach my $data (@$arrayData) { - if ($data->{'shortLabel'} eq "Plants") { - printf "### Plants public hub data\n"; + if ($data->{'shortLabel'} eq "Synonymous Constraint") { + printf "### Synonymous Constraint public hub data\n"; foreach my $key (sort keys %$data) { next if ($key eq "registrationTime"); printf "'%s'\t'%s'\n", $key, $data->{$key}; } } } } } elsif (ref($jsonReturn) eq "ARRAY") { printf "ERROR: top level returns ARRAY of size: %d\n", scalar(@$jsonReturn); printf "should have been a HASH to the publicHub data\n"; } $jsonReturn = performJsonAction("/list/ucscGenomes", ""); # printf "%s", $json->pretty->encode( $jsonReturn ); if (ref($jsonReturn) eq "HASH") { topLevelKeys($jsonReturn); if (defined($jsonReturn->{"ucscGenomes"})) { my $ucscGenomes = $jsonReturn->{"ucscGenomes"}; if (exists($ucscGenomes->{'hg38'})) { my $hg38 = $ucscGenomes->{'hg38'}; printf "### hg38/Human information\n"; foreach my $key (sort keys %$hg38) { printf "\"%s\"\t\"%s\"\n", $key, $hg38->{$key}; } } } } elsif (ref($jsonReturn) eq "ARRAY") { printf "ERROR: top level returns ARRAY of size: %d\n", scalar(@$jsonReturn); printf "should have been a HASH to the ucscGenomes\n"; } } # sub test0() sub elapsedTime() { if ($measureTiming) { my $endTime = Time::HiRes::time(); my $et = $endTime - $processStartTime; printf STDERR "# procesing time: %.3fs\n", $et; } } ############################################################################# ### main() ############################################################################# my $argc = scalar(@ARGV); GetOptions ("hubUrl=s" => \$hubUrl, "endpoint=s" => \$endpoint, "genome=s" => \$genome, "track=s" => \$track, "chrom=s" => \$chrom, "start=s" => \$start, "end=s" => \$end, "test0" => \$test0, "trackDump" => \$trackDump, "debug" => \$debug, "trackLeavesOnly" => \$trackLeavesOnly, "measureTiming" => \$measureTiming, "jsonOutputArrays" => \$jsonOutputArrays, "maxItemsOutput=s" => \$maxItemsOutput) or die "Error in command line arguments\n"; if ($test0) { test0; elapsedTime(); exit 0; } if ($argc > 0) { if (processEndPoint()) { elapsedTime(); exit 255; } else { elapsedTime(); exit 0; } } usage();