29781ccf5a802932426ff9e78b5f191b4ebef5c0 hiram Tue May 21 17:02:06 2019 -0700 add trackDump option for proof of concept refs #18869 diff --git src/hg/hubApi/tests/jsonConsumer.pl src/hg/hubApi/tests/jsonConsumer.pl index db81b96c0..1993abe 100755 --- src/hg/hubApi/tests/jsonConsumer.pl +++ src/hg/hubApi/tests/jsonConsumer.pl @@ -15,41 +15,44 @@ my $globalHeaders = { 'Content-Type' => 'application/json' }; my $lastRequestTime = Time::HiRes::time(); my $processStartTime = Time::HiRes::time(); my $requestCount = 0; ############################################################################## # command line options my $endpoint = ""; my $hubUrl = ""; my $genome = ""; my $track = ""; my $chrom = ""; my $start = ""; my $end = ""; my $test0 = 0; +my $trackDump = 0; my $debug = 0; my $trackLeavesOnly = 0; my $measureTiming = 0; my $jsonOutputArrays = 0; my $maxItemsOutput = ""; ############################################################################## sub usage() { printf STDERR "usage: ./jsonConsumer.pl [arguments]\n"; printf STDERR "arguments: -test0 - perform test of /list/publicHubs and /list/ucscGenomes endpoints +-trackDump - obtain all data for a single track from: track, genome (hubUrl) + - proof of concept, will not work for all cases -hubUrl= - use the URL to access the track or assembly hub -genome= - name for UCSC database genome or assembly/track hub genome -track= - specify a single track in a hub or database -chrom= - restrict the operation to a single chromosome -start= - restrict the operation to a range, use both start and end -end= - restrict the operation to a range, use both start and end -maxItemsOutput= - limit output to this number of items. Default 1,000 maximum allowed 1,000,000 -trackLeavesOnly - for list tracks function, no containers listed -measureTimeing - turn on timing measurement -debug - turn on debugging business -endpoint= - where is one of the following: /list/publicHubs - provide a listing of all available public hubs /list/ucscGenomes - provide a listing of all available UCSC genomes /list/hubGenomes - list genomes from a specified hub (with hubUrl=...) @@ -80,41 +83,41 @@ if (ref($element) eq "HASH") { hashOutput($element); } } } ######################################################################### ############################################################################## ### ### these functions were copied from Ensembl HTTP::Tiny example code: ### https://github.com/Ensembl/ensembl-rest/wiki/Example-Perl-Client ### ############################################################################## ############################################################################## -sub performJsonAction { +sub performJsonAction($$) { my ($endpoint, $parameters) = @_; my $headers = $globalHeaders; my $content = performRestAction($endpoint, $parameters, $headers); return {} unless $content; my $json = decode_json($content); return $json; } ############################################################################## -sub performRestAction { +sub performRestAction($$$) { my ($endpoint, $parameters, $headers) = @_; $parameters ||= {}; $headers ||= {}; $headers->{'Content-Type'} = 'application/json' unless exists $headers->{'Content-Type'}; if($requestCount == 15) { # check every 15 my $currentTime = Time::HiRes::time(); my $diff = $currentTime - $lastRequestTime; # if less than a second then sleep for the remainder of the second if($diff < 1) { Time::HiRes::sleep(1-$diff); } # reset $lastRequestTime = Time::HiRes::time(); $requestCount = 0; } @@ -213,63 +216,154 @@ } printf "%s", $json->pretty->encode( $jsonReturn ); } } ############################################################################# sub verifyCommandProcessing() { my $json = JSON->new; # verify command processing can detected bad input my $endpoint = "/list/noSubCommand"; my $expect = "do not recognize endpoint function:"; checkError($json, $endpoint,$expect); } # sub verifyCommandProcessing() +############################################################################# +# Find the highest chromStart in the returned to data to obtain a continuation +# point. +# The item 'chromStart' is not necessarily always named as such, +# depending upon track type, it could be: tStart or genoStart or txStart +sub findHighestChromStart($$) { + my $highStart = -1; + my ($hashPtr, $track) = @_; + my $trackData = $hashPtr->{$track}; + foreach my $item (@$trackData) { + if (defined($item->{'tStart'})) { + $highStart = $item->{'tStart'} if ($item->{'tStart'} > $highStart); + } elsif (defined($item->{'genoStart'})) { + $highStart = $item->{'genoStart'} if ($item->{'genoStart'} > $highStart); + } elsif (defined($item->{'txStart'})) { + $highStart = $item->{'txStart'} if ($item->{'txStart'} > $highStart); + } elsif (defined($item->{'chromStart'})) { + $highStart = $item->{'chromStart'} if ($item->{'chromStart'} > $highStart); + } else { + die "ERROR: do not recognize table type for track '%s', can not find chrom start.\n", $track; + } + } + return $highStart; +} + +############################################################################# +# walk through all the chromosomes for a track to extract all data +# XXX - NOT ADDRESSED - this produces duplicate items at the breaks when +# maxItemsLimit is used +sub trackDump($$) { + my ($endpoint, $parameters) = @_; + my $errReturn = 0; + my %localParameters; + if (length($hubUrl)) { + $localParameters{"hubUrl"} = "$hubUrl"; + } + if (length($genome)) { + $localParameters{"genome"} = "$genome"; + } + if (length($track)) { + $localParameters{"track"} = "$track"; + } + my $endPoint = "/list/chromosomes"; + my $jsonChromosomes = performJsonAction($endPoint, \%localParameters); + $errReturn = 1 if (defined ($jsonChromosomes->{'error'})); + my $json = JSON->new; + my %chromInfo; # key is chrom name, value is size + if (0 == $errReturn) { + my $chromHash = $jsonChromosomes->{'chromosomes'}; + foreach my $chr (keys %$chromHash) { + $chromInfo{$chr} = $chromHash->{$chr}; + } + # for each chromosome, in order by size, smallest first + $endPoint = "/getData/track"; + $maxItemsOutput = 14000; + foreach my $chr (sort {$chromInfo{$a} <=> $chromInfo{$b}} keys %chromInfo) { + $localParameters{"chrom"} = "$chr"; + delete $localParameters{'start'}; + delete $localParameters{'end'}; + printf STDERR "# working\t%s\t%d\n", $chr, $chromInfo{$chr}; + my $oneChrom = performJsonAction($endPoint, \%localParameters); + my $itemsReturned = $oneChrom->{'itemsReturned'}; + my $reachedMaxItems = 0; + $reachedMaxItems = 1 if (defined($oneChrom->{'maxItemsLimit'})); + if ($reachedMaxItems) { + while ($reachedMaxItems) { + my $highestChromStart = findHighestChromStart($oneChrom, $track); + printf STDERR "# chrom: %s\t%d items -> max item limit last chromStart %d\n", $chr, $itemsReturned, $highestChromStart; + $localParameters{'start'} = "$highestChromStart"; + $localParameters{'end'} = "$chromInfo{$chr}"; + $reachedMaxItems = 0; + $oneChrom = performJsonAction($endPoint, \%localParameters); + $itemsReturned = $oneChrom->{'itemsReturned'}; + $reachedMaxItems = 1 if (defined($oneChrom->{'maxItemsLimit'})); + if (0 == $reachedMaxItems) { + $highestChromStart = findHighestChromStart($oneChrom, $track); + printf STDERR "# chrom: %s\t%d items completed at last chromStart %d\n", $chr, $itemsReturned, $highestChromStart; + } + } + } else { + printf STDERR "# chrom: %s\t%d items - completed\n", $chr, $itemsReturned; + } + } # foreach chrom in chromInfo + } # if (0 == $errReturn) chromInfo was successful + + return $errReturn; +} # sub trackDump($$) ############################################################################# sub processEndPoint() { my $errReturn = 0; if (length($endpoint)) { my $json = JSON->new; my $jsonReturn = {}; my %parameters; if (length($hubUrl)) { $parameters{"hubUrl"} = "$hubUrl"; } if (length($genome)) { $parameters{"genome"} = "$genome"; } if (length($chrom)) { $parameters{"chrom"} = "$chrom"; } if ($trackLeavesOnly) { $parameters{"trackLeavesOnly"} = "1"; } if (length($track)) { $parameters{"track"} = "$track"; } if (length($start)) { $parameters{"start"} = "$start"; } if (length($end)) { $parameters{"end"} = "$end"; } # Pass along any bogus request just to test the error handling. + if ($trackDump) { + $errReturn = trackDump($endpoint, \%parameters); + } else { $jsonReturn = performJsonAction($endpoint, \%parameters); $errReturn = 1 if (defined ($jsonReturn->{'error'})); printf "%s", $json->pretty->encode( $jsonReturn ); + } } else { printf STDERR "ERROR: no endpoint given ?\n"; usage(); exit 255; } return $errReturn; } # sub processEndPoint() ########################################################################### ### test /list/publicHubs and /list/ucscGenomes sub test0() { my $json = JSON->new; my $jsonReturn = {}; @@ -345,30 +439,31 @@ ############################################################################# ### main() ############################################################################# my $argc = scalar(@ARGV); GetOptions ("hubUrl=s" => \$hubUrl, "endpoint=s" => \$endpoint, "genome=s" => \$genome, "track=s" => \$track, "chrom=s" => \$chrom, "start=s" => \$start, "end=s" => \$end, "test0" => \$test0, + "trackDump" => \$trackDump, "debug" => \$debug, "trackLeavesOnly" => \$trackLeavesOnly, "measureTiming" => \$measureTiming, "jsonOutputArrays" => \$jsonOutputArrays, "maxItemsOutput=s" => \$maxItemsOutput) or die "Error in command line arguments\n"; if ($test0) { test0; elapsedTime(); exit 0; } if ($argc > 0) { if (processEndPoint()) {