29781ccf5a802932426ff9e78b5f191b4ebef5c0
hiram
  Tue May 21 17:02:06 2019 -0700
add trackDump option for proof of concept refs #18869

diff --git src/hg/hubApi/tests/jsonConsumer.pl src/hg/hubApi/tests/jsonConsumer.pl
index db81b96c0..1993abe 100755
--- src/hg/hubApi/tests/jsonConsumer.pl
+++ src/hg/hubApi/tests/jsonConsumer.pl
@@ -15,41 +15,44 @@
 my $globalHeaders = { 'Content-Type' => 'application/json' };
 my $lastRequestTime = Time::HiRes::time();
 my $processStartTime = Time::HiRes::time();
 my $requestCount = 0;
 
 ##############################################################################
 # command line options
 my $endpoint = "";
 my $hubUrl = "";
 my $genome = "";
 my $track = "";
 my $chrom = "";
 my $start = "";
 my $end = "";
 my $test0 = 0;
+my $trackDump = 0;
 my $debug = 0;
 my $trackLeavesOnly = 0;
 my $measureTiming = 0;
 my $jsonOutputArrays = 0;
 my $maxItemsOutput = "";
 ##############################################################################
 
 sub usage() {
 printf STDERR "usage: ./jsonConsumer.pl [arguments]\n";
 printf STDERR "arguments:
 -test0 - perform test of /list/publicHubs and /list/ucscGenomes endpoints
+-trackDump - obtain all data for a single track from: track, genome (hubUrl)
+           - proof of concept, will not work for all cases
 -hubUrl=<URL> - use the URL to access the track or assembly hub
 -genome=<name> - name for UCSC database genome or assembly/track hub genome
 -track=<trackName> - specify a single track in a hub or database
 -chrom=<chromName> - restrict the operation to a single chromosome
 -start=<coordinate> - restrict the operation to a range, use both start and end
 -end=<coordinate> - restrict the operation to a range, use both start and end
 -maxItemsOutput=<N> - limit output to this number of items.  Default 1,000
                       maximum allowed 1,000,000
 -trackLeavesOnly - for list tracks function, no containers listed
 -measureTimeing - turn on timing measurement
 -debug - turn on debugging business
 -endpoint=<function> - where <function> is one of the following:
    /list/publicHubs - provide a listing of all available public hubs
    /list/ucscGenomes - provide a listing of all available UCSC genomes
    /list/hubGenomes - list genomes from a specified hub (with hubUrl=...)
@@ -80,41 +83,41 @@
      if (ref($element) eq "HASH") {
        hashOutput($element);
      }
   }
 }
 #########################################################################
 
 ##############################################################################
 ###
 ### these functions were copied from Ensembl HTTP::Tiny example code:
 ###  https://github.com/Ensembl/ensembl-rest/wiki/Example-Perl-Client
 ###
 ##############################################################################
 
 ##############################################################################
-sub performJsonAction {
+sub performJsonAction($$) {
   my ($endpoint, $parameters) = @_;
   my $headers = $globalHeaders;
   my $content = performRestAction($endpoint, $parameters, $headers);
   return {} unless $content;
   my $json = decode_json($content);
   return $json;
 }
 
 ##############################################################################
-sub performRestAction {
+sub performRestAction($$$) {
   my ($endpoint, $parameters, $headers) = @_;
   $parameters ||= {};
   $headers ||= {};
   $headers->{'Content-Type'} = 'application/json' unless exists $headers->{'Content-Type'};
   if($requestCount == 15) { # check every 15
     my $currentTime = Time::HiRes::time();
     my $diff = $currentTime - $lastRequestTime;
     # if less than a second then sleep for the remainder of the second
     if($diff < 1) {
       Time::HiRes::sleep(1-$diff);
     }
     # reset
     $lastRequestTime = Time::HiRes::time();
     $requestCount = 0;
   }
@@ -213,63 +216,154 @@
      }
      printf "%s", $json->pretty->encode( $jsonReturn );
   }
 }
 
 #############################################################################
 sub verifyCommandProcessing()
 {
     my $json = JSON->new;
     # verify command processing can detected bad input
     my $endpoint = "/list/noSubCommand";
     my $expect = "do not recognize endpoint function:";
     checkError($json, $endpoint,$expect);
 }	#	sub verifyCommandProcessing()
 
+#############################################################################
+#  Find the highest chromStart in the returned to data to obtain a continuation
+#  point.
+#  The item 'chromStart' is not necessarily always named as such,
+#    depending upon track type, it could be: tStart or genoStart or txStart
+sub findHighestChromStart($$) {
+  my $highStart = -1;
+  my ($hashPtr, $track) = @_;
+  my $trackData = $hashPtr->{$track};
+  foreach my $item (@$trackData) {
+    if (defined($item->{'tStart'})) {
+       $highStart = $item->{'tStart'} if ($item->{'tStart'} > $highStart);
+    } elsif (defined($item->{'genoStart'})) {
+       $highStart = $item->{'genoStart'} if ($item->{'genoStart'} > $highStart);
+    } elsif (defined($item->{'txStart'})) {
+       $highStart = $item->{'txStart'} if ($item->{'txStart'} > $highStart);
+    } elsif (defined($item->{'chromStart'})) {
+     $highStart = $item->{'chromStart'} if ($item->{'chromStart'} > $highStart);
+    } else {
+       die "ERROR: do not recognize table type for track '%s', can not find chrom start.\n", $track;
+    }
+  }
+  return $highStart;
+}
+
+#############################################################################
+# walk through all the chromosomes for a track to extract all data
+# XXX - NOT ADDRESSED - this produces duplicate items at the breaks when
+#       maxItemsLimit is used
+sub trackDump($$) {
+  my ($endpoint, $parameters) = @_;
+  my $errReturn = 0;
+  my %localParameters;
+  if (length($hubUrl)) {
+     $localParameters{"hubUrl"} = "$hubUrl";
+  }
+  if (length($genome)) {
+     $localParameters{"genome"} = "$genome";
+  }
+  if (length($track)) {
+     $localParameters{"track"} = "$track";
+  }
+  my $endPoint = "/list/chromosomes";
+  my $jsonChromosomes = performJsonAction($endPoint, \%localParameters);
+  $errReturn = 1 if (defined ($jsonChromosomes->{'error'}));
+  my $json = JSON->new;
+  my %chromInfo;	# key is chrom name, value is size
+  if (0 == $errReturn) {
+    my $chromHash = $jsonChromosomes->{'chromosomes'};
+    foreach my $chr (keys %$chromHash) {
+      $chromInfo{$chr} = $chromHash->{$chr};
+    }
+    # for each chromosome, in order by size, smallest first
+    $endPoint = "/getData/track";
+    $maxItemsOutput = 14000;
+    foreach my $chr (sort {$chromInfo{$a} <=> $chromInfo{$b}} keys %chromInfo) {
+      $localParameters{"chrom"} = "$chr";
+      delete $localParameters{'start'};
+      delete $localParameters{'end'};
+      printf STDERR "# working\t%s\t%d\n", $chr, $chromInfo{$chr};
+      my $oneChrom = performJsonAction($endPoint, \%localParameters);
+      my $itemsReturned = $oneChrom->{'itemsReturned'};
+      my $reachedMaxItems = 0;
+      $reachedMaxItems = 1 if (defined($oneChrom->{'maxItemsLimit'}));
+      if ($reachedMaxItems) {
+         while ($reachedMaxItems) {
+           my $highestChromStart = findHighestChromStart($oneChrom, $track);
+           printf STDERR "# chrom: %s\t%d items -> max item limit last chromStart %d\n", $chr, $itemsReturned, $highestChromStart;
+	   $localParameters{'start'} = "$highestChromStart";
+	   $localParameters{'end'} = "$chromInfo{$chr}";
+           $reachedMaxItems = 0;
+           $oneChrom = performJsonAction($endPoint, \%localParameters);
+           $itemsReturned = $oneChrom->{'itemsReturned'};
+           $reachedMaxItems = 1 if (defined($oneChrom->{'maxItemsLimit'}));
+           if (0 == $reachedMaxItems) {
+             $highestChromStart = findHighestChromStart($oneChrom, $track);
+             printf STDERR "# chrom: %s\t%d items completed at last chromStart %d\n", $chr, $itemsReturned, $highestChromStart;
+           }
+         }
+      } else {
+         printf STDERR "# chrom: %s\t%d items - completed\n", $chr, $itemsReturned;
+      }
+    }	# foreach chrom in chromInfo
+  }	# if (0 == $errReturn)  chromInfo was successful
+
+  return $errReturn;
+}	#	sub trackDump($$)
 
 #############################################################################
 sub processEndPoint() {
   my $errReturn = 0;
   if (length($endpoint)) {
      my $json = JSON->new;
      my $jsonReturn = {};
      my %parameters;
      if (length($hubUrl)) {
 	$parameters{"hubUrl"} = "$hubUrl";
      }
      if (length($genome)) {
 	$parameters{"genome"} = "$genome";
         }
      if (length($chrom)) {
 	$parameters{"chrom"} = "$chrom";
      }
      if ($trackLeavesOnly) {
 	$parameters{"trackLeavesOnly"} = "1";
      }
      if (length($track)) {
 	$parameters{"track"} = "$track";
      }
      if (length($start)) {
 	$parameters{"start"} = "$start";
      }
      if (length($end)) {
 	$parameters{"end"} = "$end";
      }
      #	Pass along any bogus request just to test the error handling.
+     if ($trackDump) {
+        $errReturn = trackDump($endpoint, \%parameters);
+     } else {
         $jsonReturn = performJsonAction($endpoint, \%parameters);
         $errReturn = 1 if (defined ($jsonReturn->{'error'}));
         printf "%s", $json->pretty->encode( $jsonReturn );
+     }
   } else {
     printf STDERR "ERROR: no endpoint given ?\n";
     usage();
     exit 255;
   }
   return $errReturn;
 }	# sub processEndPoint()
 
 ###########################################################################
 ### test /list/publicHubs and /list/ucscGenomes
 sub test0() {
 
 my $json = JSON->new;
 my $jsonReturn = {};
 
@@ -345,30 +439,31 @@
 
 #############################################################################
 ### main()
 #############################################################################
 
 my $argc = scalar(@ARGV);
 
 GetOptions ("hubUrl=s" => \$hubUrl,
     "endpoint=s"  => \$endpoint,
     "genome=s"  => \$genome,
     "track=s"  => \$track,
     "chrom=s"  => \$chrom,
     "start=s"  => \$start,
     "end=s"    => \$end,
     "test0"    => \$test0,
+    "trackDump"    => \$trackDump,
     "debug"    => \$debug,
     "trackLeavesOnly"    => \$trackLeavesOnly,
     "measureTiming"    => \$measureTiming,
     "jsonOutputArrays"    => \$jsonOutputArrays,
     "maxItemsOutput=s"   => \$maxItemsOutput)
     or die "Error in command line arguments\n";
 
 if ($test0) {
    test0;
    elapsedTime();
    exit 0;
 }
 
 if ($argc > 0) {
    if (processEndPoint()) {