Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add pagination and base_url to JSON query_runner #6499

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
125 changes: 108 additions & 17 deletions redash/query_runner/json_ds.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import datetime
import logging
from urllib.parse import urljoin

import yaml
from funcy import compact, project
Expand Down Expand Up @@ -61,7 +62,7 @@
columns.append({"name": column_name, "friendly_name": column_name, "type": column_type})


def _apply_path_search(response, path):
def _apply_path_search(response, path, default=None):
if path is None:
return response

Expand All @@ -71,13 +72,17 @@
current_path = path_parts.pop()
if current_path in response:
response = response[current_path]
elif default is not None:
return default
guidopetri marked this conversation as resolved.
Show resolved Hide resolved
else:
raise Exception("Couldn't find path {} in response.".format(path))

return response


def _normalize_json(data, path):
if not data:
return None

Check warning on line 85 in redash/query_runner/json_ds.py

View check run for this annotation

Codecov / codecov/patch

redash/query_runner/json_ds.py#L85

Added line #L85 was not covered by tests
data = _apply_path_search(data, path)

if isinstance(data, dict):
Expand All @@ -94,9 +99,7 @@


# TODO: merge the logic here with the one in MongoDB's queyr runner
def parse_json(data, path, fields):
data = _normalize_json(data, path)

def parse_json(data, fields):
rows = []
columns = []

Expand Down Expand Up @@ -130,17 +133,19 @@

class JSON(BaseHTTPQueryRunner):
requires_url = False
base_url_title = "Base URL"

@classmethod
def configuration_schema(cls):
return {
"type": "object",
"properties": {
"base_url": {"type": "string", "title": cls.base_url_title},
"username": {"type": "string", "title": cls.username_title},
"password": {"type": "string", "title": cls.password_title},
},
"secret": ["password"],
"order": ["username", "password"],
"order": ["base_url", "username", "password"],
}

def __init__(self, configuration):
Expand All @@ -153,6 +158,16 @@
def run_query(self, query, user):
query = parse_query(query)

results, error = self._run_json_query(query)

Check warning on line 161 in redash/query_runner/json_ds.py

View check run for this annotation

Codecov / codecov/patch

redash/query_runner/json_ds.py#L161

Added line #L161 was not covered by tests
if error is not None:
return None, error

Check warning on line 163 in redash/query_runner/json_ds.py

View check run for this annotation

Codecov / codecov/patch

redash/query_runner/json_ds.py#L163

Added line #L163 was not covered by tests

data = json_dumps(results)

Check warning on line 165 in redash/query_runner/json_ds.py

View check run for this annotation

Codecov / codecov/patch

redash/query_runner/json_ds.py#L165

Added line #L165 was not covered by tests
if data:
return data, None
return None, "Got empty response from '{}'.".format(query["url"])

Check warning on line 168 in redash/query_runner/json_ds.py

View check run for this annotation

Codecov / codecov/patch

redash/query_runner/json_ds.py#L167-L168

Added lines #L167 - L168 were not covered by tests

def _run_json_query(self, query):
if not isinstance(query, dict):
raise QueryParseError("Query should be a YAML object describing the URL to query.")

Expand All @@ -165,31 +180,107 @@
fields = query.get("fields")
path = query.get("path")

if "pagination" in query:
pagination = RequestPagination.from_config(self.configuration, query["pagination"])
else:
pagination = None

if isinstance(request_options.get("auth", None), list):
request_options["auth"] = tuple(request_options["auth"])
elif self.configuration.get("username") or self.configuration.get("password"):
request_options["auth"] = (
self.configuration.get("username"),
self.configuration.get("password"),
)
request_options["auth"] = (self.configuration.get("username"), self.configuration.get("password"))

Check warning on line 191 in redash/query_runner/json_ds.py

View check run for this annotation

Codecov / codecov/patch

redash/query_runner/json_ds.py#L191

Added line #L191 was not covered by tests

if method not in ("get", "post"):
raise QueryParseError("Only GET or POST methods are allowed.")

if fields and not isinstance(fields, list):
raise QueryParseError("'fields' needs to be a list.")

response, error = self.get_response(query["url"], http_method=method, **request_options)
results, error = self._get_all_results(query["url"], method, path, pagination, **request_options)
return parse_json(results, fields), error

if error is not None:
return None, error
def _get_all_results(self, url, method, result_path, pagination, **request_options):
"""Get all results from a paginated endpoint."""
base_url = self.configuration.get("base_url")
url = urljoin(base_url, url)

data = json_dumps(parse_json(response.json(), path, fields))
results = []
has_more = True
while has_more:
response, error = self._get_json_response(url, method, **request_options)
has_more = False

if data:
return data, None
else:
return None, "Got empty response from '{}'.".format(query["url"])
result = _normalize_json(response, result_path)
if result:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Having this many nested if statements is kind of a code smell. Maybe we should split this up into multiple functions?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We could, but there are kind of a lot of variables to pass around to do so. I personally would just pass query around instead of breaking it up on line 168 and then passing that around everywhere. I was just trying to change as little as possible. Should I do that switch?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In my opinion, yes - definitely worth it if it makes the code easier to follow.

results.extend(result)
if pagination:
has_more, url, request_options = pagination.next(url, request_options, response)

return results, error

def _get_json_response(self, url, method, **request_options):
response, error = self.get_response(url, http_method=method, **request_options)
result = response.json() if error is None else {}
return result, error

Check warning on line 224 in redash/query_runner/json_ds.py

View check run for this annotation

Codecov / codecov/patch

redash/query_runner/json_ds.py#L222-L224

Added lines #L222 - L224 were not covered by tests


class RequestPagination:
def next(self, url, request_options, response):
"""Checks the response for another page.

Returns:
has_more, next_url, next_request_options
"""
return False, None, request_options

Check warning on line 234 in redash/query_runner/json_ds.py

View check run for this annotation

Codecov / codecov/patch

redash/query_runner/json_ds.py#L234

Added line #L234 was not covered by tests

@staticmethod
def from_config(configuration, pagination):
if not isinstance(pagination, dict) or not isinstance(pagination.get("type"), str):
raise QueryParseError("'pagination' should be an object with a `type` property")

Check warning on line 239 in redash/query_runner/json_ds.py

View check run for this annotation

Codecov / codecov/patch

redash/query_runner/json_ds.py#L239

Added line #L239 was not covered by tests

if pagination["type"] == "url":
return UrlPagination(pagination)
elif pagination["type"] == "token":
return TokenPagination(pagination)

raise QueryParseError("Unknown 'pagination.type' {}".format(pagination["type"]))

Check warning on line 246 in redash/query_runner/json_ds.py

View check run for this annotation

Codecov / codecov/patch

redash/query_runner/json_ds.py#L246

Added line #L246 was not covered by tests


class UrlPagination(RequestPagination):
def __init__(self, pagination):
self.path = pagination.get("path", "_links.next.href")
if not isinstance(self.path, str):
raise QueryParseError("'pagination.path' should be a string")

Check warning on line 253 in redash/query_runner/json_ds.py

View check run for this annotation

Codecov / codecov/patch

redash/query_runner/json_ds.py#L253

Added line #L253 was not covered by tests

def next(self, url, request_options, response):
next_url = _apply_path_search(response, self.path, "")
if not next_url:
return False, None, request_options

next_url = urljoin(url, next_url)
return True, next_url, request_options


class TokenPagination(RequestPagination):
def __init__(self, pagination):
self.fields = pagination.get("fields", ["next_page_token", "page_token"])
if not isinstance(self.fields, list) or len(self.fields) != 2:
raise QueryParseError("'pagination.fields' should be a list of 2 field names")

Check warning on line 268 in redash/query_runner/json_ds.py

View check run for this annotation

Codecov / codecov/patch

redash/query_runner/json_ds.py#L268

Added line #L268 was not covered by tests

def next(self, url, request_options, response):
next_token = _apply_path_search(response, self.fields[0], "")
if not next_token:
return False, None, request_options

params = request_options.get("params", {})

# prevent infinite loop that can happen if self.fields[1] is wrong
if next_token == params.get(self.fields[1]):
raise Exception("{} did not change; possible misconfiguration".format(self.fields[0]))

Check warning on line 279 in redash/query_runner/json_ds.py

View check run for this annotation

Codecov / codecov/patch

redash/query_runner/json_ds.py#L279

Added line #L279 was not covered by tests

params[self.fields[1]] = next_token
request_options["params"] = params
return True, url, request_options


register(JSON)
88 changes: 88 additions & 0 deletions tests/query_runner/test_json_ds.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
"""
Some test cases for JSON api runner
"""
from unittest import TestCase
from urllib.parse import urlencode, urljoin

from redash.query_runner.json_ds import JSON


def mock_api(url, method, **request_options):
if "params" in request_options:
qs = urlencode(request_options["params"])
url = urljoin(url, "?{}".format(qs))

data, error = None, None

if url == "http://localhost/basics":
data = [{"id": 1}, {"id": 2}]
elif url == "http://localhost/token-test":
data = {"next_page_token": "2", "records": [{"id": 1}, {"id": 2}]}
elif url == "http://localhost/token-test?page_token=2":
data = {"next_page_token": "3", "records": [{"id": 3}, {"id": 4}]}
elif url == "http://localhost/token-test?page_token=3":
data = {"records": [{"id": 5}]}
elif url == "http://localhost/hateoas":
data = {
"_embedded": {"records": [{"id": 10}, {"id": 11}]},
"_links": {
"first": {"href": "http://localhost/hateoas"},
"self": {"href": "http://localhost/hateoas"},
"next": {"href": "http://localhost/hateoas?page=2"},
"last": {"href": "http://localhost/hateoas?page=2"},
},
"page": {"size": 2, "totalElements": 3, "totalPages": 2},
}
elif url == "http://localhost/hateoas?page=2":
data = {
"_embedded": {"records": [{"id": 12}]},
"_links": {
"first": {"href": "http://localhost/hateoas"},
"self": {"href": "http://localhost/hateoas?page=2"},
"prev": {"href": "http://localhost/hateoas"},
"last": {"href": "http://localhost/hateoas?page=2"},
},
"page": {"size": 2, "totalElements": 3, "totalPages": 2},
}
else:
error = "404: {} not found".format(url)

return data, error


class TestJSON(TestCase):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, sorry, I should have specified. We've been aiming for pytest tests instead of unittest; if it's not too much trouble, would you mind converting to those? I won't hold it against you if you're not feeling up to it though. I appreciate the work you've put into this PR and already recognize you've gone above and beyond :)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I just used the other tests as an example. It could always be refactored later. It seems like some of that should be added to the contributing guide, like the testing, ruff, and rebasing process. Like are you looking for a single commit per pr, or just going for a linear history?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's a great point. I don't think we have a good handle on what our contribution guidelines are; I'll try and add some documentation for that to the wiki soon.

def setUp(self):
self.runner = JSON({"base_url": "http://localhost/"})
self.runner._get_json_response = mock_api

def test_basics(self):
q = {"url": "basics"}
results, error = self.runner._run_json_query(q)

expected = [{"id": 1}, {"id": 2}]
self.assertEqual(results["rows"], expected)

def test_token_pagination(self):
q = {
"url": "token-test",
"pagination": {"type": "token", "fields": ["next_page_token", "page_token"]},
"path": "records",
}
results, error = self.runner._run_json_query(q)
self.assertIsNone(error)

expected = [{"id": 1}, {"id": 2}, {"id": 3}, {"id": 4}, {"id": 5}]
self.assertEqual(results["rows"], expected)

def test_url_pagination(self):
q = {
"url": "hateoas",
"pagination": {"type": "url", "path": "_links.next.href"},
"path": "_embedded.records",
"fields": ["id"],
}
results, error = self.runner._run_json_query(q)
self.assertIsNone(error)

expected = [{"id": 10}, {"id": 11}, {"id": 12}]
self.assertEqual(results["rows"], expected)
Loading