From 5709441da28aafe7d55a84f50733f1127a5c70d7 Mon Sep 17 00:00:00 2001 From: skyflow-vivek Date: Thu, 24 Aug 2023 16:14:30 +0530 Subject: [PATCH 1/3] SK-907 Query Support in Python SDK - SK-973 Test Driven Development for Query Support in Python SDK --- skyflow/_utils.py | 3 + skyflow/errors/_skyflow_errors.py | 4 + skyflow/vault/_client.py | 29 +++++- skyflow/vault/_config.py | 4 + skyflow/vault/_query.py | 58 +++++++++++ tests/vault/test_query.py | 158 ++++++++++++++++++++++++++++++ 6 files changed, 253 insertions(+), 3 deletions(-) create mode 100644 skyflow/vault/_query.py create mode 100644 tests/vault/test_query.py diff --git a/skyflow/_utils.py b/skyflow/_utils.py index 301e013..83bf54a 100644 --- a/skyflow/_utils.py +++ b/skyflow/_utils.py @@ -60,11 +60,13 @@ class InfoMessages(Enum): INSERT_DATA_SUCCESS = "Data has been inserted successfully." DETOKENIZE_SUCCESS = "Data has been detokenized successfully." GET_BY_ID_SUCCESS = "Data fetched from ID successfully." + QUERY_SUCCESS = "Query executed successfully." BEARER_TOKEN_RECEIVED = "tokenProvider returned token successfully." INSERT_TRIGGERED = "Insert method triggered." DETOKENIZE_TRIGGERED = "Detokenize method triggered." GET_BY_ID_TRIGGERED = "Get by ID triggered." INVOKE_CONNECTION_TRIGGERED = "Invoke connection triggered." + QUERY_TRIGGERED = "Query method triggered." GENERATE_BEARER_TOKEN_TRIGGERED = "Generate bearer token triggered" GENERATE_BEARER_TOKEN_SUCCESS = "Generate bearer token returned successfully" IS_TOKEN_VALID_TRIGGERED = "isTokenValid() triggered" @@ -87,6 +89,7 @@ class InterfaceName(Enum): GET = "client.get" UPDATE = "client.update" INVOKE_CONNECTION = "client.invoke_connection" + QUERY = "client.query" GENERATE_BEARER_TOKEN = "service_account.generate_bearer_token" IS_TOKEN_VALID = "service_account.isTokenValid" diff --git a/skyflow/errors/_skyflow_errors.py b/skyflow/errors/_skyflow_errors.py index 60f6c74..cd25b6e 100644 --- a/skyflow/errors/_skyflow_errors.py +++ b/skyflow/errors/_skyflow_errors.py @@ -82,6 +82,10 @@ class SkyflowErrorMessages(Enum): INVALID_UPSERT_COLUMN_TYPE = "upsert object column key has value of type %s, expected string" EMPTY_UPSERT_OPTION_TABLE = "upsert object table value is empty string at index %s, expected non-empty string" EMPTY_UPSERT_OPTION_COLUMN = "upsert object column value is empty string at index %s, expected non-empty string" + QUERY_KEY_ERROR = "Query key is missing from payload" + INVALID_QUERY_TYPE = "Query key has value of type %s, expected string" + INVALID_QUERY_COMMAND = "only SELECT commands are supported, %s command was passed instead" + SERVER_ERROR = "Server returned errors, check SkyflowError.data for more" class SkyflowError(Exception): def __init__(self, code, message="An Error occured", data={}, interface: str = 'Unknown') -> None: diff --git a/skyflow/vault/_client.py b/skyflow/vault/_client.py index 4e0699a..1daa3c7 100644 --- a/skyflow/vault/_client.py +++ b/skyflow/vault/_client.py @@ -8,8 +8,7 @@ from ._delete import deleteProcessResponse from ._insert import getInsertRequestBody, processResponse, convertResponse from ._update import sendUpdateRequests, createUpdateResponseBody -from ._config import Configuration, DeleteOptions -from ._config import InsertOptions, ConnectionConfig, UpdateOptions +from ._config import Configuration, DeleteOptions, InsertOptions, ConnectionConfig, UpdateOptions, QueryOptions from ._connection import createRequest from ._detokenize import sendDetokenizeRequests, createDetokenizeResponseBody from ._get_by_id import sendGetByIdRequests, createGetResponseBody @@ -18,7 +17,7 @@ from skyflow.errors._skyflow_errors import SkyflowError, SkyflowErrorCodes, SkyflowErrorMessages from skyflow._utils import log_info, InfoMessages, InterfaceName, getMetrics from ._token import tokenProviderWrapper - +from ._query import getQueryRequestBody, getQueryResponse class Client: def __init__(self, config: Configuration): @@ -141,6 +140,30 @@ def invoke_connection(self, config: ConnectionConfig): session.close() return processResponse(response, interface=interface) + def query(self, queryInput, options: QueryOptions = QueryOptions()): + interface = InterfaceName.QUERY.value + log_info(InfoMessages.QUERY_TRIGGERED.value, interface=interface) + + self._checkConfig(interface) + + jsonBody = getQueryRequestBody(queryInput, options) + requestURL = self._get_complete_vault_url() + "/query" + self.storedToken = tokenProviderWrapper( + self.storedToken, self.tokenProvider, interface) + headers = { + "Content-Type": "application/json", + "Authorization": "Bearer " + self.storedToken, + "sky-metadata": json.dumps(getMetrics()) + } + + response = requests.post(requestURL, data=jsonBody, headers=headers) + # print(response.json()) + # print("\n\n\n") + result = getQueryResponse(response) + + log_info(InfoMessages.QUERY_SUCCESS.value, interface) + return result + def _checkConfig(self, interface): ''' Performs basic check on the given client config diff --git a/skyflow/vault/_config.py b/skyflow/vault/_config.py index 77abd39..36009a6 100644 --- a/skyflow/vault/_config.py +++ b/skyflow/vault/_config.py @@ -41,6 +41,10 @@ def __init__(self, tokens: bool=True): class DeleteOptions: def __init__(self, tokens: bool=False): self.tokens = tokens + +class QueryOptions: + def __init__(self): + pass class RequestMethod(Enum): GET = 'GET' diff --git a/skyflow/vault/_query.py b/skyflow/vault/_query.py new file mode 100644 index 0000000..93ee565 --- /dev/null +++ b/skyflow/vault/_query.py @@ -0,0 +1,58 @@ +''' + Copyright (c) 2022 Skyflow, Inc. +''' +import json + +import requests +from ._config import QueryOptions +from requests.models import HTTPError +from skyflow.errors._skyflow_errors import SkyflowError, SkyflowErrorCodes, SkyflowErrorMessages +from skyflow._utils import InterfaceName + +interface = InterfaceName.QUERY.value + + +def getQueryRequestBody(data, options): + try: + query = data["query"] + except KeyError: + raise SkyflowError(SkyflowErrorCodes.INVALID_INPUT, + SkyflowErrorMessages.QUERY_KEY_ERROR, interface=interface) + + if not isinstance(query, str): + queryType = str(type(query)) + raise SkyflowError(SkyflowErrorCodes.INVALID_INPUT, SkyflowErrorMessages.INVALID_QUERY_TYPE.value % queryType, interface=interface) + + requestBody = {"query": query} + try: + jsonBody = json.dumps(requestBody) + except Exception as e: + raise SkyflowError(SkyflowErrorCodes.INVALID_INPUT, SkyflowErrorMessages.INVALID_JSON.value % ( + 'query payload'), interface=interface) + + return jsonBody + +def getQueryResponse(response: requests.Response, interface=interface): + statusCode = response.status_code + content = response.content.decode('utf-8') + try: + response.raise_for_status() + try: + return json.loads(content) + except: + raise SkyflowError( + statusCode, SkyflowErrorMessages.RESPONSE_NOT_JSON.value % content, interface=interface) + except HTTPError: + message = SkyflowErrorMessages.API_ERROR.value % statusCode + if response != None and response.content != None: + try: + errorResponse = json.loads(content) + if 'error' in errorResponse and type(errorResponse['error']) == type({}) and 'message' in errorResponse['error']: + message = errorResponse['error']['message'] + except: + message = SkyflowErrorMessages.RESPONSE_NOT_JSON.value % content + error = {"error": {}} + if 'x-request-id' in response.headers: + message += ' - request id: ' + response.headers['x-request-id'] + error['error'].update({"code": statusCode, "description": message}) + raise SkyflowError(SkyflowErrorCodes.SERVER_ERROR, SkyflowErrorMessages.SERVER_ERROR.value, error, interface=interface) diff --git a/tests/vault/test_query.py b/tests/vault/test_query.py new file mode 100644 index 0000000..08a6a1d --- /dev/null +++ b/tests/vault/test_query.py @@ -0,0 +1,158 @@ +''' + Copyright (c) 2022 Skyflow, Inc. +''' +import json +import unittest +import os +from requests.models import Response +from dotenv import dotenv_values +from skyflow.vault._query import getQueryRequestBody, getQueryResponse +from skyflow.errors._skyflow_errors import SkyflowError, SkyflowErrorCodes, SkyflowErrorMessages +from skyflow.service_account import generate_bearer_token +from skyflow.vault._client import Client +from skyflow.vault._config import Configuration, QueryOptions + + +class TestQuery(unittest.TestCase): + + def setUp(self) -> None: + self.dataPath = os.path.join(os.getcwd(), 'tests/vault/data/') + query = "SELECT * FROM pii_fields WHERE skyflow_id='3ea3861-x107-40w8-la98-106sp08ea83f'" + self.data = {"query": query} + self.mockRequest = {"records": [query]} + + self.mockResponse = { + "records": [ + { + "fields": { + "card_number": "XXXXXXXXXXXX1111", + "card_pin": "*REDACTED*", + "cvv": "", + "expiration_date": "*REDACTED*", + "expiration_month": "*REDACTED*", + "expiration_year": "*REDACTED*", + "name": "a***te", + "skyflow_id": "3ea3861-x107-40w8-la98-106sp08ea83f", + "ssn": "XXX-XX-6789", + "zip_code": None + }, + "tokens": None + } + ] + } + + self.mockFailResponse = { + "error": { + "code": 500, + "description": "ERROR (internal_error): Could not find Notebook Mapping Notebook Name was not found - request id: 5d5d7e21-c789-9fcc-ba31-2a279d3a28ef" + } + } + + self.queryOptions = QueryOptions() + + return super().setUp() + + def getDataPath(self, file): + return self.dataPath + file + '.json' + + def testGetQueryRequestBodyWithValidBody(self): + body = json.loads(getQueryRequestBody(self.data, self.queryOptions)) + expectedOutput = { + "query": "SELECT * FROM pii_fields WHERE skyflow_id='3ea3861-x107-40w8-la98-106sp08ea83f'", + } + self.assertEqual(body, expectedOutput) + + def testGetQueryRequestBodyNoQuery(self): + invalidData = {"invalidKey": self.data["query"]} + try: + getQueryRequestBody(invalidData, self.queryOptions) + self.fail('Should have thrown an error') + except SkyflowError as e: + self.assertEqual(e.code, SkyflowErrorCodes.INVALID_INPUT.value) + self.assertEqual( + e.message, SkyflowErrorMessages.QUERY_KEY_ERROR.value) + + def testGetQueryRequestBodyInvalidType(self): + invalidData = {"query": ['SELECT * FROM table_name']} + try: + getQueryRequestBody(invalidData, self.queryOptions) + self.fail('Should have thrown an error') + except SkyflowError as e: + self.assertEqual(e.code, SkyflowErrorCodes.INVALID_INPUT.value) + self.assertEqual( + e.message, SkyflowErrorMessages.INVALID_QUERY_TYPE.value % (str(type(invalidData["query"])))) + + def testQueryInvalidJson(self): + invalidjson = {query: json} + + try: + getQueryRequestBody(invalidjson, self.queryOptions) + self.fail('Should have thrown an error') + except SkyflowError as e: + self.assertEqual(e.code, SkyflowErrorCodes.INVALID_INPUT.value) + self.assertEqual( + e.message, SkyflowErrorMessages.INVALID_JSON.value % ('query payload')) + + def testGetQueryValidResponse(self): + response = Response() + response.status_code = 200 + response._content = b'{"key": "value"}' + try: + responseDict = getQueryResponse(response) + self.assertDictEqual(responseDict, {'key': 'value'}) + except SkyflowError as e: + self.fail() + + def testClientInit(self): + config = Configuration( + 'vaultid', 'https://skyflow.com', lambda: 'test') + client = Client(config) + self.assertEqual(client.vaultURL, 'https://skyflow.com') + self.assertEqual(client.vaultID, 'vaultid') + self.assertEqual(client.tokenProvider(), 'test') + + def testGetQueryResponseSuccessInvalidJson(self): + invalid_response = Response() + invalid_response.status_code = 200 + invalid_response._content = b'invalid-json' + try: + getQueryResponse(invalid_response) + self.fail('not failing on invalid json') + except SkyflowError as se: + self.assertEqual(se.code, 200) + self.assertEqual( + se.message, SkyflowErrorMessages.RESPONSE_NOT_JSON.value % 'invalid-json') + + def testGetQueryResponseFailInvalidJson(self): + invalid_response = Response() + invalid_response.status_code = 404 + invalid_response._content = {error: "error"} + try: + getQueryResponse(invalid_response) + self.fail('Not failing on invalid error json') + except SkyflowError as se: + self.assertEqual(se.code, 404) + self.assertEqual( + se.message, SkyflowErrorMessages.RESPONSE_NOT_JSON.value % 'error') + + def testGetQueryResponseFail(self): + response = Response() + response.status_code = 500 + response._content = self.mockFailResponse + try: + getQueryResponse(response) + self.fail() + except SkyflowError as e: + self.assertEqual(e.code, 500) + self.assertEqual(e.message, self.mockFailResponse['error']['description']) + self.assertEqual(e.data, self.mockFailResponse) + + def testQueryInvalidToken(self): + config = Configuration('id', 'url', lambda: 'invalid-token') + try: + Client(config).query({'query': 'SELECT * FROM table_name'}) + self.fail() + except SkyflowError as e: + self.assertEqual(e.code, SkyflowErrorCodes.INVALID_INPUT.value) + self.assertEqual( + e.message, SkyflowErrorMessages.TOKEN_PROVIDER_INVALID_TOKEN.value) \ No newline at end of file From 58f8abd9e4d38429d680edd346a713dbe11370f9 Mon Sep 17 00:00:00 2001 From: skyflow-vivek Date: Thu, 24 Aug 2023 19:12:40 +0530 Subject: [PATCH 2/3] SK-973 Query Support in Python SDK --- skyflow/errors/_skyflow_errors.py | 1 + skyflow/vault/_client.py | 2 -- skyflow/vault/_query.py | 3 +++ tests/vault/test_query.py | 37 +++++-------------------------- 4 files changed, 9 insertions(+), 34 deletions(-) diff --git a/skyflow/errors/_skyflow_errors.py b/skyflow/errors/_skyflow_errors.py index cd25b6e..ed1ee76 100644 --- a/skyflow/errors/_skyflow_errors.py +++ b/skyflow/errors/_skyflow_errors.py @@ -84,6 +84,7 @@ class SkyflowErrorMessages(Enum): EMPTY_UPSERT_OPTION_COLUMN = "upsert object column value is empty string at index %s, expected non-empty string" QUERY_KEY_ERROR = "Query key is missing from payload" INVALID_QUERY_TYPE = "Query key has value of type %s, expected string" + EMPTY_QUERY = "Query key cannot be empty" INVALID_QUERY_COMMAND = "only SELECT commands are supported, %s command was passed instead" SERVER_ERROR = "Server returned errors, check SkyflowError.data for more" diff --git a/skyflow/vault/_client.py b/skyflow/vault/_client.py index 1daa3c7..bc3bebb 100644 --- a/skyflow/vault/_client.py +++ b/skyflow/vault/_client.py @@ -157,8 +157,6 @@ def query(self, queryInput, options: QueryOptions = QueryOptions()): } response = requests.post(requestURL, data=jsonBody, headers=headers) - # print(response.json()) - # print("\n\n\n") result = getQueryResponse(response) log_info(InfoMessages.QUERY_SUCCESS.value, interface) diff --git a/skyflow/vault/_query.py b/skyflow/vault/_query.py index 93ee565..504d86c 100644 --- a/skyflow/vault/_query.py +++ b/skyflow/vault/_query.py @@ -22,6 +22,9 @@ def getQueryRequestBody(data, options): if not isinstance(query, str): queryType = str(type(query)) raise SkyflowError(SkyflowErrorCodes.INVALID_INPUT, SkyflowErrorMessages.INVALID_QUERY_TYPE.value % queryType, interface=interface) + + if not query.strip(): + raise SkyflowError(SkyflowErrorCodes.INVALID_INPUT,SkyflowErrorMessages.EMPTY_QUERY.value, interface=interface) requestBody = {"query": query} try: diff --git a/tests/vault/test_query.py b/tests/vault/test_query.py index 08a6a1d..2a67d36 100644 --- a/tests/vault/test_query.py +++ b/tests/vault/test_query.py @@ -5,10 +5,8 @@ import unittest import os from requests.models import Response -from dotenv import dotenv_values from skyflow.vault._query import getQueryRequestBody, getQueryResponse from skyflow.errors._skyflow_errors import SkyflowError, SkyflowErrorCodes, SkyflowErrorMessages -from skyflow.service_account import generate_bearer_token from skyflow.vault._client import Client from skyflow.vault._config import Configuration, QueryOptions @@ -81,17 +79,16 @@ def testGetQueryRequestBodyInvalidType(self): self.assertEqual(e.code, SkyflowErrorCodes.INVALID_INPUT.value) self.assertEqual( e.message, SkyflowErrorMessages.INVALID_QUERY_TYPE.value % (str(type(invalidData["query"])))) - - def testQueryInvalidJson(self): - invalidjson = {query: json} - + + def testGetQueryRequestBodyEmptyBody(self): + invalidData = {"query": ''} try: - getQueryRequestBody(invalidjson, self.queryOptions) + getQueryRequestBody(invalidData, self.queryOptions) self.fail('Should have thrown an error') except SkyflowError as e: self.assertEqual(e.code, SkyflowErrorCodes.INVALID_INPUT.value) self.assertEqual( - e.message, SkyflowErrorMessages.INVALID_JSON.value % ('query payload')) + e.message, SkyflowErrorMessages.EMPTY_QUERY.value) def testGetQueryValidResponse(self): response = Response() @@ -123,30 +120,6 @@ def testGetQueryResponseSuccessInvalidJson(self): self.assertEqual( se.message, SkyflowErrorMessages.RESPONSE_NOT_JSON.value % 'invalid-json') - def testGetQueryResponseFailInvalidJson(self): - invalid_response = Response() - invalid_response.status_code = 404 - invalid_response._content = {error: "error"} - try: - getQueryResponse(invalid_response) - self.fail('Not failing on invalid error json') - except SkyflowError as se: - self.assertEqual(se.code, 404) - self.assertEqual( - se.message, SkyflowErrorMessages.RESPONSE_NOT_JSON.value % 'error') - - def testGetQueryResponseFail(self): - response = Response() - response.status_code = 500 - response._content = self.mockFailResponse - try: - getQueryResponse(response) - self.fail() - except SkyflowError as e: - self.assertEqual(e.code, 500) - self.assertEqual(e.message, self.mockFailResponse['error']['description']) - self.assertEqual(e.data, self.mockFailResponse) - def testQueryInvalidToken(self): config = Configuration('id', 'url', lambda: 'invalid-token') try: From 140b5f137c90058ac4655f7dd08c5cdb50dd785f Mon Sep 17 00:00:00 2001 From: skyflow-vivek Date: Thu, 24 Aug 2023 21:04:14 +0530 Subject: [PATCH 3/3] SK-973 Increase Coverage --- skyflow/vault/_query.py | 1 + tests/vault/test_query.py | 48 +++++++++++++++++++++++++++++++++++++-- 2 files changed, 47 insertions(+), 2 deletions(-) diff --git a/skyflow/vault/_query.py b/skyflow/vault/_query.py index 504d86c..373264f 100644 --- a/skyflow/vault/_query.py +++ b/skyflow/vault/_query.py @@ -54,6 +54,7 @@ def getQueryResponse(response: requests.Response, interface=interface): message = errorResponse['error']['message'] except: message = SkyflowErrorMessages.RESPONSE_NOT_JSON.value % content + raise SkyflowError(SkyflowErrorCodes.INVALID_INDEX, message, interface=interface) error = {"error": {}} if 'x-request-id' in response.headers: message += ' - request id: ' + response.headers['x-request-id'] diff --git a/tests/vault/test_query.py b/tests/vault/test_query.py index 2a67d36..63f9079 100644 --- a/tests/vault/test_query.py +++ b/tests/vault/test_query.py @@ -4,13 +4,14 @@ import json import unittest import os +from unittest import mock +import requests from requests.models import Response from skyflow.vault._query import getQueryRequestBody, getQueryResponse from skyflow.errors._skyflow_errors import SkyflowError, SkyflowErrorCodes, SkyflowErrorMessages from skyflow.vault._client import Client from skyflow.vault._config import Configuration, QueryOptions - class TestQuery(unittest.TestCase): def setUp(self) -> None: @@ -39,6 +40,18 @@ def setUp(self) -> None: ] } + self.requestId = '5d5d7e21-c789-9fcc-ba31-2a279d3a28ef' + + self.mockApiError = { + "error": { + "grpc_code": 13, + "http_code": 500, + "message": "ERROR (internal_error): Could not find Notebook Mapping Notebook Name was not found", + "http_status": "Internal Server Error", + "details": [] + } + } + self.mockFailResponse = { "error": { "code": 500, @@ -119,6 +132,37 @@ def testGetQueryResponseSuccessInvalidJson(self): self.assertEqual(se.code, 200) self.assertEqual( se.message, SkyflowErrorMessages.RESPONSE_NOT_JSON.value % 'invalid-json') + + def testGetQueryResponseFailInvalidJson(self): + invalid_response = mock.Mock( + spec=requests.Response, + status_code=404, + content=b'error' + ) + invalid_response.raise_for_status.side_effect = requests.exceptions.HTTPError("Not found") + try: + getQueryResponse(invalid_response) + self.fail('Not failing on invalid error json') + except SkyflowError as se: + self.assertEqual(se.code, 404) + self.assertEqual( + se.message, SkyflowErrorMessages.RESPONSE_NOT_JSON.value % 'error') + + def testGetQueryResponseFail(self): + response = mock.Mock( + spec=requests.Response, + status_code=500, + content=json.dumps(self.mockApiError).encode('utf-8') + ) + response.headers = {"x-request-id": self.requestId} + response.raise_for_status.side_effect = requests.exceptions.HTTPError("Server Error") + try: + getQueryResponse(response) + self.fail('not throwing exception when error code is 500') + except SkyflowError as e: + self.assertEqual(e.code, 500) + self.assertEqual(e.message, SkyflowErrorMessages.SERVER_ERROR.value) + self.assertDictEqual(e.data, self.mockFailResponse) def testQueryInvalidToken(self): config = Configuration('id', 'url', lambda: 'invalid-token') @@ -128,4 +172,4 @@ def testQueryInvalidToken(self): except SkyflowError as e: self.assertEqual(e.code, SkyflowErrorCodes.INVALID_INPUT.value) self.assertEqual( - e.message, SkyflowErrorMessages.TOKEN_PROVIDER_INVALID_TOKEN.value) \ No newline at end of file + e.message, SkyflowErrorMessages.TOKEN_PROVIDER_INVALID_TOKEN.value)