From 9f53aedf102037bf4d08a8a8482c3d93f98bc0df Mon Sep 17 00:00:00 2001 From: mibe Date: Fri, 15 Sep 2023 10:43:08 +0100 Subject: [PATCH 01/10] Split MockContext into Standalone and multi-group --- exasol_udf_mock_python/mock_context.py | 155 ++++++++++++++++++------- tests/test_mock_context.py | 59 ++++++++++ tests/test_mock_context_standalone.py | 104 +++++++++++++++++ 3 files changed, 277 insertions(+), 41 deletions(-) create mode 100644 tests/test_mock_context.py create mode 100644 tests/test_mock_context_standalone.py diff --git a/exasol_udf_mock_python/mock_context.py b/exasol_udf_mock_python/mock_context.py index 7d8b8a2..9523e98 100644 --- a/exasol_udf_mock_python/mock_context.py +++ b/exasol_udf_mock_python/mock_context.py @@ -1,4 +1,4 @@ -from typing import List, Tuple, Iterator +from typing import List, Tuple, Iterator, Any, Optional, Union import pandas as pd @@ -9,49 +9,122 @@ class MockContext(UDFContext): + """ + Implementation of generic UDF Mock Context interface for a SET UDF with groups. + This class allows iterating over groups. The functionality of the UDF Context are applicable + for the current input group. + + Call `_next_group` to iterate over groups. The `_output_groups` property provides the emit + output for all groups iterated so far including the output for the current group. + """ def __init__(self, input_groups: Iterator[Group], metadata: MockMetaData): + """ + :param input_groups: Input groups. Each group object should contain input rows for the group. + + :param metadata: The mock metadata object. + """ + self._input_groups = input_groups - self._output_groups = [] - self._input_group = None # type: Group - self._output_group_list = None # type: List - self._output_group = None # type: Group - self._iter = None # type: Iterator[Tuple] - self._len = None # type: int self._metadata = metadata - self._name_position_map = \ - {column.name: position - for position, column - in enumerate(metadata.input_columns)} + """ Mock context for the current group """ + self._current = None # type: Optional[StandaloneMockContext] + """ Output for all groups """ + self._previous_groups = [] # type: List[Group] + + def _next_group(self) -> bool: + """ + Moves group iterator to the next group. + Returns False if the iterator gets beyond the last group. Returns True otherwise. + """ + + # Save output of the current group + if self._current is not None: + self._previous_groups.append(Group(self._current.output)) + self._current = None - def _next_group(self): + # Try get to the next input group try: - self._input_group = next(self._input_groups) + input_group = next(self._input_groups) except StopIteration as e: - self._data = None - self._output_group_list = None - self._output_group = None - self._input_group = None - self._iter = None - self._len = None return False - self._len = len(self._input_group) - if self._len == 0: - self._data = None - self._output_group_list = None - self._output_group = None - self._input_group = None - self._iter = None - self._len = None - raise RuntimeError("Empty input groups are not allowd") - self._output_group_list = [] - self._output_group = Group(self._output_group_list) - self._output_groups.append(self._output_group) - self._iter = iter(self._input_group) - self.next() + if len(input_group) == 0: + raise RuntimeError("Empty input groups are not allowed") + + # Create Mock Context for the new input group + self._current = StandaloneMockContext(input_group, self._metadata) return True - def _is_positive_integer(self, value): + @property + def _output_groups(self): + """ + Output of all groups including the current one. + """ + if self._current is None: + return self._previous_groups + else: + groups = list(self._previous_groups) + groups.append(Group(self._current.output)) + return groups + + def __getattr__(self, name): + return None if self._current is None else getattr(self._current, name) + + def get_dataframe(self, num_rows: Union[str, int], start_col: int = 0) -> Optional[pd.DataFrame]: + return None if self._current is None else self._current.get_dataframe(num_rows, start_col) + + def next(self, reset: bool = False) -> bool: + return False if self._current is None else self._current.next(reset) + + def size(self) -> int: + return 0 if self._current is None else self._current.size() + + def reset(self) -> None: + if self._current is not None: + self._current.reset() + + def emit(self, *args): + if self._current is not None: + self._current.emit(*args) + + +class StandaloneMockContext(UDFContext): + """ + Implementation of generic UDF Mock Context interface a SCALAR UDF or a SET UDF with no groups. + + For Emit UDFs the output in the form of the list of tuples can be + access by reading the `output` property. + """ + + def __init__(self, inp: Any, metadata: MockMetaData): + """ + :param inp: Input rows for a SET UDF or parameters for a SCALAR one. + In the former case the input object must be an iterable of rows. This, for example, + can be a Group object. It must implement the __len__ method. Each data row must be + an indexable container, e.g. a tuple. In the SCALAR case the input should also be + an indexable container. + + :param metadata: The mock metadata object. + """ + + self._input = inp if metadata.input_type.upper() == 'SET' else [inp] + self._metadata = metadata + self._data = None # type: Optional[Any] + self._iter = None # type: Optional[Iterator[Tuple[Any, ...]]] + self._name_position_map = \ + {column.name: position + for position, column + in enumerate(metadata.input_columns)} + self._output = [] + self.next(reset=True) + + @property + def output(self) -> List[Tuple[Any, ...]]: + """Emitted output so far""" + return self._output + + @staticmethod + def _is_positive_integer(value): return value is not None and isinstance(value, int) and value > 0 def get_dataframe(self, num_rows='all', start_col=0): @@ -80,10 +153,10 @@ def get_dataframe(self, num_rows='all', start_col=0): return df def __getattr__(self, name): - return self._data[self._name_position_map[name]] + return None if self._data is None else self._data[self._name_position_map[name]] def next(self, reset: bool = False): - if reset: + if self._iter is None or reset: self.reset() else: try: @@ -96,10 +169,10 @@ def next(self, reset: bool = False): return False def size(self): - return self._len + return len(self._input) def reset(self): - self._iter = iter(self._input_group) + self._iter = iter(self._input) self.next() def emit(self, *args): @@ -109,10 +182,10 @@ def emit(self, *args): tuples = [args] for row in tuples: self._validate_tuples(row, self._metadata.output_columns) - self._output_group_list.extend(tuples) - return + self._output.extend(tuples) - def _validate_tuples(self, row: Tuple, columns: List[Column]): + @staticmethod + def _validate_tuples(row: Tuple, columns: List[Column]): if len(row) != len(columns): raise Exception(f"row {row} has not the same number of values as columns are defined") for i, column in enumerate(columns): diff --git a/tests/test_mock_context.py b/tests/test_mock_context.py new file mode 100644 index 0000000..32e70de --- /dev/null +++ b/tests/test_mock_context.py @@ -0,0 +1,59 @@ +import pytest +import pandas as pd + +from exasol_udf_mock_python.group import Group +from exasol_udf_mock_python.mock_context import MockContext +from tests.test_mock_context_standalone import meta_set_emits + + +@pytest.fixture +def context_set_emits(meta_set_emits): + pets = Group([(1, 'cat'), (2, 'dog')]) + bugs = Group([(3, 'ant'), (4, 'bee'), (5, 'beetle')]) + groups = [pets, bugs] + return MockContext(iter(groups), meta_set_emits) + + +def test_scroll(context_set_emits): + assert context_set_emits._current is None + assert not context_set_emits._output_groups + assert context_set_emits._next_group() + assert context_set_emits.t2 == 'cat' + assert context_set_emits.next() + assert context_set_emits.t2 == 'dog' + assert not context_set_emits.next() + assert context_set_emits._next_group() + assert context_set_emits.t2 == 'ant' + assert context_set_emits.next() + assert context_set_emits.t2 == 'bee' + assert context_set_emits.next() + assert context_set_emits.t2 == 'beetle' + assert not context_set_emits.next() + assert not context_set_emits._next_group() + assert context_set_emits._current is None + + +def test_output_groups(context_set_emits): + context_set_emits._next_group() + context_set_emits.emit(1, 'cat') + context_set_emits.emit(2, 'dog') + context_set_emits._next_group() + context_set_emits.emit(3, 'ant') + context_set_emits.emit(4, 'bee') + context_set_emits.emit(5, 'beetle') + context_set_emits._next_group() + assert len(context_set_emits._output_groups) == 2 + assert context_set_emits._output_groups[0] == Group([(1, 'cat'), (2, 'dog')]) + assert context_set_emits._output_groups[1] == Group([(3, 'ant'), (4, 'bee'), (5, 'beetle')]) + + +def test_output_groups_partial(context_set_emits): + context_set_emits._next_group() + context_set_emits.emit(1, 'cat') + context_set_emits.emit(2, 'dog') + context_set_emits._next_group() + context_set_emits.emit(3, 'ant') + context_set_emits.emit(4, 'bee') + assert len(context_set_emits._output_groups) == 2 + assert context_set_emits._output_groups[0] == Group([(1, 'cat'), (2, 'dog')]) + assert context_set_emits._output_groups[1] == Group([(3, 'ant'), (4, 'bee')]) diff --git a/tests/test_mock_context_standalone.py b/tests/test_mock_context_standalone.py new file mode 100644 index 0000000..23eaef4 --- /dev/null +++ b/tests/test_mock_context_standalone.py @@ -0,0 +1,104 @@ +import pytest +import pandas as pd + +from exasol_udf_mock_python.column import Column +from exasol_udf_mock_python.mock_meta_data import MockMetaData +from exasol_udf_mock_python.mock_context import StandaloneMockContext + + +def udf_wrapper(): + pass + + +@pytest.fixture +def meta_scalar_return(): + return MockMetaData( + script_code_wrapper_function=udf_wrapper, + input_type='SCALAR', + input_columns=[Column('t', int, 'INTEGER')], + output_type='RETURNS', + output_columns=[Column('t', int, 'INTEGER')] + ) + + +@pytest.fixture +def meta_set_emits(): + return MockMetaData( + script_code_wrapper_function=udf_wrapper, + input_type='SET', + input_columns=[Column('t1', int, 'INTEGER'), Column('t2', str, 'VARCHAR(100)')], + output_type='EMITS', + output_columns=[Column('t1', int, 'INTEGER'), Column('t2', str, 'VARCHAR(100)')] + ) + + +@pytest.fixture +def context_scalar_return(meta_scalar_return): + return StandaloneMockContext((5,), meta_scalar_return) + + +@pytest.fixture +def context_set_emits(meta_set_emits): + return StandaloneMockContext([(5, 'abc'), (6, 'efgh')], meta_set_emits) + + +def test_get_dataframe(context_set_emits): + df = context_set_emits.get_dataframe() + expected_df = pd.DataFrame({'t1': [5, 6], 't2': ['abc', 'efgh']}) + pd.testing.assert_frame_equal(df, expected_df) + + +def test_get_dataframe_limited(context_set_emits): + df = context_set_emits.get_dataframe(1, 1) + expected_df = pd.DataFrame({'t2': ['abc']}) + pd.testing.assert_frame_equal(df, expected_df) + + +def test_attr_set(context_set_emits): + assert context_set_emits.t1 == 5 + assert context_set_emits.t2 == 'abc' + + +def test_attr_scalar(context_scalar_return): + assert context_scalar_return.t == 5 + + +def test_next(context_set_emits): + assert context_set_emits.next() + assert context_set_emits.t1 == 6 + assert context_set_emits.t2 == 'efgh' + + +def test_next_end(context_set_emits): + assert context_set_emits.next() + assert not context_set_emits.next() + + +def test_reset(context_set_emits): + assert context_set_emits.next() + context_set_emits.reset() + assert context_set_emits.t1 == 5 + assert context_set_emits.t2 == 'abc' + + +def test_size(context_set_emits): + assert context_set_emits.size() == 2 + + +def test_validate_tuples_good(meta_set_emits): + StandaloneMockContext._validate_tuples((10, 'fish'), meta_set_emits.output_columns) + + +def test_validate_tuples_bad(meta_set_emits): + with pytest.raises(Exception): + StandaloneMockContext._validate_tuples((10,), meta_set_emits.output_columns) + with pytest.raises(Exception): + StandaloneMockContext._validate_tuples((10, 'fish', 4.5), meta_set_emits.output_columns) + with pytest.raises(Exception): + StandaloneMockContext._validate_tuples((10., 'fish'), meta_set_emits.output_columns) + + +def test_emit_df(context_set_emits): + df = pd.DataFrame({'t1': [1, 2], 't2': ['cat', 'dog']}) + context_set_emits.emit(df) + assert context_set_emits.output == [(1, 'cat'), (2, 'dog')] From 8c2fc81b8067438109a80563cfd7811a0538d4e0 Mon Sep 17 00:00:00 2001 From: mibe Date: Fri, 15 Sep 2023 13:07:39 +0100 Subject: [PATCH 02/10] Fixed compatibility issue with the scalar parameters being wrapped in a container. --- exasol_udf_mock_python/mock_context.py | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/exasol_udf_mock_python/mock_context.py b/exasol_udf_mock_python/mock_context.py index 9523e98..ad631b0 100644 --- a/exasol_udf_mock_python/mock_context.py +++ b/exasol_udf_mock_python/mock_context.py @@ -1,4 +1,4 @@ -from typing import List, Tuple, Iterator, Any, Optional, Union +from typing import List, Tuple, Iterator, Iterable, Any, Optional, Union import pandas as pd @@ -101,13 +101,27 @@ def __init__(self, inp: Any, metadata: MockMetaData): :param inp: Input rows for a SET UDF or parameters for a SCALAR one. In the former case the input object must be an iterable of rows. This, for example, can be a Group object. It must implement the __len__ method. Each data row must be - an indexable container, e.g. a tuple. In the SCALAR case the input should also be - an indexable container. + an indexable container, e.g. a tuple. + In the SCALAR case the input can be a scalar value, or tuple. This can also be wrapped + in an iterable container, similar to the SET case. :param metadata: The mock metadata object. """ - self._input = inp if metadata.input_type.upper() == 'SET' else [inp] + if metadata.input_type.upper() == 'SCALAR': + # Figure out if the SCALAR parameters are provided as a scalar value or a tuple + # and also if there is a wrapping container around. In any case, this should be + # converted to a form [(param1[, param2, ...)] + if isinstance(inp, Iterable) and not isinstance(inp, str): + row1 = next(iter(inp)) + if isinstance(row1, Iterable) and not isinstance(row1, str): + self._input = inp + else: + self._input = [inp] + else: + self._input = [(inp,)] + else: + self._input = inp self._metadata = metadata self._data = None # type: Optional[Any] self._iter = None # type: Optional[Iterator[Tuple[Any, ...]]] From 4f9e1888ebb050f380900e8dc65583dff67c9695 Mon Sep 17 00:00:00 2001 From: Mikhail Beck Date: Thu, 12 Oct 2023 07:53:41 +0100 Subject: [PATCH 03/10] Update exasol_udf_mock_python/mock_context.py Co-authored-by: Torsten Kilias --- exasol_udf_mock_python/mock_context.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/exasol_udf_mock_python/mock_context.py b/exasol_udf_mock_python/mock_context.py index ad631b0..615c9de 100644 --- a/exasol_udf_mock_python/mock_context.py +++ b/exasol_udf_mock_python/mock_context.py @@ -28,7 +28,7 @@ def __init__(self, input_groups: Iterator[Group], metadata: MockMetaData): self._input_groups = input_groups self._metadata = metadata """ Mock context for the current group """ - self._current = None # type: Optional[StandaloneMockContext] + self._current:Optional[StandaloneMockContext] = None """ Output for all groups """ self._previous_groups = [] # type: List[Group] From da09702176c0df05cca027ad87abddf8e8324e05 Mon Sep 17 00:00:00 2001 From: Mikhail Beck Date: Thu, 12 Oct 2023 07:53:48 +0100 Subject: [PATCH 04/10] Update exasol_udf_mock_python/mock_context.py Co-authored-by: Torsten Kilias --- exasol_udf_mock_python/mock_context.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/exasol_udf_mock_python/mock_context.py b/exasol_udf_mock_python/mock_context.py index 615c9de..9520319 100644 --- a/exasol_udf_mock_python/mock_context.py +++ b/exasol_udf_mock_python/mock_context.py @@ -30,7 +30,7 @@ def __init__(self, input_groups: Iterator[Group], metadata: MockMetaData): """ Mock context for the current group """ self._current:Optional[StandaloneMockContext] = None """ Output for all groups """ - self._previous_groups = [] # type: List[Group] + self._previous_groups: List[Group] = [] def _next_group(self) -> bool: """ From 124e10c593217d2d577c08a49412a60b6e64ff1d Mon Sep 17 00:00:00 2001 From: Mikhail Beck Date: Thu, 12 Oct 2023 07:55:42 +0100 Subject: [PATCH 05/10] Update exasol_udf_mock_python/mock_context.py Co-authored-by: Torsten Kilias --- exasol_udf_mock_python/mock_context.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/exasol_udf_mock_python/mock_context.py b/exasol_udf_mock_python/mock_context.py index 9520319..f5ba806 100644 --- a/exasol_udf_mock_python/mock_context.py +++ b/exasol_udf_mock_python/mock_context.py @@ -124,7 +124,7 @@ def __init__(self, inp: Any, metadata: MockMetaData): self._input = inp self._metadata = metadata self._data = None # type: Optional[Any] - self._iter = None # type: Optional[Iterator[Tuple[Any, ...]]] + self._iter: Optional[Iterator[Tuple[Any, ...]]] = None self._name_position_map = \ {column.name: position for position, column From 7c97b03b5dc2a1fbcafa4f0a88c936f9f2373d55 Mon Sep 17 00:00:00 2001 From: Mikhail Beck Date: Thu, 12 Oct 2023 07:56:00 +0100 Subject: [PATCH 06/10] Update exasol_udf_mock_python/mock_context.py Co-authored-by: Torsten Kilias --- exasol_udf_mock_python/mock_context.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/exasol_udf_mock_python/mock_context.py b/exasol_udf_mock_python/mock_context.py index f5ba806..0a0aedd 100644 --- a/exasol_udf_mock_python/mock_context.py +++ b/exasol_udf_mock_python/mock_context.py @@ -123,7 +123,7 @@ def __init__(self, inp: Any, metadata: MockMetaData): else: self._input = inp self._metadata = metadata - self._data = None # type: Optional[Any] + self._data: Optional[Any] = None self._iter: Optional[Iterator[Tuple[Any, ...]]] = None self._name_position_map = \ {column.name: position From aa3f2ed1f4646182b081ae52515457497cd4fa2f Mon Sep 17 00:00:00 2001 From: mibe Date: Wed, 18 Oct 2023 08:59:09 +0100 Subject: [PATCH 07/10] Refactoring following a review --- exasol_udf_mock_python/mock_context.py | 104 +++++++++++++------- exasol_udf_mock_python/udf_mock_executor.py | 4 +- tests/test_mock_context.py | 51 ++++++---- tests/test_mock_context_standalone.py | 16 +-- 4 files changed, 110 insertions(+), 65 deletions(-) diff --git a/exasol_udf_mock_python/mock_context.py b/exasol_udf_mock_python/mock_context.py index 0a0aedd..90e2acc 100644 --- a/exasol_udf_mock_python/mock_context.py +++ b/exasol_udf_mock_python/mock_context.py @@ -1,4 +1,5 @@ from typing import List, Tuple, Iterator, Iterable, Any, Optional, Union +from functools import wraps import pandas as pd @@ -14,8 +15,11 @@ class MockContext(UDFContext): This class allows iterating over groups. The functionality of the UDF Context are applicable for the current input group. - Call `_next_group` to iterate over groups. The `_output_groups` property provides the emit + Call `next_group` to iterate over groups. The `output_groups` property provides the emit output for all groups iterated so far including the output for the current group. + + Calling any function of the UDFContext interface when the group iterator has passed the end + or before the first call to the `next_group` is illegal and will cause a RuntimeException. """ def __init__(self, input_groups: Iterator[Group], metadata: MockMetaData): @@ -28,20 +32,20 @@ def __init__(self, input_groups: Iterator[Group], metadata: MockMetaData): self._input_groups = input_groups self._metadata = metadata """ Mock context for the current group """ - self._current:Optional[StandaloneMockContext] = None + self._current_context: Optional[StandaloneMockContext] = None """ Output for all groups """ - self._previous_groups: List[Group] = [] + self._previous_output: List[Group] = [] - def _next_group(self) -> bool: + def next_group(self) -> bool: """ Moves group iterator to the next group. Returns False if the iterator gets beyond the last group. Returns True otherwise. """ # Save output of the current group - if self._current is not None: - self._previous_groups.append(Group(self._current.output)) - self._current = None + if self._current_context is not None: + self._previous_output.append(Group(self._current_context.output)) + self._current_context = None # Try get to the next input group try: @@ -52,40 +56,75 @@ def _next_group(self) -> bool: raise RuntimeError("Empty input groups are not allowed") # Create Mock Context for the new input group - self._current = StandaloneMockContext(input_group, self._metadata) + self._current_context = StandaloneMockContext(input_group, self._metadata) return True @property - def _output_groups(self): + def output_groups(self): """ Output of all groups including the current one. """ - if self._current is None: - return self._previous_groups + if self._current_context is None: + return self._previous_output else: - groups = list(self._previous_groups) - groups.append(Group(self._current.output)) + groups = list(self._previous_output) + groups.append(Group(self._current_context.output)) return groups + @staticmethod + def _check_context(f): + @wraps(f) + def wrapper(self, *args, **kwargs): + if self._current_context is None: + raise RuntimeError('Calling UDFContext interface when the current group context ' + 'is invalid is disallowed') + return f(self, *args, **kwargs) + + return wrapper + + @_check_context def __getattr__(self, name): - return None if self._current is None else getattr(self._current, name) + return getattr(self._current_context, name) + @_check_context def get_dataframe(self, num_rows: Union[str, int], start_col: int = 0) -> Optional[pd.DataFrame]: - return None if self._current is None else self._current.get_dataframe(num_rows, start_col) + return self._current_context.get_dataframe(num_rows, start_col) + @_check_context def next(self, reset: bool = False) -> bool: - return False if self._current is None else self._current.next(reset) + return self._current_context.next(reset) + @_check_context def size(self) -> int: - return 0 if self._current is None else self._current.size() + return self._current_context.size() + @_check_context def reset(self) -> None: - if self._current is not None: - self._current.reset() + self._current_context.reset() - def emit(self, *args): - if self._current is not None: - self._current.emit(*args) + @_check_context + def emit(self, *args) -> None: + self._current_context.emit(*args) + + +def get_scalar_input(inp: Any) -> Iterable[Tuple[Any, ...]]: + """ + Figures out if the SCALAR parameters are provided as a scalar value or a tuple + and also if there is a wrapping container around. + Unless the parameters are already in a wrapping container returns parameters as a tuple wrapped + into a one-item list, e.g [(param1[, param2, ...)]. Otherwise, returns the original input. + + :param inp: Input parameters. + """ + + if isinstance(inp, Iterable) and not isinstance(inp, str): + row1 = next(iter(inp)) + if isinstance(row1, Iterable) and not isinstance(row1, str): + return inp + else: + return [inp] + else: + return [(inp,)] class StandaloneMockContext(UDFContext): @@ -93,7 +132,7 @@ class StandaloneMockContext(UDFContext): Implementation of generic UDF Mock Context interface a SCALAR UDF or a SET UDF with no groups. For Emit UDFs the output in the form of the list of tuples can be - access by reading the `output` property. + accessed by reading the `output` property. """ def __init__(self, inp: Any, metadata: MockMetaData): @@ -107,19 +146,8 @@ def __init__(self, inp: Any, metadata: MockMetaData): :param metadata: The mock metadata object. """ - if metadata.input_type.upper() == 'SCALAR': - # Figure out if the SCALAR parameters are provided as a scalar value or a tuple - # and also if there is a wrapping container around. In any case, this should be - # converted to a form [(param1[, param2, ...)] - if isinstance(inp, Iterable) and not isinstance(inp, str): - row1 = next(iter(inp)) - if isinstance(row1, Iterable) and not isinstance(row1, str): - self._input = inp - else: - self._input = [inp] - else: - self._input = [(inp,)] + self._input = get_scalar_input(inp) else: self._input = inp self._metadata = metadata @@ -176,7 +204,7 @@ def next(self, reset: bool = False): try: new_data = next(self._iter) self._data = new_data - self._validate_tuples(self._data, self._metadata.input_columns) + self.validate_emit(self._data, self._metadata.input_columns) return True except StopIteration as e: self._data = None @@ -195,11 +223,11 @@ def emit(self, *args): else: tuples = [args] for row in tuples: - self._validate_tuples(row, self._metadata.output_columns) + self.validate_emit(row, self._metadata.output_columns) self._output.extend(tuples) @staticmethod - def _validate_tuples(row: Tuple, columns: List[Column]): + def validate_emit(row: Tuple, columns: List[Column]): if len(row) != len(columns): raise Exception(f"row {row} has not the same number of values as columns are defined") for i, column in enumerate(columns): diff --git a/exasol_udf_mock_python/udf_mock_executor.py b/exasol_udf_mock_python/udf_mock_executor.py index 448b00d..a9bdd66 100644 --- a/exasol_udf_mock_python/udf_mock_executor.py +++ b/exasol_udf_mock_python/udf_mock_executor.py @@ -7,7 +7,7 @@ def _loop_groups(ctx:MockContext, exa:MockExaEnvironment, runfunc:Callable): - while ctx._next_group(): + while ctx.next_group(): _wrapped_run(ctx, exa, runfunc) @@ -77,4 +77,4 @@ def run(self, finally: if "cleanup" in exec_globals: self._exec_cleanup(exec_globals) - return ctx._output_groups + return ctx.output_groups diff --git a/tests/test_mock_context.py b/tests/test_mock_context.py index 32e70de..267c8f7 100644 --- a/tests/test_mock_context.py +++ b/tests/test_mock_context.py @@ -15,45 +15,62 @@ def context_set_emits(meta_set_emits): def test_scroll(context_set_emits): - assert context_set_emits._current is None - assert not context_set_emits._output_groups - assert context_set_emits._next_group() + assert not context_set_emits.output_groups + assert context_set_emits.next_group() assert context_set_emits.t2 == 'cat' assert context_set_emits.next() assert context_set_emits.t2 == 'dog' assert not context_set_emits.next() - assert context_set_emits._next_group() + assert context_set_emits.next_group() assert context_set_emits.t2 == 'ant' assert context_set_emits.next() assert context_set_emits.t2 == 'bee' assert context_set_emits.next() assert context_set_emits.t2 == 'beetle' assert not context_set_emits.next() - assert not context_set_emits._next_group() - assert context_set_emits._current is None + assert not context_set_emits.next_group() def test_output_groups(context_set_emits): - context_set_emits._next_group() + context_set_emits.next_group() context_set_emits.emit(1, 'cat') context_set_emits.emit(2, 'dog') - context_set_emits._next_group() + context_set_emits.next_group() context_set_emits.emit(3, 'ant') context_set_emits.emit(4, 'bee') context_set_emits.emit(5, 'beetle') - context_set_emits._next_group() - assert len(context_set_emits._output_groups) == 2 - assert context_set_emits._output_groups[0] == Group([(1, 'cat'), (2, 'dog')]) - assert context_set_emits._output_groups[1] == Group([(3, 'ant'), (4, 'bee'), (5, 'beetle')]) + context_set_emits.next_group() + assert len(context_set_emits.output_groups) == 2 + assert context_set_emits.output_groups[0] == Group([(1, 'cat'), (2, 'dog')]) + assert context_set_emits.output_groups[1] == Group([(3, 'ant'), (4, 'bee'), (5, 'beetle')]) def test_output_groups_partial(context_set_emits): - context_set_emits._next_group() + context_set_emits.next_group() context_set_emits.emit(1, 'cat') context_set_emits.emit(2, 'dog') - context_set_emits._next_group() + context_set_emits.next_group() context_set_emits.emit(3, 'ant') context_set_emits.emit(4, 'bee') - assert len(context_set_emits._output_groups) == 2 - assert context_set_emits._output_groups[0] == Group([(1, 'cat'), (2, 'dog')]) - assert context_set_emits._output_groups[1] == Group([(3, 'ant'), (4, 'bee')]) + assert len(context_set_emits.output_groups) == 2 + assert context_set_emits.output_groups[0] == Group([(1, 'cat'), (2, 'dog')]) + assert context_set_emits.output_groups[1] == Group([(3, 'ant'), (4, 'bee')]) + + +def test_no_context_exception(context_set_emits): + + for _ in range(3): + context_set_emits.next_group() + + with pytest.raises(RuntimeError): + _ = context_set_emits.t2 + with pytest.raises(RuntimeError): + _ = context_set_emits.get_dataframe() + with pytest.raises(RuntimeError): + context_set_emits.next() + with pytest.raises(RuntimeError): + _ = context_set_emits.size() + with pytest.raises(RuntimeError): + context_set_emits.reset() + with pytest.raises(RuntimeError): + context_set_emits.emit(1, 'cat') diff --git a/tests/test_mock_context_standalone.py b/tests/test_mock_context_standalone.py index 23eaef4..6ffc10c 100644 --- a/tests/test_mock_context_standalone.py +++ b/tests/test_mock_context_standalone.py @@ -70,12 +70,12 @@ def test_next(context_set_emits): def test_next_end(context_set_emits): - assert context_set_emits.next() + context_set_emits.next() assert not context_set_emits.next() def test_reset(context_set_emits): - assert context_set_emits.next() + context_set_emits.next() context_set_emits.reset() assert context_set_emits.t1 == 5 assert context_set_emits.t2 == 'abc' @@ -85,17 +85,17 @@ def test_size(context_set_emits): assert context_set_emits.size() == 2 -def test_validate_tuples_good(meta_set_emits): - StandaloneMockContext._validate_tuples((10, 'fish'), meta_set_emits.output_columns) +def test_validate_emit_good(meta_set_emits): + StandaloneMockContext.validate_emit((10, 'fish'), meta_set_emits.output_columns) -def test_validate_tuples_bad(meta_set_emits): +def test_validate_emit_bad(meta_set_emits): with pytest.raises(Exception): - StandaloneMockContext._validate_tuples((10,), meta_set_emits.output_columns) + StandaloneMockContext.validate_emit((10,), meta_set_emits.output_columns) with pytest.raises(Exception): - StandaloneMockContext._validate_tuples((10, 'fish', 4.5), meta_set_emits.output_columns) + StandaloneMockContext.validate_emit((10, 'fish', 4.5), meta_set_emits.output_columns) with pytest.raises(Exception): - StandaloneMockContext._validate_tuples((10., 'fish'), meta_set_emits.output_columns) + StandaloneMockContext.validate_emit((10., 'fish'), meta_set_emits.output_columns) def test_emit_df(context_set_emits): From 28277219d8cc4ce41ae1b9079f1718efbd9cf31d Mon Sep 17 00:00:00 2001 From: mibe Date: Wed, 18 Oct 2023 09:22:59 +0100 Subject: [PATCH 08/10] Making the MockContext tests running under Python 3.8 --- exasol_udf_mock_python/mock_context.py | 43 ++++++++++++++++---------- 1 file changed, 26 insertions(+), 17 deletions(-) diff --git a/exasol_udf_mock_python/mock_context.py b/exasol_udf_mock_python/mock_context.py index 90e2acc..e266774 100644 --- a/exasol_udf_mock_python/mock_context.py +++ b/exasol_udf_mock_python/mock_context.py @@ -9,6 +9,21 @@ from exasol_udf_mock_python.udf_context import UDFContext +def check_context(f): + """ + Decorator checking that a MockContext object has valid current group context. + Raises a RuntimeError if this is not the case. + """ + @wraps(f) + def wrapper(self, *args, **kwargs): + if self.no_context: + raise RuntimeError('Calling UDFContext interface when the current group context ' + 'is invalid is disallowed') + return f(self, *args, **kwargs) + + return wrapper + + class MockContext(UDFContext): """ Implementation of generic UDF Mock Context interface for a SET UDF with groups. @@ -36,6 +51,11 @@ def __init__(self, input_groups: Iterator[Group], metadata: MockMetaData): """ Output for all groups """ self._previous_output: List[Group] = [] + @property + def no_context(self) -> bool: + """Returns True if the current group context is invalid""" + return self._current_context is None + def next_group(self) -> bool: """ Moves group iterator to the next group. @@ -71,38 +91,27 @@ def output_groups(self): groups.append(Group(self._current_context.output)) return groups - @staticmethod - def _check_context(f): - @wraps(f) - def wrapper(self, *args, **kwargs): - if self._current_context is None: - raise RuntimeError('Calling UDFContext interface when the current group context ' - 'is invalid is disallowed') - return f(self, *args, **kwargs) - - return wrapper - - @_check_context + @check_context def __getattr__(self, name): return getattr(self._current_context, name) - @_check_context + @check_context def get_dataframe(self, num_rows: Union[str, int], start_col: int = 0) -> Optional[pd.DataFrame]: return self._current_context.get_dataframe(num_rows, start_col) - @_check_context + @check_context def next(self, reset: bool = False) -> bool: return self._current_context.next(reset) - @_check_context + @check_context def size(self) -> int: return self._current_context.size() - @_check_context + @check_context def reset(self) -> None: self._current_context.reset() - @_check_context + @check_context def emit(self, *args) -> None: self._current_context.emit(*args) From 077dec332b995a5686bfc2bdfc67eefcc14813d3 Mon Sep 17 00:00:00 2001 From: mibe Date: Tue, 24 Oct 2023 12:02:51 +0100 Subject: [PATCH 09/10] Updated MockContext test_scroll test --- tests/test_mock_context.py | 21 +++++++-------------- 1 file changed, 7 insertions(+), 14 deletions(-) diff --git a/tests/test_mock_context.py b/tests/test_mock_context.py index 267c8f7..3a50bba 100644 --- a/tests/test_mock_context.py +++ b/tests/test_mock_context.py @@ -15,20 +15,13 @@ def context_set_emits(meta_set_emits): def test_scroll(context_set_emits): - assert not context_set_emits.output_groups - assert context_set_emits.next_group() - assert context_set_emits.t2 == 'cat' - assert context_set_emits.next() - assert context_set_emits.t2 == 'dog' - assert not context_set_emits.next() - assert context_set_emits.next_group() - assert context_set_emits.t2 == 'ant' - assert context_set_emits.next() - assert context_set_emits.t2 == 'bee' - assert context_set_emits.next() - assert context_set_emits.t2 == 'beetle' - assert not context_set_emits.next() - assert not context_set_emits.next_group() + groups = [] + while context_set_emits.next_group(): + group = [context_set_emits.t2] + while context_set_emits.next(): + group.append(context_set_emits.t2) + groups.append(group) + assert groups == [['cat', 'dog'], ['ant', 'bee', 'beetle']] def test_output_groups(context_set_emits): From 26a26d7100ed0e3793faf853da64a8978d8e2a2b Mon Sep 17 00:00:00 2001 From: mibe Date: Tue, 24 Oct 2023 14:04:02 +0100 Subject: [PATCH 10/10] validate_emit is taken out of the StandaloneMockContext class --- exasol_udf_mock_python/mock_context.py | 29 +++++++++++++++++--------- tests/test_mock_context_standalone.py | 10 ++++----- 2 files changed, 24 insertions(+), 15 deletions(-) diff --git a/exasol_udf_mock_python/mock_context.py b/exasol_udf_mock_python/mock_context.py index e266774..86635c2 100644 --- a/exasol_udf_mock_python/mock_context.py +++ b/exasol_udf_mock_python/mock_context.py @@ -24,6 +24,23 @@ def wrapper(self, *args, **kwargs): return wrapper +def validate_emit(row: Tuple, columns: List[Column]): + """ + Validates that a data row to be emitted corresponds to the definition of the output columns. + The number of elements in the row should match the number of columns and the type of each + element should match the type of the correspondent column. Raises a ValueError if the first + condition is false or a TypeError if the second condition is false. + + :param row: Data row + :param columns: Column definition. + """ + if len(row) != len(columns): + raise ValueError(f"row {row} has not the same number of values as columns are defined") + for i, column in enumerate(columns): + if row[i] is not None and not isinstance(row[i], column.type): + raise TypeError(f"Value {row[i]} ({type(row[i])}) at position {i} is not a {column.type}") + + class MockContext(UDFContext): """ Implementation of generic UDF Mock Context interface for a SET UDF with groups. @@ -213,7 +230,7 @@ def next(self, reset: bool = False): try: new_data = next(self._iter) self._data = new_data - self.validate_emit(self._data, self._metadata.input_columns) + validate_emit(self._data, self._metadata.input_columns) return True except StopIteration as e: self._data = None @@ -232,13 +249,5 @@ def emit(self, *args): else: tuples = [args] for row in tuples: - self.validate_emit(row, self._metadata.output_columns) + validate_emit(row, self._metadata.output_columns) self._output.extend(tuples) - - @staticmethod - def validate_emit(row: Tuple, columns: List[Column]): - if len(row) != len(columns): - raise Exception(f"row {row} has not the same number of values as columns are defined") - for i, column in enumerate(columns): - if row[i] is not None and not isinstance(row[i], column.type): - raise TypeError(f"Value {row[i]} ({type(row[i])}) at position {i} is not a {column.type}") diff --git a/tests/test_mock_context_standalone.py b/tests/test_mock_context_standalone.py index 6ffc10c..9ce794d 100644 --- a/tests/test_mock_context_standalone.py +++ b/tests/test_mock_context_standalone.py @@ -3,7 +3,7 @@ from exasol_udf_mock_python.column import Column from exasol_udf_mock_python.mock_meta_data import MockMetaData -from exasol_udf_mock_python.mock_context import StandaloneMockContext +from exasol_udf_mock_python.mock_context import StandaloneMockContext, validate_emit def udf_wrapper(): @@ -86,16 +86,16 @@ def test_size(context_set_emits): def test_validate_emit_good(meta_set_emits): - StandaloneMockContext.validate_emit((10, 'fish'), meta_set_emits.output_columns) + validate_emit((10, 'fish'), meta_set_emits.output_columns) def test_validate_emit_bad(meta_set_emits): with pytest.raises(Exception): - StandaloneMockContext.validate_emit((10,), meta_set_emits.output_columns) + validate_emit((10,), meta_set_emits.output_columns) with pytest.raises(Exception): - StandaloneMockContext.validate_emit((10, 'fish', 4.5), meta_set_emits.output_columns) + validate_emit((10, 'fish', 4.5), meta_set_emits.output_columns) with pytest.raises(Exception): - StandaloneMockContext.validate_emit((10., 'fish'), meta_set_emits.output_columns) + validate_emit((10., 'fish'), meta_set_emits.output_columns) def test_emit_df(context_set_emits):