diff --git a/.github/workflows/unit-build.yml b/.github/workflows/unit-build.yml new file mode 100644 index 0000000..5123c83 --- /dev/null +++ b/.github/workflows/unit-build.yml @@ -0,0 +1,39 @@ +# This workflow will install Python dependencies, run tests and lint with a variety of Python versions +# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions + +name: Build + +on: + push: + branches: [ main ] + pull_request: + branches: [ main ] + +jobs: + build: + + runs-on: ubuntu-latest + timeout-minutes: 15 + strategy: + matrix: + python-version: [ 3.8, 3.9, '3.10' ] + + steps: + - uses: actions/checkout@v2 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.python-version }} + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install pytest + pip install . + pip install pytest-cov + - name: Test with pytest + run: | + pytest --cov=pecapiku -s tests/unit + - name: Codecov-coverage + uses: codecov/codecov-action@v3 + env: + CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} diff --git a/README.md b/README.md index 16173b5..c1bea9b 100644 --- a/README.md +++ b/README.md @@ -1,12 +1,20 @@ -# Pecapiku - a persistent cache pickling utility +[](https://app.codecov.io/gh/MorrisNein/pecapiku) +[](https://github.com/MorrisNein/pecapiku/actions) +[](https://github.com/MorrisNein/pecapiku/blob/main/LICENSE) +[](https://t.me/morrisnein) + +# Pecapiku + +... a persistent cache pickling utility Provides a syntax for storing and retrieving the results of a computation on disk using `pickle` library. > ***Important note!*** The purpose of the utility is not to speed up calculations or to save memory. As the size -of a cache file increases, the access time will raise. -> -> The main purpose is to restart a heavy computational script if something broke in the middle and there is no way to debug it -beforehand. +> of a cache file increases, the access time will raise. +> +> The main purpose is to restart a heavy computational script if something broke in the middle and there is no way to +> debug it +> beforehand. The two main classes are `CacheDict`, `SingleValueCache`. @@ -33,6 +41,7 @@ decorated function. ## Cache File Management As plain as a day: + ``` python from pecapiku import config @@ -40,6 +49,7 @@ config.get_cache_dir() # Look at the default cache dir # The result is OS-specific config.set_cache_dir(...) # Change it to a more preferable directory ``` + All cache files will be created inside this directory, if a filename or a relative cache path is provided. If an absolute path is provided, a pickle file will appear at the path. @@ -75,14 +85,15 @@ There are 3 ways of getting a key: - positional and keyword arguments - object fields, if this function is a method 2. `inner_key` may be provided in a form of string code expression or a callable. -This expression or callable must return a hashable result that may be used as a dictionary key. -It may use inner function arguments by their corresponding names. -Or it may use `args` and `kwargs` - as the only option for any precompiled non-Python function. + This expression or callable must return a hashable result that may be used as a dictionary key. + It may use inner function arguments by their corresponding names. + Or it may use `args` and `kwargs` - as the only option for any precompiled non-Python function. 3. `outer_key` is a hashable constant to access a value in a `CacheDict`. - ## Examples +## Examples + +Example 1. CacheDict as a context manager. - Example 1. CacheDict as a context manager. ``` python import numpy as np from pecapiku import CacheDict @@ -98,7 +109,9 @@ with CacheDict('example_cache_dict.pkl') as cache_dict: # {'x_T': array([[1, 3], # [2, 4]])} ``` + Example 2. CacheDict as a decorator. + ``` python import numpy as np from pecapiku import CacheDict @@ -115,7 +128,9 @@ cached_mult(a, b) # array([[ 5, 12], # [21, 32]]) ``` + Example 3. SingleValueCache as a decorator. + ``` python import time from timeit import timeit @@ -133,6 +148,7 @@ def a_heavy_function_cached(): print(timeit(a_heavy_function, number=10)) # 10.070 print(timeit(a_heavy_function_cached, number=10)) # 1.015 ``` + ## Installation `pip install git+https://github.com/MorrisNein/pecapiku` diff --git a/pecapiku/base_cache.py b/pecapiku/base_cache.py index 42933e7..d69636c 100644 --- a/pecapiku/base_cache.py +++ b/pecapiku/base_cache.py @@ -7,12 +7,13 @@ from pathlib import Path from typing import Any, Callable, Generic, Hashable, Type, TypeVar -from pecapiku.cache_access import COMP_CACHE_FILE_NAME, CacheAccess, _resolve_filepath +from pecapiku.cache_access import CacheAccess from pecapiku.no_cache import NoCache logger = logging.getLogger(__file__) DecoratedCallable = TypeVar("DecoratedCallable", bound=Callable[..., Any]) +Decorator = Callable[[DecoratedCallable], DecoratedCallable] class omnimethod(Generic[DecoratedCallable]): @@ -30,21 +31,20 @@ def __get__(self, instance, owner) -> DecoratedCallable: class BaseCache(ABC): def __init__(self, file_path: os.PathLike | str | None = None, access: CacheAccess = 'rew'): - file_path = file_path or COMP_CACHE_FILE_NAME - file_path = _resolve_filepath(file_path) - self.file_path: Path = file_path + file_path = file_path or self._get_default_file_path() + self.file_path: Path | None = file_path if file_path is None else Path(file_path) self.access = access @abstractmethod - def get_cache_val(self, key: Hashable) -> Any: + def _get_cache_val(self, key: Hashable) -> Any: raise NotImplementedError() @abstractmethod - def put_cache_val(self, key: Hashable, value: Any): + def _put_cache_val(self, key: Hashable, value: Any): raise NotImplementedError() @abstractmethod - def key_func(self, *args, **kwargs) -> Hashable: + def _key_func(self, *args, **kwargs) -> Hashable: raise NotImplementedError() def _read_execute_write(self, func, func_args, func_kwargs, access, key_kwargs: dict | None = None) -> Any: @@ -53,12 +53,12 @@ def _read_execute_write(self, func, func_args, func_kwargs, access, key_kwargs: logger.info('Executing cache value, since no access to the cache is provided...') return func(*func_args, **func_kwargs) - key = self.key_func(func, func_args, func_kwargs, **key_kwargs) + key = self._key_func(func, func_args, func_kwargs, **key_kwargs) was_read = False if 'r' in access: logger.info(f'Getting cache for the key "{key}"...') - val = self.get_cache_val(key) + val = self._get_cache_val(key) else: val = NoCache() @@ -76,13 +76,13 @@ def _read_execute_write(self, func, func_args, func_kwargs, access, key_kwargs: val = func(*func_args, **func_kwargs) if 'w' in access and not was_read and not isinstance(val, NoCache): - self.put_cache_val(key, val) + self._put_cache_val(key, val) logger.info(f'Writing cache for the key "{key}": {val}...') return val @classmethod @abstractmethod - def _decorate(cls, func: DecoratedCallable, *args, **kwargs) -> DecoratedCallable: + def _decorate(cls, func: DecoratedCallable, *args, **kwargs) -> Decorator | DecoratedCallable: raise NotImplementedError() @classmethod @@ -93,12 +93,13 @@ def _get_default_file_path(cls): @omnimethod def decorate(self: BaseCache | Type[BaseCache], func: DecoratedCallable, + *, file_path: os.PathLike | str | None = None, - access: CacheAccess | None = None, *args, **kwargs) -> DecoratedCallable: + access: CacheAccess | None = None, **kwargs) -> Decorator | DecoratedCallable: if not isinstance(self, BaseCache): file_path = file_path or self._get_default_file_path() access = access or 'rew' else: file_path = file_path or self.file_path access = access or self.access - return self._decorate(func, file_path, access, *args, **kwargs) + return self._decorate(func, file_path, access, **kwargs) diff --git a/pecapiku/cache_dict.py b/pecapiku/cache_dict.py index 1bc67a2..22293df 100644 --- a/pecapiku/cache_dict.py +++ b/pecapiku/cache_dict.py @@ -5,10 +5,10 @@ from collections import defaultdict from functools import partial, wraps from inspect import getcallargs, ismethod, signature -from typing import Any, Callable, Hashable +from typing import Any, Callable, Generic, Hashable -from pecapiku.base_cache import BaseCache, DecoratedCallable, omnimethod -from pecapiku.cache_access import COMP_CACHE_FILE_NAME, CacheAccess, _initialize_cache, update_cache +from pecapiku.base_cache import BaseCache, DecoratedCallable, Decorator, omnimethod +from pecapiku.cache_access import COMP_CACHE_FILE_NAME, CacheAccess, _initialize_cache, _resolve_filepath, update_cache from pecapiku.hash import get_hash from pecapiku.no_cache import NoCache @@ -58,7 +58,7 @@ def parse_key(callable_or_code: Callable[[Any], Hashable] | str, func: Callable, return key -class CacheDict(BaseCache): +class CacheDict(BaseCache, Generic[DecoratedCallable]): """ Decorator/context manager for caching of evaluation results. Creates a "pickle" file at disk space on a specified path. @@ -117,22 +117,23 @@ def __init__(self, file_path: os.PathLike | str | None = None, access: CacheAcce def __call__(self, func: DecoratedCallable | None = None, outer_key: Hashable | None = None, - inner_key: str | Callable[[Any], Hashable] | None = None) -> DecoratedCallable: + inner_key: str | Callable[[Any], Hashable] | None = None) -> DecoratedCallable | Decorator: return self.decorate(func=func, outer_key=outer_key, inner_key=inner_key) - def get_cache_val(self, key: Hashable) -> Any: + def _get_cache_val(self, key: Hashable) -> Any: + initialize_cache_dict(self.file_path) return self.cache_dict[key] - def put_cache_val(self, key: Hashable, value: Any) -> None: + def _put_cache_val(self, key: Hashable, value: Any) -> None: self.cache_dict[key] = value - def key_func(self, func, func_agrs, func_kwargs, inner_key, outer_key) -> Hashable: + def _key_func(self, func, func_agrs, func_kwargs, inner_key, outer_key) -> Hashable: if outer_key is not None: key = outer_key elif inner_key is not None: key = parse_key(inner_key, func, *func_agrs, **func_kwargs) else: - hash_objects = [func.__name__, func_agrs, tuple(sorted(func_kwargs.items()))] + hash_objects = [func.__name__, func_agrs, func_kwargs] if ismethod(func): hash_objects.insert(0, func.__self__) @@ -146,10 +147,12 @@ def _decorate(cls, file_path: os.PathLike | str | None = None, access: CacheAccess = 'rew', outer_key: Hashable | None = None, - inner_key: str | Callable[[Any], Hashable] | None = None) -> DecoratedCallable: + inner_key: str | Callable[[Any], Hashable] | None = None) -> DecoratedCallable | Decorator: if outer_key is not None and inner_key is not None: raise ValueError('At most one of (outer key, inner key) can be specified.') + file_path = _resolve_filepath(file_path) + @wraps(func) def decorated(*args, **kwargs): instance = cls(file_path, access) @@ -157,8 +160,6 @@ def decorated(*args, **kwargs): val = instance._read_execute_write(func, func_args=args, func_kwargs=kwargs, access=access, key_kwargs=dict(outer_key=outer_key, inner_key=inner_key)) return val - - decorator_return = decorated if func is None: decorator_return = partial( cls._decorate, @@ -166,6 +167,8 @@ def decorated(*args, **kwargs): access=access, outer_key=outer_key, inner_key=inner_key) + else: + decorator_return = decorated return decorator_return @omnimethod @@ -173,7 +176,7 @@ def decorate(self, func: DecoratedCallable | None = None, file_path: os.PathLike | str | None = None, access: CacheAccess | None = None, outer_key: Hashable | None = None, - inner_key: str | Callable[[Any], Hashable] | None = None) -> DecoratedCallable: + inner_key: str | Callable[[Any], Hashable] | None = None) -> DecoratedCallable | Decorator: """ Wraps a function and stores its execution results into a pickled cache dictionary. Examples: @@ -240,6 +243,7 @@ def decorate(self, func: DecoratedCallable | None = None, def __enter__(self) -> MyDefaultDict: if 'r' in self.access: + self.file_path = _resolve_filepath(self.file_path) self.cache_dict = initialize_cache_dict(self.file_path) else: self.cache_dict = MyDefaultDict(NoCache) @@ -252,7 +256,7 @@ def __exit__(self, exc_type, exc_val, exc_tb): self.cache_dict = None def get(self, key: None | Hashable) -> NoCache | MyDefaultDict | Any: - file_path = self.file_path + file_path = _resolve_filepath(self.file_path) cache_dict = _initialize_cache(file_path) if key is None: return cache_dict diff --git a/pecapiku/hash.py b/pecapiku/hash.py index 51d3b47..4500b1d 100644 --- a/pecapiku/hash.py +++ b/pecapiku/hash.py @@ -1,7 +1,7 @@ from __future__ import annotations import json -from typing import Sequence +from typing import Iterable, Sequence def get_hash(objects: Sequence[object]) -> str: @@ -21,6 +21,9 @@ def _json_dumps(obj: object) -> str: def _json_default(obj: object): + if isinstance(obj, Iterable): + return list(obj) + obj_class = obj.__class__ class_path = '.'.join((obj_class.__module__, obj_class.__name__)) vars_dict = vars(obj) if hasattr(obj, '__dict__') else {} diff --git a/pecapiku/no_cache.py b/pecapiku/no_cache.py index f223432..e33c314 100644 --- a/pecapiku/no_cache.py +++ b/pecapiku/no_cache.py @@ -1,6 +1,3 @@ -from __future__ import annotations - - class NoCache: def __bool__(self): return False diff --git a/pecapiku/single_value_cache.py b/pecapiku/single_value_cache.py index 5429112..003a352 100644 --- a/pecapiku/single_value_cache.py +++ b/pecapiku/single_value_cache.py @@ -2,14 +2,14 @@ import os from functools import partial, wraps -from typing import Any, Hashable +from typing import Any, Generic, Hashable -from pecapiku.base_cache import BaseCache, DecoratedCallable, omnimethod +from pecapiku.base_cache import BaseCache, DecoratedCallable, Decorator, omnimethod from pecapiku.cache_access import CacheAccess, _initialize_cache, _resolve_filepath, update_cache from pecapiku.no_cache import NoCache -class SingleValueCache(BaseCache): +class SingleValueCache(BaseCache, Generic[DecoratedCallable]): """ Decorator for caching of evaluation results. Creates a "pickle" file at disk space on a specified path. Wraps a function and stores its execution result in the file. @@ -48,22 +48,23 @@ def __init__(self, file_path: os.PathLike | str | None = None, access: CacheAcce self.cache_dict = None def __call__(self, - func: DecoratedCallable | None = None) -> DecoratedCallable: - return self.decorate(func) + func: DecoratedCallable | None = None, *, file_path: os.PathLike | str | None = None, + access: CacheAccess | None = None) -> DecoratedCallable | Decorator: + return self.decorate(func=func, file_path=file_path, access=access) - def get_cache_val(self, key: Hashable) -> Any: + def _get_cache_val(self, key: Hashable) -> Any: return _initialize_cache(self.file_path) - def put_cache_val(self, key: Hashable, value: Any): + def _put_cache_val(self, key: Hashable, value: Any): return update_cache(value, self.file_path) - def key_func(self, *args, **kwargs) -> Hashable: + def _key_func(self, *args, **kwargs) -> Hashable: return 0 @classmethod def _decorate(cls, func: DecoratedCallable | None = None, file_path: os.PathLike | str | None = None, - access: CacheAccess = 'rew') -> DecoratedCallable: + access: CacheAccess = 'rew') -> DecoratedCallable | Decorator: """ Wraps a function and stores its execution results into a pickle cache file. Example @@ -100,9 +101,10 @@ def decorated(*args, **kwargs): val = instance._read_execute_write(func, func_args=args, func_kwargs=kwargs, access=access) return val - decorator_return = decorated if func is None: - decorator_return = partial(cls.decorate, file_path=file_path, access=access) + decorator_return = partial(cls._decorate, file_path=file_path, access=access) + else: + decorator_return = decorated return decorator_return @staticmethod @@ -111,8 +113,8 @@ def get(file_path: os.PathLike | str) -> NoCache | Any: return _initialize_cache(file_path) @omnimethod - def decorate(self, func: DecoratedCallable | None = None, file_path: os.PathLike | str | None = None, - access: CacheAccess | None = None, **kwargs) -> DecoratedCallable: + def decorate(self, func: DecoratedCallable | None = None, *, file_path: os.PathLike | str | None = None, + access: CacheAccess | None = None, **kwargs) -> DecoratedCallable | Decorator: """ Wraps a function and stores its execution results into a pickle cache file. Example diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..9be222b --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,23 @@ +from pathlib import Path + +import pytest + +from pecapiku import config + + +def get_project_root(): + return Path(__file__).parents[1] + + +@pytest.fixture(scope='function', autouse=True) +def set_cache_dir(): + cache_dir = get_project_root() / 'tests' / '.proj_cache' + config.set_cache_dir(cache_dir) + yield + [f.unlink() for f in cache_dir.glob("*") if f.is_file()] + + +@pytest.fixture(scope='function') +def get_cache_dir(set_cache_dir): + cache_dir = config.get_cache_dir() + return cache_dir diff --git a/tests/unit/__init__.py b/tests/unit/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/unit/core/__init__.py b/tests/unit/core/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/unit/core/test_config.py b/tests/unit/core/test_config.py new file mode 100644 index 0000000..f2e7cb2 --- /dev/null +++ b/tests/unit/core/test_config.py @@ -0,0 +1,8 @@ +from pecapiku import config +from tests.conftest import get_project_root, set_cache_dir # noqa + + +def test_cache_dir(): + current_cache_dir = config.get_cache_dir() + expected_cache_dir = get_project_root() / 'tests' / '.proj_cache' + assert current_cache_dir == expected_cache_dir diff --git a/tests/unit/core/test_hash_retrieval.py b/tests/unit/core/test_hash_retrieval.py new file mode 100644 index 0000000..ff096cf --- /dev/null +++ b/tests/unit/core/test_hash_retrieval.py @@ -0,0 +1,35 @@ +from typing import Any + +import pytest + +from pecapiku.hash import get_hash + + +def test_hash_order(): + assert get_hash([1, 2, 3]) != get_hash([3, 2, 1]) + + +class TestObject: + def __init__(self, foo: Any): + self.foo = foo + + +class OtherTestObject: + def __init__(self, foo: Any): + self.foo = foo + + +@pytest.mark.parametrize('test_obj_1, test_obj_2, test_obj_3', + [ + [{1, 2}, {2, 1}, {1, 2, 3}], + [dict(a=1, b=2), dict(b=2, a=1), dict(a=2, b=1)], + [TestObject(1), TestObject(1), TestObject(2)], + [TestObject(1), TestObject(1), OtherTestObject(1)], + [TestObject(1), TestObject(1), TestObject('1')], + [TestObject(None), TestObject(None), TestObject('None')], + ]) +def test_hash_invariants(test_obj_1, test_obj_2, test_obj_3): + hash_1 = get_hash([test_obj_1]) + hash_2 = get_hash([test_obj_2]) + hash_3 = get_hash([test_obj_3]) + assert hash_1 == hash_2 != hash_3 diff --git a/tests/unit/interface/__init__.py b/tests/unit/interface/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/unit/interface/test_cache_classes.py b/tests/unit/interface/test_cache_classes.py new file mode 100644 index 0000000..be7129e --- /dev/null +++ b/tests/unit/interface/test_cache_classes.py @@ -0,0 +1,113 @@ +from functools import wraps +from itertools import product +from time import sleep, time +from typing import Any + +import pytest + +from pecapiku import CacheDict, SingleValueCache +from tests.conftest import get_cache_dir, set_cache_dir # noqa + + +class TestObject: + def __init__(self, foo: Any): + self.foo = foo + + def sleep(self, time_: float) -> float: + sleep(time_) + return time_ + + +class TestObjectWithCounter: + def __init__(self, foo: Any): + self.foo = foo + self.counter = 0 + + def sleep(self, time_: float) -> float: + self.counter += 1 + sleep(time_) + return time_ + + +def sleep_(time_: float): + sleep(time_) + return time_ + + +def timed(func): + @wraps(func) + def wrapper(*args, **kwargs): + t1 = time() + res = func(*args, **kwargs) + t2 = time() + t = t2 - t1 + return res, t + + return wrapper + + +@pytest.mark.parametrize('sleep_func', [sleep_, TestObject(1).sleep]) +@pytest.mark.parametrize('cache_decorator, cache_kwargs', + [ + *product([SingleValueCache(), SingleValueCache.decorate], [dict(file_path='some.pkl')]), + *product([CacheDict(), CacheDict.decorate], [{}]), + ] + ) +@pytest.mark.parametrize('wrapper_syntax', ['definition', 'runtime']) +def test_decorators(sleep_func, cache_decorator, cache_kwargs, get_cache_dir, wrapper_syntax): + if wrapper_syntax == 'definition': + @timed + @cache_decorator(**cache_kwargs) + def cached_sleep(*args, **kwargs): + return sleep_func(*args, **kwargs) + elif wrapper_syntax == 'runtime': + cached_sleep = cache_decorator(sleep_func, **cache_kwargs) + cached_sleep = timed(cached_sleep) + else: + raise ValueError(f'Unexpected value: {wrapper_syntax}') + + plan = 0.1 + plan_return, fact = cached_sleep(plan) + cache_files = set(get_cache_dir.iterdir()) + + assert plan_return == plan + assert fact > plan + assert cache_files + + plan_return, fact = cached_sleep(plan) + cache_files_2 = set(get_cache_dir.iterdir()) + + assert plan_return == plan + assert fact < plan + assert cache_files == cache_files_2 + + +@pytest.mark.parametrize('cache_decorator, cache_kwargs', + [ + *product([CacheDict(), CacheDict.decorate], [{}]), + ] + ) +def test_method_of_changing_object(cache_decorator, cache_kwargs, get_cache_dir): + test_object = TestObjectWithCounter(1) + test_object.sleep = cache_decorator(test_object.sleep, **cache_kwargs) + test_object.sleep = timed(test_object.sleep) + + val_1, t_1 = test_object.sleep(0.1) + val_2, t_2 = test_object.sleep(0.1) + + assert t_1 > 0.1 + assert t_2 > 0.1 + assert val_1 == val_2 + assert test_object.counter == 2 + assert set(get_cache_dir.iterdir()) + + +def test_context_manager(get_cache_dir): + key, val = 'key', 'val' + with CacheDict() as c_d: + c_d[key] = val + + with CacheDict() as c_d: + val_other = c_d[key] + assert val == val_other + assert set(get_cache_dir.iterdir())