From 2303a7b5d52d1635e6d39dd9489cb635e0a01335 Mon Sep 17 00:00:00 2001 From: Mikhail Beck Date: Wed, 24 Apr 2024 14:15:34 +0100 Subject: [PATCH] #114 Added path builder function (#115) * #114 Added build_path function prototype. * #114 Added the first integration test for the path builder. * #114 Added two more integration tests for the path builder. * #114 Addressed review comments. * #114 Tried to fix the integration test error. * #114 Used specific exceptions. * #114 Understanding the lock on deleted files. * #114 Understanding the lock on deleted files. * #114 Fixed the integration tests and improved the build_path documentation. * #114 Fixed the integration tests. * #114 Added the service_name to buckets; made MountedBucket R/W * #114 Added the service_name to buckets; made MountedBucket R/W * #114 Added the service_name to buckets; made MountedBucket R/W * #114 Added the service_name to buckets; made MountedBucket R/W * #114 Made path a sub-module * #114 Added as_udf_path method * #114 Fixed issues found in a review --- doc/changes/unreleased.md | 1 + exasol/bucketfs/__init__.py | 4 + exasol/bucketfs/_buckets.py | 140 +++++++++++++++++++++++++ exasol/bucketfs/_path.py | 147 +++++++++++++++++++++++++-- exasol/bucketfs/_service.py | 6 ++ test/integration/conftest.py | 3 +- test/integration/test_bucket_path.py | 111 ++++++++++++++++++++ test/unit/bucketfs_test.py | 17 ++++ test/unit/conftest.py | 62 +---------- test/unit/test_bucket_path.py | 56 +++++----- 10 files changed, 453 insertions(+), 94 deletions(-) create mode 100644 test/integration/test_bucket_path.py diff --git a/doc/changes/unreleased.md b/doc/changes/unreleased.md index eaea2280..2e2efb37 100644 --- a/doc/changes/unreleased.md +++ b/doc/changes/unreleased.md @@ -26,6 +26,7 @@ Added the PathLike protocol as described in the [design document](../design/bucketpath.rst). Extracted bucket interface into BucketLike protocol. Implemented PathLike for buckets based on BucketLike protocol. + Added a path factory function. ## Internal diff --git a/exasol/bucketfs/__init__.py b/exasol/bucketfs/__init__.py index 57a7bf42..e0f81e15 100644 --- a/exasol/bucketfs/__init__.py +++ b/exasol/bucketfs/__init__.py @@ -46,6 +46,7 @@ from __future__ import annotations from exasol.bucketfs._buckets import ( + BucketLike, Bucket, MappedBucket, ) @@ -55,14 +56,17 @@ as_hash, as_string, ) +from exasol.bucketfs import _path as path from exasol.bucketfs._error import BucketFsError from exasol.bucketfs._service import Service __all__ = [ "Service", + "BucketLike", "Bucket", "MappedBucket", "BucketFsError", + "path", "as_bytes", "as_string", "as_file", diff --git a/exasol/bucketfs/_buckets.py b/exasol/bucketfs/_buckets.py index 7aaba344..b402f461 100644 --- a/exasol/bucketfs/_buckets.py +++ b/exasol/bucketfs/_buckets.py @@ -6,7 +6,13 @@ Iterable, Iterator, Protocol, + Optional, ) +import os +from io import IOBase +import shutil +import errno +from pathlib import Path import requests from requests import HTTPError @@ -27,6 +33,18 @@ class BucketLike(Protocol): It is compatible with both on-premises an SaaS BucketFS systems. """ + @property + def name(self) -> str: + """ + Returns the bucket name. + """ + + @property + def udf_path(self) -> str: + """ + Returns the path to the bucket's base directory, as it's seen from a UDF. + """ + @property def files(self) -> Iterable[str]: """ @@ -110,6 +128,7 @@ def __init__( username: str, password: str, verify: bool | str = True, + service_name: Optional[str] = None ): """ Create a new bucket instance. @@ -127,12 +146,15 @@ def __init__( Either a boolean, in which case it controls whether we verify the server's TLS certificate, or a string, in which case it must be a path to a CA bundle to use. Defaults to ``True``. + service_name: + Optional name of the BucketFS service. """ self._name = name self._service = _parse_service_url(service) self._username = username self._password = password self._verify = verify + self._service_name = service_name def __str__(self): return f"Bucket<{self.name} | on: {self._service}>" @@ -141,6 +163,13 @@ def __str__(self): def name(self) -> str: return self._name + @property + def udf_path(self) -> str: + if self._service_name is None: + raise BucketFsError('The bucket cannot provide its udf_path ' + 'as the service name is unknown.') + return f'/buckets/{self._service_name}/{self._name}' + @property def _auth(self) -> HTTPBasicAuth: return HTTPBasicAuth(username=self._username, password=self._password) @@ -223,6 +252,117 @@ def download(self, path: str, chunk_size: int = 8192) -> Iterable[ByteString]: yield from response.iter_content(chunk_size=chunk_size) +class SaaSBucket: + + def __init__(self, url: str, account_id: str, database_id: str, pat: str) -> None: + self._url = url + self._account_id = account_id + self.database_id = database_id + self._pat = pat + + @property + def name(self) -> str: + return 'default' + + @property + def udf_path(self) -> str: + return f'/buckets/uploads/{self.name}' + + def files(self) -> Iterable[str]: + """To be provided""" + raise NotImplementedError() + + def delete(self, path: str) -> None: + """To be provided""" + raise NotImplementedError() + + def upload(self, path: str, data: ByteString | BinaryIO) -> None: + """To be provided""" + raise NotImplementedError() + + def download(self, path: str, chunk_size: int = 8192) -> Iterable[ByteString]: + """To be provided""" + raise NotImplementedError() + + def __str__(self): + return f"SaaSBucket<{self.name} | on: {self._url}>" + + +class MountedBucket: + """ + Implementation of the Bucket interface backed by a normal file system. + The targeted use case is the access to the BucketFS files from a UDF. + + Arguments: + service_name: + Name of the BucketFS service (not a service url). Defaults to 'bfsdefault'. + bucket_name: + Name of the bucket. Defaults to 'default'. + base_path: + Instead of specifying the names of the service and the bucket, one can provide + a full path to the root directory. This can be a useful option for testing when + the backend is a local file system. + If this parameter is not provided the root directory is set to + buckets//. + """ + + def __init__(self, + service_name: str = 'bfsdefault', + bucket_name: str = 'default', + base_path: Optional[str] = None): + self._name = bucket_name + if base_path: + self.root = Path(base_path) + else: + self.root = Path('/buckets') / service_name / bucket_name + + @property + def name(self) -> str: + return self._name + + @property + def udf_path(self) -> str: + return str(self.root) + + @property + def files(self) -> list[str]: + return [str(pth.relative_to(self.root)) for pth in self.root.rglob('*.*')] + + def delete(self, path: str) -> None: + try: + full_path = self.root / path + full_path.unlink(missing_ok=True) + except IsADirectoryError: + pass + + def upload(self, path: str, data: ByteString | BinaryIO) -> None: + full_path = self.root / path + if not full_path.parent.exists(): + full_path.parent.mkdir(parents=True) + with full_path.open('wb') as f: + if isinstance(data, IOBase): + shutil.copyfileobj(data, f) + elif isinstance(data, ByteString): + f.write(data) + else: + raise ValueError('upload called with unrecognised data type. ' + 'A valid data should be either ByteString or BinaryIO') + + def download(self, path: str, chunk_size: int) -> Iterable[ByteString]: + full_path = self.root / path + if (not full_path.exists()) or (not full_path.is_file()): + raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), str(path)) + with full_path.open('rb') as f: + while True: + data = f.read(chunk_size) + if not data: + break + yield data + + def __str__(self): + return f"MountedBucket<{self.name} | on: {self._service_name}>" + + class MappedBucket: """ Wraps a bucket and provides various convenience features to it (e.g. index based access). diff --git a/exasol/bucketfs/_path.py b/exasol/bucketfs/_path.py index 4f60b65b..d3f30dd1 100644 --- a/exasol/bucketfs/_path.py +++ b/exasol/bucketfs/_path.py @@ -1,10 +1,19 @@ from __future__ import annotations from typing import Protocol, ByteString, BinaryIO, Iterable, Generator, Optional +from enum import Enum, auto from pathlib import PurePath, PureWindowsPath import errno import os from io import IOBase -from exasol.bucketfs._buckets import BucketLike +from exasol.bucketfs._buckets import BucketLike, SaaSBucket, MountedBucket +from exasol.bucketfs._service import Service +from exasol.bucketfs._error import BucketFsError + + +class StorageBackend(Enum): + onprem = auto() + saas = auto() + mounted = () class PathLike(Protocol): @@ -41,6 +50,12 @@ def as_uri(self) -> str: Represent the path as a file URI. Can be used to reconstruct the location/path. """ + def as_udf_path(self) -> str: + """ + This method is specific to a BucketFS flavour of the PathLike. + It returns a corresponding path, as it's seen from a UDF. + """ + def exists(self) -> bool: """ Return True if the path points to an existing file or directory. @@ -73,7 +88,7 @@ def read(self, chunk_size: int = 8192) -> Iterable[ByteString]: IsADirectoryError: if the pathlike object points to a directory. """ - def write(self, data: ByteString | BinaryIO | Iterable[ByteString]): + def write(self, data: ByteString | BinaryIO | Iterable[ByteString]) -> None: """ Writes data to this path. @@ -90,7 +105,7 @@ def write(self, data: ByteString | BinaryIO | Iterable[ByteString]): NotAFileError: if the pathlike object is not a file path. """ - def rm(self): + def rm(self) -> None: """ Remove this file. @@ -102,7 +117,7 @@ def rm(self): FileNotFoundError: If the file does not exist. """ - def rmdir(self, recursive: bool = False): + def rmdir(self, recursive: bool = False) -> None: """ Removes this directory. @@ -126,7 +141,7 @@ def joinpath(self, *path_segments) -> "PathLike": A new pathlike object pointing the combined path. """ - def walk(self) -> Generator[tuple["PathLike", list[str], list[str]], None, None]: + def walk(self, top_down: bool = True) -> Generator[tuple["PathLike", list[str], list[str]], None, None]: """ Generate the file names in a directory tree by walking the tree either top-down or bottom-up. @@ -272,6 +287,9 @@ def parent(self) -> str: def as_uri(self) -> str: return self._path.as_uri() + def as_udf_path(self) -> str: + return str(PurePath(self._bucket_api.udf_path) / self._path) + def exists(self) -> bool: return self._navigate() is not None @@ -320,7 +338,7 @@ def _rmdir_recursive(self, node: _BucketFile): if node.is_file: self._bucket_api.delete(node.path) - def joinpath(self, *path_segments) -> "PathLike": + def joinpath(self, *path_segments) -> PathLike: # The path segments can be of either this type or an os.PathLike. cls = type(self) seg_paths = [seg._path if isinstance(seg, cls) else seg for seg in path_segments] @@ -376,3 +394,120 @@ def __truediv__(self, other): def __str__(self): return str(self._path) + + +def _create_onprem_bucket(url: str, + username: str, + password: str, + bucket_name: str = 'default', + verify: bool | str = True, + service_name: Optional[str] = None + ) -> BucketLike: + """ + Creates an on-prem bucket. + """ + credentials = {bucket_name: {'username': username, 'password': password}} + service = Service(url, credentials, verify, service_name) + buckets = service.buckets + if bucket_name not in buckets: + raise BucketFsError(f'Bucket {bucket_name} does not exist.') + return buckets[bucket_name] + + +def _create_saas_bucket(account_id: str, + database_id: str, + pat: str, + url: str = 'https://cloud.exasol.com' + ) -> BucketLike: + """ + Creates a SaaS bucket. + """ + return SaaSBucket(url=url, account_id=account_id, database_id=database_id, pat=pat) + + +def _create_mounted_bucket(service_name: str = 'bfsdefault', + bucket_name: str = 'default', + base_path: Optional[str] = None + ) -> BucketLike: + """ + Creates a bucket mounted to a UDF. + """ + bucket = MountedBucket(service_name, bucket_name, base_path) + if not bucket.root.exists(): + raise BucketFsError(f'Service {service_name} or bucket {bucket_name} do not exist.') + return bucket + + +def build_path(**kwargs) -> PathLike: + """ + Creates a PathLike object based on a bucket in one of the BucketFS storage backends. + It provides the same interface for the following BucketFS implementations: + - On-Premises + - SaaS + - BucketFS files mounted as read-only directory in a UDF. + + Arguments: + backend: + This is a mandatory parameter that indicates the BucketFS storage backend. + It can be provided either as a string or as the StorageBackend enumeration. + path: + Optional parameter that selects a path within the bucket. If not provided + the returned PathLike objects corresponds to the root of the bucket. Hence, + an alternative way of creating a PathLike pointing to a particular file or + directory is as in the code below. + path = build_path(...) / "the_desired_path" + + The rest of the arguments a backend specific. + + On-prem arguments: + url: + Url of the BucketFS service, e.g. `http(s)://127.0.0.1:2580`. + username: + BucketFS username (generally, different from the DB username). + password: + BucketFS user password. + bucket_name: + Name of the bucket. Currently, a PathLike cannot span multiple buckets. + verify: + Either a boolean, in which case it controls whether we verify the server's + TLS certificate, or a string, in which case it must be a path to a CA bundle + to use. Defaults to ``True``. + service_name: + Optional name of the BucketFS service. + + SaaS arguments: + url: + Url of the Exasol SaaS. Defaults to 'https://cloud.exasol.com'. + account_id: + SaaS user account ID, e.g. 'org_LVeOj4pwXhPatNz5' + (given example is not a valid ID of an existing account). + database_id: + Database ID, e.g. 'msduZKlMR8QCP_MsLsVRwy' + (given example is not a valid ID of an existing database). + pat: + Personal Access Token, e.g. 'exa_pat_aj39AsM3bYR9bQ4qk2wiG8SWHXbRUGNCThnep5YV73az6A' + (given example is not a valid PAT). + + Mounted BucketFS directory arguments: + service_name: + Name of the BucketFS service (not a service url). Defaults to 'bfsdefault'. + bucket_name: + Name of the bucket. Currently, a PathLike cannot span multiple buckets. + base_path: + Explicitly specified root path in a file system. This is an alternative to + providing the service_name and the bucket_name. + """ + + backend = kwargs.pop('backend', StorageBackend.onprem) + path = kwargs.pop('path') if 'path' in kwargs else '' + + if isinstance(backend, str): + backend = StorageBackend[backend.lower()] + if backend == StorageBackend.onprem: + bucket = _create_onprem_bucket(**kwargs) + elif backend == StorageBackend.saas: + bucket = _create_saas_bucket(**kwargs) + else: + bucket = _create_mounted_bucket(**kwargs) + + return BucketPath(path, bucket) diff --git a/exasol/bucketfs/_service.py b/exasol/bucketfs/_service.py index 8ea80ca7..297ec415 100644 --- a/exasol/bucketfs/_service.py +++ b/exasol/bucketfs/_service.py @@ -5,6 +5,7 @@ Iterator, Mapping, MutableMapping, + Optional, ) import requests @@ -32,6 +33,7 @@ def __init__( url: str, credentials: Mapping[str, Mapping[str, str]] | None = None, verify: bool | str = True, + service_name: Optional[str] = None ): """Create a new Service instance. @@ -45,6 +47,8 @@ def __init__( Either a boolean, in which case it controls whether we verify the server's TLS certificate, or a string, in which case it must be a path to a CA bundle to use. Defaults to ``True``. + service_name: + Optional name of the bucketfs service. """ self._url = _parse_service_url(url) self._authenticator = defaultdict( @@ -52,6 +56,7 @@ def __init__( credentials if credentials is not None else {}, ) self._verify = verify + self._service_name = service_name @property def buckets(self) -> MutableMapping[str, Bucket]: @@ -73,6 +78,7 @@ def buckets(self) -> MutableMapping[str, Bucket]: service=self._url, username=self._authenticator[name]["username"], password=self._authenticator[name]["password"], + service_name=self._service_name ) for name in buckets } diff --git a/test/integration/conftest.py b/test/integration/conftest.py index 99a0ed57..01691c8b 100644 --- a/test/integration/conftest.py +++ b/test/integration/conftest.py @@ -7,6 +7,7 @@ Tuple, Union, ) +from exasol.bucketfs._shared import _build_url import pytest import requests @@ -32,7 +33,7 @@ def delete_file( service: str, bucket: str, username: str, password: str, filename: str ) -> Tuple[str, str]: auth = HTTPBasicAuth(username, password) - url = f"{service.rstrip('/')}/{bucket}/{filename}" + url = _build_url(service_url=service, bucket=bucket, path=filename) response = requests.delete(url, auth=auth) response.raise_for_status() return filename, url diff --git a/test/integration/test_bucket_path.py b/test/integration/test_bucket_path.py new file mode 100644 index 00000000..5982e733 --- /dev/null +++ b/test/integration/test_bucket_path.py @@ -0,0 +1,111 @@ +from __future__ import annotations +from typing import ByteString +import pytest +import exasol.bucketfs as bfs +from integration.conftest import delete_file + + +@pytest.fixture +def children_poem() -> ByteString: + poem_text = \ + b"Twinkle twinkle little star." \ + b"How I wonder what you are." \ + b"Up above the world so high." \ + b"Like a diamond in the sky." + return poem_text + + +@pytest.fixture +def classic_poem() -> ByteString: + poem_text = \ + b"My heart's in the Highlands, my heart is not here," \ + b"My heart's in the Highlands, a-chasing the deer;" \ + b"Chasing the wild-deer, and following the roe," \ + b"My heart's in the Highlands, wherever I go." + return poem_text + + +def _collect_all_names(path: bfs.path.PathLike) -> set[str]: + all_names = [] + for _, dirs, files in path.walk(): + all_names.extend(dirs) + all_names.extend(files) + return set(all_names) + + +def test_write_read_back_onprem(test_config, children_poem): + + base_path = bfs.path.build_path(backend=bfs.path.StorageBackend.onprem, + url=test_config.url, verify=False, + username=test_config.username, + password=test_config.password) + file_name = 'my_poems/little_star.txt' + poem_path = base_path / file_name + + try: + poem_path.write(children_poem) + data_back = b''.join(poem_path.read(20)) + assert data_back == children_poem + finally: + # cleanup + delete_file( + test_config.url, + 'default', + test_config.username, + test_config.password, + file_name + ) + + +def test_write_list_files_onprem(test_config, children_poem, classic_poem): + + base_path = bfs.path.build_path(backend=bfs.path.StorageBackend.onprem, + url=test_config.url, path='my_poems', verify=False, + username=test_config.username, + password=test_config.password) + poem_path1 = base_path / 'children/little_star.txt' + poem_path2 = base_path / 'classic/highlands.txt' + + try: + poem_path1.write(children_poem) + poem_path2.write(classic_poem) + expected_names = {'children', 'classic', 'little_star.txt', 'highlands.txt'} + assert _collect_all_names(base_path) == expected_names + finally: + # cleanup + for poem_path in [poem_path1, poem_path2]: + delete_file( + test_config.url, + 'default', + test_config.username, + test_config.password, + str(poem_path) + ) + + +def test_write_delete_onprem(test_config, children_poem, classic_poem): + + base_path = bfs.path.build_path(backend=bfs.path.StorageBackend.onprem, + url=test_config.url, verify=False, + username=test_config.username, + password=test_config.password) + poems_root = base_path / 'my_other_poems' + poem_path1 = poems_root / 'children/little_star.txt' + poem_path2 = poems_root / 'classic/highlands.txt' + + try: + poem_path1.write(children_poem) + poem_path2.write(classic_poem) + poem_path1.rm() + expected_names = {'classic', 'highlands.txt'} + assert _collect_all_names(poems_root) == expected_names + finally: + # cleanup + for poem_path in [poem_path1, poem_path2]: + delete_file( + test_config.url, + 'default', + test_config.username, + test_config.password, + str(poem_path) + ) diff --git a/test/unit/bucketfs_test.py b/test/unit/bucketfs_test.py index 27fc23cf..51379275 100644 --- a/test/unit/bucketfs_test.py +++ b/test/unit/bucketfs_test.py @@ -233,3 +233,20 @@ def test_dunder_str_of_mapped_bucket(): ) expected = "MappedBucket>" assert f"{bucket}" == expected + + +def test_mounted_bucket_files(bucket_fake): + content = set(bucket_fake.files) + expected_content = { + 'file00.dat', + 'file01.dat', + 'dir1/file10.dat', + 'dir1/file11.dat', + 'dir1/dir11/file110.dat', + 'dir1/dir11/file111.dat', + 'dir1/dir12/file120.dat', + 'dir1/dir12/file121.dat', + 'dir2/file20.dat', + 'dir2/file21.dat' + } + assert content == expected_content diff --git a/test/unit/conftest.py b/test/unit/conftest.py index 797e3e1e..7dae1f99 100644 --- a/test/unit/conftest.py +++ b/test/unit/conftest.py @@ -1,66 +1,10 @@ -from __future__ import annotations -from typing import Iterable, ByteString, BinaryIO -import os -from io import IOBase -import shutil -import errno -from pathlib import Path import pytest - -class BucketFake: - """ - Implementation of the Bucket API backed by the normal file system. - """ - - def __init__(self, root: str): - self.root = Path(root) - if not self.root.is_dir(): - raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), str(root)) - - def _get_full_path(self, path: str | Path): - return self.root / path - - @property - def files(self) -> list[str]: - root_length = len(str(self.root)) - if self.root != self.root.root: - root_length += 1 - return [str(pth)[root_length:] for pth in self.root.rglob('*.*')] - - def delete(self, path: str) -> None: - try: - self._get_full_path(path).unlink(missing_ok=True) - except IsADirectoryError: - pass - - def upload(self, path: str, data: ByteString | BinaryIO) -> None: - full_path = self._get_full_path(path) - if not full_path.parent.exists(): - full_path.parent.mkdir(parents=True) - with full_path.open('wb') as f: - if isinstance(data, IOBase): - shutil.copyfileobj(data, f) - elif isinstance(data, ByteString): - f.write(data) - else: - raise ValueError('upload_file called with unrecognised data type. ' - 'A valid data should be either ByteString or BinaryIO') - - def download(self, path: str, chunk_size: int) -> Iterable[ByteString]: - full_path = self._get_full_path(path) - if (not full_path.exists()) or (not full_path.is_file()): - raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), str(path)) - with full_path.open('rb') as f: - while True: - data = f.read(chunk_size) - if not data: - break - yield data +from exasol.bucketfs._buckets import MountedBucket @pytest.fixture -def bucket_fake(tmpdir) -> BucketFake: +def bucket_fake(tmpdir) -> MountedBucket: dir1 = tmpdir.mkdir('dir1') dir2 = tmpdir.mkdir('dir2') dir11 = dir1.mkdir('dir11') @@ -70,4 +14,4 @@ def bucket_fake(tmpdir) -> BucketFake: file_xx = d / f'file{d_id}{i}.dat' dat = bytes([d_id * i] * 24) file_xx.write_binary(dat) - return BucketFake(tmpdir) + return MountedBucket(base_path=tmpdir) diff --git a/test/unit/test_bucket_path.py b/test/unit/test_bucket_path.py index b46b8aab..02e393f0 100644 --- a/test/unit/test_bucket_path.py +++ b/test/unit/test_bucket_path.py @@ -1,7 +1,7 @@ from pathlib import Path from itertools import chain import pytest -from exasol.bucketfs._path import BucketPath +import exasol.bucketfs as bfs @pytest.mark.parametrize("test_path, should_exist", [ @@ -11,7 +11,7 @@ ('dir1/dir3', False) ]) def test_file_exists(bucket_fake, test_path, should_exist): - path = BucketPath(test_path, bucket_api=bucket_fake) + path = bfs.path.BucketPath(test_path, bucket_api=bucket_fake) assert path.exists() == should_exist @@ -22,7 +22,7 @@ def test_file_exists(bucket_fake, test_path, should_exist): ('dir1/dir3', False) ]) def test_is_dir(bucket_fake, test_path, is_dir): - path = BucketPath(test_path, bucket_api=bucket_fake) + path = bfs.path.BucketPath(test_path, bucket_api=bucket_fake) assert path.is_dir() == is_dir @@ -33,81 +33,81 @@ def test_is_dir(bucket_fake, test_path, is_dir): ('dir1/dir3', False) ]) def test_is_file(bucket_fake, test_path, is_file): - path = BucketPath(test_path, bucket_api=bucket_fake) + path = bfs.path.BucketPath(test_path, bucket_api=bucket_fake) assert path.is_file() == is_file def test_rm(bucket_fake): - path = BucketPath('dir1/dir12/file120.dat', bucket_api=bucket_fake) + path = bfs.path.BucketPath('dir1/dir12/file120.dat', bucket_api=bucket_fake) path.rm() assert not path.exists() def test_rm_not_exist(bucket_fake): - path = BucketPath('dir1/dir12/file125.dat', bucket_api=bucket_fake) + path = bfs.path.BucketPath('dir1/dir12/file125.dat', bucket_api=bucket_fake) with pytest.raises(FileNotFoundError): path.rm() def test_rm_directory(bucket_fake): - path = BucketPath('dir1/dir12', bucket_api=bucket_fake) + path = bfs.path.BucketPath('dir1/dir12', bucket_api=bucket_fake) with pytest.raises(IsADirectoryError): path.rm() def test_rmdir(bucket_fake): for i in range(2): - BucketPath(f'dir1/dir12/file12{i}.dat', bucket_api=bucket_fake).rm() - path = BucketPath('dir1/dir12', bucket_api=bucket_fake) + bfs.path.BucketPath(f'dir1/dir12/file12{i}.dat', bucket_api=bucket_fake).rm() + path = bfs.path.BucketPath('dir1/dir12', bucket_api=bucket_fake) path.rmdir(recursive=False) assert not path.exists() def test_rmdir_recursive(bucket_fake): - path = BucketPath('dir1', bucket_api=bucket_fake) + path = bfs.path.BucketPath('dir1', bucket_api=bucket_fake) path.rmdir(recursive=True) assert not path.exists() def test_rmdir_not_empty(bucket_fake): - path = BucketPath('dir1', bucket_api=bucket_fake) + path = bfs.path.BucketPath('dir1', bucket_api=bucket_fake) with pytest.raises(OSError): path.rmdir(recursive=False) def test_rmdir_not_exist(bucket_fake): - path = BucketPath('dir1/dir5', bucket_api=bucket_fake) + path = bfs.path.BucketPath('dir1/dir5', bucket_api=bucket_fake) path.rmdir() def test_rmdir_file(bucket_fake): - path = BucketPath('dir1/dir12/file120.dat', bucket_api=bucket_fake) + path = bfs.path.BucketPath('dir1/dir12/file120.dat', bucket_api=bucket_fake) with pytest.raises(NotADirectoryError): path.rmdir() def test_joinpath(bucket_fake): - path1 = BucketPath('dir1', bucket_api=bucket_fake) + path1 = bfs.path.BucketPath('dir1', bucket_api=bucket_fake) path2 = 'dir11' - path3 = BucketPath('dir111/dir1111', bucket_api=bucket_fake) + path3 = bfs.path.BucketPath('dir111/dir1111', bucket_api=bucket_fake) path4 = Path('dir11111/file111110.dat') path = path1.joinpath(path2, path3, path4) - assert isinstance(path, BucketPath) + assert isinstance(path, bfs.path.BucketPath) assert str(path) == 'dir1/dir11/dir111/dir1111/dir11111/file111110.dat' def test_truediv(bucket_fake): - path1 = BucketPath('dir1', bucket_api=bucket_fake) + path1 = bfs.path.BucketPath('dir1', bucket_api=bucket_fake) path2 = 'dir11' - path3 = BucketPath('dir111/dir1111', bucket_api=bucket_fake) + path3 = bfs.path.BucketPath('dir111/dir1111', bucket_api=bucket_fake) path4 = Path('dir11111/file111110.dat') path = path1 / path2 / path3 / path4 - assert isinstance(path, BucketPath) + assert isinstance(path, bfs.path.BucketPath) assert str(path) == 'dir1/dir11/dir111/dir1111/dir11111/file111110.dat' def test_walk_top_down(bucket_fake): - path = BucketPath('', bucket_api=bucket_fake) + path = bfs.path.BucketPath('', bucket_api=bucket_fake) content = [','.join(chain([pth.name, '/'], sorted(dirs), sorted(files))) for pth, dirs, files in path.walk(top_down=True)] expected_content = [ @@ -123,7 +123,7 @@ def test_walk_top_down(bucket_fake): def test_walk_bottom_up(bucket_fake): - path = BucketPath('', bucket_api=bucket_fake) + path = bfs.path.BucketPath('', bucket_api=bucket_fake) content = [','.join(chain([pth.name, '/'], sorted(dirs), sorted(files))) for pth, dirs, files in path.walk(top_down=False)] expected_content = [ @@ -139,7 +139,7 @@ def test_walk_bottom_up(bucket_fake): def test_iterdir(bucket_fake): - path = BucketPath('dir1', bucket_api=bucket_fake) + path = bfs.path.BucketPath('dir1', bucket_api=bucket_fake) content = set(str(node) for node in path.iterdir()) expected_content = { 'dir1/dir11', @@ -151,14 +151,14 @@ def test_iterdir(bucket_fake): def test_read(bucket_fake): - path = BucketPath('dir1/dir12/file121.dat', bucket_api=bucket_fake) + path = bfs.path.BucketPath('dir1/dir12/file121.dat', bucket_api=bucket_fake) expected_chunk = bytes([12] * 8) for chunk in path.read(chunk_size=8): assert chunk == expected_chunk def test_read_not_found(bucket_fake): - path = BucketPath('dir1/file12.dat', bucket_api=bucket_fake) + path = bfs.path.BucketPath('dir1/file12.dat', bucket_api=bucket_fake) with pytest.raises(FileNotFoundError): list(path.read()) @@ -166,7 +166,7 @@ def test_read_not_found(bucket_fake): @pytest.mark.parametrize("file_name", ['file23.dat', 'file20.dat']) def test_write_bytes(bucket_fake, file_name): data = b'abcd' - path = BucketPath(f'dir2/{file_name}', bucket_api=bucket_fake) + path = bfs.path.BucketPath(f'dir2/{file_name}', bucket_api=bucket_fake) path.write(data) data_back = next(iter(path.read(100))) assert data_back == data @@ -174,14 +174,14 @@ def test_write_bytes(bucket_fake, file_name): def test_write_chunks(bucket_fake): data_chunks = [b'abc', b'def', b'gh'] - path = BucketPath('dir2/file23.dat', bucket_api=bucket_fake) + path = bfs.path.BucketPath('dir2/file23.dat', bucket_api=bucket_fake) path.write(data_chunks) data_back = next(iter(path.read(100))) assert data_back == b'abcdefgh' def test_write_file(bucket_fake): - path = BucketPath('dir2/file_copy.dat', bucket_api=bucket_fake) + path = bfs.path.BucketPath('dir2/file_copy.dat', bucket_api=bucket_fake) source_file = bucket_fake.root / 'dir2/file21.dat' with open(source_file, 'rb') as f: path.write(f) @@ -190,7 +190,7 @@ def test_write_file(bucket_fake): def test_write_and_create_parent(bucket_fake): - path = BucketPath('dir2/dir21/file_copy.dat', bucket_api=bucket_fake) + path = bfs.path.BucketPath('dir2/dir21/file_copy.dat', bucket_api=bucket_fake) assert not path.exists() source_file = bucket_fake.root / 'dir2/file21.dat' with open(source_file, 'rb') as f: