From 631ed3f1c022f53b0f0f7c6bf27cc7c36dd04322 Mon Sep 17 00:00:00 2001 From: Mikhail Beck Date: Wed, 17 Apr 2024 11:52:08 +0100 Subject: [PATCH] #111 Created a bridge between the Pathlike and the Bucket API (#112) * #111 Created a bridge between the Pathlike and SaaS File API * #111 Mentioned the bridge between the Pathlike and SaaS File API in the changelog. * #111 Addressing linting errors. * #111 Fixed import path in unit tests. * #111 Made the implementation of the Pathlike generic * #111 Made the implementation of the Pathlike generic --- doc/changes/unreleased.md | 6 +- exasol/bucketfs/_buckets.py | 82 ++++++++ exasol/bucketfs/_path.py | 378 ++++++++++++++++++++++++++++++++++ exasol/bucketfs/pathlike.py | 146 ------------- test/unit/conftest.py | 73 +++++++ test/unit/test_bucket_path.py | 200 ++++++++++++++++++ 6 files changed, 737 insertions(+), 148 deletions(-) create mode 100644 exasol/bucketfs/_path.py delete mode 100644 exasol/bucketfs/pathlike.py create mode 100644 test/unit/conftest.py create mode 100644 test/unit/test_bucket_path.py diff --git a/doc/changes/unreleased.md b/doc/changes/unreleased.md index c23a1406..eaea2280 100644 --- a/doc/changes/unreleased.md +++ b/doc/changes/unreleased.md @@ -21,9 +21,11 @@ logging.basicConfig(level=logging.INFO) ``` -- Support for viewing BucketFS as a directory + - Support for viewing BucketFS as a directory - Added the Pathlike protocol as described in the [design document](../design/bucketpath.rst). + Added the PathLike protocol as described in the [design document](../design/bucketpath.rst). + Extracted bucket interface into BucketLike protocol. + Implemented PathLike for buckets based on BucketLike protocol. ## Internal diff --git a/exasol/bucketfs/_buckets.py b/exasol/bucketfs/_buckets.py index b65f13c5..7aaba344 100644 --- a/exasol/bucketfs/_buckets.py +++ b/exasol/bucketfs/_buckets.py @@ -5,6 +5,7 @@ ByteString, Iterable, Iterator, + Protocol, ) import requests @@ -20,7 +21,88 @@ ) +class BucketLike(Protocol): + """ + Definition of the Bucket interface. + It is compatible with both on-premises an SaaS BucketFS systems. + """ + + @property + def files(self) -> Iterable[str]: + """ + Returns an iterator over the bucket files. + + A usage example: + print(list(bucket_api.files)) + output: + [dir1/subdir1/file1.dat, dir1/subdir2/file2.txt, ....] + + Note that the paths will look like in the example above, i.e. POSIX style, + no backslash at the start or at the end. + """ + + def delete(self, path: str) -> None: + """ + Deletes a file in the bucket. + + :param path: Path of the file to be deleted. + + Q. What happens if the path doesn't exist? + A. It does nothing, no error. + + Q. What happens if the path points to a directory? + A. Same. There are no directories as such in the BucketFS, hence + a directory path is just a non-existent file. + """ + + def upload(self, path: str, data: ByteString | BinaryIO) -> None: + """ + Uploads a file to the bucket. + + :param path: Path in the bucket where the file should be uploaded. + :param data: Either a binary array or a binary stream, e.g. a file opened in the binary mode. + + Q. What happens if the parent is missing? + A. The bucket doesn't care about the structure of the file's path. Looking from the prospective + of a file system, the bucket will create the missing parent, but in reality it will just + store the data indexed by the provided path. + + Q. What happens if the path points to an existing file? + A. That's fine, the file will be updated. + + Q. What happens if the path points to an existing directory? + A. The bucket doesn't care about the structure of the file's path. Looking from the prospective + of a file system, there will exist a file and directory with the same name. + + Q. How should the path look like? + A. It should look like a POSIX path, but it should not contain any of the NTFS invalid characters. + It can have the leading and/or ending backslashes, which will be subsequently removed. + If the path doesn't conform to this format an BucketFsError will be raised. + """ + + def download(self, path: str, chunk_size: int = 8192) -> Iterable[ByteString]: + """ + Downloads a file from the bucket. The content of the file will be provided + in chunks of the specified size. The full content of the file can be constructed using + code similar to the line below. + content = b''.join(api.download_file(path)) + + :param path: Path of the file in the bucket that should be downloaded. + :param chunk_size: Size of the chunks the file content will be delivered in. + + Q. What happens if the file specified by the path doesn't exist. + A. BucketFsError will be raised. + + Q. What happens if the path points to a directory. + A. Same, since a "directory" in the BucketFS is just a non-existent file. + """ + + class Bucket: + """ + Implementation of the On-Premises bucket. + """ + def __init__( self, name: str, diff --git a/exasol/bucketfs/_path.py b/exasol/bucketfs/_path.py new file mode 100644 index 00000000..4f60b65b --- /dev/null +++ b/exasol/bucketfs/_path.py @@ -0,0 +1,378 @@ +from __future__ import annotations +from typing import Protocol, ByteString, BinaryIO, Iterable, Generator, Optional +from pathlib import PurePath, PureWindowsPath +import errno +import os +from io import IOBase +from exasol.bucketfs._buckets import BucketLike + + +class PathLike(Protocol): + """ + Definition of the PathLike view of the files in a Bucket. + """ + + @property + def name(self) -> str: + """ + A string representing the final path component, excluding the drive and root, if any. + """ + + @property + def suffix(self) -> str: + """ + The file extension of the final component, if any. + """ + + @property + def root(self) -> str: + """ + A string representing the root, if any. + """ + + @property + def parent(self) -> str: + """ + The logical parent of this path. + """ + + def as_uri(self) -> str: + """ + Represent the path as a file URI. Can be used to reconstruct the location/path. + """ + + def exists(self) -> bool: + """ + Return True if the path points to an existing file or directory. + """ + + def is_dir(self) -> bool: + """ + Return True if the path points to a directory, False if it points to another kind of file. + """ + + def is_file(self) -> bool: + """ + Return True if the path points to a regular file, False if it points to another kind of file. + """ + + def read(self, chunk_size: int = 8192) -> Iterable[ByteString]: + """ + Read the content of the file behind this path. + + Only works for PathLike objects which return True for `is_file()`. + + Args: + chunk_size: which will be yielded by the iterator. + + Returns: + Returns an iterator which can be used to read the contents of the path in chunks. + + Raises: + FileNotFoundError: If the file does not exist. + IsADirectoryError: if the pathlike object points to a directory. + """ + + def write(self, data: ByteString | BinaryIO | Iterable[ByteString]): + """ + Writes data to this path. + + Q. Should it create the parent directory if it doesn't exit? + A. Yes, it should. + + After successfully writing to this path `exists` will yield true for this path. + If the file already existed it will be overwritten. + + Args: + data: which shall be writen to the path. + + Raises: + NotAFileError: if the pathlike object is not a file path. + """ + + def rm(self): + """ + Remove this file. + + Note: + If `exists()` and is_file yields true for this path, the path will be deleted, + otherwise exception will be thrown. + + Raises: + FileNotFoundError: If the file does not exist. + """ + + def rmdir(self, recursive: bool = False): + """ + Removes this directory. + + Note: In order to stay close to pathlib, by default `rmdir` with `recursive` + set to `False` won't delete non-empty directories. + + Args: + recursive: if true the directory itself and its entire contents (files and subdirs) + will be deleted. If false and the directory is not empty an error will be thrown. + + Raises: + FileNotFoundError: If the file does not exist. + PermissionError: If recursive is false and the directory is not empty. + """ + + def joinpath(self, *path_segments) -> "PathLike": + """ + Calling this method is equivalent to combining the path with each of the given path segments in turn. + + Returns: + A new pathlike object pointing the combined path. + """ + + def walk(self) -> Generator[tuple["PathLike", list[str], list[str]], None, None]: + """ + Generate the file names in a directory tree by walking the tree either top-down or bottom-up. + + Note: + Try to mimik https://docs.python.org/3/library/pathlib.html#pathlib.Path.walk as closely as possible, + except the functionality associated with the parameters of the `pathlib` walk. + + Yields: + A 3-tuple of (dirpath, dirnames, filenames). + """ + + def iterdir(self) -> Generator["PathLike", None, None]: + """ + When the path points to a directory, yield path objects of the directory contents. + + Note: + If `path` points to a file then `iterdir()` will yield nothing. + + Yields: + All direct children of the pathlike object. + """ + + def __truediv__(self, other): + """ + Overload / for joining, see also joinpath or `pathlib.Path`. + """ + + +class _BucketFile: + """ + A node in a perceived file structure of a bucket. + This can be a file, a directory or both. + """ + + def __init__(self, name: str, parent: str = ''): + self._name = name + self._path = f'{parent}/{name}' if parent else name + self._children: Optional[dict[str, "_BucketFile"]] = None + self.is_file = False + + @property + def name(self): + return self._name + + @property + def path(self): + return self._path + + @property + def is_dir(self): + # The node can be a directory as well as a file, + # hence is the is_dir property, independent of is_file. + return bool(self._children) + + def __iter__(self): + if self._children is None: + return iter(()) + return iter(self._children.values()) + + def get_child(self, child_name: str) -> "_BucketFile": + """ + Returns a child object with the specified name. + Creates one if it hasn't been created yet. + """ + if self._children is None: + self._children = {} + child: Optional["_BucketFile"] = None + else: + child = self._children.get(child_name) + if child is None: + child = _BucketFile(child_name, self._path) + self._children[child_name] = child + return child + + +class BucketPath: + """ + Implementation of the PathLike view for files in a bucket. + """ + + def __init__(self, path: str | PurePath, bucket_api: BucketLike): + """ + :param path: A pure path of a file or directory. The path is assumed to + be relative to the bucket. It is also permissible to have + this path in an absolute form, e.g. '/dir1/...' + or '\\\\abc\\...\\'. + + All Pure Path methods of the PathLike protocol will be + delegated to this object. + + :param bucket_api: An object supporting the Bucket API protocol. + """ + self._path = PurePath(path) + self._bucket_api = bucket_api + + def _get_relative_posix(self): + """ + Returns the pure path of this object as a string, in the format of a bucket + file: 'dir/subdir/.../filename'. + """ + path_str = str(self._path)[len(self._path.anchor):] + if isinstance(self._path, PureWindowsPath): + path_str = path_str.replace('\\', '/') + if path_str == '.': + path_str = '' + return path_str + + def _navigate(self) -> Optional[_BucketFile]: + """ + Reads the bucket file structure and navigates to the node corresponding to the + pure path of this object. Returns None if such node doesn't exist, otherwise + returns this node. + """ + path_str = self._get_relative_posix() + path_len = len(path_str) + path_root: Optional[_BucketFile] = None + for file_name in self._bucket_api.files: + if file_name.startswith(path_str): + path_root = path_root or _BucketFile(self._path.name, str(self.parent)) + node = path_root + for part in file_name[path_len:].split('/'): + if part: + node = node.get_child(part) + node.is_file = True + return path_root + + @property + def name(self) -> str: + return self._path.name + + @property + def suffix(self) -> str: + return self._path.suffix + + @property + def root(self) -> str: + return self._path.root + + @property + def parent(self) -> str: + return self._path.parent.name + + def as_uri(self) -> str: + return self._path.as_uri() + + def exists(self) -> bool: + return self._navigate() is not None + + def is_dir(self) -> bool: + current_node = self._navigate() + return (current_node is not None) and current_node.is_dir + + def is_file(self) -> bool: + current_node = self._navigate() + return (current_node is not None) and current_node.is_file + + def read(self, chunk_size: int = 8192) -> Iterable[ByteString]: + return self._bucket_api.download(str(self._path), chunk_size) + + def write(self, data: ByteString | BinaryIO | Iterable[ByteString]) -> None: + if (not isinstance(data, IOBase) and isinstance(data, Iterable) and + all(isinstance(chunk, ByteString) for chunk in data)): + data = b''.join(data) + self._bucket_api.upload(str(self._path), data) + + def rm(self) -> None: + current_node = self._navigate() + if current_node is None: + raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), str(self._path)) + if not current_node.is_file: + raise IsADirectoryError(errno.EISDIR, os.strerror(errno.EISDIR), str(self._path)) + self._bucket_api.delete(str(self._path)) + + def rmdir(self, recursive: bool = False) -> None: + current_node = self._navigate() + if current_node is None: + # There is no such thing as an empty directory. So, for the sake of + # compatibility with the PathLike, any directory that doesn't exist + # is considered empty. + return + if not current_node.is_dir: + raise NotADirectoryError(errno.ENOTDIR, os.strerror(errno.ENOTDIR), str(self._path)) + if recursive: + self._rmdir_recursive(current_node) + else: + raise OSError(errno.ENOTEMPTY, os.strerror(errno.ENOTEMPTY), str(self._path)) + + def _rmdir_recursive(self, node: _BucketFile): + for child in node: + self._rmdir_recursive(child) + if node.is_file: + self._bucket_api.delete(node.path) + + def joinpath(self, *path_segments) -> "PathLike": + # The path segments can be of either this type or an os.PathLike. + cls = type(self) + seg_paths = [seg._path if isinstance(seg, cls) else seg for seg in path_segments] + new_path = self._path.joinpath(*seg_paths) + return cls(new_path, self._bucket_api) + + def walk(self, top_down: bool = True) -> Generator[tuple[PathLike, list[str], list[str]], None, None]: + current_node = self._navigate() + if current_node is None: + raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), str(self._path)) + + if current_node.is_dir: + yield from self._walk_recursive(current_node, top_down) + + def _walk_recursive(self, node: _BucketFile, top_down: bool) -> \ + Generator[tuple[PathLike, list[str], list[str]], None, None]: + + bucket_path = BucketPath(node.path, self._bucket_api) + dir_list: list[str] = [] + file_list: list[str] = [] + for child in node: + if child.is_file: + file_list.append(child.name) + if child.is_dir: + dir_list.append(child.name) + + # The difference between the top_down and bottom_up is in the order of + # yielding the current node and its children. Top down - current node first, + # bottom_up - children first. + if top_down: + yield bucket_path, dir_list, file_list + for child in node: + if child.is_dir: + yield from self._walk_recursive(child, top_down) + if not top_down: + yield bucket_path, dir_list, file_list + + def iterdir(self) -> Generator[PathLike, None, None]: + current_node = self._navigate() + if current_node is None: + raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), str(self._path)) + if not current_node.is_dir: + raise NotADirectoryError(errno.ENOTDIR, os.strerror(errno.ENOTDIR), str(self._path)) + + for child in current_node: + yield BucketPath(self._path / child.name, self._bucket_api) + + def __truediv__(self, other): + # The other object can be of either this type or an os.PathLike. + cls = type(self) + new_path = self._path / (other._path if isinstance(other, cls) else other) + return cls(new_path, self._bucket_api) + + def __str__(self): + return str(self._path) diff --git a/exasol/bucketfs/pathlike.py b/exasol/bucketfs/pathlike.py deleted file mode 100644 index 7336232c..00000000 --- a/exasol/bucketfs/pathlike.py +++ /dev/null @@ -1,146 +0,0 @@ -from typing import Protocol, ByteString, BinaryIO, Iterable, Generator - - -class Pathlike(Protocol): - - @property - def name(self) -> str: - """ - A string representing the final path component, excluding the drive and root, if any. - """ - - @property - def suffix(self) -> str: - """ - The file extension of the final component, if any. - """ - - @property - def root(self) -> str: - """ - A string representing the root, if any. - """ - - @property - def parent(self) -> str: - """ - The logical parent of this path. - """ - - def as_uri(self) -> str: - """ - Represent the path as a file URI. Can be used to reconstruct the location/path. - """ - - def exists(self) -> bool: - """ - Return True if the path points to an existing file or directory. - """ - - def is_dir(self) -> bool: - """ - Return True if the path points to a directory, False if it points to another kind of file. - """ - - def is_file(self) -> bool: - """ - Return True if the path points to a regular file, False if it points to another kind of file. - """ - - def read(self, chunk_size: int = 8192) -> Iterable[ByteString]: - """ - Read the content of the file behind this path. - - Only works for Pathlike objects which return True for `is_file()`. - - Args: - chunk_size: which will be yielded by the iterator. - - Returns: - Returns an iterator which can be used to read the contents of the path in chunks. - - Raises: - FileNotFoundError: If the file does not exist. - IsADirectoryError: if the pathlike object points to a directory. - """ - - def write(self, data: ByteString | BinaryIO | Iterable[ByteString]): - """ - Writes data to this path. - - Q. Should it create the parent directory if it doesn't exit? - A. Yes, it should. - - After successfully writing to this path `exists` will yield true for this path. - If the file already existed it will be overwritten. - - Args: - data: which shall be writen to the path. - - Raises: - NotAFileError: if the pathlike object is not a file path. - """ - - def rm(self): - """ - Remove this file. - - Note: - If `exists()` and is_file yields true for this path, the path will be deleted, - otherwise exception will be thrown. - - Raises: - FileNotFoundError: If the file does not exist. - """ - - def rmdir(self, recursive: bool = False): - """ - Removes this directory. - - Note: In order to stay close to pathlib, by default `rmdir` with `recursive` - set to `False` won't delete non-empty directories. - - Args: - recursive: if true the directory itself and its entire contents (files and subdirs) - will be deleted. If false and the directory is not empty an error will be thrown. - - Raises: - FileNotFoundError: If the file does not exist. - PermissionError: If recursive is false and the directory is not empty. - """ - - def joinpath(self, *path_segments) -> "Pathlike": - """ - Calling this method is equivalent to combining the path with each of the given path segments in turn. - - Returns: - A new pathlike object pointing the combined path. - """ - - def walk(self) -> Generator[tuple["Pathlike", list[str], list[str]], None, None]: - """ - Generate the file names in a directory tree by walking the tree either top-down or bottom-up. - - Note: - Try to mimik https://docs.python.org/3/library/pathlib.html#pathlib.Path.walk as closely as possible, - except the functionality associated with the parameters of the `pathlib` walk. - - Yields: - A 3-tuple of (dirpath, dirnames, filenames). - """ - - def iterdir(self) -> Generator["Pathlike", None, None]: - """ - When the path points to a directory, yield path objects of the directory contents. - - Note: - If `path` points to a file then `iterdir()` will yield nothing. - - Yields: - All direct children of the pathlike object. - """ - - def __truediv__(self, other): - """ - Overload / for joining, see also joinpath or `pathlib.Path`. - """ diff --git a/test/unit/conftest.py b/test/unit/conftest.py new file mode 100644 index 00000000..797e3e1e --- /dev/null +++ b/test/unit/conftest.py @@ -0,0 +1,73 @@ +from __future__ import annotations +from typing import Iterable, ByteString, BinaryIO +import os +from io import IOBase +import shutil +import errno +from pathlib import Path +import pytest + + +class BucketFake: + """ + Implementation of the Bucket API backed by the normal file system. + """ + + def __init__(self, root: str): + self.root = Path(root) + if not self.root.is_dir(): + raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), str(root)) + + def _get_full_path(self, path: str | Path): + return self.root / path + + @property + def files(self) -> list[str]: + root_length = len(str(self.root)) + if self.root != self.root.root: + root_length += 1 + return [str(pth)[root_length:] for pth in self.root.rglob('*.*')] + + def delete(self, path: str) -> None: + try: + self._get_full_path(path).unlink(missing_ok=True) + except IsADirectoryError: + pass + + def upload(self, path: str, data: ByteString | BinaryIO) -> None: + full_path = self._get_full_path(path) + if not full_path.parent.exists(): + full_path.parent.mkdir(parents=True) + with full_path.open('wb') as f: + if isinstance(data, IOBase): + shutil.copyfileobj(data, f) + elif isinstance(data, ByteString): + f.write(data) + else: + raise ValueError('upload_file called with unrecognised data type. ' + 'A valid data should be either ByteString or BinaryIO') + + def download(self, path: str, chunk_size: int) -> Iterable[ByteString]: + full_path = self._get_full_path(path) + if (not full_path.exists()) or (not full_path.is_file()): + raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), str(path)) + with full_path.open('rb') as f: + while True: + data = f.read(chunk_size) + if not data: + break + yield data + + +@pytest.fixture +def bucket_fake(tmpdir) -> BucketFake: + dir1 = tmpdir.mkdir('dir1') + dir2 = tmpdir.mkdir('dir2') + dir11 = dir1.mkdir('dir11') + dir12 = dir1.mkdir('dir12') + for d, d_id in zip([tmpdir, dir1, dir2, dir11, dir12], [0, 1, 2, 11, 12]): + for i in range(2): + file_xx = d / f'file{d_id}{i}.dat' + dat = bytes([d_id * i] * 24) + file_xx.write_binary(dat) + return BucketFake(tmpdir) diff --git a/test/unit/test_bucket_path.py b/test/unit/test_bucket_path.py new file mode 100644 index 00000000..b46b8aab --- /dev/null +++ b/test/unit/test_bucket_path.py @@ -0,0 +1,200 @@ +from pathlib import Path +from itertools import chain +import pytest +from exasol.bucketfs._path import BucketPath + + +@pytest.mark.parametrize("test_path, should_exist", [ + ('dir1/file11.dat', True), + ('dir1/dir11', True), + ('dir1/file19.dat', False), + ('dir1/dir3', False) +]) +def test_file_exists(bucket_fake, test_path, should_exist): + path = BucketPath(test_path, bucket_api=bucket_fake) + assert path.exists() == should_exist + + +@pytest.mark.parametrize("test_path, is_dir", [ + ('dir1/file11.dat', False), + ('dir1/dir11', True), + ('dir1/file19.dat', False), + ('dir1/dir3', False) +]) +def test_is_dir(bucket_fake, test_path, is_dir): + path = BucketPath(test_path, bucket_api=bucket_fake) + assert path.is_dir() == is_dir + + +@pytest.mark.parametrize("test_path, is_file", [ + ('dir1/file11.dat', True), + ('dir1/dir11', False), + ('dir1/file19.dat', False), + ('dir1/dir3', False) +]) +def test_is_file(bucket_fake, test_path, is_file): + path = BucketPath(test_path, bucket_api=bucket_fake) + assert path.is_file() == is_file + + +def test_rm(bucket_fake): + path = BucketPath('dir1/dir12/file120.dat', bucket_api=bucket_fake) + path.rm() + assert not path.exists() + + +def test_rm_not_exist(bucket_fake): + path = BucketPath('dir1/dir12/file125.dat', bucket_api=bucket_fake) + with pytest.raises(FileNotFoundError): + path.rm() + + +def test_rm_directory(bucket_fake): + path = BucketPath('dir1/dir12', bucket_api=bucket_fake) + with pytest.raises(IsADirectoryError): + path.rm() + + +def test_rmdir(bucket_fake): + for i in range(2): + BucketPath(f'dir1/dir12/file12{i}.dat', bucket_api=bucket_fake).rm() + path = BucketPath('dir1/dir12', bucket_api=bucket_fake) + path.rmdir(recursive=False) + assert not path.exists() + + +def test_rmdir_recursive(bucket_fake): + path = BucketPath('dir1', bucket_api=bucket_fake) + path.rmdir(recursive=True) + assert not path.exists() + + +def test_rmdir_not_empty(bucket_fake): + path = BucketPath('dir1', bucket_api=bucket_fake) + with pytest.raises(OSError): + path.rmdir(recursive=False) + + +def test_rmdir_not_exist(bucket_fake): + path = BucketPath('dir1/dir5', bucket_api=bucket_fake) + path.rmdir() + + +def test_rmdir_file(bucket_fake): + path = BucketPath('dir1/dir12/file120.dat', bucket_api=bucket_fake) + with pytest.raises(NotADirectoryError): + path.rmdir() + + +def test_joinpath(bucket_fake): + path1 = BucketPath('dir1', bucket_api=bucket_fake) + path2 = 'dir11' + path3 = BucketPath('dir111/dir1111', bucket_api=bucket_fake) + path4 = Path('dir11111/file111110.dat') + path = path1.joinpath(path2, path3, path4) + assert isinstance(path, BucketPath) + assert str(path) == 'dir1/dir11/dir111/dir1111/dir11111/file111110.dat' + + +def test_truediv(bucket_fake): + path1 = BucketPath('dir1', bucket_api=bucket_fake) + path2 = 'dir11' + path3 = BucketPath('dir111/dir1111', bucket_api=bucket_fake) + path4 = Path('dir11111/file111110.dat') + path = path1 / path2 / path3 / path4 + assert isinstance(path, BucketPath) + assert str(path) == 'dir1/dir11/dir111/dir1111/dir11111/file111110.dat' + + +def test_walk_top_down(bucket_fake): + path = BucketPath('', bucket_api=bucket_fake) + content = [','.join(chain([pth.name, '/'], sorted(dirs), sorted(files))) + for pth, dirs, files in path.walk(top_down=True)] + expected_content = [ + ',/,dir1,dir2,file00.dat,file01.dat', + 'dir1,/,dir11,dir12,file10.dat,file11.dat', + 'dir11,/,file110.dat,file111.dat', + 'dir12,/,file120.dat,file121.dat', + 'dir2,/,file20.dat,file21.dat' + ] + assert set(content) == set(expected_content) + idx = [content.index(expected_content[i]) for i in range(3)] + assert idx == sorted(idx) + + +def test_walk_bottom_up(bucket_fake): + path = BucketPath('', bucket_api=bucket_fake) + content = [','.join(chain([pth.name, '/'], sorted(dirs), sorted(files))) + for pth, dirs, files in path.walk(top_down=False)] + expected_content = [ + 'dir11,/,file110.dat,file111.dat', + 'dir1,/,dir11,dir12,file10.dat,file11.dat', + ',/,dir1,dir2,file00.dat,file01.dat', + 'dir12,/,file120.dat,file121.dat', + 'dir2,/,file20.dat,file21.dat' + ] + assert set(content) == set(expected_content) + idx = [content.index(expected_content[i]) for i in range(3)] + assert idx == sorted(idx) + + +def test_iterdir(bucket_fake): + path = BucketPath('dir1', bucket_api=bucket_fake) + content = set(str(node) for node in path.iterdir()) + expected_content = { + 'dir1/dir11', + 'dir1/dir12', + 'dir1/file10.dat', + 'dir1/file11.dat' + } + assert content == expected_content + + +def test_read(bucket_fake): + path = BucketPath('dir1/dir12/file121.dat', bucket_api=bucket_fake) + expected_chunk = bytes([12] * 8) + for chunk in path.read(chunk_size=8): + assert chunk == expected_chunk + + +def test_read_not_found(bucket_fake): + path = BucketPath('dir1/file12.dat', bucket_api=bucket_fake) + with pytest.raises(FileNotFoundError): + list(path.read()) + + +@pytest.mark.parametrize("file_name", ['file23.dat', 'file20.dat']) +def test_write_bytes(bucket_fake, file_name): + data = b'abcd' + path = BucketPath(f'dir2/{file_name}', bucket_api=bucket_fake) + path.write(data) + data_back = next(iter(path.read(100))) + assert data_back == data + + +def test_write_chunks(bucket_fake): + data_chunks = [b'abc', b'def', b'gh'] + path = BucketPath('dir2/file23.dat', bucket_api=bucket_fake) + path.write(data_chunks) + data_back = next(iter(path.read(100))) + assert data_back == b'abcdefgh' + + +def test_write_file(bucket_fake): + path = BucketPath('dir2/file_copy.dat', bucket_api=bucket_fake) + source_file = bucket_fake.root / 'dir2/file21.dat' + with open(source_file, 'rb') as f: + path.write(f) + with open(source_file, 'rb') as f: + assert next(iter(path.read(100))) == f.read() + + +def test_write_and_create_parent(bucket_fake): + path = BucketPath('dir2/dir21/file_copy.dat', bucket_api=bucket_fake) + assert not path.exists() + source_file = bucket_fake.root / 'dir2/file21.dat' + with open(source_file, 'rb') as f: + path.write(f) + assert path.exists() + with open(source_file, 'rb') as f: + assert next(iter(path.read(100))) == f.read()