From 5ef90c3879dcdcbe5d199c841f423e8fd9898e1a Mon Sep 17 00:00:00 2001 From: Mikhail Beck Date: Tue, 9 Jul 2024 09:17:47 +0100 Subject: [PATCH] #149 Added documentation for SaaS and the PathLike (#149) * #139 Added the doc for SaaS and PathLike * #139 Fixed underline lengths * #149 Fixed some issues with Sphinx * #149 Addressed review comments --- doc/api.rst | 32 ++++++++ doc/changes/unreleased.md | 1 + doc/examples/bucket_saas.py | 16 ++++ doc/examples/delete.py | 3 + doc/examples/download.py | 3 + doc/examples/list.py | 3 + doc/examples/path_like.py | 140 ++++++++++++++++++++++++++++++++++ doc/examples/quickstart.py | 14 ++-- doc/examples/service.py | 4 + doc/examples/upload.py | 3 + doc/user_guide/basics.rst | 31 ++++++-- doc/user_guide/user_guide.rst | 4 +- exasol/bucketfs/_buckets.py | 90 +++++++++------------- exasol/bucketfs/_path.py | 14 ++-- 14 files changed, 286 insertions(+), 72 deletions(-) create mode 100644 doc/examples/bucket_saas.py create mode 100644 doc/examples/path_like.py diff --git a/doc/api.rst b/doc/api.rst index bc122e73..ecb37915 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -9,6 +9,13 @@ exasol.bucketfs.Service :undoc-members: :show-inheritance: +exasol.bucketfs.BucketLike +-------------------------- +.. autoclass:: exasol.bucketfs.BucketLike + :members: + :undoc-members: + :show-inheritance: + exasol.bucketfs.Bucket ----------------------- .. autoclass:: exasol.bucketfs.Bucket @@ -16,6 +23,31 @@ exasol.bucketfs.Bucket :undoc-members: :show-inheritance: +exasol.bucketfs.SaaSBucket +-------------------------- +.. autoclass:: exasol.bucketfs.SaaSBucket + :members: + :undoc-members: + :show-inheritance: + +exasol.bucketfs.MountedBucket +----------------------------- +.. autoclass:: exasol.bucketfs.MountedBucket + :members: + :undoc-members: + :show-inheritance: + +exasol.bucketfs.path.PathLike +----------------------------- +.. autoclass:: exasol.bucketfs._path.PathLike + :members: + :undoc-members: + :show-inheritance: + +exasol.bucketfs.path.build_path +------------------------------- +.. autofunction:: exasol.bucketfs._path.build_path + exasol.bucketfs.as_bytes ------------------------ .. autofunction:: exasol.bucketfs.as_bytes diff --git a/doc/changes/unreleased.md b/doc/changes/unreleased.md index c0d363e8..7a8c588b 100644 --- a/doc/changes/unreleased.md +++ b/doc/changes/unreleased.md @@ -11,3 +11,4 @@ The current release adds a dependency to plugin `pytest_exasol_saas` and replace ## Documentation * #144: Added comment on using fixtures from pytest-plugin `pytest-exasol-saas` +* #147: Added documentation for the SaaS and the PathLike interface. diff --git a/doc/examples/bucket_saas.py b/doc/examples/bucket_saas.py new file mode 100644 index 00000000..b3e15ab5 --- /dev/null +++ b/doc/examples/bucket_saas.py @@ -0,0 +1,16 @@ +""" +This example is relevant for the Exasol SaaS database. +It demonstrates the creation of a bucket object for a SaaS database. +""" +import os + +from exasol.bucketfs import SaaSBucket + +# Let's assume that the required SaaS connection parameters +# are stored in environment variables. +bucket = SaaSBucket( + url=os.environ.get('SAAS_URL'), + account_id=os.environ.get('SAAS_ACCOUNT_ID'), + database_id=os.environ.get('SAAS_DATABASE_ID'), + pat=os.environ.get('SAAS_PAT'), +) diff --git a/doc/examples/delete.py b/doc/examples/delete.py index 5135df72..bac420ab 100644 --- a/doc/examples/delete.py +++ b/doc/examples/delete.py @@ -1,3 +1,6 @@ +""" +These examples are relevant for the On-Prem Exasol database. +""" from exasol.bucketfs import Service URL = "http://localhost:6666" diff --git a/doc/examples/download.py b/doc/examples/download.py index 02f5741d..6a29771d 100644 --- a/doc/examples/download.py +++ b/doc/examples/download.py @@ -1,3 +1,6 @@ +""" +These examples are relevant for the On-Prem Exasol database. +""" from exasol.bucketfs import ( Service, as_bytes, diff --git a/doc/examples/list.py b/doc/examples/list.py index 18bd4a19..54ec759c 100644 --- a/doc/examples/list.py +++ b/doc/examples/list.py @@ -1,3 +1,6 @@ +""" +These examples are relevant for the On-Prem Exasol database. +""" from exasol.bucketfs import Service URL = "http://localhost:6666" diff --git a/doc/examples/path_like.py b/doc/examples/path_like.py new file mode 100644 index 00000000..02a71709 --- /dev/null +++ b/doc/examples/path_like.py @@ -0,0 +1,140 @@ +""" +In this tutorial we will demonstrate the usage of the PathLike interface +with an example of handling customer reviews. +""" +from typing import ByteString +import tempfile +import os + +import exasol.bucketfs as bfs + +# First, we need to get a path in the BucketFS where we will store reviews. +# We will use the build_path() function for that. This function takes different +# input parameters depending on the backend in use. We will set the type of +# backed to the variable below. Please change it to bfs.path.StorageBackend.saas +# if needed. +backend = bfs.path.StorageBackend.onprem + +if backend == bfs.path.StorageBackend.onprem: + # The parameters below are the default BucketFS parameters of the Docker-DB + # running on a local machine. Please change them according to the settings of the + # On-Prem database being used. For better security, consider storing the password + # in an environment variable. + reviews = bfs.path.build_path( + backend=backend, + url="http://localhost:6666", + bucket_name='default', + service_name='bfsdefault', + path='reviews', + username='w', + password='write', + verify=False + ) +elif backend == bfs.path.StorageBackend.saas: + # In case of a SaaS database we will assume that the required SaaS connection + # parameters are stored in environment variables. + reviews = bfs.path.build_path( + backend=backend, + url=os.environ.get('SAAS_URL'), + account_id=os.environ.get('SAAS_ACCOUNT_ID'), + database_id=os.environ.get('SAAS_DATABASE_ID'), + pat=os.environ.get('SAAS_PAT'), + path='reviews', + ) +else: + raise RuntimeError(f'Unknown backend {backend}') + +# Let's create a path for good reviews and write some reviews there, +# each into a separate file. +good_reviews = reviews / 'good' + +john_h_review = good_reviews / 'John-H.review' +john_h_review.write( + b'I had an amazing experience with this company! ' + b'The customer service was top-notch, and the product exceeded my expectations. ' + b'I highly recommend them to anyone looking for quality products and excellent service.' +) + +sarah_l_review = good_reviews / 'Sarah-L.review' +sarah_l_review.write( + b'I am a repeat customer of this business, and they never disappoint. ' + b'The team is always friendly and helpful, and their products are outstanding. ' + b'I have recommended them to all my friends and family, and I will continue to do so!' +) + +david_w_review = good_reviews / 'David-W.review' +david_w_review.write( + b'After trying several other companies, I finally found the perfect fit with this one. ' + b'Their attention to detail and commitment to customer satisfaction is unparalleled. ' + b'I will definitely be using their services again in the future.' +) + +# Now let's write some bad reviews in a different subdirectory. +bad_reviews = reviews / 'bad' + +# Previously we provided content as a ByteString. But we can also use a file object, +# as shown here. +with tempfile.TemporaryFile() as file_obj: + file_obj.write( + b'I first began coming here because of their amazing reviews. ' + b'Unfortunately, my experiences have been overwhelmingly negative. ' + b'I was billed more than 2,600 euros, the vast majority of which ' + b'I did not consent to and were never carried out.' + ) + file_obj.seek(0) + mike_s_review = bad_reviews / 'Mike-S.review' + mike_s_review.write(file_obj) + + +# A PathLike object supports an interface similar to the PosixPurePath. +for path_obj in [reviews, good_reviews, john_h_review]: + print(path_obj) + print('\tname:', path_obj.name) + print('\tsuffix:', path_obj.suffix) + print('\tparent:', path_obj.parent) + print('\texists:', path_obj.exists()) + print('\tis_dir:', path_obj.is_dir()) + print('\tis_file:', path_obj.is_file()) + +# The as_udf_path() function returns the correspondent path, as it's seen from a UDF. +print("A UDF can find John's review at", john_h_review.as_udf_path()) + + +# The read() method returns an iterator over chunks of content. +# The function below reads the whole content of the specified file. +def read_content(bfs_path: bfs.path.PathLike) -> ByteString: + return b''.join(bfs_path.read()) + + +# Like the pathlib.Path class, the BucketFS PathLike object provides methods +# to iterate over the content of a directory. +# Let's use the iterdir() method to print all good reviews. +for item in good_reviews.iterdir(): + if item.is_file(): + print(item.name, 'said:') + print(read_content(item)) + + +# The walk method allows traversing subdirectories. +# Let's use this method to create a list of all review paths. +all_reviews = [node / file for node, _, files in reviews.walk() for file in files] +for review in all_reviews: + print(review) + + +# A file can be deleted using the rm() method. Please note that once the file is +# deleted it won't be possible to write another file to the same path for a certain +# time, due to internal internode synchronisation procedure. +mike_s_review.rm() + +# A directory can be deleted using the rmdir() method. If it is not empty we need +# to use the recursive=True option to delete the directory with all its content. +good_reviews.rmdir(recursive=True) + +# Now all reviews should be deleted. +print('Are any reviews left?', reviews.exists()) + +# It may look surprising why a call to the review.exists() returns False, since we +# have not deleted the base directory. In BucketFS a directory doesn't exist as a +# distinct entity. Therefore, the exists() function called on a path for an empty +# directory returns False. diff --git a/doc/examples/quickstart.py b/doc/examples/quickstart.py index 3c49ffe4..b281262f 100644 --- a/doc/examples/quickstart.py +++ b/doc/examples/quickstart.py @@ -1,3 +1,7 @@ +""" +These examples are relevant for the On-Prem Exasol database. +""" + from exasol.bucketfs import ( Service, as_bytes, @@ -11,17 +15,17 @@ # 0. List buckets buckets = [bucket for bucket in bucketfs] -# 2. Get a bucket +# 1. Get a bucket default_bucket = bucketfs["default"] -# 3. List files in bucket +# 2. List files in bucket files = [file for file in default_bucket] -# 4. Upload a file to the bucket +# 3. Upload a file to the bucket default_bucket.upload("MyFile.txt", b"File content") -# 5. Download a file/content +# 4. Download a file/content data = as_bytes(default_bucket.download("MyFile.txt")) -# 6. Delete a file from a bucket +# 5. Delete a file from a bucket default_bucket.delete("MyFile.txt") diff --git a/doc/examples/service.py b/doc/examples/service.py index 38562f10..ca3748df 100644 --- a/doc/examples/service.py +++ b/doc/examples/service.py @@ -1,3 +1,7 @@ +""" +These examples are relevant for the On-Prem Exasol database. +""" + # List buckets from exasol.bucketfs import Service diff --git a/doc/examples/upload.py b/doc/examples/upload.py index d71c74ae..031f81c2 100644 --- a/doc/examples/upload.py +++ b/doc/examples/upload.py @@ -1,3 +1,6 @@ +""" +These examples are relevant for the On-Prem Exasol database. +""" import io from exasol.bucketfs import Service diff --git a/doc/user_guide/basics.rst b/doc/user_guide/basics.rst index 04725195..53b7670b 100644 --- a/doc/user_guide/basics.rst +++ b/doc/user_guide/basics.rst @@ -3,8 +3,8 @@ Basic's The Bucketfs Service -------------------- -A single bucketfs service can host multiple buckets. In order to interact with a bucketfs service one -can use the :ref:`exasol.bucketfs.Service ` class. +In the On-Prem database, a single bucketfs service can host multiple buckets. In order to interact with a +bucketfs service one can use the :ref:`exasol.bucketfs.Service ` class. List buckets ++++++++++++ @@ -23,10 +23,16 @@ Get a Bucket reference Bucket class -------------- +------------ A Bucket contains a set of files which may be restricted, depending on the credentials of the requester. -Using :ref:`exasol.bucketfs.Bucket ` class the user can interact (download, upload, list and delete) files. -with the files in the bucket. +The Bucket class for an On-Prem database is :ref:`exasol.bucketfs.Bucket `. +The correspondent class for a SaaS database is exasol.bucketfs.SaaSBucket. +Using these classes the user can interact with the files in the bucket (download, upload, list and delete them). + +Most of the examples below are based on the On-Prem implementation of the BucketFS. In the SaaS implementation +there is only one BucketFS service, providing a single bucket. To access the BucketFS in SaaS the Bucket +object should be created directly, as it is demonstrated in the last example. The interface of the Bucket +object for the SaaS database is identical to that of the On-Prem database. List files in a Bucket ++++++++++++++++++++++ @@ -73,6 +79,21 @@ Delete files from Bucket :language: python3 :end-before: # Expert/Mapped bucket API +Create bucket object in SaaS +++++++++++++++++++++++++++++ + +.. literalinclude:: /examples/bucket_saas.py + :language: python3 + +PathLike interface +------------------ +A PathLike is an interface similar to the pathlib.Path and should feel familiar to most users. + +Using the PathLike interface +++++++++++++++++++++++++++++ + +.. literalinclude:: /examples/path_like.py + :language: python3 Configure logging +++++++++++++++++ diff --git a/doc/user_guide/user_guide.rst b/doc/user_guide/user_guide.rst index eaf18787..b72c8871 100644 --- a/doc/user_guide/user_guide.rst +++ b/doc/user_guide/user_guide.rst @@ -6,10 +6,10 @@ Bucketfs Depending on the database configuration, the bucketfs setup can range from straight forward to fairly complex. This is due to the fact that: -* Each database can have one or more BucketFS services +* Each database can have one or more BucketFS services (in the On-Prem database) * Each BucketFS service is available on all worker cluster of a database * Each BucketFS service runs on all data nodes of a database -* Each BucketFS service can have one or more Buckets +* Each BucketFS service can have one or more Buckets (in the On-Prem database) * Each Bucket can hold one or more files The overview bellow tries to illustrate this in a more tangible manner. diff --git a/exasol/bucketfs/_buckets.py b/exasol/bucketfs/_buckets.py index 3d98ad15..2e76f603 100644 --- a/exasol/bucketfs/_buckets.py +++ b/exasol/bucketfs/_buckets.py @@ -78,7 +78,7 @@ def delete(self, path: str) -> None: Q. What happens if the path points to a directory? A. Same. There are no directories as such in the BucketFS, hence - a directory path is just a non-existent file. + a directory path is just a non-existent file. """ def upload(self, path: str, data: ByteString | BinaryIO) -> None: @@ -90,20 +90,20 @@ def upload(self, path: str, data: ByteString | BinaryIO) -> None: Q. What happens if the parent is missing? A. The bucket doesn't care about the structure of the file's path. Looking from the prospective - of a file system, the bucket will create the missing parent, but in reality it will just - store the data indexed by the provided path. + of a file system, the bucket will create the missing parent, but in reality it will just + store the data indexed by the provided path. Q. What happens if the path points to an existing file? A. That's fine, the file will be updated. Q. What happens if the path points to an existing directory? A. The bucket doesn't care about the structure of the file's path. Looking from the prospective - of a file system, there will exist a file and directory with the same name. + of a file system, there will exist a file and directory with the same name. Q. How should the path look like? A. It should look like a POSIX path, but it should not contain any of the NTFS invalid characters. - It can have the leading and/or ending backslashes, which will be subsequently removed. - If the path doesn't conform to this format an BucketFsError will be raised. + It can have the leading and/or ending backslashes, which will be subsequently removed. + If the path doesn't conform to this format an BucketFsError will be raised. """ def download(self, path: str, chunk_size: int = 8192) -> Iterable[ByteString]: @@ -126,7 +126,23 @@ def download(self, path: str, chunk_size: int = 8192) -> Iterable[ByteString]: class Bucket: """ - Implementation of the On-Premises bucket. + Implementation of the BucketLike interface for the BucketFS in Exasol On-Premises database. + + Args: + name: + Name of the bucket. + service: + Url where this bucket is hosted on. + username: + Username used for authentication. + password: + Password used for authentication. + verify: + Either a boolean, in which case it controls whether we verify + the server's TLS certificate, or a string, in which case it must be a path + to a CA bundle to use. Defaults to ``True``. + service_name: + Optional name of the BucketFS service. """ def __init__( @@ -138,25 +154,6 @@ def __init__( verify: bool | str = True, service_name: Optional[str] = None ): - """ - Create a new bucket instance. - - Args: - name: - Name of the bucket. - service: - Url where this bucket is hosted on. - username: - Username used for authentication. - password: - Password used for authentication. - verify: - Either a boolean, in which case it controls whether we verify - the server's TLS certificate, or a string, in which case it must be a path - to a CA bundle to use. Defaults to ``True``. - service_name: - Optional name of the BucketFS service. - """ self._name = name self._service = _parse_service_url(service) self._username = username @@ -201,13 +198,6 @@ def __iter__(self) -> Iterator[str]: def upload( self, path: str, data: ByteString | BinaryIO | Iterable[ByteString] ) -> None: - """ - Uploads a file onto this bucket - - Args: - path: in the bucket the file shall be associated with. - data: raw content of the file. - """ url = _build_url(service_url=self._service, bucket=self.name, path=path) LOGGER.info("Uploading %s to bucket %s.", path, self.name) response = requests.put(url, data=data, auth=self._auth, verify=self._verify) @@ -217,15 +207,6 @@ def upload( raise BucketFsError(f"Couldn't upload file: {path}") from ex def delete(self, path) -> None: - """ - Deletes a specific file in this bucket. - - Args: - path: points to the file which shall be deleted. - - Raises: - A BucketFsError if the operation couldn't be executed successfully. - """ url = _build_url(service_url=self._service, bucket=self.name, path=path) LOGGER.info("Deleting %s from bucket %s.", path, self.name) response = requests.delete(url, auth=self._auth, verify=self._verify) @@ -236,16 +217,6 @@ def delete(self, path) -> None: raise BucketFsError(f"Couldn't delete: {path}") from ex def download(self, path: str, chunk_size: int = 8192) -> Iterable[ByteString]: - """ - Downloads a specific file of this bucket. - - Args: - path: which shall be downloaded. - chunk_size: which shall be used for downloading. - - Returns: - An iterable of binary chunks representing the downloaded file. - """ url = _build_url(service_url=self._service, bucket=self.name, path=path) LOGGER.info( "Downloading %s using a chunk size of %d bytes from bucket %s.", @@ -263,6 +234,19 @@ def download(self, path: str, chunk_size: int = 8192) -> Iterable[ByteString]: class SaaSBucket: + """ + Implementation of the BucketLike interface for the BucketFS in Exasol SaaS. + + Arguments: + url: + Url of the Exasol SaaS service. + account_id: + SaaS user account ID. + database_id: + SaaS database ID. + pat: + Personal Access Token + """ def __init__(self, url: str, account_id: str, database_id: str, pat: str) -> None: self._url = url @@ -362,7 +346,7 @@ def __str__(self): class MountedBucket: """ - Implementation of the Bucket interface backed by a normal file system. + Implementation of the BucketLike interface backed by a normal file system. The targeted use case is the access to the BucketFS files from a UDF. Arguments: diff --git a/exasol/bucketfs/_path.py b/exasol/bucketfs/_path.py index 57f1ad90..12e5cfee 100644 --- a/exasol/bucketfs/_path.py +++ b/exasol/bucketfs/_path.py @@ -451,9 +451,9 @@ def build_path(**kwargs) -> PathLike: """ Creates a PathLike object based on a bucket in one of the BucketFS storage backends. It provides the same interface for the following BucketFS implementations: - - On-Premises - - SaaS - - BucketFS files mounted as read-only directory in a UDF. + - On-Premises + - SaaS + - BucketFS files mounted as read-only directory in a UDF. Arguments: backend: @@ -469,9 +469,9 @@ def build_path(**kwargs) -> PathLike: directory is as in the code below. path = build_path(...) / "the_desired_path" - The rest of the arguments are backend specific. + The rest of the arguments are backend specific. - On-prem arguments: + On-prem arguments: url: Url of the BucketFS service, e.g. `http(s)://127.0.0.1:2580`. username: @@ -487,7 +487,7 @@ def build_path(**kwargs) -> PathLike: service_name: Optional name of the BucketFS service. - SaaS arguments: + SaaS arguments: url: Url of the Exasol SaaS. Defaults to 'https://cloud.exasol.com'. account_id: @@ -500,7 +500,7 @@ def build_path(**kwargs) -> PathLike: Personal Access Token, e.g. 'exa_pat_aj39AsM3bYR9bQ4qk2wiG8SWHXbRUGNCThnep5YV73az6A' (given example is not a valid PAT). - Mounted BucketFS directory arguments: + Mounted BucketFS directory arguments: service_name: Name of the BucketFS service (not a service url). Defaults to 'bfsdefault'. bucket_name: