diff --git a/dbxio/__init__.py b/dbxio/__init__.py index f0ac856..75f72d1 100644 --- a/dbxio/__init__.py +++ b/dbxio/__init__.py @@ -4,4 +4,4 @@ from dbxio.utils import * # noqa: F403 from dbxio.volume import * # noqa: F403 -__version__ = '0.4.0' # single source of truth +__version__ = '0.4.1' # single source of truth diff --git a/dbxio/blobs/download.py b/dbxio/blobs/download.py index 1a122f3..9d233f0 100644 --- a/dbxio/blobs/download.py +++ b/dbxio/blobs/download.py @@ -9,13 +9,26 @@ @retry(stop=stop_after_attempt(3), wait=wait_fixed(10)) def download_blob_tree( - object_storage_client: 'ObjectStorageClient', local_path: Path, prefix_path: Optional[str] = None + object_storage_client: 'ObjectStorageClient', + local_path: Path, + prefix_path: Optional[str] = None, ): for blob in object_storage_client.list_blobs(prefix=prefix_path): - if blob.name == prefix_path: + is_dir = object_storage_client.is_directory(blob.name) + if blob.name == prefix_path and is_dir: + # prefix path is subdir, skip it on first iteration continue + relative_blob_path = blob.name[len(prefix_path) + 1 :] if prefix_path else blob.name - if blob.content_settings.content_type is None: + if not relative_blob_path: + # if the prefix path is full path to one file + assert object_storage_client.blobs_path, 'blobs_path is not set' + relative_blob_path = blob.name[len(object_storage_client.blobs_path) + 1 :] + + if is_dir: + if not blob.name.startswith(f'{prefix_path}/'): + # we found a directory with the same prefix, but it's not from our subtree + continue # it's a directory, create it Path(local_path / relative_blob_path).mkdir(parents=True, exist_ok=True) continue diff --git a/dbxio/core/cloud/azure/object_storage.py b/dbxio/core/cloud/azure/object_storage.py index 4c55d27..9c79857 100644 --- a/dbxio/core/cloud/azure/object_storage.py +++ b/dbxio/core/cloud/azure/object_storage.py @@ -55,6 +55,10 @@ def list_blobs(self, prefix: Optional[str] = None, **kwargs) -> Iterator: container_client = self.blob_service_client.get_container_client(self.container_name) return container_client.list_blobs(name_starts_with=prefix, **kwargs) + def is_directory(self, blob_name: str) -> bool: + blob_client = self.blob_service_client.get_container_client(self.container_name).get_blob_client(blob_name) + return blob_client.get_blob_properties().metadata.get('hdi_isfolder') == 'true' + def download_blob(self, blob_name: str) -> bytes: blob_client = self.blob_service_client.get_blob_client(container=self.container_name, blob=blob_name) return blob_client.download_blob().readall() diff --git a/dbxio/core/cloud/client/object_storage.py b/dbxio/core/cloud/client/object_storage.py index e50e4f3..1551b29 100644 --- a/dbxio/core/cloud/client/object_storage.py +++ b/dbxio/core/cloud/client/object_storage.py @@ -51,6 +51,10 @@ def try_delete_blob(self, blob_name: str): def list_blobs(self, prefix: Optional[str] = None, **kwargs) -> Iterator: raise NotImplementedError + @abstractmethod + def is_directory(self, blob_name: str) -> bool: + raise NotImplementedError + @abstractmethod def download_blob(self, blob_name: str) -> bytes: raise NotImplementedError