From 9c6b808189c6cb99e83933bd606426f00cc1ac66 Mon Sep 17 00:00:00 2001
From: BRAUN REMI <remi.braun@unistra.fr>
Date: Fri, 13 Dec 2024 11:10:21 +0100
Subject: [PATCH 01/18] Test with UPath

---
 CI/SCRIPTS/script_utils.py |   9 ++-
 CI/SCRIPTS/test_ci.py      |  27 ++++----
 CI/SCRIPTS/test_files.py   | 125 +++++++++++++++++--------------------
 CI/SCRIPTS/test_s3.py      |   5 +-
 CI/SCRIPTS/test_unistra.py |   4 +-
 sertit/__init__.py         |  12 +++-
 sertit/files.py            |  36 +++++------
 sertit/path.py             |  22 +++++--
 sertit/rasters_rio.py      |   4 +-
 sertit/s3.py               |  33 ++++++++++
 sertit/types.py            |   7 ++-
 sertit/vectors.py          |   4 +-
 12 files changed, 166 insertions(+), 122 deletions(-)

diff --git a/CI/SCRIPTS/script_utils.py b/CI/SCRIPTS/script_utils.py
index 5afe358..1d8863e 100644
--- a/CI/SCRIPTS/script_utils.py
+++ b/CI/SCRIPTS/script_utils.py
@@ -39,8 +39,13 @@ class Polarization(ListEnum):
 
 def get_s3_ci_path():
     """Get S3 CI path"""
-    unistra.define_s3_client()
-    return AnyPath("s3://sertit-sertit-utils-ci")
+    # unistra.define_s3_client()
+
+    from sertit.unistra import UNISTRA_S3_ENDPOINT
+
+    return AnyPath(
+        "s3://sertit-sertit-utils-ci", endpoint_url=f"https://{UNISTRA_S3_ENDPOINT}"
+    )
 
 
 def get_proj_path():
diff --git a/CI/SCRIPTS/test_ci.py b/CI/SCRIPTS/test_ci.py
index fb0a65d..72a5632 100644
--- a/CI/SCRIPTS/test_ci.py
+++ b/CI/SCRIPTS/test_ci.py
@@ -22,7 +22,7 @@
 from lxml import etree
 
 from CI.SCRIPTS.script_utils import files_path, rasters_path, s3_env, vectors_path
-from sertit import ci, path, rasters, rasters_rio, vectors
+from sertit import ci, path, rasters, rasters_rio, s3, vectors
 
 ci.reduce_verbosity()
 
@@ -169,15 +169,15 @@ def test_assert_raster():
 
 
 @s3_env
-def test_assert_xml():
+def test_assert_xml(tmp_path):
     # XML
     xml_folder = files_path().joinpath("LM05_L1TP_200030_20121230_20200820_02_T2_CI")
     xml_path = xml_folder.joinpath("LM05_L1TP_200030_20121230_20200820_02_T2_MTL.xml")
     xml_bad_path = xml_folder.joinpath("false_xml.xml")
 
     if path.is_cloud_path(files_path()):
-        xml_path = xml_path.fspath
-        xml_bad_path = xml_bad_path.fspath
+        xml_path = s3.download(xml_path, tmp_path)
+        xml_bad_path = s3.download(xml_bad_path, tmp_path)
 
     xml_ok = etree.parse(str(xml_path)).getroot()
     xml_nok = etree.parse(str(xml_bad_path)).getroot()
@@ -188,19 +188,18 @@ def test_assert_xml():
 
 
 @s3_env
-def test_assert_html():
+def test_assert_html(tmp_path):
     # HTML
     html_path = files_path().joinpath("productPreview.html")
     html_bad_path = files_path().joinpath("false.html")
 
-    with tempfile.TemporaryDirectory() as tmp_dir:
-        if path.is_cloud_path(files_path()):
-            html_path = html_path.download_to(tmp_dir)
-            html_bad_path = html_bad_path.download_to(tmp_dir)
+    if path.is_cloud_path(files_path()):
+        html_path = s3.download(html_path, tmp_path)
+        html_bad_path = s3.download(html_bad_path, tmp_path)
 
-        html_ok = etree.parse(str(html_path)).getroot()
-        html_nok = etree.parse(str(html_bad_path)).getroot()
+    html_ok = etree.parse(str(html_path)).getroot()
+    html_nok = etree.parse(str(html_bad_path)).getroot()
 
-        ci.assert_xml_equal(html_ok, html_ok)
-        with pytest.raises(AssertionError):
-            ci.assert_xml_equal(html_ok, html_nok)
+    ci.assert_xml_equal(html_ok, html_ok)
+    with pytest.raises(AssertionError):
+        ci.assert_xml_equal(html_ok, html_nok)
diff --git a/CI/SCRIPTS/test_files.py b/CI/SCRIPTS/test_files.py
index 0339720..56aa530 100644
--- a/CI/SCRIPTS/test_files.py
+++ b/CI/SCRIPTS/test_files.py
@@ -25,7 +25,7 @@
 from lxml import etree, html
 
 from CI.SCRIPTS.script_utils import Polarization, files_path, s3_env
-from sertit import AnyPath, ci, files, path, vectors
+from sertit import AnyPath, ci, files, path, s3, vectors
 
 ci.reduce_verbosity()
 
@@ -94,7 +94,7 @@ def test_archive():
 
 
 @s3_env
-def test_archived_files():
+def test_archived_files(tmp_path):
     landsat_name = "LM05_L1TP_200030_20121230_20200820_02_T2_CI"
     ok_folder = files_path().joinpath(landsat_name)
     zip_file = files_path().joinpath(f"{landsat_name}.zip")
@@ -114,70 +114,63 @@ def test_archived_files():
         ci.assert_geom_equal(vect_ok, vect_zip)
         ci.assert_geom_equal(vect_ok, vect_tar)
 
-    with tempfile.TemporaryDirectory() as tmp_dir:
-        # XML
-        xml_name = "LM05_L1TP_200030_20121230_20200820_02_T2_MTL.xml"
-        xml_ok_path = ok_folder.joinpath(xml_name)
-        if path.is_cloud_path(files_path()):
-            xml_ok_path = str(xml_ok_path.download_to(tmp_dir))
-        else:
-            xml_ok_path = str(xml_ok_path)
-
-        xml_regex = f".*{xml_name}"
-        xml_zip = files.read_archived_xml(zip_file, xml_regex)
-        xml_tar = files.read_archived_xml(tar_file, r".*_MTL\.xml")
-        xml_ok = etree.parse(xml_ok_path).getroot()
-        ci.assert_xml_equal(xml_ok, xml_zip)
-        ci.assert_xml_equal(xml_ok, xml_tar)
-
-        # FILE + HTML
-        html_zip_file = files_path().joinpath("productPreview.zip")
-        html_tar_file = files_path().joinpath("productPreview.tar")
-        html_name = "productPreview.html"
-        html_ok_path = files_path().joinpath(html_name)
-        if path.is_cloud_path(files_path()):
-            html_ok_path = str(html_ok_path.download_to(tmp_dir))
-        else:
-            html_ok_path = str(html_ok_path)
-
-        html_regex = f".*{html_name}"
-
-        # FILE
-        file_zip = files.read_archived_file(html_zip_file, html_regex)
-        file_tar = files.read_archived_file(html_tar_file, html_regex)
-        html_ok = html.parse(html_ok_path).getroot()
-        ci.assert_html_equal(html_ok, html.fromstring(file_zip))
-        ci.assert_html_equal(html_ok, html.fromstring(file_tar))
-
-        file_list = path.get_archived_file_list(html_zip_file)
-        ci.assert_html_equal(
-            html_ok,
-            html.fromstring(
-                files.read_archived_file(html_zip_file, html_regex, file_list=file_list)
-            ),
-        )
-
-        # HTML
-        html_zip = files.read_archived_html(html_zip_file, html_regex)
-        html_tar = files.read_archived_html(html_tar_file, html_regex)
-        ci.assert_html_equal(html_ok, html_zip)
-        ci.assert_html_equal(html_ok, html_tar)
-        ci.assert_html_equal(
-            html_ok,
-            files.read_archived_html(
-                html_tar_file,
-                html_regex,
-                file_list=path.get_archived_file_list(html_tar_file),
-            ),
-        )
-
-        # ERRORS
-        with pytest.raises(TypeError):
-            files.read_archived_file(targz_file, xml_regex)
-        with pytest.raises(TypeError):
-            files.read_archived_file(sz_file, xml_regex)
-        with pytest.raises(FileNotFoundError):
-            files.read_archived_file(zip_file, "cdzeferf")
+    # XML
+    xml_name = "LM05_L1TP_200030_20121230_20200820_02_T2_MTL.xml"
+    xml_ok_path = ok_folder.joinpath(xml_name)
+    xml_ok_path = str(s3.download(xml_ok_path, tmp_path))
+
+    xml_regex = f".*{xml_name}"
+    xml_zip = files.read_archived_xml(zip_file, xml_regex)
+    xml_tar = files.read_archived_xml(tar_file, r".*_MTL\.xml")
+    xml_ok = etree.parse(xml_ok_path).getroot()
+    ci.assert_xml_equal(xml_ok, xml_zip)
+    ci.assert_xml_equal(xml_ok, xml_tar)
+
+    # FILE + HTML
+    html_zip_file = files_path().joinpath("productPreview.zip")
+    html_tar_file = files_path().joinpath("productPreview.tar")
+    html_name = "productPreview.html"
+    html_ok_path = files_path().joinpath(html_name)
+    html_ok_path = str(s3.download(html_ok_path, tmp_path))
+
+    html_regex = f".*{html_name}"
+
+    # FILE
+    file_zip = files.read_archived_file(html_zip_file, html_regex)
+    file_tar = files.read_archived_file(html_tar_file, html_regex)
+    html_ok = html.parse(html_ok_path).getroot()
+    ci.assert_html_equal(html_ok, html.fromstring(file_zip))
+    ci.assert_html_equal(html_ok, html.fromstring(file_tar))
+
+    file_list = path.get_archived_file_list(html_zip_file)
+    ci.assert_html_equal(
+        html_ok,
+        html.fromstring(
+            files.read_archived_file(html_zip_file, html_regex, file_list=file_list)
+        ),
+    )
+
+    # HTML
+    html_zip = files.read_archived_html(html_zip_file, html_regex)
+    html_tar = files.read_archived_html(html_tar_file, html_regex)
+    ci.assert_html_equal(html_ok, html_zip)
+    ci.assert_html_equal(html_ok, html_tar)
+    ci.assert_html_equal(
+        html_ok,
+        files.read_archived_html(
+            html_tar_file,
+            html_regex,
+            file_list=path.get_archived_file_list(html_tar_file),
+        ),
+    )
+
+    # ERRORS
+    with pytest.raises(TypeError):
+        files.read_archived_file(targz_file, xml_regex)
+    with pytest.raises(TypeError):
+        files.read_archived_file(sz_file, xml_regex)
+    with pytest.raises(FileNotFoundError):
+        files.read_archived_file(zip_file, "cdzeferf")
 
 
 def test_cp_rm():
diff --git a/CI/SCRIPTS/test_s3.py b/CI/SCRIPTS/test_s3.py
index f99b073..032376b 100644
--- a/CI/SCRIPTS/test_s3.py
+++ b/CI/SCRIPTS/test_s3.py
@@ -19,11 +19,10 @@
 
 import pytest
 import rasterio
-from cloudpathlib import AnyPath, S3Client
 from tempenv import tempenv
 
 from CI.SCRIPTS.script_utils import CI_SERTIT_S3
-from sertit import rasters
+from sertit import AnyPath, rasters
 from sertit.s3 import USE_S3_STORAGE, s3_env, temp_s3
 
 
@@ -43,6 +42,8 @@ def with_s3(variable_1, variable_2):
 
 
 def without_s3():
+    from cloudpathlib import S3Client
+
     S3Client().set_as_default_client()
     return base_fct(None)
 
diff --git a/CI/SCRIPTS/test_unistra.py b/CI/SCRIPTS/test_unistra.py
index bb31894..6f58bb9 100644
--- a/CI/SCRIPTS/test_unistra.py
+++ b/CI/SCRIPTS/test_unistra.py
@@ -16,11 +16,11 @@
 # limitations under the License.
 """ Script testing the CI """
 import pytest
-from cloudpathlib import AnyPath, S3Client
+from cloudpathlib import S3Client
 from tempenv import tempenv
 
 from CI.SCRIPTS.script_utils import CI_SERTIT_S3
-from sertit import ci, misc, rasters, s3
+from sertit import AnyPath, ci, misc, rasters, s3
 from sertit.unistra import (
     _get_db_path,
     get_db2_path,
diff --git a/sertit/__init__.py b/sertit/__init__.py
index d63b9d5..5189825 100644
--- a/sertit/__init__.py
+++ b/sertit/__init__.py
@@ -20,11 +20,17 @@
 .. include:: ../README.md
 """
 try:
-    from cloudpathlib import AnyPath
+    from upath import UPath
+
+    AnyPath = UPath
 
-    AnyPath = AnyPath
 except ImportError:
-    pass
+    try:
+        from cloudpathlib import AnyPath
+
+        AnyPath = AnyPath
+    except ImportError:
+        pass
 
 # flake8: noqa
 from .__meta__ import (
diff --git a/sertit/files.py b/sertit/files.py
index 918a173..a5dea19 100644
--- a/sertit/files.py
+++ b/sertit/files.py
@@ -36,7 +36,7 @@
 from lxml import etree, html
 from tqdm import tqdm
 
-from sertit import AnyPath, logs, path
+from sertit import AnyPath, logs, path, s3
 from sertit.logs import SU_NAME
 from sertit.strings import DATE_FORMAT
 from sertit.types import AnyPathStrType, AnyPathType
@@ -515,26 +515,20 @@ def archive(
         'D:/path/to/output/folder_to_archive.tar.gz'
     """
     archive_path = AnyPath(archive_path)
-    folder_path = AnyPath(folder_path)
-
-    tmp_dir = None
-    if path.is_cloud_path(folder_path):
-        tmp_dir = tempfile.TemporaryDirectory()
-        folder_path = folder_path.download_to(tmp_dir.name)
-
-    # Shutil make_archive needs a path without extension
-    archive_base = os.path.splitext(archive_path)[0]
-
-    # Archive the folder
-    archive_fn = shutil.make_archive(
-        archive_base,
-        format=fmt,
-        root_dir=folder_path.parent,
-        base_dir=folder_path.name,
-    )
 
-    if tmp_dir is not None:
-        tmp_dir.cleanup()
+    with tempfile.TemporaryDirectory() as tmp_path:
+        folder_path = s3.download(AnyPath(folder_path), tmp_path)
+
+        # Shutil make_archive needs a path without extension
+        archive_base = os.path.splitext(archive_path)[0]
+
+        # Archive the folder
+        archive_fn = shutil.make_archive(
+            archive_base,
+            format=fmt,
+            root_dir=folder_path.parent,
+            base_dir=folder_path.name,
+        )
 
     return AnyPath(archive_fn)
 
@@ -755,7 +749,7 @@ def copy(src: AnyPathStrType, dst: AnyPathStrType) -> AnyPathType:
     src = AnyPath(src)
 
     if path.is_cloud_path(src):
-        out = src.download_to(dst)
+        out = s3.download(src, dst)
     else:
         out = None
         try:
diff --git a/sertit/path.py b/sertit/path.py
index 94a8106..079ec5a 100644
--- a/sertit/path.py
+++ b/sertit/path.py
@@ -593,11 +593,22 @@ def is_cloud_path(path: AnyPathStrType):
         bool: True if the file is store on the cloud.
     """
     try:
-        from cloudpathlib import CloudPath
+        return AnyPath(path).protocol in [
+            "s3",
+            "az",
+            "adl",
+            "abfs",
+            "abfss",
+            "gs",
+            "gcs",
+        ]
+    except ImportError:
+        try:
+            from cloudpathlib import CloudPath
 
-        return isinstance(AnyPath(path), CloudPath)
-    except Exception:
-        return False
+            return isinstance(AnyPath(path), CloudPath)
+        except Exception:
+            return False
 
 
 def is_path(path: Any) -> bool:
@@ -613,5 +624,6 @@ def is_path(path: Any) -> bool:
     from pathlib import Path
 
     from cloudpathlib import CloudPath
+    from upath import UPath
 
-    return isinstance(path, (str, Path, CloudPath))
+    return isinstance(path, (str, Path, CloudPath, UPath))
diff --git a/sertit/rasters_rio.py b/sertit/rasters_rio.py
index a95af4e..6a0b9ee 100644
--- a/sertit/rasters_rio.py
+++ b/sertit/rasters_rio.py
@@ -44,7 +44,7 @@
         "Please install 'rasterio' to use the 'rasters_rio' package."
     ) from ex
 
-from sertit import AnyPath, geometry, logs, misc, path, strings, vectors, xml
+from sertit import AnyPath, geometry, logs, misc, path, s3, strings, vectors, xml
 from sertit.logs import SU_NAME
 from sertit.types import AnyNumpyArray, AnyPathStrType, AnyPathType, AnyRasterType
 
@@ -1435,7 +1435,7 @@ def merge_vrt(
         crs_path = AnyPath(crs_path)
         # Download file if VRT is needed
         if path.is_cloud_path(crs_path):
-            crs_path = crs_path.download_to(merged_path.parent)
+            crs_path = s3.download(crs_path, merged_path.parent)
 
         with rasterio.open(str(crs_path)) as src:
             if first_crs is None:
diff --git a/sertit/s3.py b/sertit/s3.py
index d9bf2e8..f1e35e1 100644
--- a/sertit/s3.py
+++ b/sertit/s3.py
@@ -24,6 +24,7 @@
 
 from cloudpathlib import S3Client
 
+from sertit import AnyPath, path
 from sertit.logs import SU_NAME
 
 LOGGER = logging.getLogger(SU_NAME)
@@ -272,3 +273,35 @@ def define_s3_client(
     client = S3Client(**args_s3_client)
 
     client.set_as_default_client()
+
+
+def download(src, dst):
+
+    # By default, use the src path
+    downloaded_path = src
+
+    if path.is_path(src):
+        from cloudpathlib import CloudPath
+        from upath import UPath
+
+        # Universal pathlib
+        if isinstance(src, UPath):
+            import shutil
+
+            dst = AnyPath(dst)
+            if dst.is_dir():
+                downloaded_path = dst / src.name
+            else:
+                downloaded_path = dst
+
+            with src.open("rb") as f0, downloaded_path.open("wb") as f1:
+                shutil.copyfileobj(f0, f1)
+
+        # cloudpathlib
+        elif isinstance(src, CloudPath):
+            if dst is None:
+                downloaded_path = src.fspath
+            else:
+                downloaded_path = src.download_to(dst)
+
+    return downloaded_path
diff --git a/sertit/types.py b/sertit/types.py
index ef02fcb..d44b625 100644
--- a/sertit/types.py
+++ b/sertit/types.py
@@ -8,11 +8,12 @@
 from cloudpathlib import CloudPath
 from rasterio.io import DatasetReader, DatasetWriter
 from shapely import MultiPolygon, Polygon
+from upath import UPath
 
-AnyPathType = Union[CloudPath, Path]
-"""Any Path Type (derived from Pathlib and CloudpathLib)"""
+AnyPathType = Union[CloudPath, Path, UPath]
+"""Any Path Type (derived from Pathlib, Universal Pathlib and CloudpathLib)"""
 
-AnyPathStrType = Union[str, CloudPath, Path]
+AnyPathStrType = Union[str, AnyPathType]
 """Same as :code:`AnyPathType` but appened with :code:`str`"""
 
 AnyXrDataStructure = Union[xr.DataArray, xr.Dataset]
diff --git a/sertit/vectors.py b/sertit/vectors.py
index 818b7ee..ef3b626 100644
--- a/sertit/vectors.py
+++ b/sertit/vectors.py
@@ -35,7 +35,7 @@
 from cloudpathlib.exceptions import AnyPathTypeError
 from shapely import Polygon, wkt
 
-from sertit import AnyPath, files, geometry, logs, misc, path, strings
+from sertit import AnyPath, files, geometry, logs, misc, path, s3, strings
 from sertit.logs import SU_NAME
 from sertit.types import AnyPathStrType, AnyPathType
 
@@ -717,7 +717,7 @@ def ogr2geojson(
     else:
         # vector_path should be downloaded to work with 'ogr2ogr'
         if path.is_cloud_path(vector_path):
-            vector_path = AnyPath(vector_path).fspath
+            vector_path = s3.download(vector_path, out_dir)
         vect_path = vector_path
 
     vect_path_gj = os.path.join(

From 7a2cb5db715838a4adf7e858235b92f5041e8a1c Mon Sep 17 00:00:00 2001
From: BRAUN REMI <remi.braun@unistra.fr>
Date: Fri, 13 Dec 2024 11:57:43 +0100
Subject: [PATCH 02/18] Updates to make UPath work with zipfile and tarfiles

---
 CI/SCRIPTS/test_s3.py      |  3 ++-
 CI/SCRIPTS/test_types.py   |  3 ++-
 CI/SCRIPTS/test_unistra.py |  6 +++---
 sertit/files.py            | 25 +++++++++++++++++++------
 sertit/path.py             | 21 +++++++++++++++------
 sertit/s3.py               | 12 ++++++++++++
 sertit/vectors.py          | 17 ++++++++++++++---
 sertit/xml.py              |  6 +++---
 8 files changed, 70 insertions(+), 23 deletions(-)

diff --git a/CI/SCRIPTS/test_s3.py b/CI/SCRIPTS/test_s3.py
index 032376b..d7e1604 100644
--- a/CI/SCRIPTS/test_s3.py
+++ b/CI/SCRIPTS/test_s3.py
@@ -19,10 +19,11 @@
 
 import pytest
 import rasterio
+from cloudpathlib import AnyPath
 from tempenv import tempenv
 
 from CI.SCRIPTS.script_utils import CI_SERTIT_S3
-from sertit import AnyPath, rasters
+from sertit import rasters
 from sertit.s3 import USE_S3_STORAGE, s3_env, temp_s3
 
 
diff --git a/CI/SCRIPTS/test_types.py b/CI/SCRIPTS/test_types.py
index 53e6f95..6013570 100644
--- a/CI/SCRIPTS/test_types.py
+++ b/CI/SCRIPTS/test_types.py
@@ -3,6 +3,7 @@
 
 import numpy as np
 from cloudpathlib import CloudPath
+from upath import UPath
 
 from sertit import AnyPath
 from sertit.types import AnyPathType, is_iterable, make_iterable
@@ -10,7 +11,7 @@
 
 def test_types():
     """Test some type aliases"""
-    assert AnyPathType == Union[Path, CloudPath]
+    assert AnyPathType == Union[Path, CloudPath, UPath]
 
 
 def test_is_iterable():
diff --git a/CI/SCRIPTS/test_unistra.py b/CI/SCRIPTS/test_unistra.py
index 6f58bb9..7406400 100644
--- a/CI/SCRIPTS/test_unistra.py
+++ b/CI/SCRIPTS/test_unistra.py
@@ -16,11 +16,11 @@
 # limitations under the License.
 """ Script testing the CI """
 import pytest
-from cloudpathlib import S3Client
+from cloudpathlib import AnyPath, S3Client
 from tempenv import tempenv
 
 from CI.SCRIPTS.script_utils import CI_SERTIT_S3
-from sertit import AnyPath, ci, misc, rasters, s3
+from sertit import ci, misc, rasters, s3
 from sertit.unistra import (
     _get_db_path,
     get_db2_path,
@@ -73,7 +73,7 @@ def test_unistra_s3():
         assert with_s3() == 1
 
         # Test get_geodatastore with s3
-        assert str(get_geodatastore()) == "s3://sertit-geodatastore"
+        assert str(get_geodatastore()) == "s3://sertit-geodatastore/"
 
     # Test get_geodatastore without s3
     with tempenv.TemporaryEnvironment({s3.USE_S3_STORAGE: "0"}):
diff --git a/sertit/files.py b/sertit/files.py
index a5dea19..5d30cd1 100644
--- a/sertit/files.py
+++ b/sertit/files.py
@@ -221,16 +221,25 @@ def extract_sub_dir(arch, filename_list):
             arch.extractall(archive_output)
 
     # Manage archive type
+
     if file_path.suffix == ".zip":
+        if path.is_cloud_path(file_path):
+            file_path = s3.read(file_path)
         with zipfile.ZipFile(file_path, "r") as zip_file:
             extract_sub_dir(zip_file, zip_file.namelist())
     elif file_path.suffix == ".tar" or file_path.suffixes == [".tar", ".gz"]:
-        with tarfile.open(file_path, "r") as tar_file:
+        if path.is_cloud_path(file_path):
+            args = {"fileobj": s3.read(file_path), "mode": "r"}
+        else:
+            args = {"name": file_path, "mode": "r"}
+        with tarfile.open(**args) as tar_file:
             extract_sub_dir(tar_file, tar_file.getnames())
     elif file_path.suffix == ".7z":
         try:
             import py7zr
 
+            if path.is_cloud_path(file_path):
+                file_path = s3.read(file_path)
             with py7zr.SevenZipFile(file_path, "r") as z7_file:
                 extract_sub_dir(z7_file, z7_file.getnames())
         except ModuleNotFoundError:
@@ -394,14 +403,18 @@ def read_archived_file(
          bytes: Archived file in bytes
     """
     archive_path = AnyPath(archive_path)
-
+    archive_fn = get_filename(archive_path)
     # Compile regex
     regex = re.compile(regex)
 
     # Open tar and zip XML
     try:
         if archive_path.suffix == ".tar":
-            with tarfile.open(archive_path) as tar_ds:
+            if path.is_cloud_path(archive_path):
+                args = {"fileobj": s3.read(archive_path), "mode": "r"}
+            else:
+                args = {"name": archive_path, "mode": "r"}
+            with tarfile.open(**args) as tar_ds:
                 # file_list is not very useful for TAR files...
                 if file_list is None:
                     tar_mb = tar_ds.getmembers()
@@ -410,6 +423,8 @@ def read_archived_file(
                 tarinfo = tar_ds.getmember(name)
                 file_str = tar_ds.extractfile(tarinfo).read()
         elif archive_path.suffix == ".zip":
+            if path.is_cloud_path(archive_path):
+                archive_path = s3.read(archive_path)
             with zipfile.ZipFile(archive_path) as zip_ds:
                 if file_list is None:
                     file_list = [f.filename for f in zip_ds.filelist]
@@ -425,9 +440,7 @@ def read_archived_file(
                 "Only .zip and .tar files can be read from inside its archive."
             )
     except IndexError:
-        raise FileNotFoundError(
-            f"Impossible to find file {regex} in {path.get_filename(archive_path)}"
-        )
+        raise FileNotFoundError(f"Impossible to find file {regex} in {archive_fn}")
 
     return file_str
 
diff --git a/sertit/path.py b/sertit/path.py
index 079ec5a..69b97d6 100644
--- a/sertit/path.py
+++ b/sertit/path.py
@@ -26,7 +26,7 @@
 import zipfile
 from typing import Any, Union
 
-from sertit import AnyPath, logs
+from sertit import AnyPath, logs, s3
 from sertit.logs import SU_NAME
 from sertit.types import AnyPathStrType, AnyPathType
 
@@ -167,18 +167,27 @@ def get_archived_file_list(archive_path: AnyPathStrType) -> list:
         ['file_1.txt', 'file_2.tif', 'file_3.xml', 'file_4.geojson']
     """
     archive_path = AnyPath(archive_path)
-    if archive_path.suffix == ".zip":
+
+    is_zip = archive_path.suffix == ".zip"
+    archive_fn = get_filename(archive_path)
+    if is_zip:
+
+        if is_cloud_path(archive_path):
+            archive_path = s3.read(archive_path)
+
         with zipfile.ZipFile(archive_path) as zip_ds:
             file_list = [f.filename for f in zip_ds.filelist]
     else:
         try:
-            with tarfile.open(archive_path) as tar_ds:
+            if is_cloud_path(archive_path):
+                args = {"fileobj": s3.read(archive_path), "mode": "r"}
+            else:
+                args = {"name": archive_path, "mode": "r"}
+            with tarfile.open(**args) as tar_ds:
                 tar_mb = tar_ds.getmembers()
                 file_list = [mb.name for mb in tar_mb]
         except tarfile.ReadError as ex:
-            raise tarfile.ReadError(
-                f"Impossible to open archive: {archive_path}"
-            ) from ex
+            raise tarfile.ReadError(f"Impossible to open archive: {archive_fn}") from ex
 
     return file_list
 
diff --git a/sertit/s3.py b/sertit/s3.py
index f1e35e1..3b92d19 100644
--- a/sertit/s3.py
+++ b/sertit/s3.py
@@ -21,6 +21,7 @@
 import os
 from contextlib import contextmanager
 from functools import wraps
+from io import BytesIO
 
 from cloudpathlib import S3Client
 
@@ -305,3 +306,14 @@ def download(src, dst):
                 downloaded_path = src.download_to(dst)
 
     return downloaded_path
+
+
+def read(src):
+    src = AnyPath(src)
+    try:
+        b = src.read_bytes()
+    except Exception:
+        with src.open("rb") as f:
+            b = f.read()
+
+    return BytesIO(b)
diff --git a/sertit/vectors.py b/sertit/vectors.py
index ef3b626..7a39e17 100644
--- a/sertit/vectors.py
+++ b/sertit/vectors.py
@@ -255,8 +255,11 @@ def get_aoi_wkt(aoi_path: AnyPathStrType, as_str: bool = True) -> Union[str, Pol
 
     if aoi_path.suffix == ".wkt":
         try:
-            with open(aoi_path, "r") as aoi_f:
-                aoi = wkt.load(aoi_f)
+            if path.is_cloud_path(aoi_path):
+                aoi = wkt.load(s3.read(aoi_path))
+            else:
+                with open(aoi_path, "r") as aoi_f:
+                    aoi = wkt.load(aoi_f)
         except Exception as ex:
             raise ValueError("AOI WKT cannot be read") from ex
     else:
@@ -707,11 +710,19 @@ def ogr2geojson(
     vector_path = AnyPath(vector_path)
 
     # archived vector_path are extracted in a tmp folder so no need to be downloaded
+
     if vector_path.suffix == ".zip":
+        if path.is_cloud_path(vector_path):
+            vector_path = s3.read(vector_path)
         with zipfile.ZipFile(vector_path, "r") as zip_ds:
             vect_path = zip_ds.extract(arch_vect_path, out_dir)
     elif vector_path.suffix == ".tar":
-        with tarfile.open(vector_path, "r") as tar_ds:
+        if path.is_cloud_path(vector_path):
+            args = {"fileobj": s3.read(vector_path), "mode": "r"}
+        else:
+            args = {"name": vector_path, "mode": "r"}
+
+        with tarfile.open(**args) as tar_ds:
             tar_ds.extract(arch_vect_path, out_dir)
             vect_path = os.path.join(out_dir, arch_vect_path)
     else:
diff --git a/sertit/xml.py b/sertit/xml.py
index 0f358af..738c544 100644
--- a/sertit/xml.py
+++ b/sertit/xml.py
@@ -29,7 +29,7 @@
 )
 from lxml.html.builder import E
 
-from sertit import AnyPath, files, path
+from sertit import AnyPath, files, path, s3
 from sertit.logs import SU_NAME
 from sertit.misc import ListEnum
 from sertit.types import AnyPathStrType
@@ -55,12 +55,12 @@ def read(xml_path: AnyPathStrType) -> _Element:
             try:
                 # Try using read_text (faster)
                 root = fromstring(xml_path.read_text())
-            except ValueError:
+            except (ValueError, PermissionError):
                 # Try using read_bytes
                 # Slower but works with:
                 # {ValueError}Unicode strings with encoding declaration are not supported.
                 # Please use bytes input or XML fragments without declaration.
-                root = fromstring(xml_path.read_bytes())
+                root = fromstring(s3.read(xml_path))
         else:
             # pylint: disable=I1101:
             # Module 'lxml.etree' has no 'parse' member, but source is unavailable.

From 118cad12473f9cc3b28f702d2a5b0422bdf36754 Mon Sep 17 00:00:00 2001
From: BRAUN REMI <remi.braun@unistra.fr>
Date: Fri, 13 Dec 2024 12:33:56 +0100
Subject: [PATCH 03/18] Don't recreate Path without storage options to make it
 work with UPath

---
 CI/SCRIPTS/test_vectors.py |  5 ++++-
 CI/SCRIPTS/test_xml.py     |  5 +++--
 sertit/vectors.py          |  6 +++++-
 sertit/xml.py              | 38 +++++++++++++++++++++++++++++---------
 4 files changed, 41 insertions(+), 13 deletions(-)

diff --git a/CI/SCRIPTS/test_vectors.py b/CI/SCRIPTS/test_vectors.py
index 0ecd93a..d84e2c0 100644
--- a/CI/SCRIPTS/test_vectors.py
+++ b/CI/SCRIPTS/test_vectors.py
@@ -279,7 +279,10 @@ def test_read_archived():
     map_overlay_extracted = vectors.read(map_overlay_extracted_path)
 
     ci.assert_geom_equal(
-        map_overlay_extracted, vectors.read(f"{zip_landsat}!{landsat}/{map_overlay}")
+        map_overlay_extracted,
+        vectors.read(
+            zip_landsat.parent / (zip_landsat.name + f"!{landsat}/{map_overlay}")
+        ),
     )
     ci.assert_geom_equal(
         map_overlay_extracted,
diff --git a/CI/SCRIPTS/test_xml.py b/CI/SCRIPTS/test_xml.py
index fb74f16..63c23c9 100644
--- a/CI/SCRIPTS/test_xml.py
+++ b/CI/SCRIPTS/test_xml.py
@@ -111,7 +111,7 @@ def test_xml():
     _assert_str(cv_xml.findtext(".//Age"), "20")
 
     # Write
-    true_xml = str(xml_path() / "true.xml")
+    true_xml = xml_path() / "true.xml"
     with tempfile.TemporaryDirectory() as tmp_dir:
         tmp_xml = os.path.join(tmp_dir, "tmp.xml")
         xml.write(cv_xml, tmp_xml)
@@ -121,7 +121,8 @@ def test_xml():
     # Based on `files.read_archived_xml`, so it is considered to work.
     # Just test the case with complete path to the archive
     l8_archived = files_path() / "LM05_L1TP_200030_20121230_20200820_02_T2_CI.zip"
-    xml_archived = f"{l8_archived}!LM05_L1TP_200030_20121230_20200820_02_T2_CI/LM05_L1TP_200030_20121230_20200820_02_T2_MTL.xml"
+    xml_path_in_zip = "!LM05_L1TP_200030_20121230_20200820_02_T2_CI/LM05_L1TP_200030_20121230_20200820_02_T2_MTL.xml"
+    xml_archived = l8_archived.parent / (l8_archived.name + xml_path_in_zip)
 
     ci.assert_xml_equal(
         xml.read_archive(l8_archived, r".*_MTL\.xml"), xml.read_archive(xml_archived)
diff --git a/sertit/vectors.py b/sertit/vectors.py
index 7a39e17..75769eb 100644
--- a/sertit/vectors.py
+++ b/sertit/vectors.py
@@ -476,7 +476,11 @@ def read(
         if "!" in str(vector_path):
             split_vect = str(vector_path).split("!")
             archive_regex = ".*{0}".format(split_vect[1].replace(".", r"\."))
-            vector_path = AnyPath(split_vect[0])
+            try:
+                vector_path = AnyPath(split_vect[0], **vector_path.storage_options)
+            except Exception:
+                # Cloudpathlib
+                vector_path = AnyPath(split_vect[0])
 
         # Manage archive case
         if vector_path.suffix in [".tar", ".zip"]:
diff --git a/sertit/xml.py b/sertit/xml.py
index 738c544..86e8471 100644
--- a/sertit/xml.py
+++ b/sertit/xml.py
@@ -29,7 +29,7 @@
 )
 from lxml.html.builder import E
 
-from sertit import AnyPath, files, path, s3
+from sertit import AnyPath, files, logs, path, s3
 from sertit.logs import SU_NAME
 from sertit.misc import ListEnum
 from sertit.types import AnyPathStrType
@@ -60,7 +60,7 @@ def read(xml_path: AnyPathStrType) -> _Element:
                 # Slower but works with:
                 # {ValueError}Unicode strings with encoding declaration are not supported.
                 # Please use bytes input or XML fragments without declaration.
-                root = fromstring(s3.read(xml_path))
+                root = fromstring(s3.read(xml_path).read())
         else:
             # pylint: disable=I1101:
             # Module 'lxml.etree' has no 'parse' member, but source is unavailable.
@@ -74,7 +74,10 @@ def read(xml_path: AnyPathStrType) -> _Element:
 
 
 def read_archive(
-    path: AnyPathStrType, regex: str = None, file_list: list = None
+    archive_path: AnyPathStrType = None,
+    regex: str = None,
+    file_list: list = None,
+    **kwargs,
 ) -> _Element:
     """
     Read an XML file from inside an archive (zip or tar)
@@ -86,25 +89,42 @@ def read_archive(
     - path to the archive plus a regex looking inside the archive. Duplicate behaviour to :py:func:`files.read_archived_xml`
 
     Args:
-        path (AnyPathStrType): Path to the XML file, stored inside an archive or path to the archive itself
+        archive_path (AnyPathStrType): Path to the XML file, stored inside an archive or path to the archive itself
         regex (str): Optional. If specified, the path should be the archive path and the regex should be the key to find the XML file inside the archive.
         file_list (list): List of files contained in the archive. Optional, if not given it will be re-computed.
 
     Returns:
         _Element: XML Root
     """
+    if archive_path is None:
+        logs.deprecation_warning(
+            "'path' argument is deprecated, use 'archive_path' instead."
+        )
+        archive_path = kwargs.pop("path")
 
     try:
         if not regex:
-            path, basename = str(path).split("!")
+            archive_base_path, basename = str(archive_path).split("!")
             regex = basename
-            if path.startswith("zip://") or path.startswith("tar://"):
-                path = path[5:]
+            if archive_base_path.startswith("zip://") or archive_base_path.startswith(
+                "tar://"
+            ):
+                archive_base_path = archive_base_path[5:]
 
-        return files.read_archived_xml(path, regex, file_list=file_list)
+            # For UPath
+            try:
+                archive_base_path = AnyPath(
+                    archive_base_path, **archive_path.storage_options
+                )
+            except Exception:
+                pass
+        else:
+            archive_base_path = archive_path
+
+        return files.read_archived_xml(archive_base_path, regex, file_list=file_list)
 
     except XMLSyntaxError:
-        raise ValueError(f"Invalid metadata XML for {path}!")
+        raise ValueError(f"Invalid metadata XML for {archive_path}!")
 
 
 def write(xml: _Element, path: str) -> None:

From e6ba4387856b440291bb3142d4e7dcc21ba7ea10 Mon Sep 17 00:00:00 2001
From: BRAUN REMI <remi.braun@unistra.fr>
Date: Fri, 13 Dec 2024 14:30:27 +0100
Subject: [PATCH 04/18] Create archives module (to avoid circular imports
 between path and files) + remove some depr functions + fix erroneous merge

---
 CI/SCRIPTS/test_archives.py | 147 ++++++++++
 CI/SCRIPTS/test_files.py    | 146 +---------
 CI/SCRIPTS/test_path.py     |  57 +---
 CI/SCRIPTS/test_vectors.py  |   4 +-
 sertit/archives.py          | 558 ++++++++++++++++++++++++++++++++++++
 sertit/files.py             | 458 +----------------------------
 sertit/path.py              | 188 +-----------
 sertit/vectors.py           |  25 +-
 sertit/xml.py               |  36 ++-
 9 files changed, 756 insertions(+), 863 deletions(-)
 create mode 100644 CI/SCRIPTS/test_archives.py
 create mode 100644 sertit/archives.py

diff --git a/CI/SCRIPTS/test_archives.py b/CI/SCRIPTS/test_archives.py
new file mode 100644
index 0000000..3415618
--- /dev/null
+++ b/CI/SCRIPTS/test_archives.py
@@ -0,0 +1,147 @@
+import os
+import shutil
+
+import pytest
+from lxml import etree, html
+
+from CI.SCRIPTS.script_utils import files_path, s3_env
+from sertit import archives, ci, files, path, s3, vectors
+
+
+def test_archive(tmp_path):
+    """Test extracting functions"""
+    # Archives
+    zip_file = files_path().joinpath("test_zip.zip")
+    zip2_file = files_path().joinpath("test_zip.zip")  # For overwrite
+    zip_without_directory = files_path().joinpath("test_zip_without_directory.zip")
+    tar_file = files_path().joinpath("test_tar.tar")
+    tar_gz_file = files_path().joinpath("test_targz.tar.gz")
+
+    # Core dir
+    core_dir = files_path().joinpath("core")
+    folder = core_dir
+    arch = [
+        zip_file,
+        tar_file,
+        tar_gz_file,
+        folder,
+        zip2_file,
+        zip_without_directory,
+    ]
+
+    # Extract
+    extracted_dirs = archives.extract_files(arch, tmp_path, overwrite=True)
+    archives.extract_files([zip2_file], tmp_path, overwrite=False)  # Already existing
+
+    # Test
+    for ex_dir in extracted_dirs:
+        ci.assert_dir_equal(core_dir, ex_dir)
+
+    # Archive
+    archive_base = os.path.join(tmp_path, "archive")
+    for fmt in ["zip", "tar", "gztar"]:
+        archive_fn = archives.archive(
+            folder_path=core_dir, archive_path=archive_base, fmt=fmt
+        )
+        out = archives.extract_file(archive_fn, tmp_path)
+        # an additional folder is created
+        out_dir = path.listdir_abspath(out)[0]
+        ci.assert_dir_equal(core_dir, out_dir)
+
+        # Remove out directory in order to avoid any interferences
+        files.remove(out)
+
+    # Add to zip
+    zip_out = zip2_file if path.is_cloud_path(zip2_file) else archive_base + ".zip"
+    core_copy = files.copy(core_dir, os.path.join(tmp_path, "core2"))
+    zip_out = archives.add_to_zip(zip_out, core_copy)
+
+    # Extract
+    unzip_out = os.path.join(tmp_path, "out")
+    unzip_out = archives.extract_file(zip_out, unzip_out)
+
+    # Test
+    unzip_dirs = path.listdir_abspath(unzip_out)
+
+    assert len(unzip_dirs) == 2
+    ci.assert_dir_equal(unzip_dirs[0], unzip_dirs[1])
+
+
+@s3_env
+def test_archived_files(tmp_path):
+    landsat_name = "LM05_L1TP_200030_20121230_20200820_02_T2_CI"
+    ok_folder = files_path().joinpath(landsat_name)
+    zip_file = files_path().joinpath(f"{landsat_name}.zip")
+    tar_file = files_path().joinpath(f"{landsat_name}.tar")
+    targz_file = files_path().joinpath(f"{landsat_name}.tar.gz")
+    sz_file = files_path().joinpath(f"{landsat_name}.7z")
+
+    # VECTORS
+    vect_name = "map-overlay.kml"
+    vec_ok_path = ok_folder.joinpath(vect_name)
+    if shutil.which("ogr2ogr"):  # Only works if ogr2ogr can be found.
+        vect_regex = f".*{vect_name}"
+        vect_zip = vectors.read(zip_file, archive_regex=vect_regex)
+        vect_tar = vectors.read(tar_file, archive_regex=r".*overlay\.kml")
+        vect_ok = vectors.read(vec_ok_path)
+        assert not vect_ok.empty
+        ci.assert_geom_equal(vect_ok, vect_zip)
+        ci.assert_geom_equal(vect_ok, vect_tar)
+
+    # XML
+    xml_name = "LM05_L1TP_200030_20121230_20200820_02_T2_MTL.xml"
+    xml_ok_path = ok_folder.joinpath(xml_name)
+    xml_ok_path = str(s3.download(xml_ok_path, tmp_path))
+
+    xml_regex = f".*{xml_name}"
+    xml_zip = archives.read_archived_xml(zip_file, xml_regex)
+    xml_tar = archives.read_archived_xml(tar_file, r".*_MTL\.xml")
+    xml_ok = etree.parse(xml_ok_path).getroot()
+    ci.assert_xml_equal(xml_ok, xml_zip)
+    ci.assert_xml_equal(xml_ok, xml_tar)
+
+    # FILE + HTML
+    html_zip_file = files_path().joinpath("productPreview.zip")
+    html_tar_file = files_path().joinpath("productPreview.tar")
+    html_name = "productPreview.html"
+    html_ok_path = files_path().joinpath(html_name)
+    html_ok_path = str(s3.download(html_ok_path, tmp_path))
+
+    html_regex = f".*{html_name}"
+
+    # FILE
+    file_zip = archives.read_archived_file(html_zip_file, html_regex)
+    file_tar = archives.read_archived_file(html_tar_file, html_regex)
+    html_ok = html.parse(html_ok_path).getroot()
+    ci.assert_html_equal(html_ok, html.fromstring(file_zip))
+    ci.assert_html_equal(html_ok, html.fromstring(file_tar))
+
+    file_list = archives.get_archived_file_list(html_zip_file)
+    ci.assert_html_equal(
+        html_ok,
+        html.fromstring(
+            archives.read_archived_file(html_zip_file, html_regex, file_list=file_list)
+        ),
+    )
+
+    # HTML
+    html_zip = archives.read_archived_html(html_zip_file, html_regex)
+    html_tar = archives.read_archived_html(html_tar_file, html_regex)
+    ci.assert_html_equal(html_ok, html_zip)
+    ci.assert_html_equal(html_ok, html_tar)
+    ci.assert_html_equal(
+        html_ok,
+        archives.read_archived_html(
+            html_tar_file,
+            html_regex,
+            file_list=archives.get_archived_file_list(html_tar_file),
+        ),
+    )
+
+    # ERRORS
+    with pytest.raises(TypeError):
+        archives.read_archived_file(targz_file, xml_regex)
+    with pytest.raises(TypeError):
+        archives.read_archived_file(sz_file, xml_regex)
+    with pytest.raises(FileNotFoundError):
+        archives.read_archived_file(zip_file, "cdzeferf")
diff --git a/CI/SCRIPTS/test_files.py b/CI/SCRIPTS/test_files.py
index 04015b7..a0f9889 100644
--- a/CI/SCRIPTS/test_files.py
+++ b/CI/SCRIPTS/test_files.py
@@ -16,160 +16,18 @@
 """Script testing the files"""
 
 import os
-import shutil
 import tempfile
 from datetime import date, datetime
 
 import numpy as np
 import pytest
-from lxml import etree, html
 
-from CI.SCRIPTS.script_utils import Polarization, files_path, s3_env
-from sertit import AnyPath, ci, files, path, s3, vectors
+from CI.SCRIPTS.script_utils import Polarization
+from sertit import AnyPath, ci, files
 
 ci.reduce_verbosity()
 
 
-def test_archive():
-    """Test extracting functions"""
-    with tempfile.TemporaryDirectory() as tmp_dir:
-        # Archives
-        zip_file = files_path().joinpath("test_zip.zip")
-        zip2_file = files_path().joinpath("test_zip.zip")  # For overwrite
-        zip_without_directory = files_path().joinpath("test_zip_without_directory.zip")
-        tar_file = files_path().joinpath("test_tar.tar")
-        tar_gz_file = files_path().joinpath("test_targz.tar.gz")
-
-        # Core dir
-        core_dir = files_path().joinpath("core")
-        folder = core_dir
-        archives = [
-            zip_file,
-            tar_file,
-            tar_gz_file,
-            folder,
-            zip2_file,
-            zip_without_directory,
-        ]
-
-        # Extract
-        extracted_dirs = files.extract_files(archives, tmp_dir, overwrite=True)
-        files.extract_files([zip2_file], tmp_dir, overwrite=False)  # Already existing
-
-        # Test
-        for ex_dir in extracted_dirs:
-            ci.assert_dir_equal(core_dir, ex_dir)
-
-        # Archive
-        archive_base = os.path.join(tmp_dir, "archive")
-        for fmt in ["zip", "tar", "gztar"]:
-            archive_fn = files.archive(
-                folder_path=core_dir, archive_path=archive_base, fmt=fmt
-            )
-            out = files.extract_file(archive_fn, tmp_dir)
-            # an additional folder is created
-            out_dir = path.listdir_abspath(out)[0]
-            ci.assert_dir_equal(core_dir, out_dir)
-
-            # Remove out directory in order to avoid any interferences
-            files.remove(out)
-
-        # Add to zip
-        zip_out = zip2_file if path.is_cloud_path(zip2_file) else archive_base + ".zip"
-        core_copy = files.copy(core_dir, os.path.join(tmp_dir, "core2"))
-        zip_out = files.add_to_zip(zip_out, core_copy)
-
-        # Extract
-        unzip_out = os.path.join(tmp_dir, "out")
-        unzip_out = files.extract_file(zip_out, unzip_out)
-
-        # Test
-        unzip_dirs = path.listdir_abspath(unzip_out)
-
-        assert len(unzip_dirs) == 2
-        ci.assert_dir_equal(unzip_dirs[0], unzip_dirs[1])
-
-
-@s3_env
-def test_archived_files(tmp_path):
-    landsat_name = "LM05_L1TP_200030_20121230_20200820_02_T2_CI"
-    ok_folder = files_path().joinpath(landsat_name)
-    zip_file = files_path().joinpath(f"{landsat_name}.zip")
-    tar_file = files_path().joinpath(f"{landsat_name}.tar")
-    targz_file = files_path().joinpath(f"{landsat_name}.tar.gz")
-    sz_file = files_path().joinpath(f"{landsat_name}.7z")
-
-    # VECTORS
-    vect_name = "map-overlay.kml"
-    vec_ok_path = ok_folder.joinpath(vect_name)
-    if shutil.which("ogr2ogr"):  # Only works if ogr2ogr can be found.
-        vect_regex = f".*{vect_name}"
-        vect_zip = vectors.read(zip_file, archive_regex=vect_regex)
-        vect_tar = vectors.read(tar_file, archive_regex=r".*overlay\.kml")
-        vect_ok = vectors.read(vec_ok_path)
-        assert not vect_ok.empty
-        ci.assert_geom_equal(vect_ok, vect_zip)
-        ci.assert_geom_equal(vect_ok, vect_tar)
-
-    # XML
-    xml_name = "LM05_L1TP_200030_20121230_20200820_02_T2_MTL.xml"
-    xml_ok_path = ok_folder.joinpath(xml_name)
-    xml_ok_path = str(s3.download(xml_ok_path, tmp_path))
-
-    xml_regex = f".*{xml_name}"
-    xml_zip = files.read_archived_xml(zip_file, xml_regex)
-    xml_tar = files.read_archived_xml(tar_file, r".*_MTL\.xml")
-    xml_ok = etree.parse(xml_ok_path).getroot()
-    ci.assert_xml_equal(xml_ok, xml_zip)
-    ci.assert_xml_equal(xml_ok, xml_tar)
-
-    # FILE + HTML
-    html_zip_file = files_path().joinpath("productPreview.zip")
-    html_tar_file = files_path().joinpath("productPreview.tar")
-    html_name = "productPreview.html"
-    html_ok_path = files_path().joinpath(html_name)
-    html_ok_path = str(s3.download(html_ok_path, tmp_path))
-
-    html_regex = f".*{html_name}"
-
-    # FILE
-    file_zip = files.read_archived_file(html_zip_file, html_regex)
-    file_tar = files.read_archived_file(html_tar_file, html_regex)
-    html_ok = html.parse(html_ok_path).getroot()
-    ci.assert_html_equal(html_ok, html.fromstring(file_zip))
-    ci.assert_html_equal(html_ok, html.fromstring(file_tar))
-
-    file_list = path.get_archived_file_list(html_zip_file)
-    ci.assert_html_equal(
-        html_ok,
-        html.fromstring(
-            files.read_archived_file(html_zip_file, html_regex, file_list=file_list)
-        ),
-    )
-
-    # HTML
-    html_zip = files.read_archived_html(html_zip_file, html_regex)
-    html_tar = files.read_archived_html(html_tar_file, html_regex)
-    ci.assert_html_equal(html_ok, html_zip)
-    ci.assert_html_equal(html_ok, html_tar)
-    ci.assert_html_equal(
-        html_ok,
-        files.read_archived_html(
-            html_tar_file,
-            html_regex,
-            file_list=path.get_archived_file_list(html_tar_file),
-        ),
-    )
-
-    # ERRORS
-    with pytest.raises(TypeError):
-        files.read_archived_file(targz_file, xml_regex)
-    with pytest.raises(TypeError):
-        files.read_archived_file(sz_file, xml_regex)
-    with pytest.raises(FileNotFoundError):
-        files.read_archived_file(zip_file, "cdzeferf")
-
-
 def test_cp_rm():
     """Test CP/RM functions"""
     with tempfile.TemporaryDirectory() as tmp_dir:
diff --git a/CI/SCRIPTS/test_path.py b/CI/SCRIPTS/test_path.py
index bf335ec..d173bc3 100644
--- a/CI/SCRIPTS/test_path.py
+++ b/CI/SCRIPTS/test_path.py
@@ -16,13 +16,12 @@
 """Script testing the files"""
 
 import os
-import shutil
 import tempfile
 
 import pytest
 
-from CI.SCRIPTS.script_utils import files_path, get_s3_ci_path, s3_env
-from sertit import AnyPath, ci, misc, path, vectors
+from CI.SCRIPTS.script_utils import get_s3_ci_path
+from sertit import AnyPath, ci, misc, path
 
 ci.reduce_verbosity()
 
@@ -65,58 +64,6 @@ def test_paths():
         assert not path.is_writable("cvfgbherth")  # Non-existing
 
 
-@s3_env
-def test_archived_paths():
-    landsat_name = "LM05_L1TP_200030_20121230_20200820_02_T2_CI"
-    ok_folder = files_path().joinpath(landsat_name)
-    zip_file = files_path().joinpath(f"{landsat_name}.zip")
-    tar_file = files_path().joinpath(f"{landsat_name}.tar")
-    targz_file = files_path().joinpath(f"{landsat_name}.tar.gz")
-    sz_file = files_path().joinpath(f"{landsat_name}.7z")
-
-    # Archive file
-    tif_name = "LM05_L1TP_200030_20121230_20200820_02_T2_QA_RADSAT.TIF"
-    tif_ok = f"{ok_folder.name}/{tif_name}"
-    tif_regex = f".*{tif_name}"
-    assert tif_ok == path.get_archived_path(zip_file, tif_regex)
-    assert tif_ok == path.get_archived_path(zip_file, tif_regex, as_list=True)[0]
-    assert tif_ok == path.get_archived_path(tar_file, ".*RADSAT")
-
-    # RASTERIO
-    tif_zip = path.get_archived_rio_path(zip_file, tif_regex)
-    tif_list = path.get_archived_rio_path(zip_file, tif_regex, as_list=True)
-    tif_tar = path.get_archived_rio_path(tar_file, ".*RADSAT")
-    tif_ok = ok_folder.joinpath(tif_name)
-    ci.assert_raster_equal(tif_ok, tif_zip)
-    ci.assert_raster_equal(tif_ok, tif_list[0])
-    ci.assert_raster_equal(tif_ok, tif_tar)
-
-    file_list = path.get_archived_file_list(zip_file)
-    ci.assert_raster_equal(
-        tif_ok, path.get_archived_rio_path(zip_file, tif_regex, file_list=file_list)
-    )
-
-    # VECTORS
-    vect_name = "map-overlay.kml"
-    vec_ok_path = ok_folder.joinpath(vect_name)
-    if shutil.which("ogr2ogr"):  # Only works if ogr2ogr can be found.
-        vect_regex = f".*{vect_name}"
-        vect_zip = vectors.read(zip_file, archive_regex=vect_regex)
-        vect_tar = vectors.read(tar_file, archive_regex=r".*overlay\.kml")
-        vect_ok = vectors.read(vec_ok_path)
-        assert not vect_ok.empty
-        ci.assert_geom_equal(vect_ok, vect_zip)
-        ci.assert_geom_equal(vect_ok, vect_tar)
-
-    # ERRORS
-    with pytest.raises(TypeError):
-        path.get_archived_rio_path(targz_file, tif_regex)
-    with pytest.raises(TypeError):
-        path.get_archived_rio_path(sz_file, tif_regex)
-    with pytest.raises(FileNotFoundError):
-        path.get_archived_rio_path(zip_file, "cdzeferf")
-
-
 def test_get_file_name():
     """Test get_file_name"""
     file_name = path.get_filename(__file__)
diff --git a/CI/SCRIPTS/test_vectors.py b/CI/SCRIPTS/test_vectors.py
index 5f9bd92..5a79272 100644
--- a/CI/SCRIPTS/test_vectors.py
+++ b/CI/SCRIPTS/test_vectors.py
@@ -25,7 +25,7 @@
 from shapely import wkt
 
 from CI.SCRIPTS.script_utils import KAPUT_KWARGS, files_path, s3_env, vectors_path
-from sertit import ci, files, path, vectors
+from sertit import archives, ci, files, path, vectors
 from sertit.vectors import EPSG_4326, DataSourceError
 
 ci.reduce_verbosity()
@@ -294,7 +294,7 @@ def test_read_archived():
         vectors.read(tar_landsat, archive_regex=map_overlay_regex),
     )
 
-    file_list = path.get_archived_file_list(tar_landsat)
+    file_list = archives.get_archived_file_list(tar_landsat)
     ci.assert_geom_equal(
         map_overlay_extracted,
         vectors.read(tar_landsat, archive_regex=map_overlay_regex, file_list=file_list),
diff --git a/sertit/archives.py b/sertit/archives.py
new file mode 100644
index 0000000..2115071
--- /dev/null
+++ b/sertit/archives.py
@@ -0,0 +1,558 @@
+import logging
+import os
+import re
+import shutil
+import tarfile
+import tempfile
+import zipfile
+from contextlib import contextmanager
+from pathlib import Path
+from typing import Union
+
+from lxml import etree, html
+from tqdm import tqdm
+
+from sertit import AnyPath, logs, path, s3
+from sertit.logs import SU_NAME
+from sertit.types import AnyPathStrType, AnyPathType
+
+LOGGER = logging.getLogger(SU_NAME)
+
+
+@contextmanager
+def open_zipfile(file_path, mode="r"):
+    if path.is_cloud_path(file_path):
+        file_path = s3.read(file_path)
+
+    with zipfile.ZipFile(file_path, mode) as zip_file:
+        yield zip_file
+
+
+@contextmanager
+def open_tarfile(file_path, mode="r"):
+    if path.is_cloud_path(file_path):
+        args = {"fileobj": s3.read(file_path), "mode": mode}
+    else:
+        args = {"name": file_path, "mode": mode}
+    with tarfile.open(**args) as tar_file:
+        yield tar_file
+
+
+def extract_file(
+    file_path: AnyPathStrType,
+    output: AnyPathStrType,
+    overwrite: bool = False,
+) -> AnyPathType:
+    """
+    Extract an archived file (zip or others). Overwrites if specified.
+    If the archive don't contain a root directory with the name of the archive without the extension, create it
+
+    Args:
+        file_path (str): Archive file path
+        output (str): Output where to put the extracted directory
+        overwrite (bool): Overwrite found extracted directory
+
+    Returns:
+        AnyPathType: Extracted directory paths
+
+    Example:
+        >>> file_path = 'D:/path/to/zip.zip'
+        >>> output = 'D:/path/to/output'
+        >>> extract_file(file_path, output, overwrite=True)
+        D:/path/to/output/zip'
+    """
+    # Convert to path
+    file_path = AnyPath(file_path)
+    output = AnyPath(output)
+
+    # In case a folder is given, returns it (this means that the file is already extracted)
+    if file_path.is_dir():
+        return file_path
+
+    # Beware with .SEN3 and .SAFE extensions
+    archive_output = output.joinpath(path.get_filename(file_path))
+
+    # In case not overwrite and the extracted directory already exists
+    if not overwrite and archive_output.exists():
+        LOGGER.debug(
+            "Already existing extracted %s. It won't be overwritten.",
+            archive_output,
+        )
+        return archive_output
+
+    def extract_sub_dir(arch, filename_list):
+        top_level_files = list({item.split("/")[0] for item in filename_list})
+
+        # When the only root directory in the archive has the right name, we don't have to create it
+        if len(top_level_files) == 1 and archive_output.name == path.get_filename(
+            top_level_files[0]
+        ):
+            arch.extractall(archive_output.parent)
+            archive_output.parent.joinpath(top_level_files[0]).rename(archive_output)
+        else:
+            arch.extractall(archive_output)
+
+    # Manage archive type
+    if file_path.suffix == ".zip":
+        with open_zipfile(file_path) as zip_file:
+            extract_sub_dir(zip_file, zip_file.namelist())
+    elif file_path.suffix == ".tar" or file_path.suffixes == [".tar", ".gz"]:
+        with open_tarfile(file_path) as tar_file:
+            extract_sub_dir(tar_file, tar_file.getnames())
+    elif file_path.suffix == ".7z":
+        try:
+            import py7zr
+
+            with py7zr.SevenZipFile(file_path, "r") as z7_file:
+                extract_sub_dir(z7_file, z7_file.getnames())
+        except ModuleNotFoundError as exc:
+            raise TypeError("Please install 'py7zr' to extract .7z files") from exc
+    else:
+        raise TypeError(
+            f"Only .zip, .tar, .tar.gz and .7z files can be extracted, not {file_path}"
+        )
+
+    return archive_output
+
+
+def extract_files(
+    archives: list, output: AnyPathStrType, overwrite: bool = False
+) -> list:
+    """
+    Extract all archived files. Overwrites if specified.
+
+    Example:
+        >>> file_path = ['D:/path/to/zip1.zip', 'D:/path/to/zip2.zip']
+        >>> output = 'D:/path/to/output'
+        >>> extract_files(file_path, output, overwrite=True)
+        ['D:/path/to/output.zip1', 'D:/path/to/output.zip2']
+
+    Args:
+        archives (list of str): List of archives to be extracted
+        output (str): Output folder where extracted files will be written
+        overwrite (bool): Overwrite found extracted files
+
+    Returns:
+        list: Extracted files (even pre-existing ones)
+    """
+    LOGGER.info("Extracting products in %s", output)
+    progress_bar = tqdm(archives)
+    extracts = []
+    for arch in progress_bar:
+        progress_bar.set_description(f"Extracting product {os.path.basename(arch)}")
+        extracts.append(extract_file(arch, output, overwrite))
+
+    return extracts
+
+
+def read_archived_file(
+    archive_path: AnyPathStrType, regex: str, file_list: list = None
+) -> bytes:
+    """
+    Read archived file (in bytes) from :code:`zip` or :code:`tar` archives.
+
+    You can use this `site <https://regexr.com/>`_ to build your regex.
+
+    Args:
+        archive_path (AnyPathStrType): Archive path
+        regex (str): Regex (used by re) as it can be found in the getmembers() list
+        file_list (list): List of files contained in the archive. Optional, if not given it will be re-computed.
+
+    Returns:
+         bytes: Archived file in bytes
+    """
+    archive_path = AnyPath(archive_path)
+
+    # Compile regex
+    regex = re.compile(regex)
+
+    # Open tar and zip XML
+    try:
+        if archive_path.suffix == ".tar":
+            with open_tarfile(archive_path) as tar_ds:
+                # file_list is not very useful for TAR files...
+                if file_list is None:
+                    tar_mb = tar_ds.getmembers()
+                    file_list = [mb.name for mb in tar_mb]
+                name = list(filter(regex.match, file_list))[0]
+                tarinfo = tar_ds.getmember(name)
+                file_str = tar_ds.extractfile(tarinfo).read()
+        elif archive_path.suffix == ".zip":
+            with open_zipfile(archive_path) as zip_ds:
+                if file_list is None:
+                    file_list = [f.filename for f in zip_ds.filelist]
+                name = list(filter(regex.match, file_list))[0]
+                file_str = zip_ds.read(name)
+
+        elif archive_path.suffix == ".tar.gz":
+            raise TypeError(
+                ".tar.gz files are too slow to read from inside the archive. Please extract them instead."
+            )
+        else:
+            raise TypeError(
+                "Only .zip and .tar files can be read from inside its archive."
+            )
+    except IndexError as exc:
+        raise FileNotFoundError(
+            f"Impossible to find file {regex} in {path.get_filename(archive_path)}"
+        ) from exc
+
+    return file_str
+
+
+def read_archived_xml(
+    archive_path: AnyPathStrType, regex: str = None, file_list: list = None, **kwargs
+) -> etree._Element:
+    """
+    Read archived XML from :code:`zip` or :code:`tar` archives.
+
+    You can use this `site <https://regexr.com/>`_ to build your regex.
+
+    Args:
+        archive_path (AnyPathStrType): Archive path
+        regex (str): XML regex (used by re) as it can be found in the getmembers() list
+        file_list (list): List of files contained in the archive. Optional, if not given it will be re-computed.
+
+    Returns:
+         etree._Element: XML file
+
+    Example:
+        >>> arch_path = 'D:/path/to/zip.zip'
+        >>> file_regex = '.*dir.*file_name'  # Use .* for any character
+        >>> read_archived_xml(arch_path, file_regex)
+        <Element LANDSAT_METADATA_FILE at 0x1c90007f8c8>
+    """
+    if regex is None:
+        logs.deprecation_warning(
+            "'xml_regex' is deprecated, please use 'regex' instead."
+        )
+        regex = kwargs.pop("xml_regex")
+
+    xml_bytes = read_archived_file(archive_path, regex=regex, file_list=file_list)
+
+    return etree.fromstring(xml_bytes)
+
+
+def read_archived_html(
+    archive_path: AnyPathStrType, regex: str, file_list: list = None
+) -> html.HtmlElement:
+    """
+    Read archived HTML from :code:`zip` or :code:`tar` archives.
+
+    You can use this `site <https://regexr.com/>`_ to build your regex.
+
+    Args:
+        archive_path (AnyPathStrType): Archive path
+        regex (str): HTML regex (used by re) as it can be found in the getmembers() list
+        file_list (list): List of files contained in the archive. Optional, if not given it will be re-computed.
+
+    Returns:
+         html._Element: HTML file
+
+    Example:
+        >>> arch_path = 'D:/path/to/zip.zip'
+        >>> file_regex = '.*dir.*file_name'  # Use .* for any character
+        >>> read_archived_html(arch_path, file_regex)
+        <Element html at 0x1c90007f8c8>
+    """
+    html_bytes = read_archived_file(archive_path, regex, file_list=file_list)
+
+    return html.fromstring(html_bytes)
+
+
+def archive(
+    folder_path: AnyPathStrType,
+    archive_path: AnyPathStrType,
+    fmt: str = "zip",
+) -> AnyPathType:
+    """
+    Archives a folder recursively.
+
+    Args:
+        folder_path (AnyPathStrType): Folder to archive
+        archive_path (AnyPathStrType): Archive path, with or without extension
+        fmt (str): Format of the archive, used by :code:`shutil.make_archive`. Choose between [zip, tar, gztar, bztar, xztar]
+
+    Returns:
+        str: Archive filename
+
+    Example:
+        >>> folder_path = 'D:/path/to/folder_to_archive'
+        >>> archive_path = 'D:/path/to/output'
+        >>> archive = archive(folder_path, archive_path, fmt="gztar")
+        'D:/path/to/output/folder_to_archive.tar.gz'
+    """
+    archive_path = AnyPath(archive_path)
+    folder_path = AnyPath(folder_path)
+
+    tmp_dir = None
+    if path.is_cloud_path(folder_path):
+        tmp_dir = tempfile.TemporaryDirectory()
+        folder_path = folder_path.download_to(tmp_dir.name)
+
+    # Shutil make_archive needs a path without extension
+    archive_base = os.path.splitext(archive_path)[0]
+
+    # Archive the folder
+    archive_fn = shutil.make_archive(
+        archive_base,
+        format=fmt,
+        root_dir=folder_path.parent,
+        base_dir=folder_path.name,
+    )
+
+    if tmp_dir is not None:
+        tmp_dir.cleanup()
+
+    return AnyPath(archive_fn)
+
+
+def add_to_zip(
+    zip_path: AnyPathStrType,
+    dirs_to_add: Union[list, AnyPathStrType],
+) -> AnyPathType:
+    """
+    Add folders to an already existing zip file (recursively).
+
+    Args:
+        zip_path (AnyPathStrType): Already existing zip file
+        dirs_to_add (Union[list, AnyPathStrType]): Directories to add
+
+    Returns:
+        AnyPathType: Updated zip_path
+
+    Example:
+        >>> zip_path = 'D:/path/to/zip.zip'
+        >>> dirs_to_add = ['D:/path/to/dir1', 'D:/path/to/dir2']
+        >>> add_to_zip(zip_path, dirs_to_add)
+        zip.zip contains 2 more folders, dir1 and dir2
+    """
+    zip_path = AnyPath(zip_path)
+
+    # If the zip is on the cloud, cache it (zipfile doesn't like cloud paths)
+    if path.is_cloud_path(zip_path):
+        zip_path = AnyPath(zip_path.fspath)
+
+    # Check if existing zipfile
+    if not zip_path.is_file():
+        raise FileNotFoundError(f"Non existing {zip_path}")
+
+    # Convert to list if needed
+    if not isinstance(dirs_to_add, list):
+        dirs_to_add = [dirs_to_add]
+
+    # Add all folders to the existing zip
+    # Forced to use ZipFile because make_archive only works with one folder and not existing zipfile
+    with open_zipfile(zip_path, "a") as zip_file:
+        progress_bar = tqdm(dirs_to_add)
+        for dir_to_add_path in progress_bar:
+            # Just to be sure, use str instead of Paths
+            if isinstance(dir_to_add_path, Path):
+                dir_to_add = str(dir_to_add_path)
+            elif path.is_cloud_path(dir_to_add_path):
+                dir_to_add = dir_to_add_path.fspath
+            else:
+                dir_to_add = dir_to_add_path
+
+            progress_bar.set_description(
+                f"Adding {os.path.basename(dir_to_add)} to {os.path.basename(zip_path)}"
+            )
+            tmp = tempfile.TemporaryDirectory()
+            if os.path.isfile(dir_to_add):
+                dir_to_add = extract_file(dir_to_add, tmp.name)
+
+            for root, _, files in os.walk(dir_to_add):
+                base_path = os.path.join(dir_to_add, "..")
+
+                # Write dir (in namelist at least)
+                zip_file.write(root, os.path.relpath(root, base_path))
+
+                # Write files
+                for file in files:
+                    zip_file.write(
+                        os.path.join(root, file),
+                        os.path.relpath(
+                            os.path.join(root, file), os.path.join(dir_to_add, "..")
+                        ),
+                    )
+
+            # Clean tmp
+            tmp.cleanup()
+
+    return zip_path
+
+
+def get_archived_file_list(archive_path: AnyPathStrType) -> list:
+    """
+    Get the list of all the files contained in an archive.
+
+    Args:
+        archive_path (AnyPathStrType): Archive path
+
+    Returns:
+        list: All files contained in the given archive
+
+    Example:
+        >>> arch_path = 'D:/path/to/zip.zip'
+        >>> get_archived_file_list(arch_path, file_regex)
+        ['file_1.txt', 'file_2.tif', 'file_3.xml', 'file_4.geojson']
+    """
+    archive_path = AnyPath(archive_path)
+
+    is_zip = archive_path.suffix == ".zip"
+    archive_fn = path.get_filename(archive_path)
+    if is_zip:
+        with open_zipfile(archive_path) as zip_ds:
+            file_list = [f.filename for f in zip_ds.filelist]
+    else:
+        try:
+            with open_tarfile(archive_path) as tar_ds:
+                tar_mb = tar_ds.getmembers()
+                file_list = [mb.name for mb in tar_mb]
+        except tarfile.ReadError as ex:
+            raise tarfile.ReadError(f"Impossible to open archive: {archive_fn}") from ex
+
+    return file_list
+
+
+def get_archived_path(
+    archive_path: AnyPathStrType,
+    regex: str,
+    as_list: bool = False,
+    case_sensitive: bool = False,
+    file_list: list = None,
+    **kwargs,
+) -> Union[list, AnyPathType]:
+    """
+    Get archived file path from inside the archive.
+
+    .. WARNING::
+        If :code:`as_list` is :code:`False`, it will only return the first file matched !
+
+    You can use this `site <https://regexr.com/>`_ to build your regex.
+
+    Args:
+        archive_path (AnyPathStrType): Archive path
+        regex (str): File regex (used by re) as it can be found in the getmembers() list
+        as_list (bool): If true, returns a list (including all found files). If false, returns only the first match
+        case_sensitive (bool): If true, the regex is case-sensitive.
+        file_list (list): List of files to get archived from. Optional, if not given it will be re-computed.
+
+    Returns:
+        Union[list, str]: Path from inside the zipfile
+
+    Example:
+        >>> arch_path = 'D:/path/to/zip.zip'
+        >>> file_regex = '.*dir.*file_name'  # Use .* for any character
+        >>> path = get_archived_path(arch_path, file_regex)
+        'dir/filename.tif'
+    """
+    if regex is None:
+        logs.deprecation_warning(
+            "'file_regex' is deprecated, please use 'regex' instead."
+        )
+        regex = kwargs.pop("file_regex")
+
+    # Get file list
+    archive_path = AnyPath(archive_path)
+
+    # Offer the ability to give the file list directly, as this operation is expensive when done with large archives stored on the cloud
+    if file_list is None:
+        file_list = get_archived_file_list(archive_path)
+
+    # Search for file
+    re_rgx = re.compile(regex) if case_sensitive else re.compile(regex, re.IGNORECASE)
+    archived_band_paths = list(filter(re_rgx.match, file_list))
+    if not archived_band_paths:
+        raise FileNotFoundError(
+            f"Impossible to find file {regex} in {path.get_filename(archive_path)}"
+        )
+
+    # Convert to str if needed
+    if not as_list:
+        archived_band_paths = archived_band_paths[0]
+
+    return archived_band_paths
+
+
+def get_archived_rio_path(
+    archive_path: AnyPathStrType,
+    regex: str,
+    as_list: bool = False,
+    file_list: list = None,
+    **kwargs,
+) -> Union[list, AnyPathType]:
+    """
+    Get archived file path from inside the archive, to be read with rasterio:
+
+    - :code:`zip+file://{zip_path}!{file_name}`
+    - :code:`tar+file://{tar_path}!{file_name}`
+
+
+    See `here <https://rasterio.readthedocs.io/en/latest/topics/datasets.html?highlight=zip#dataset-identifiers>`_
+    for more information.
+
+    .. WARNING::
+        It wont be readable by pandas, geopandas or xmltree !
+
+    .. WARNING::
+        If :code:`as_list` is :code:`False`, it will only return the first file matched !
+
+    You can use this `site <https://regexr.com/>`_ to build your regex.
+
+    Args:
+        archive_path (AnyPathStrType): Archive path
+        regex (str): File regex (used by re) as it can be found in the getmembers() list
+        as_list (bool): If true, returns a list (including all found files). If false, returns only the first match
+        file_list (list): List of files contained in the archive. Optional, if not given it will be re-computed.
+
+    Returns:
+        Union[list, str]: Band path that can be read by rasterio
+
+    Example:
+        >>> arch_path = 'D:/path/to/zip.zip'
+        >>> file_regex = '.*dir.*file_name'  # Use .* for any character
+        >>> path = get_archived_tif_path(arch_path, file_regex)
+        'zip+file://D:/path/to/output.zip!dir/filename.tif'
+        >>> rasterio.open(path)
+        <open DatasetReader name='zip+file://D:/path/to/output.zip!dir/filename.tif' mode='r'>
+    """
+    if regex is None:
+        logs.deprecation_warning(
+            "'file_regex' is deprecated, please use 'regex' instead."
+        )
+        regex = kwargs.pop("file_regex")
+
+    archive_path = AnyPath(archive_path)
+    if archive_path.suffix in [".tar", ".zip"]:
+        prefix = archive_path.suffix[-3:]
+    elif archive_path.suffix == ".tar.gz":
+        raise TypeError(
+            ".tar.gz files are too slow to be read from inside the archive. Please extract them instead."
+        )
+    else:
+        raise TypeError("Only .zip and .tar files can be read from inside its archive.")
+
+    # Search for file
+    archived_band_paths = get_archived_path(
+        archive_path, regex=regex, as_list=True, file_list=file_list
+    )
+
+    # Convert to rio path
+    if path.is_cloud_path(archive_path):
+        archived_band_paths = [
+            f"{prefix}+file+{archive_path}!{p}" for p in archived_band_paths
+        ]
+    else:
+        # archived_band_paths = [
+        #     f"{prefix}+file://{archive_path}!{path}" for path in archived_band_paths
+        # ]
+        archived_band_paths = [
+            f"/vsi{prefix}/{archive_path}/{p}" for p in archived_band_paths
+        ]
+
+    # Convert to str if needed
+    if not as_list:
+        archived_band_paths = archived_band_paths[0]
+
+    return archived_band_paths
diff --git a/sertit/files.py b/sertit/files.py
index 4bdb6c2..c2ec5a3 100644
--- a/sertit/files.py
+++ b/sertit/files.py
@@ -19,11 +19,7 @@
 import json
 import logging
 import os
-import re
 import shutil
-import tarfile
-import tempfile
-import zipfile
 from datetime import date, datetime
 from enum import Enum
 from json import JSONDecoder, JSONEncoder
@@ -32,10 +28,8 @@
 
 import dill
 import numpy as np
-from lxml import etree, html
-from tqdm import tqdm
 
-from sertit import AnyPath, logs, path
+from sertit import AnyPath, logs, path, s3
 from sertit.logs import SU_NAME
 from sertit.strings import DATE_FORMAT
 from sertit.types import AnyPathStrType, AnyPathType
@@ -165,454 +159,6 @@ def real_rel_path(raw_path: AnyPathStrType, start: AnyPathStrType) -> AnyPathTyp
     return path.real_rel_path(raw_path, start)
 
 
-def extract_file(
-    file_path: AnyPathStrType,
-    output: AnyPathStrType,
-    overwrite: bool = False,
-) -> AnyPathType:
-    """
-    Extract an archived file (zip or others). Overwrites if specified.
-    If the archive don't contain a root directory with the name of the archive without the extension, create it
-
-    Args:
-        file_path (str): Archive file path
-        output (str): Output where to put the extracted directory
-        overwrite (bool): Overwrite found extracted directory
-
-    Returns:
-        AnyPathType: Extracted directory paths
-
-    Example:
-        >>> file_path = 'D:/path/to/zip.zip'
-        >>> output = 'D:/path/to/output'
-        >>> extract_file(file_path, output, overwrite=True)
-        D:/path/to/output/zip'
-    """
-    # Convert to path
-    file_path = AnyPath(file_path)
-    output = AnyPath(output)
-
-    # In case a folder is given, returns it (this means that the file is already extracted)
-    if file_path.is_dir():
-        return file_path
-
-    # Beware with .SEN3 and .SAFE extensions
-    archive_output = output.joinpath(path.get_filename(file_path))
-
-    # In case not overwrite and the extracted directory already exists
-    if not overwrite and archive_output.exists():
-        LOGGER.debug(
-            "Already existing extracted %s. It won't be overwritten.",
-            archive_output,
-        )
-        return archive_output
-
-    def extract_sub_dir(arch, filename_list):
-        top_level_files = list({item.split("/")[0] for item in filename_list})
-
-        # When the only root directory in the archive has the right name, we don't have to create it
-        if len(top_level_files) == 1 and archive_output.name == path.get_filename(
-            top_level_files[0]
-        ):
-            arch.extractall(archive_output.parent)
-            archive_output.parent.joinpath(top_level_files[0]).rename(archive_output)
-        else:
-            arch.extractall(archive_output)
-
-    # Manage archive type
-    if file_path.suffix == ".zip":
-        with zipfile.ZipFile(file_path, "r") as zip_file:
-            extract_sub_dir(zip_file, zip_file.namelist())
-    elif file_path.suffix == ".tar" or file_path.suffixes == [".tar", ".gz"]:
-        with tarfile.open(file_path, "r") as tar_file:
-            extract_sub_dir(tar_file, tar_file.getnames())
-    elif file_path.suffix == ".7z":
-        try:
-            import py7zr
-
-            with py7zr.SevenZipFile(file_path, "r") as z7_file:
-                extract_sub_dir(z7_file, z7_file.getnames())
-        except ModuleNotFoundError as exc:
-            raise TypeError("Please install 'py7zr' to extract .7z files") from exc
-    else:
-        raise TypeError(
-            f"Only .zip, .tar, .tar.gz and .7z files can be extracted, not {file_path}"
-        )
-
-    return archive_output
-
-
-def extract_files(
-    archives: list, output: AnyPathStrType, overwrite: bool = False
-) -> list:
-    """
-    Extract all archived files. Overwrites if specified.
-
-    Example:
-        >>> file_path = ['D:/path/to/zip1.zip', 'D:/path/to/zip2.zip']
-        >>> output = 'D:/path/to/output'
-        >>> extract_files(file_path, output, overwrite=True)
-        ['D:/path/to/output.zip1', 'D:/path/to/output.zip2']
-
-    Args:
-        archives (list of str): List of archives to be extracted
-        output (str): Output folder where extracted files will be written
-        overwrite (bool): Overwrite found extracted files
-
-    Returns:
-        list: Extracted files (even pre-existing ones)
-    """
-    LOGGER.info("Extracting products in %s", output)
-    progress_bar = tqdm(archives)
-    extracts = []
-    for arch in progress_bar:
-        progress_bar.set_description(f"Extracting product {os.path.basename(arch)}")
-        extracts.append(extract_file(arch, output, overwrite))
-
-    return extracts
-
-
-def get_archived_file_list(archive_path: AnyPathStrType) -> list:
-    """
-    .. deprecated:: 1.30.0
-       Import it from :py:mod:`sertit.path` instead of :py:mod:`sertit.files`
-
-    Get the list of all the files contained in an archive.
-
-    Args:
-        archive_path (AnyPathStrType): Archive path
-
-    Returns:
-        list: All files contained in the given archive
-
-    Example:
-        >>> arch_path = 'D:/path/to/zip.zip'
-        >>> get_archived_file_list(arch_path, file_regex)
-        ['file_1.txt', 'file_2.tif', 'file_3.xml', 'file_4.geojson']
-    """
-    logs.deprecation_warning(
-        "This function is deprecated. Import it from 'sertit.path' instead of 'sertit.files'"
-    )
-    return path.get_archived_file_list(archive_path)
-
-
-def get_archived_path(
-    archive_path: AnyPathStrType, file_regex: str, as_list: bool = False
-) -> Union[list, AnyPathType]:
-    """
-    .. deprecated:: 1.30.0
-       Import it from :py:mod:`sertit.path` instead of :py:mod:`sertit.files`
-
-    Get archived file path from inside the archive.
-
-    .. WARNING::
-        If :code:`as_list` is :code:`False`, it will only return the first file matched !
-
-    You can use this `site <https://regexr.com/>`_ to build your regex.
-
-    Example:
-        >>> arch_path = 'D:/path/to/zip.zip'
-        >>> file_regex = '.*dir.*file_name'  # Use .* for any character
-        >>> path = get_archived_path(arch_path, file_regex)
-        'dir/filename.tif'
-
-    Args:
-        archive_path (AnyPathStrType): Archive path
-        file_regex (str): File regex (used by re) as it can be found in the getmembers() list
-        as_list (bool): If true, returns a list (including all found files). If false, returns only the first match
-
-    Returns:
-        Union[list, str]: Path from inside the zipfile
-    """
-    logs.deprecation_warning(
-        "This function is deprecated. Import it from 'sertit.path' instead of 'sertit.files'"
-    )
-    return path.get_archived_path(archive_path, file_regex, as_list)
-
-
-def get_archived_rio_path(
-    archive_path: AnyPathStrType, file_regex: str, as_list: bool = False
-) -> Union[list, AnyPathType]:
-    """
-    .. deprecated:: 1.30.0
-       Import it from :py:mod:`sertit.path` instead of :py:mod:`sertit.files`
-
-    Get archived file path from inside the archive, to be read with rasterio:
-
-    - :code:`zip+file://{zip_path}!{file_name}`
-    - :code:`tar+file://{tar_path}!{file_name}`
-
-
-    See `here <https://rasterio.readthedocs.io/en/latest/topics/datasets.html?highlight=zip#dataset-identifiers>`_
-    for more information.
-
-    .. WARNING::
-        It won't be readable by pandas, geopandas or xmltree !
-
-    .. WARNING::
-        If :code:`as_list` is :code:`False`, it will only return the first file matched !
-
-    You can use this `site <https://regexr.com/>`_ to build your regex.
-
-    Args:
-        archive_path (AnyPathStrType): Archive path
-        file_regex (str): File regex (used by re) as it can be found in the getmembers() list
-        as_list (bool): If true, returns a list (including all found files). If false, returns only the first match
-
-    Returns:
-        Union[list, str]: Band path that can be read by rasterio
-
-    Example:
-        >>> arch_path = 'D:/path/to/zip.zip'
-        >>> file_regex = '.*dir.*file_name'  # Use .* for any character
-        >>> path = get_archived_tif_path(arch_path, file_regex)
-        'zip+file://D:/path/to/output.zip!dir/filename.tif'
-        >>> rasterio.open(path)
-        <open DatasetReader name='zip+file://D:/path/to/output.zip!dir/filename.tif' mode='r'>
-    """
-    logs.deprecation_warning(
-        "This function is deprecated. Import it from 'sertit.path' instead of 'sertit.files'"
-    )
-    return path.get_archived_rio_path(archive_path, file_regex, as_list)
-
-
-def read_archived_file(
-    archive_path: AnyPathStrType, regex: str, file_list: list = None
-) -> bytes:
-    """
-    Read archived file (in bytes) from :code:`zip` or :code:`tar` archives.
-
-    You can use this `site <https://regexr.com/>`_ to build your regex.
-
-    Args:
-        archive_path (AnyPathStrType): Archive path
-        regex (str): Regex (used by re) as it can be found in the getmembers() list
-        file_list (list): List of files contained in the archive. Optional, if not given it will be re-computed.
-
-    Returns:
-         bytes: Archived file in bytes
-    """
-    archive_path = AnyPath(archive_path)
-
-    # Compile regex
-    regex = re.compile(regex)
-
-    # Open tar and zip XML
-    try:
-        if archive_path.suffix == ".tar":
-            with tarfile.open(archive_path) as tar_ds:
-                # file_list is not very useful for TAR files...
-                if file_list is None:
-                    tar_mb = tar_ds.getmembers()
-                    file_list = [mb.name for mb in tar_mb]
-                name = list(filter(regex.match, file_list))[0]
-                tarinfo = tar_ds.getmember(name)
-                file_str = tar_ds.extractfile(tarinfo).read()
-        elif archive_path.suffix == ".zip":
-            with zipfile.ZipFile(archive_path) as zip_ds:
-                if file_list is None:
-                    file_list = [f.filename for f in zip_ds.filelist]
-                name = list(filter(regex.match, file_list))[0]
-                file_str = zip_ds.read(name)
-
-        elif archive_path.suffix == ".tar.gz":
-            raise TypeError(
-                ".tar.gz files are too slow to read from inside the archive. Please extract them instead."
-            )
-        else:
-            raise TypeError(
-                "Only .zip and .tar files can be read from inside its archive."
-            )
-    except IndexError as exc:
-        raise FileNotFoundError(
-            f"Impossible to find file {regex} in {path.get_filename(archive_path)}"
-        ) from exc
-
-    return file_str
-
-
-def read_archived_xml(
-    archive_path: AnyPathStrType, regex: str = None, file_list: list = None, **kwargs
-) -> etree._Element:
-    """
-    Read archived XML from :code:`zip` or :code:`tar` archives.
-
-    You can use this `site <https://regexr.com/>`_ to build your regex.
-
-    Args:
-        archive_path (AnyPathStrType): Archive path
-        regex (str): XML regex (used by re) as it can be found in the getmembers() list
-        file_list (list): List of files contained in the archive. Optional, if not given it will be re-computed.
-
-    Returns:
-         etree._Element: XML file
-
-    Example:
-        >>> arch_path = 'D:/path/to/zip.zip'
-        >>> file_regex = '.*dir.*file_name'  # Use .* for any character
-        >>> read_archived_xml(arch_path, file_regex)
-        <Element LANDSAT_METADATA_FILE at 0x1c90007f8c8>
-    """
-    if regex is None:
-        logs.deprecation_warning(
-            "'xml_regex' is deprecated, please use 'regex' instead."
-        )
-        regex = kwargs.pop("xml_regex")
-
-    xml_bytes = read_archived_file(archive_path, regex=regex, file_list=file_list)
-
-    return etree.fromstring(xml_bytes)
-
-
-def read_archived_html(
-    archive_path: AnyPathStrType, regex: str, file_list: list = None
-) -> html.HtmlElement:
-    """
-    Read archived HTML from :code:`zip` or :code:`tar` archives.
-
-    You can use this `site <https://regexr.com/>`_ to build your regex.
-
-    Args:
-        archive_path (AnyPathStrType): Archive path
-        regex (str): HTML regex (used by re) as it can be found in the getmembers() list
-        file_list (list): List of files contained in the archive. Optional, if not given it will be re-computed.
-
-    Returns:
-         html._Element: HTML file
-
-    Example:
-        >>> arch_path = 'D:/path/to/zip.zip'
-        >>> file_regex = '.*dir.*file_name'  # Use .* for any character
-        >>> read_archived_html(arch_path, file_regex)
-        <Element html at 0x1c90007f8c8>
-    """
-    html_bytes = read_archived_file(archive_path, regex, file_list=file_list)
-
-    return html.fromstring(html_bytes)
-
-
-def archive(
-    folder_path: AnyPathStrType,
-    archive_path: AnyPathStrType,
-    fmt: str = "zip",
-) -> AnyPathType:
-    """
-    Archives a folder recursively.
-
-    Args:
-        folder_path (AnyPathStrType): Folder to archive
-        archive_path (AnyPathStrType): Archive path, with or without extension
-        fmt (str): Format of the archive, used by :code:`shutil.make_archive`. Choose between [zip, tar, gztar, bztar, xztar]
-
-    Returns:
-        str: Archive filename
-
-    Example:
-        >>> folder_path = 'D:/path/to/folder_to_archive'
-        >>> archive_path = 'D:/path/to/output'
-        >>> archive = archive(folder_path, archive_path, fmt="gztar")
-        'D:/path/to/output/folder_to_archive.tar.gz'
-    """
-    archive_path = AnyPath(archive_path)
-    folder_path = AnyPath(folder_path)
-
-    tmp_dir = None
-    if path.is_cloud_path(folder_path):
-        tmp_dir = tempfile.TemporaryDirectory()
-        folder_path = folder_path.download_to(tmp_dir.name)
-
-    # Shutil make_archive needs a path without extension
-    archive_base = os.path.splitext(archive_path)[0]
-
-    # Archive the folder
-    archive_fn = shutil.make_archive(
-        archive_base,
-        format=fmt,
-        root_dir=folder_path.parent,
-        base_dir=folder_path.name,
-    )
-
-    if tmp_dir is not None:
-        tmp_dir.cleanup()
-
-    return AnyPath(archive_fn)
-
-
-def add_to_zip(
-    zip_path: AnyPathStrType,
-    dirs_to_add: Union[list, AnyPathStrType],
-) -> AnyPathType:
-    """
-    Add folders to an already existing zip file (recursively).
-
-    Args:
-        zip_path (AnyPathStrType): Already existing zip file
-        dirs_to_add (Union[list, AnyPathStrType]): Directories to add
-
-    Returns:
-        AnyPathType: Updated zip_path
-
-    Example:
-        >>> zip_path = 'D:/path/to/zip.zip'
-        >>> dirs_to_add = ['D:/path/to/dir1', 'D:/path/to/dir2']
-        >>> add_to_zip(zip_path, dirs_to_add)
-        zip.zip contains 2 more folders, dir1 and dir2
-    """
-    zip_path = AnyPath(zip_path)
-
-    # If the zip is on the cloud, cache it (zipfile doesn't like cloud paths)
-    if path.is_cloud_path(zip_path):
-        zip_path = AnyPath(zip_path.fspath)
-
-    # Check if existing zipfile
-    if not zip_path.is_file():
-        raise FileNotFoundError(f"Non existing {zip_path}")
-
-    # Convert to list if needed
-    if not isinstance(dirs_to_add, list):
-        dirs_to_add = [dirs_to_add]
-
-    # Add all folders to the existing zip
-    # Forced to use ZipFile because make_archive only works with one folder and not existing zipfile
-    with zipfile.ZipFile(zip_path, "a") as zip_file:
-        progress_bar = tqdm(dirs_to_add)
-        for dir_to_add_path in progress_bar:
-            # Just to be sure, use str instead of Paths
-            if isinstance(dir_to_add_path, Path):
-                dir_to_add = str(dir_to_add_path)
-            elif path.is_cloud_path(dir_to_add_path):
-                dir_to_add = dir_to_add_path.fspath
-            else:
-                dir_to_add = dir_to_add_path
-
-            progress_bar.set_description(
-                f"Adding {os.path.basename(dir_to_add)} to {os.path.basename(zip_path)}"
-            )
-            tmp = tempfile.TemporaryDirectory()
-            if os.path.isfile(dir_to_add):
-                dir_to_add = extract_file(dir_to_add, tmp.name)
-
-            for root, _, files in os.walk(dir_to_add):
-                base_path = os.path.join(dir_to_add, "..")
-
-                # Write dir (in namelist at least)
-                zip_file.write(root, os.path.relpath(root, base_path))
-
-                # Write files
-                for file in files:
-                    zip_file.write(
-                        os.path.join(root, file),
-                        os.path.relpath(
-                            os.path.join(root, file), os.path.join(dir_to_add, "..")
-                        ),
-                    )
-
-            # Clean tmp
-            tmp.cleanup()
-
-    return zip_path
-
-
 def get_filename(file_path: AnyPathStrType, other_exts: Union[list, str] = None) -> str:
     """
     .. deprecated:: 1.30.0
@@ -754,7 +300,7 @@ def copy(src: AnyPathStrType, dst: AnyPathStrType) -> AnyPathType:
     src = AnyPath(src)
 
     if path.is_cloud_path(src):
-        out = src.download_to(dst)
+        out = s3.download(src, dst)
     else:
         out = None
         try:
diff --git a/sertit/path.py b/sertit/path.py
index 48e9b90..30451e1 100644
--- a/sertit/path.py
+++ b/sertit/path.py
@@ -19,13 +19,10 @@
 import logging
 import os
 import pprint
-import re
-import tarfile
 import tempfile
-import zipfile
 from typing import Any, Union
 
-from sertit import AnyPath, logs, s3
+from sertit import AnyPath
 from sertit.logs import SU_NAME
 from sertit.types import AnyPathStrType, AnyPathType
 
@@ -150,189 +147,6 @@ def real_rel_path(raw_path: AnyPathStrType, start: AnyPathStrType) -> AnyPathTyp
     return rel_path
 
 
-def get_archived_file_list(archive_path: AnyPathStrType) -> list:
-    """
-    Get the list of all the files contained in an archive.
-
-    Args:
-        archive_path (AnyPathStrType): Archive path
-
-    Returns:
-        list: All files contained in the given archive
-
-    Example:
-        >>> arch_path = 'D:/path/to/zip.zip'
-        >>> get_archived_file_list(arch_path, file_regex)
-        ['file_1.txt', 'file_2.tif', 'file_3.xml', 'file_4.geojson']
-    """
-    archive_path = AnyPath(archive_path)
-
-    is_zip = archive_path.suffix == ".zip"
-    archive_fn = get_filename(archive_path)
-    if is_zip:
-        if is_cloud_path(archive_path):
-            archive_path = s3.read(archive_path)
-
-        with zipfile.ZipFile(archive_path) as zip_ds:
-            file_list = [f.filename for f in zip_ds.filelist]
-    else:
-        try:
-            if is_cloud_path(archive_path):
-                args = {"fileobj": s3.read(archive_path), "mode": "r"}
-            else:
-                args = {"name": archive_path, "mode": "r"}
-            with tarfile.open(**args) as tar_ds:
-                tar_mb = tar_ds.getmembers()
-                file_list = [mb.name for mb in tar_mb]
-        except tarfile.ReadError as ex:
-            raise tarfile.ReadError(f"Impossible to open archive: {archive_fn}") from ex
-
-    return file_list
-
-
-def get_archived_path(
-    archive_path: AnyPathStrType,
-    regex: str,
-    as_list: bool = False,
-    case_sensitive: bool = False,
-    file_list: list = None,
-    **kwargs,
-) -> Union[list, AnyPathType]:
-    """
-    Get archived file path from inside the archive.
-
-    .. WARNING::
-        If :code:`as_list` is :code:`False`, it will only return the first file matched !
-
-    You can use this `site <https://regexr.com/>`_ to build your regex.
-
-    Args:
-        archive_path (AnyPathStrType): Archive path
-        regex (str): File regex (used by re) as it can be found in the getmembers() list
-        as_list (bool): If true, returns a list (including all found files). If false, returns only the first match
-        case_sensitive (bool): If true, the regex is case-sensitive.
-        file_list (list): List of files to get archived from. Optional, if not given it will be re-computed.
-
-    Returns:
-        Union[list, str]: Path from inside the zipfile
-
-    Example:
-        >>> arch_path = 'D:/path/to/zip.zip'
-        >>> file_regex = '.*dir.*file_name'  # Use .* for any character
-        >>> path = get_archived_path(arch_path, file_regex)
-        'dir/filename.tif'
-    """
-    if regex is None:
-        logs.deprecation_warning(
-            "'file_regex' is deprecated, please use 'regex' instead."
-        )
-        regex = kwargs.pop("file_regex")
-
-    # Get file list
-    archive_path = AnyPath(archive_path)
-
-    # Offer the ability to give the file list directly, as this operation is expensive when done with large archives stored on the cloud
-    if file_list is None:
-        file_list = get_archived_file_list(archive_path)
-
-    # Search for file
-    re_rgx = re.compile(regex) if case_sensitive else re.compile(regex, re.IGNORECASE)
-    archived_band_paths = list(filter(re_rgx.match, file_list))
-    if not archived_band_paths:
-        raise FileNotFoundError(
-            f"Impossible to find file {regex} in {get_filename(archive_path)}"
-        )
-
-    # Convert to str if needed
-    if not as_list:
-        archived_band_paths = archived_band_paths[0]
-
-    return archived_band_paths
-
-
-def get_archived_rio_path(
-    archive_path: AnyPathStrType,
-    regex: str,
-    as_list: bool = False,
-    file_list: list = None,
-    **kwargs,
-) -> Union[list, AnyPathType]:
-    """
-    Get archived file path from inside the archive, to be read with rasterio:
-
-    - :code:`zip+file://{zip_path}!{file_name}`
-    - :code:`tar+file://{tar_path}!{file_name}`
-
-
-    See `here <https://rasterio.readthedocs.io/en/latest/topics/datasets.html?highlight=zip#dataset-identifiers>`_
-    for more information.
-
-    .. WARNING::
-        It wont be readable by pandas, geopandas or xmltree !
-
-    .. WARNING::
-        If :code:`as_list` is :code:`False`, it will only return the first file matched !
-
-    You can use this `site <https://regexr.com/>`_ to build your regex.
-
-    Args:
-        archive_path (AnyPathStrType): Archive path
-        regex (str): File regex (used by re) as it can be found in the getmembers() list
-        as_list (bool): If true, returns a list (including all found files). If false, returns only the first match
-        file_list (list): List of files contained in the archive. Optional, if not given it will be re-computed.
-
-    Returns:
-        Union[list, str]: Band path that can be read by rasterio
-
-    Example:
-        >>> arch_path = 'D:/path/to/zip.zip'
-        >>> file_regex = '.*dir.*file_name'  # Use .* for any character
-        >>> path = get_archived_tif_path(arch_path, file_regex)
-        'zip+file://D:/path/to/output.zip!dir/filename.tif'
-        >>> rasterio.open(path)
-        <open DatasetReader name='zip+file://D:/path/to/output.zip!dir/filename.tif' mode='r'>
-    """
-    if regex is None:
-        logs.deprecation_warning(
-            "'file_regex' is deprecated, please use 'regex' instead."
-        )
-        regex = kwargs.pop("file_regex")
-
-    archive_path = AnyPath(archive_path)
-    if archive_path.suffix in [".tar", ".zip"]:
-        prefix = archive_path.suffix[-3:]
-    elif archive_path.suffix == ".tar.gz":
-        raise TypeError(
-            ".tar.gz files are too slow to be read from inside the archive. Please extract them instead."
-        )
-    else:
-        raise TypeError("Only .zip and .tar files can be read from inside its archive.")
-
-    # Search for file
-    archived_band_paths = get_archived_path(
-        archive_path, regex=regex, as_list=True, file_list=file_list
-    )
-
-    # Convert to rio path
-    if is_cloud_path(archive_path):
-        archived_band_paths = [
-            f"{prefix}+file+{archive_path}!{path}" for path in archived_band_paths
-        ]
-    else:
-        # archived_band_paths = [
-        #     f"{prefix}+file://{archive_path}!{path}" for path in archived_band_paths
-        # ]
-        archived_band_paths = [
-            f"/vsi{prefix}/{archive_path}/{path}" for path in archived_band_paths
-        ]
-
-    # Convert to str if needed
-    if not as_list:
-        archived_band_paths = archived_band_paths[0]
-
-    return archived_band_paths
-
-
 def get_filename(file_path: AnyPathStrType, other_exts: Union[list, str] = None) -> str:
     """
     Get file name (without extension) from file path, ie:
diff --git a/sertit/vectors.py b/sertit/vectors.py
index 9b214b2..87856e9 100644
--- a/sertit/vectors.py
+++ b/sertit/vectors.py
@@ -23,9 +23,7 @@
 import os
 import re
 import shutil
-import tarfile
 import tempfile
-import zipfile
 from collections.abc import Generator
 from contextlib import contextmanager
 from typing import Any, Union
@@ -36,7 +34,7 @@
 from cloudpathlib.exceptions import AnyPathTypeError
 from shapely import Polygon, wkt
 
-from sertit import AnyPath, files, geometry, logs, misc, path, strings
+from sertit import AnyPath, archives, files, geometry, logs, misc, path, s3, strings
 from sertit.logs import SU_NAME
 from sertit.types import AnyPathStrType, AnyPathType
 
@@ -256,8 +254,11 @@ def get_aoi_wkt(aoi_path: AnyPathStrType, as_str: bool = True) -> Union[str, Pol
 
     if aoi_path.suffix == ".wkt":
         try:
-            with open(aoi_path) as aoi_f:
-                aoi = wkt.load(aoi_f)
+            if path.is_cloud_path(aoi_path):
+                aoi = wkt.load(s3.read(aoi_path))
+            else:
+                with open(aoi_path) as aoi_f:
+                    aoi = wkt.load(aoi_f)
         except Exception as ex:
             raise ValueError("AOI WKT cannot be read") from ex
     else:
@@ -471,13 +472,17 @@ def read(
         if "!" in str(vector_path):
             split_vect = str(vector_path).split("!")
             archive_regex = ".*{}".format(split_vect[1].replace(".", r"\."))
-            vector_path = AnyPath(split_vect[0])
+            try:
+                vector_path = AnyPath(split_vect[0], **vector_path.storage_options)
+            except Exception:
+                # Cloudpathlib
+                vector_path = AnyPath(split_vect[0])
 
         # Manage archive case
         if vector_path.suffix in [".tar", ".zip"]:
             prefix = vector_path.suffix[-3:]
             file_list = kwargs.pop(
-                "file_list", path.get_archived_file_list(vector_path)
+                "file_list", archives.get_archived_file_list(vector_path)
             )
 
             try:
@@ -710,16 +715,16 @@ def ogr2geojson(
 
     # archived vector_path are extracted in a tmp folder so no need to be downloaded
     if vector_path.suffix == ".zip":
-        with zipfile.ZipFile(vector_path, "r") as zip_ds:
+        with archives.open_zipfile(vector_path, "r") as zip_ds:
             vect_path = zip_ds.extract(arch_vect_path, out_dir)
     elif vector_path.suffix == ".tar":
-        with tarfile.open(vector_path, "r") as tar_ds:
+        with archives.open_tarfile(vector_path, "r") as tar_ds:
             tar_ds.extract(arch_vect_path, out_dir)
             vect_path = os.path.join(out_dir, arch_vect_path)
     else:
         # vector_path should be downloaded to work with 'ogr2ogr'
         if path.is_cloud_path(vector_path):
-            vector_path = AnyPath(vector_path).fspath
+            vector_path = s3.download(vector_path, out_dir)
         vect_path = vector_path
 
     vect_path_gj = os.path.join(
diff --git a/sertit/xml.py b/sertit/xml.py
index 9ddc44a..8d334ef 100644
--- a/sertit/xml.py
+++ b/sertit/xml.py
@@ -30,7 +30,7 @@
 )
 from lxml.html.builder import E
 
-from sertit import AnyPath, files, path
+from sertit import AnyPath, archives, logs, path, s3
 from sertit.logs import SU_NAME
 from sertit.misc import ListEnum
 from sertit.types import AnyPathStrType
@@ -61,7 +61,7 @@ def read(xml_path: AnyPathStrType) -> _Element:
                 # Slower but works with:
                 # {ValueError}Unicode strings with encoding declaration are not supported.
                 # Please use bytes input or XML fragments without declaration.
-                root = fromstring(xml_path.read_bytes())
+                root = fromstring(s3.read(xml_path).read())
         else:
             # pylint: disable=I1101:
             # Module 'lxml.etree' has no 'parse' member, but source is unavailable.
@@ -75,7 +75,10 @@ def read(xml_path: AnyPathStrType) -> _Element:
 
 
 def read_archive(
-    path: AnyPathStrType, regex: str = None, file_list: list = None
+    archive_path: AnyPathStrType,
+    regex: str = None,
+    file_list: list = None,
+    **kwargs,
 ) -> _Element:
     """
     Read an XML file from inside an archive (zip or tar)
@@ -87,25 +90,40 @@ def read_archive(
     - path to the archive plus a regex looking inside the archive. Duplicate behaviour to :py:func:`files.read_archived_xml`
 
     Args:
-        path (AnyPathStrType): Path to the XML file, stored inside an archive or path to the archive itself
+        archive_path (AnyPathStrType): Path to the XML file, stored inside an archive or path to the archive itself
         regex (str): Optional. If specified, the path should be the archive path and the regex should be the key to find the XML file inside the archive.
         file_list (list): List of files contained in the archive. Optional, if not given it will be re-computed.
 
     Returns:
         _Element: XML Root
     """
+    if archive_path is None:
+        logs.deprecation_warning(
+            "'path' argument is deprecated, use 'archive_path' instead."
+        )
+        archive_path = kwargs.pop("path")
 
     try:
         if not regex:
-            path, basename = str(path).split("!")
+            archive_base_path, basename = str(archive_path).split("!")
             regex = basename
-            if path.startswith("zip://") or path.startswith("tar://"):
-                path = path[5:]
+            if archive_base_path.startswith("zip://") or archive_base_path.startswith(
+                "tar://"
+            ):
+                archive_base_path = archive_base_path[5:]
+
+            # For UPath
+            with contextlib.suppress(Exception):
+                archive_base_path = AnyPath(
+                    archive_base_path, **archive_path.storage_options
+                )
+        else:
+            archive_base_path = archive_path
 
-        return files.read_archived_xml(path, regex, file_list=file_list)
+        return archives.read_archived_xml(archive_base_path, regex, file_list=file_list)
 
     except XMLSyntaxError as exc:
-        raise ValueError(f"Invalid metadata XML for {path}!") from exc
+        raise ValueError(f"Invalid metadata XML for {archive_path}!") from exc
 
 
 def write(xml: _Element, path: str) -> None:

From fa06fafb1054a8c522b7a3bbe0fae42628c00bfc Mon Sep 17 00:00:00 2001
From: BRAUN REMI <remi.braun@unistra.fr>
Date: Fri, 13 Dec 2024 14:57:26 +0100
Subject: [PATCH 05/18] Simplify pre-commit hooks

---
 .pre-commit-config.yaml | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 38aaf7b..1275d62 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -10,16 +10,12 @@ repos:
   - repo: https://github.com/pre-commit/pre-commit-hooks.git
     rev: v5.0.0
     hooks:
-      - id: trailing-whitespace
-      - id: end-of-file-fixer
       - id: check-json
       - id: check-yaml
         args: [ --allow-multiple-documents, --unsafe ]
       - id: check-xml
       - id: check-added-large-files
         args: [ '--maxkb=1600' ]
-      - id: debug-statements
-      - id: check-merge-conflict
 
   - repo: https://github.com/astral-sh/ruff-pre-commit
     # Ruff version.
@@ -30,3 +26,13 @@ repos:
         args: [ --fix ]
       # Run the formatter.
       - id: ruff-format
+
+
+
+
+
+
+
+
+
+

From 8b95aac87cd925628e6c3bb374335244e147ac5c Mon Sep 17 00:00:00 2001
From: BRAUN REMI <remi.braun@unistra.fr>
Date: Fri, 13 Dec 2024 15:50:21 +0100
Subject: [PATCH 06/18] Make UPath work with all archive functions and dcmp

---
 CI/SCRIPTS/test_archives.py |   8 ++-
 sertit/archives.py          | 108 +++++++++++++++++++-----------------
 sertit/ci.py                |  47 +++++++++-------
 sertit/s3.py                |  17 +++++-
 4 files changed, 106 insertions(+), 74 deletions(-)

diff --git a/CI/SCRIPTS/test_archives.py b/CI/SCRIPTS/test_archives.py
index 3415618..ddb5d0f 100644
--- a/CI/SCRIPTS/test_archives.py
+++ b/CI/SCRIPTS/test_archives.py
@@ -8,6 +8,7 @@
 from sertit import archives, ci, files, path, s3, vectors
 
 
+@s3_env
 def test_archive(tmp_path):
     """Test extracting functions"""
     # Archives
@@ -31,6 +32,11 @@ def test_archive(tmp_path):
 
     # Extract
     extracted_dirs = archives.extract_files(arch, tmp_path, overwrite=True)
+
+    # Test
+    for ex_dir in extracted_dirs:
+        ci.assert_dir_equal(core_dir, ex_dir)
+
     archives.extract_files([zip2_file], tmp_path, overwrite=False)  # Already existing
 
     # Test
@@ -54,7 +60,7 @@ def test_archive(tmp_path):
     # Add to zip
     zip_out = zip2_file if path.is_cloud_path(zip2_file) else archive_base + ".zip"
     core_copy = files.copy(core_dir, os.path.join(tmp_path, "core2"))
-    zip_out = archives.add_to_zip(zip_out, core_copy)
+    zip_out = archives.add_to_zip(s3.download(zip_out, tmp_path), core_copy)
 
     # Extract
     unzip_out = os.path.join(tmp_path, "out")
diff --git a/sertit/archives.py b/sertit/archives.py
index 2115071..b739e5f 100644
--- a/sertit/archives.py
+++ b/sertit/archives.py
@@ -285,10 +285,14 @@ def archive(
     archive_path = AnyPath(archive_path)
     folder_path = AnyPath(folder_path)
 
+    # with zipfile.ZipFile(archive_path, mode='w', compression=zipfile.ZIP_DEFLATED) as zipf:
+    #     for f in folder_path.glob("**"):
+    #         zipf.write(f, f.relative_to(folder_path.name))
+
     tmp_dir = None
     if path.is_cloud_path(folder_path):
         tmp_dir = tempfile.TemporaryDirectory()
-        folder_path = folder_path.download_to(tmp_dir.name)
+        folder_path = s3.download(folder_path, tmp_dir.name)
 
     # Shutil make_archive needs a path without extension
     archive_base = os.path.splitext(archive_path)[0]
@@ -304,7 +308,12 @@ def archive(
     if tmp_dir is not None:
         tmp_dir.cleanup()
 
-    return AnyPath(archive_fn)
+    try:
+        arch = AnyPath(archive_fn, folder_path.storage_options)
+    except Exception:
+        arch = AnyPath(archive_fn)
+
+    return arch
 
 
 def add_to_zip(
@@ -329,55 +338,54 @@ def add_to_zip(
     """
     zip_path = AnyPath(zip_path)
 
-    # If the zip is on the cloud, cache it (zipfile doesn't like cloud paths)
-    if path.is_cloud_path(zip_path):
-        zip_path = AnyPath(zip_path.fspath)
-
-    # Check if existing zipfile
-    if not zip_path.is_file():
-        raise FileNotFoundError(f"Non existing {zip_path}")
-
-    # Convert to list if needed
-    if not isinstance(dirs_to_add, list):
-        dirs_to_add = [dirs_to_add]
-
-    # Add all folders to the existing zip
-    # Forced to use ZipFile because make_archive only works with one folder and not existing zipfile
-    with open_zipfile(zip_path, "a") as zip_file:
-        progress_bar = tqdm(dirs_to_add)
-        for dir_to_add_path in progress_bar:
-            # Just to be sure, use str instead of Paths
-            if isinstance(dir_to_add_path, Path):
-                dir_to_add = str(dir_to_add_path)
-            elif path.is_cloud_path(dir_to_add_path):
-                dir_to_add = dir_to_add_path.fspath
-            else:
-                dir_to_add = dir_to_add_path
-
-            progress_bar.set_description(
-                f"Adding {os.path.basename(dir_to_add)} to {os.path.basename(zip_path)}"
+    with tempfile.TemporaryDirectory() as tmp_dir:
+        # If the zip is on the cloud, cache it (zipfile doesn't like cloud paths)
+        if path.is_cloud_path(zip_path):
+            raise NotImplementedError(
+                "Impossible (for now) to update a zip stored in the cloud!"
             )
-            tmp = tempfile.TemporaryDirectory()
-            if os.path.isfile(dir_to_add):
-                dir_to_add = extract_file(dir_to_add, tmp.name)
-
-            for root, _, files in os.walk(dir_to_add):
-                base_path = os.path.join(dir_to_add, "..")
-
-                # Write dir (in namelist at least)
-                zip_file.write(root, os.path.relpath(root, base_path))
-
-                # Write files
-                for file in files:
-                    zip_file.write(
-                        os.path.join(root, file),
-                        os.path.relpath(
-                            os.path.join(root, file), os.path.join(dir_to_add, "..")
-                        ),
-                    )
-
-            # Clean tmp
-            tmp.cleanup()
+
+        # Check if existing zipfile
+        if not zip_path.is_file():
+            raise FileNotFoundError(f"Non existing {zip_path}")
+
+        # Convert to list if needed
+        if not isinstance(dirs_to_add, list):
+            dirs_to_add = [dirs_to_add]
+
+        # Add all folders to the existing zip
+        # Forced to use ZipFile because make_archive only works with one folder and not existing zipfile
+        with open_zipfile(zip_path, "a") as zip_file:
+            progress_bar = tqdm(dirs_to_add)
+            for dir_to_add_path in progress_bar:
+                # Just to be sure, use str instead of Paths
+                if isinstance(dir_to_add_path, Path):
+                    dir_to_add = str(dir_to_add_path)
+                elif path.is_cloud_path(dir_to_add_path):
+                    dir_to_add = dir_to_add_path.fspath
+                else:
+                    dir_to_add = dir_to_add_path
+
+                progress_bar.set_description(
+                    f"Adding {os.path.basename(dir_to_add)} to {os.path.basename(zip_path)}"
+                )
+                if os.path.isfile(dir_to_add):
+                    dir_to_add = extract_file(dir_to_add, tmp_dir)
+
+                for root, _, files in os.walk(dir_to_add):
+                    base_path = os.path.join(dir_to_add, "..")
+
+                    # Write dir (in namelist at least)
+                    zip_file.write(root, os.path.relpath(root, base_path))
+
+                    # Write files
+                    for file in files:
+                        zip_file.write(
+                            os.path.join(root, file),
+                            os.path.relpath(
+                                os.path.join(root, file), os.path.join(dir_to_add, "..")
+                            ),
+                        )
 
     return zip_path
 
diff --git a/sertit/ci.py b/sertit/ci.py
index 8b000ba..a0280e4 100644
--- a/sertit/ci.py
+++ b/sertit/ci.py
@@ -20,6 +20,7 @@
 import filecmp
 import logging
 import pprint
+import tempfile
 from doctest import Example
 from typing import Any, Union
 
@@ -30,7 +31,7 @@
 from shapely import force_2d, normalize
 from shapely.testing import assert_geometries_equal
 
-from sertit import AnyPath, files, s3, unistra
+from sertit import AnyPath, files, path, s3, unistra
 from sertit.logs import SU_NAME, deprecation_warning
 from sertit.types import AnyPathStrType, AnyXrDataStructure
 
@@ -381,27 +382,33 @@ def assert_dir_equal(path_1: AnyPathStrType, path_2: AnyPathStrType) -> None:
     assert path_1.is_dir(), f"{path_1} is not a directory!"
     assert path_2.is_dir(), f"{path_2} is not a directory!"
 
-    dcmp = filecmp.dircmp(path_1, path_2)
-    try:
-        assert (
-            dcmp.left_only == []
-        ), f"More files in {path_1}!\n{pprint.pformat(list(dcmp.left_only))}"
-        assert (
-            dcmp.right_only == []
-        ), f"More files in {path_2}!\n{pprint.pformat(list(dcmp.right_only))}"
-    except FileNotFoundError:
-        files_1 = [AnyPath(p).name for p in AnyPath(path_1).iterdir()]
-        files_2 = [AnyPath(p).name for p in AnyPath(path_2).iterdir()]
-
-        for f1 in files_1:
-            assert (
-                f1 in files_2
-            ), f"File missing!\n{f1} not in {pprint.pformat(files_2)}"
+    with tempfile.TemporaryDirectory() as tmpdir:
+        if path.is_cloud_path(path_1):
+            path_1 = s3.download(path_1, tmpdir)
+        if path.is_cloud_path(path_2):
+            path_2 = s3.download(path_2, tmpdir)
 
-        for f2 in files_2:
+        dcmp = filecmp.dircmp(path_1, path_2)
+        try:
+            assert (
+                dcmp.left_only == []
+            ), f"More files in {path_1}!\n{pprint.pformat(list(dcmp.left_only))}"
             assert (
-                f2 in files_1
-            ), f"File missing!\n{f2} not in {pprint.pformat(files_1)}"
+                dcmp.right_only == []
+            ), f"More files in {path_2}!\n{pprint.pformat(list(dcmp.right_only))}"
+        except FileNotFoundError:
+            files_1 = [p.name for p in path_1.iterdir()]
+            files_2 = [p.name for p in path_2.iterdir()]
+
+            for f1 in files_1:
+                assert (
+                    f1 in files_2
+                ), f"File missing!\n{f1} not in {pprint.pformat(files_2)}"
+
+            for f2 in files_2:
+                assert (
+                    f2 in files_1
+                ), f"File missing!\n{f2} not in {pprint.pformat(files_1)}"
 
 
 def assert_geom_equal(
diff --git a/sertit/s3.py b/sertit/s3.py
index aba8660..55cae8f 100644
--- a/sertit/s3.py
+++ b/sertit/s3.py
@@ -288,10 +288,21 @@ def download(src, dst):
             import shutil
 
             dst = AnyPath(dst)
-            downloaded_path = dst / src.name if dst.is_dir() else dst
+            if dst.is_dir() and src.name != dst.name:
+                downloaded_path = dst / src.name
+            else:
+                downloaded_path = dst
 
-            with src.open("rb") as f0, downloaded_path.open("wb") as f1:
-                shutil.copyfileobj(f0, f1)
+            if src.is_file():
+                with src.open("rb") as f0, downloaded_path.open("wb") as f1:
+                    shutil.copyfileobj(f0, f1)
+            else:
+                for f in src.glob("**"):
+                    dst_file = downloaded_path / f.name
+                    if f.is_file():
+                        dst_file.parent.mkdir(parents=True, exist_ok=True)
+                        with f.open("rb") as f0, dst_file.open("wb") as f1:
+                            shutil.copyfileobj(f0, f1)
 
         # cloudpathlib
         elif isinstance(src, CloudPath):

From 39190377a5e026cb2d722649f0ef9f31eff884c8 Mon Sep 17 00:00:00 2001
From: BRAUN REMI <remi.braun@unistra.fr>
Date: Fri, 13 Dec 2024 16:19:22 +0100
Subject: [PATCH 07/18] Make the functions work with cloudpathlib also

---
 CI/SCRIPTS/script_utils.py | 13 ++++++---
 CI/SCRIPTS/test_types.py   | 12 ++++++--
 CI/SCRIPTS/test_unistra.py |  5 +++-
 sertit/archives.py         |  4 +--
 sertit/ci.py               |  7 +++--
 sertit/path.py             | 22 +++++++++++----
 sertit/s3.py               | 58 +++++++++++++++++++++-----------------
 sertit/types.py            | 12 ++++++--
 sertit/vectors.py          |  6 ++--
 sertit/xml.py              |  4 +--
 10 files changed, 94 insertions(+), 49 deletions(-)

diff --git a/CI/SCRIPTS/script_utils.py b/CI/SCRIPTS/script_utils.py
index ba450df..583682a 100644
--- a/CI/SCRIPTS/script_utils.py
+++ b/CI/SCRIPTS/script_utils.py
@@ -38,13 +38,18 @@ class Polarization(ListEnum):
 
 def get_s3_ci_path():
     """Get S3 CI path"""
-    # unistra.define_s3_client()
 
     from sertit.unistra import UNISTRA_S3_ENDPOINT
 
-    return AnyPath(
-        "s3://sertit-sertit-utils-ci", endpoint_url=f"https://{UNISTRA_S3_ENDPOINT}"
-    )
+    try:
+        ci_path = AnyPath(
+            "s3://sertit-sertit-utils-ci", endpoint_url=f"https://{UNISTRA_S3_ENDPOINT}"
+        )
+    except TypeError:
+        unistra.define_s3_client()
+        ci_path = AnyPath("s3://sertit-sertit-utils-ci")
+
+    return ci_path
 
 
 def get_proj_path():
diff --git a/CI/SCRIPTS/test_types.py b/CI/SCRIPTS/test_types.py
index 01b246e..1daf305 100644
--- a/CI/SCRIPTS/test_types.py
+++ b/CI/SCRIPTS/test_types.py
@@ -2,12 +2,20 @@
 from typing import Union
 
 import numpy as np
-from cloudpathlib import CloudPath
-from upath import UPath
 
 from sertit import AnyPath
 from sertit.types import AnyPathType, is_iterable, make_iterable
 
+try:
+    from upath import UPath
+except ImportError:
+    UPath = None
+
+try:
+    from cloudpathlib import CloudPath
+except ImportError:
+    CloudPath = None
+
 
 def test_types():
     """Test some type aliases"""
diff --git a/CI/SCRIPTS/test_unistra.py b/CI/SCRIPTS/test_unistra.py
index 8910b34..a0738da 100644
--- a/CI/SCRIPTS/test_unistra.py
+++ b/CI/SCRIPTS/test_unistra.py
@@ -73,7 +73,10 @@ def test_unistra_s3():
         assert with_s3() == 1
 
         # Test get_geodatastore with s3
-        assert str(get_geodatastore()) == "s3://sertit-geodatastore/"
+        try:
+            assert str(get_geodatastore()) == "s3://sertit-geodatastore/"
+        except AssertionError:
+            assert str(get_geodatastore()) == "s3://sertit-geodatastore"
 
     # Test get_geodatastore without s3
     with tempenv.TemporaryEnvironment({s3.USE_S3_STORAGE: "0"}):
diff --git a/sertit/archives.py b/sertit/archives.py
index b739e5f..990dad2 100644
--- a/sertit/archives.py
+++ b/sertit/archives.py
@@ -309,8 +309,8 @@ def archive(
         tmp_dir.cleanup()
 
     try:
-        arch = AnyPath(archive_fn, folder_path.storage_options)
-    except Exception:
+        arch = AnyPath(archive_fn, storage_options=folder_path.storage_options)
+    except AttributeError:
         arch = AnyPath(archive_fn)
 
     return arch
diff --git a/sertit/ci.py b/sertit/ci.py
index a0280e4..d8d2956 100644
--- a/sertit/ci.py
+++ b/sertit/ci.py
@@ -382,11 +382,14 @@ def assert_dir_equal(path_1: AnyPathStrType, path_2: AnyPathStrType) -> None:
     assert path_1.is_dir(), f"{path_1} is not a directory!"
     assert path_2.is_dir(), f"{path_2} is not a directory!"
 
-    with tempfile.TemporaryDirectory() as tmpdir:
+    with (
+        tempfile.TemporaryDirectory() as tmpdir,
+        tempfile.TemporaryDirectory() as tmpdir2,
+    ):
         if path.is_cloud_path(path_1):
             path_1 = s3.download(path_1, tmpdir)
         if path.is_cloud_path(path_2):
-            path_2 = s3.download(path_2, tmpdir)
+            path_2 = s3.download(path_2, tmpdir2)
 
         dcmp = filecmp.dircmp(path_1, path_2)
         try:
diff --git a/sertit/path.py b/sertit/path.py
index 30451e1..504c526 100644
--- a/sertit/path.py
+++ b/sertit/path.py
@@ -15,6 +15,7 @@
 # limitations under the License.
 """Tools for paths"""
 
+import contextlib
 import errno
 import logging
 import os
@@ -420,7 +421,7 @@ def is_cloud_path(path: AnyPathStrType):
             "gs",
             "gcs",
         ]
-    except ImportError:
+    except AttributeError:
         try:
             from cloudpathlib import CloudPath
 
@@ -431,17 +432,26 @@ def is_cloud_path(path: AnyPathStrType):
 
 def is_path(path: Any) -> bool:
     """
-    Determine whether the path corresponds to a file stored on the cloud or not.
+    Determine whether the path is really a path or not: either str, Path, UPath or CloudPath
 
     Args:
         path (AnyPathStrType): File path
 
     Returns:
-        bool: True if the file is store on the cloud.
+        bool: True if the file is a path
     """
     from pathlib import Path
 
-    from cloudpathlib import CloudPath
-    from upath import UPath
+    is_path = isinstance(path, (str, Path))
+
+    with contextlib.suppress(ImportError):
+        from upath import UPath
+
+        is_path = is_path or isinstance(path, UPath)
+
+    with contextlib.suppress(ImportError):
+        from cloudpathlib import CloudPath
+
+        is_path = is_path or isinstance(path, CloudPath)
 
-    return isinstance(path, (str, Path, CloudPath, UPath))
+    return is_path
diff --git a/sertit/s3.py b/sertit/s3.py
index 55cae8f..bc14535 100644
--- a/sertit/s3.py
+++ b/sertit/s3.py
@@ -17,6 +17,7 @@
 S3 tools
 """
 
+import contextlib
 import logging
 import os
 from contextlib import contextmanager
@@ -279,34 +280,39 @@ def download(src, dst):
     # By default, use the src path
     downloaded_path = src
 
-    if path.is_path(src):
-        from cloudpathlib import CloudPath
-        from upath import UPath
-
-        # Universal pathlib
-        if isinstance(src, UPath):
-            import shutil
-
-            dst = AnyPath(dst)
-            if dst.is_dir() and src.name != dst.name:
-                downloaded_path = dst / src.name
-            else:
-                downloaded_path = dst
-
-            if src.is_file():
-                with src.open("rb") as f0, downloaded_path.open("wb") as f1:
-                    shutil.copyfileobj(f0, f1)
-            else:
-                for f in src.glob("**"):
-                    dst_file = downloaded_path / f.name
-                    if f.is_file():
-                        dst_file.parent.mkdir(parents=True, exist_ok=True)
-                        with f.open("rb") as f0, dst_file.open("wb") as f1:
-                            shutil.copyfileobj(f0, f1)
+    # Universal pathlib
+    if path.is_cloud_path(src):
+        import shutil
+
+        with contextlib.suppress(ImportError):
+            from upath import UPath
+
+            if isinstance(src, UPath):
+                dst = AnyPath(dst)
+                if dst.is_dir() and src.name != dst.name:
+                    downloaded_path = dst / src.name
+                else:
+                    downloaded_path = dst
+
+                if src.is_file():
+                    with src.open("rb") as f0, downloaded_path.open("wb") as f1:
+                        shutil.copyfileobj(f0, f1)
+                else:
+                    downloaded_path.parent.mkdir(parents=True, exist_ok=True)
+
+                    for f in src.glob("**"):
+                        dst_file = downloaded_path / f.name
+                        if f.is_file():
+                            dst_file.parent.mkdir(parents=True, exist_ok=True)
+                            with f.open("rb") as f0, dst_file.open("wb") as f1:
+                                shutil.copyfileobj(f0, f1)
 
         # cloudpathlib
-        elif isinstance(src, CloudPath):
-            downloaded_path = src.fspath if dst is None else src.download_to(dst)
+        with contextlib.suppress(ImportError):
+            from cloudpathlib import CloudPath
+
+            if isinstance(src, CloudPath):
+                downloaded_path = src.fspath if dst is None else src.download_to(dst)
 
     return downloaded_path
 
diff --git a/sertit/types.py b/sertit/types.py
index 433fe29..3cea7a9 100644
--- a/sertit/types.py
+++ b/sertit/types.py
@@ -5,10 +5,18 @@
 import geopandas as gpd
 import numpy as np
 import xarray as xr
-from cloudpathlib import CloudPath
 from rasterio.io import DatasetReader, DatasetWriter
 from shapely import MultiPolygon, Polygon
-from upath import UPath
+
+try:
+    from upath import UPath
+except ImportError:
+    UPath = None
+
+try:
+    from cloudpathlib import CloudPath
+except ImportError:
+    CloudPath = None
 
 AnyPathType = Union[CloudPath, Path, UPath]
 """Any Path Type (derived from Pathlib, Universal Pathlib and CloudpathLib)"""
diff --git a/sertit/vectors.py b/sertit/vectors.py
index 87856e9..2f9df17 100644
--- a/sertit/vectors.py
+++ b/sertit/vectors.py
@@ -473,8 +473,10 @@ def read(
             split_vect = str(vector_path).split("!")
             archive_regex = ".*{}".format(split_vect[1].replace(".", r"\."))
             try:
-                vector_path = AnyPath(split_vect[0], **vector_path.storage_options)
-            except Exception:
+                vector_path = AnyPath(
+                    split_vect[0], storage_options=vector_path.storage_options
+                )
+            except AttributeError:
                 # Cloudpathlib
                 vector_path = AnyPath(split_vect[0])
 
diff --git a/sertit/xml.py b/sertit/xml.py
index 8d334ef..d52dfa0 100644
--- a/sertit/xml.py
+++ b/sertit/xml.py
@@ -113,9 +113,9 @@ def read_archive(
                 archive_base_path = archive_base_path[5:]
 
             # For UPath
-            with contextlib.suppress(Exception):
+            with contextlib.suppress(AttributeError):
                 archive_base_path = AnyPath(
-                    archive_base_path, **archive_path.storage_options
+                    archive_base_path, storage_options=archive_path.storage_options
                 )
         else:
             archive_base_path = archive_path

From fd953c2c82043bf0b003f1ae0fcaf9e6aeca10c4 Mon Sep 17 00:00:00 2001
From: BRAUN REMI <remi.braun@unistra.fr>
Date: Fri, 13 Dec 2024 16:50:00 +0100
Subject: [PATCH 08/18] **ENH: Use `universal_pathlib` instead of
 `cloudpathlib` (even if the code is still compatible with `cloudpathlib`)**

---
 CHANGES.md       | 4 ++++
 pyproject.toml   | 5 +++--
 requirements.txt | 1 +
 3 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/CHANGES.md b/CHANGES.md
index bb9dc5f..ec37141 100644
--- a/CHANGES.md
+++ b/CHANGES.md
@@ -1,5 +1,9 @@
 # Release History
 
+## 2.0.0 (20xx-xx-xx)
+
+- **ENH: Use `universal_pathlib` instead of `cloudpathlib` (even if the code is still compatible with `cloudpathlib`)** ([#4](https://github.com/sertit/sertit-utils/issues/4))
+ 
 ## 1.44.x (20xx-xx-xx)
 
 - **ENH: Drop `isort`, `black` and `flake8` and use `ruff`**
diff --git a/pyproject.toml b/pyproject.toml
index 1faf5d5..aed03c3 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -35,8 +35,8 @@ dependencies = [
     "dill",
     "psutil",
     "geopandas>=0.14.4",
-    "cloudpathlib[all]>=0.12.1",
     "xarray>=2024.06.0",
+    "universal_pathlib>=0.2.6"
 ]
 
 dynamic = ["version"]
@@ -60,7 +60,8 @@ dask =  [
     "odc-geo>=0.4.6",
     "xarray-spatial>=0.3.6",
 ]
-full = ["sertit[colorlog,rasters_rio,rasters,dask]"]
+cloudpathlib = ["cloudpathlib[all]>=0.12.1"]
+full = ["sertit[colorlog,rasters_rio,rasters,dask,cloudpathlib]"]
 
 [project.urls]
 Bug_Tracker = "https://github.com/sertit/sertit-utils/issues"
diff --git a/requirements.txt b/requirements.txt
index d02da9c..c4682aa 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -19,6 +19,7 @@ lxml
 dill
 psutil
 geopandas>=0.14.4
+universal_pathlib>=0.2.6
 cloudpathlib[all]>=0.12.1
 xarray>=2024.06.0
 shapely >= 2.0.0

From 455716a9df927ae1092d7bc5f7ce3d7d49065e6d Mon Sep 17 00:00:00 2001
From: BRAUN REMI <remi.braun@unistra.fr>
Date: Fri, 13 Dec 2024 16:58:34 +0100
Subject: [PATCH 09/18] BREAKING CHANGE: Remove all deprecations from
 `sertit==1.*` #3

---
 CHANGES.md                     |   6 +
 CI/SCRIPTS/test_files.py       |   5 -
 CI/SCRIPTS/test_rasters.py     |  52 ------
 CI/SCRIPTS/test_rasters_rio.py |  54 ------
 CI/SCRIPTS/test_vectors.py     |  10 --
 sertit/archives.py             |  20 +--
 sertit/arcpy.py                |  28 ---
 sertit/ci.py                   |  59 +------
 sertit/files.py                | 314 +--------------------------------
 sertit/rasters.py              |  46 -----
 sertit/rasters_rio.py          |  53 +-----
 sertit/vectors.py              |  42 +----
 sertit/xml.py                  |   8 +-
 13 files changed, 15 insertions(+), 682 deletions(-)

diff --git a/CHANGES.md b/CHANGES.md
index ec37141..c82e4cb 100644
--- a/CHANGES.md
+++ b/CHANGES.md
@@ -2,6 +2,12 @@
 
 ## 2.0.0 (20xx-xx-xx)
 
+- **BREAKING CHANGE**: Remove all deprecations from `sertit==1.*` ([#3](https://github.com/sertit/sertit-utils/issues/3)): 
+   - Duplication between `path` and `files` modules
+   - Duplication between `ci`, `s3` and `unistra` modules
+   - Arguments in functions
+   - Renaming functions
+   - Others
 - **ENH: Use `universal_pathlib` instead of `cloudpathlib` (even if the code is still compatible with `cloudpathlib`)** ([#4](https://github.com/sertit/sertit-utils/issues/4))
  
 ## 1.44.x (20xx-xx-xx)
diff --git a/CI/SCRIPTS/test_files.py b/CI/SCRIPTS/test_files.py
index a0f9889..84539f7 100644
--- a/CI/SCRIPTS/test_files.py
+++ b/CI/SCRIPTS/test_files.py
@@ -20,7 +20,6 @@
 from datetime import date, datetime
 
 import numpy as np
-import pytest
 
 from CI.SCRIPTS.script_utils import Polarization
 from sertit import AnyPath, ci, files
@@ -93,10 +92,6 @@ def test_json():
         )  # Enum are stored following their value
         assert obj == test_dict
 
-        # Test deprecation
-        with pytest.deprecated_call():
-            files.save_json(json_file, test_dict)
-
 
 def test_pickle():
     """Test pickle functions"""
diff --git a/CI/SCRIPTS/test_rasters.py b/CI/SCRIPTS/test_rasters.py
index 0fe7d2b..d97c5ea 100644
--- a/CI/SCRIPTS/test_rasters.py
+++ b/CI/SCRIPTS/test_rasters.py
@@ -32,8 +32,6 @@
     INT8_NODATA,
     UINT8_NODATA,
     UINT16_NODATA,
-    any_raster_to_xr_ds,
-    get_nodata_value_from_dtype,
     get_nodata_value_from_xr,
 )
 from sertit.vectors import EPSG_4326
@@ -675,11 +673,6 @@ def test_write(dtype, nodata_val, tmp_path, xda):
         )
         _test_raster_after_write(test_path, dtype, nodata_val)
 
-    # test deprecation warning
-    test_deprecated_path = os.path.join(tmp_path, "test_depr.tif")
-    with pytest.deprecated_call():
-        rasters.write(xda, path=test_deprecated_path, dtype=dtype)
-
 
 def test_dim():
     """Test on BEAM-DIMAP function"""
@@ -900,51 +893,6 @@ def test_rasterize(tmp_path, raster_path):
     ci.assert_raster_almost_equal(raster_true_path, out_path, decimal=4)
 
 
-@s3_env
-def test_decorator_deprecation(raster_path):
-    from sertit.rasters import path_xarr_dst
-
-    @any_raster_to_xr_ds
-    def _ok_rasters(xds):
-        assert isinstance(xds, xr.DataArray)
-        return xds
-
-    @path_xarr_dst
-    def _depr_rasters(xds):
-        assert isinstance(xds, xr.DataArray)
-        return xds
-
-    # Not able to warn deprecation from inside the decorator
-    xr.testing.assert_equal(_ok_rasters(raster_path), _depr_rasters(raster_path))
-
-
-def test_get_nodata_deprecation():
-    """Test deprecation of get_nodata_value"""
-    # Test deprecation
-    for dtype in [
-        np.uint8,
-        np.int8,
-        np.uint16,
-        np.uint32,
-        np.int32,
-        np.int64,
-        np.uint64,
-        int,
-        "int",
-        np.int16,
-        np.float32,
-        np.float64,
-        float,
-        "float",
-    ]:
-        with pytest.deprecated_call():
-            from sertit.rasters import get_nodata_value
-
-            ci.assert_val(
-                get_nodata_value_from_dtype(dtype), get_nodata_value(dtype), dtype
-            )
-
-
 @s3_env
 @dask_env
 def test_get_notata_from_xr(raster_path):
diff --git a/CI/SCRIPTS/test_rasters_rio.py b/CI/SCRIPTS/test_rasters_rio.py
index 7eb921a..62e6664 100644
--- a/CI/SCRIPTS/test_rasters_rio.py
+++ b/CI/SCRIPTS/test_rasters_rio.py
@@ -26,7 +26,6 @@
 
 from CI.SCRIPTS.script_utils import KAPUT_KWARGS, rasters_path, s3_env
 from sertit import ci, rasters_rio, vectors
-from sertit.rasters_rio import any_raster_to_rio_ds, get_nodata_value_from_dtype
 from sertit.vectors import EPSG_4326
 
 ci.reduce_verbosity()
@@ -421,56 +420,3 @@ def _test_idx(idx_list):
     _test_idx([1])
     _test_idx([1, 2])
     _test_idx(1)
-
-
-@s3_env
-def test_decorator_deprecation(raster_path):
-    from sertit.rasters_rio import path_arr_dst
-
-    @any_raster_to_rio_ds
-    def _ok_rasters(ds):
-        return ds.read()
-
-    @path_arr_dst
-    def _depr_rasters(ds):
-        return ds.read()
-
-    # Not able to warn deprecation from inside the decorator
-    np.testing.assert_equal(_ok_rasters(raster_path), _depr_rasters(raster_path))
-
-
-def test_get_nodata_deprecation():
-    """Test deprecation of get_nodata_value"""
-    # Test deprecation
-    for dtype in [
-        np.uint8,
-        np.int8,
-        np.uint16,
-        np.uint32,
-        np.int32,
-        np.int64,
-        np.uint64,
-        int,
-        "int",
-        np.int16,
-        np.float32,
-        np.float64,
-        float,
-        "float",
-    ]:
-        with pytest.deprecated_call():
-            from sertit.rasters_rio import get_nodata_value
-
-            ci.assert_val(
-                get_nodata_value_from_dtype(dtype), get_nodata_value(dtype), dtype
-            )
-
-
-@s3_env
-def test_write_deprecated(tmp_path, raster_path):
-    test_deprecated_path = os.path.join(tmp_path, "test_depr.tif")
-    raster, mtd = rasters_rio.read(raster_path)
-
-    # test deprecation warning
-    with pytest.deprecated_call():
-        rasters_rio.write(raster, mtd, path=test_deprecated_path)
diff --git a/CI/SCRIPTS/test_vectors.py b/CI/SCRIPTS/test_vectors.py
index 5a79272..98c9dd2 100644
--- a/CI/SCRIPTS/test_vectors.py
+++ b/CI/SCRIPTS/test_vectors.py
@@ -21,7 +21,6 @@
 
 import geopandas as gpd
 import pytest
-from rasterio import CRS
 from shapely import wkt
 
 from CI.SCRIPTS.script_utils import KAPUT_KWARGS, files_path, s3_env, vectors_path
@@ -81,15 +80,6 @@ def test_vectors():
     aoi = vectors.read(kml_path, **KAPUT_KWARGS)
     _assert_attributes(aoi, kml_path)
 
-    with pytest.deprecated_call():
-        assert (
-            vectors.corresponding_utm_projection(aoi.centroid.x, aoi.centroid.y)
-            == "EPSG:32638"
-        )
-        assert CRS.from_string("EPSG:32638") == vectors.to_utm_crs(
-            aoi.centroid.x, aoi.centroid.y
-        )
-
     env = aoi.envelope[0]
 
     # Test kwargs (should be slightly not equal toi AOI to prove bbox does sth)
diff --git a/sertit/archives.py b/sertit/archives.py
index 990dad2..d8dc86a 100644
--- a/sertit/archives.py
+++ b/sertit/archives.py
@@ -12,7 +12,7 @@
 from lxml import etree, html
 from tqdm import tqdm
 
-from sertit import AnyPath, logs, path, s3
+from sertit import AnyPath, path, s3
 from sertit.logs import SU_NAME
 from sertit.types import AnyPathStrType, AnyPathType
 
@@ -222,12 +222,6 @@ def read_archived_xml(
         >>> read_archived_xml(arch_path, file_regex)
         <Element LANDSAT_METADATA_FILE at 0x1c90007f8c8>
     """
-    if regex is None:
-        logs.deprecation_warning(
-            "'xml_regex' is deprecated, please use 'regex' instead."
-        )
-        regex = kwargs.pop("xml_regex")
-
     xml_bytes = read_archived_file(archive_path, regex=regex, file_list=file_list)
 
     return etree.fromstring(xml_bytes)
@@ -455,12 +449,6 @@ def get_archived_path(
         >>> path = get_archived_path(arch_path, file_regex)
         'dir/filename.tif'
     """
-    if regex is None:
-        logs.deprecation_warning(
-            "'file_regex' is deprecated, please use 'regex' instead."
-        )
-        regex = kwargs.pop("file_regex")
-
     # Get file list
     archive_path = AnyPath(archive_path)
 
@@ -525,12 +513,6 @@ def get_archived_rio_path(
         >>> rasterio.open(path)
         <open DatasetReader name='zip+file://D:/path/to/output.zip!dir/filename.tif' mode='r'>
     """
-    if regex is None:
-        logs.deprecation_warning(
-            "'file_regex' is deprecated, please use 'regex' instead."
-        )
-        regex = kwargs.pop("file_regex")
-
     archive_path = AnyPath(archive_path)
     if archive_path.suffix in [".tar", ".zip"]:
         prefix = archive_path.suffix[-3:]
diff --git a/sertit/arcpy.py b/sertit/arcpy.py
index a82f603..64cc9f9 100644
--- a/sertit/arcpy.py
+++ b/sertit/arcpy.py
@@ -1,8 +1,6 @@
 import logging
 import logging.handlers
 
-from sertit.logs import deprecation_warning
-
 # Arcpy types from inside a schema
 SHORT = "int32:4"
 """ 'Short' type for ArcGis GDB """
@@ -153,32 +151,6 @@ def emit(self, record):
         super(ArcPyLogHandler, self).emit(record)
 
 
-def feature_layer_to_path(feature_layer) -> str:
-    """
-    .. deprecated:: 1.36.0
-       Use :py:func:`gp_layer_to_path` instead.
-
-    Use :func:`gp_layer_to_path` instead.
-
-    Convert a feature layer to its source path.
-
-    Args:
-        feature_layer: Feature layer
-
-    Returns:
-        str: Path to the feature layer source
-
-    """
-    deprecation_warning("This function is deprecated. Use gp_layer_to_path instead.")
-    # Get path
-    if hasattr(feature_layer, "dataSource"):
-        path = feature_layer.dataSource
-    else:
-        path = str(feature_layer)
-
-    return path
-
-
 def gp_layer_to_path(feature_layer) -> str:
     """
     Convert a GP layer to its source path.
diff --git a/sertit/ci.py b/sertit/ci.py
index d8d2956..43b28e3 100644
--- a/sertit/ci.py
+++ b/sertit/ci.py
@@ -31,8 +31,8 @@
 from shapely import force_2d, normalize
 from shapely.testing import assert_geometries_equal
 
-from sertit import AnyPath, files, path, s3, unistra
-from sertit.logs import SU_NAME, deprecation_warning
+from sertit import AnyPath, files, path, s3
+from sertit.logs import SU_NAME
 from sertit.types import AnyPathStrType, AnyXrDataStructure
 
 LOGGER = logging.getLogger(SU_NAME)
@@ -43,61 +43,6 @@
 AWS_S3_ENDPOINT = s3.AWS_S3_ENDPOINT
 
 
-def s3_env(*args, **kwargs):
-    """
-    .. deprecated:: 1.30.0
-       Import it from :py:mod:`sertit.unistra` instead of :py:mod:`sertit.ci`
-    """
-    deprecation_warning(
-        "This function is deprecated. Import it from 'sertit.unistra' instead of 'sertit.ci'"
-    )
-    return unistra.s3_env(*args, **kwargs)
-
-
-def define_s3_client():
-    """
-    .. deprecated:: 1.30.0
-       Import it from :py:mod:`sertit.unistra` instead of :py:mod:`sertit.ci`
-    """
-    deprecation_warning(
-        "This function is deprecated. Import it from 'sertit.unistra' instead of 'sertit.ci'"
-    )
-    return unistra.define_s3_client()
-
-
-def get_db2_path():
-    """
-    .. deprecated:: 1.30.0
-       Import it from :py:mod:`sertit.unistra` instead of :py:mod:`sertit.ci`
-    """
-    deprecation_warning(
-        "This function is deprecated. Import it from 'sertit.unistra' instead of 'sertit.ci'"
-    )
-    return unistra.get_db2_path()
-
-
-def get_db3_path():
-    """
-    .. deprecated:: 1.30.0
-       Import it from :py:mod:`sertit.unistra` instead of :py:mod:`sertit.ci`
-    """
-    deprecation_warning(
-        "This function is deprecated. Import it from 'sertit.unistra' instead of 'sertit.ci'"
-    )
-    return unistra.get_db3_path()
-
-
-def get_db4_path():
-    """
-    .. deprecated:: 1.30.0
-       Import it from :py:mod:`sertit.unistra` instead of :py:mod:`sertit.ci`
-    """
-    deprecation_warning(
-        "This function is deprecated. Import it from 'sertit.unistra' instead of 'sertit.ci'"
-    )
-    return unistra.get_db4_path()
-
-
 def assert_val(val_1: Any, val_2: Any, field: str) -> None:
     """
     Compare two values corresponding to a field
diff --git a/sertit/files.py b/sertit/files.py
index c2ec5a3..7be06e9 100644
--- a/sertit/files.py
+++ b/sertit/files.py
@@ -24,12 +24,12 @@
 from enum import Enum
 from json import JSONDecoder, JSONEncoder
 from pathlib import Path
-from typing import Any, Union
+from typing import Any
 
 import dill
 import numpy as np
 
-from sertit import AnyPath, logs, path, s3
+from sertit import AnyPath, path, s3
 from sertit.logs import SU_NAME
 from sertit.strings import DATE_FORMAT
 from sertit.types import AnyPathStrType, AnyPathType
@@ -37,180 +37,6 @@
 LOGGER = logging.getLogger(SU_NAME)
 
 
-def get_root_path() -> AnyPathType:
-    """
-    .. deprecated:: 1.30.0
-       Import it from :py:mod:`sertit.path` instead of :py:mod:`sertit.files`
-
-    Get the root path of the current disk:
-
-    - On Linux this returns :code:`/`
-    - On Windows this returns :code:`C:/` or whatever the current drive is
-
-    Example:
-        >>> get_root_path()
-        "/" on Linux
-        "C:/" on Windows (if you run this code from the C: drive)
-    """
-    logs.deprecation_warning(
-        "This function is deprecated. Import it from 'sertit.path' instead of 'sertit.files'"
-    )
-    return path.get_root_path()
-
-
-def listdir_abspath(directory: AnyPathStrType) -> list:
-    """
-    .. deprecated:: 1.30.0
-       Import it from :py:mod:`sertit.path` instead of :py:mod:`sertit.files`
-
-    Get absolute path of all files in the given directory.
-
-    It is the same function than :code:`os.listdir` but returning absolute paths.
-
-    Args:
-        directory (AnyPathStrType): Relative or absolute path to the directory to be scanned
-
-    Returns:
-        str: Absolute path of all files in the given directory
-
-    Example:
-        >>> folder = "."
-        >>> listdir_abspath(folder)
-        ['D:/_SERTIT_UTILS/sertit-utils/sertit/files.py',
-        'D:/_SERTIT_UTILS/sertit-utils/sertit/logs.py',
-        'D:/_SERTIT_UTILS/sertit-utils/sertit/misc.py',
-        'D:/_SERTIT_UTILS/sertit-utils/sertit/network.py',
-        'D:/_SERTIT_UTILS/sertit-utils/sertit/rasters_rio.py',
-        'D:/_SERTIT_UTILS/sertit-utils/sertit/strings.py',
-        'D:/_SERTIT_UTILS/sertit-utils/sertit/vectors.py',
-        'D:/_SERTIT_UTILS/sertit-utils/sertit/version.py',
-        'D:/_SERTIT_UTILS/sertit-utils/sertit/__init__.py']
-    """
-    logs.deprecation_warning(
-        "This function is deprecated. Import it from 'sertit.path' instead of 'sertit.files'"
-    )
-    return path.listdir_abspath(directory)
-
-
-def to_abspath(
-    raw_path: AnyPathStrType,
-    create: bool = True,
-    raise_file_not_found: bool = True,
-) -> AnyPathType:
-    """
-    .. deprecated:: 1.30.0
-       Import it from :py:mod:`sertit.path` instead of :py:mod:`sertit.files`
-
-    Return the absolute path of the specified path and check if it exists
-
-    If not:
-
-    - If it is a file (aka has an extension), it raises an exception
-    - If it is a folder, it creates it
-
-    To be used with argparse to retrieve the absolute path of a file, like:
-
-    Args:
-        raw_path (AnyPathStrType): Path as a string (relative or absolute)
-        create (bool): Create directory if not existing
-
-    Returns:
-        AnyPathType: Absolute path
-
-    Example:
-        >>> parser = argparse.ArgumentParser()
-        >>> # Add config file path key
-        >>> parser.add_argument(
-        >>>     "--config",
-        >>>     help="Config file path (absolute or relative)",
-        >>>     type=to_abspath
-        >>> )
-    """
-    logs.deprecation_warning(
-        "This function is deprecated. Import it from 'sertit.path' instead of 'sertit.files'"
-    )
-    return path.to_abspath(raw_path, create, raise_file_not_found)
-
-
-def real_rel_path(raw_path: AnyPathStrType, start: AnyPathStrType) -> AnyPathType:
-    """
-    .. deprecated:: 1.30.0
-       Import it from :py:mod:`sertit.path` instead of :py:mod:`sertit.files`
-
-    Gives the real relative path from a starting folder.
-    (and not just adding :code:`../..` between the start and the target)
-
-    Args:
-        raw_path (AnyPathStrType): Path to make relative
-        start (AnyPathStrType): Start, the path being relative from this folder.
-
-    Returns:
-        Relative path
-
-    Example:
-        >>> path = r'D:/_SERTIT_UTILS/sertit-utils/sertit'
-        >>> start = os.path.join(".", "..", "..")
-        >>> real_rel_path(path, start)
-        'sertit-utils/sertit'
-    """
-    logs.deprecation_warning(
-        "This function is deprecated. Import it from 'sertit.path' instead of 'sertit.files'"
-    )
-    return path.real_rel_path(raw_path, start)
-
-
-def get_filename(file_path: AnyPathStrType, other_exts: Union[list, str] = None) -> str:
-    """
-    .. deprecated:: 1.30.0
-       Import it from :py:mod:`sertit.path` instead of :py:mod:`sertit.files`
-
-    Get file name (without extension) from file path, i.e.:
-
-    Args:
-        file_path (AnyPathStrType): Absolute or relative file path (the file doesn't need to exist)
-        other_exts (Union[list, str]): Other double extensions to discard
-
-    Returns:
-        str: File name (without extension)
-
-    Example:
-        >>> file_path = 'D:/path/to/filename.zip'
-        >>> get_file_name(file_path)
-        'filename'
-    """
-    logs.deprecation_warning(
-        "This function is deprecated. Import it from 'sertit.path' instead of 'sertit.files'"
-    )
-    return path.get_filename(file_path, other_exts)
-
-
-def get_ext(file_path: AnyPathStrType) -> str:
-    """
-    .. deprecated:: 1.30.0
-       Import it from :py:mod:`sertit.path` instead of :py:mod:`sertit.files`
-
-    Get file extension from file path.
-
-    .. WARNING::
-        Extension is given WITHOUT THE FIRST POINT
-
-    Args:
-        file_path (AnyPathStrType): Absolute or relative file path (the file doesn't need to exist)
-
-    Returns:
-        str: File name (without extension)
-
-    Example:
-        >>> file_path = 'D:/path/to/filename.zip'
-        >>> get_ext(file_path)
-        'zip'
-    """
-    logs.deprecation_warning(
-        "This function is deprecated. Import it from 'sertit.path' instead of 'sertit.files'"
-    )
-    return path.get_ext(file_path)
-
-
 def remove(path: AnyPathStrType) -> None:
     """
     Deletes a file or a directory (recursively) using :code:`shutil.rmtree` or :code:`os.remove`.
@@ -318,54 +144,6 @@ def copy(src: AnyPathStrType, dst: AnyPathStrType) -> AnyPathType:
     return out
 
 
-def find_files(
-    names: Union[list, str],
-    root_paths: Union[list, AnyPathStrType],
-    max_nof_files: int = -1,
-    get_as_str: bool = False,
-) -> Union[list, str]:
-    """
-    .. deprecated:: 1.30.0
-       Import it from :py:mod:`sertit.path` instead of :py:mod:`sertit.files`
-
-    Returns matching files recursively from a list of root paths.
-
-    Regex are allowed (using glob)
-
-    Args:
-        names (Union[list, str]): File names.
-        root_paths (Union[list, str]): Root paths
-        max_nof_files (int): Maximum number of files (set to -1 for unlimited)
-        get_as_str (bool): if only one file is found, it can be retrieved as a string instead of a list
-
-    Returns:
-        list: File name
-
-    Examples:
-        >>> root_path = 'D:/root'
-        >>> dir1_path = 'D:/root/dir1'
-        >>> dir2_path = 'D:/root/dir2'
-        >>>
-        >>> os.listdir(dir1_path)
-        ["haha.txt", "huhu.txt", "hoho.txt"]
-        >>> os.listdir(dir2_path)
-        ["huhu.txt", "hehe.txt"]
-        >>>
-        >>> find_files("huhu.txt", root_path)
-        ['D:/root/dir1/huhu.txt', 'D:/root/dir2/huhu.txt']
-        >>>
-        >>> find_files("huhu.txt", root_path, max_nof_files=1)
-        ['D:/root/dir1/huhu.txt']
-
-        >>> find_files("huhu.txt", root_path, max_nof_files=1, get_as_str=True)
-        found = 'D:/root/dir1/huhu.txt'
-    """
-    logs.deprecation_warning(
-        "This function is deprecated. Import it from 'sertit.path' instead of 'sertit.files'"
-    )
-    return path.find_files(names, root_paths, max_nof_files, get_as_str)
-
-
 # subclass JSONDecoder
 class CustomDecoder(JSONDecoder):
     """Decoder for JSON with methods for datetimes"""
@@ -473,15 +251,6 @@ def save_json(json_dict: dict, output_json: AnyPathStrType, **kwargs) -> None:
         >>> json_dict = {"A": np.int64(1), "B": datetime.today(), "C": SomeEnum.some_name}
         >>> save_json(output_json, json_dict)
     """
-    if isinstance(output_json, dict):
-        # Old order. Swap the variables.
-        logs.deprecation_warning(
-            "The order of the function has changed. Please set json_dict in first!"
-        )
-        tmp = output_json
-        output_json = json_dict
-        json_dict = tmp
-
     kwargs["indent"] = kwargs.get("indent", 3)
     kwargs["cls"] = kwargs.get("cls", CustomEncoder)
 
@@ -528,66 +297,6 @@ def load_obj(path: AnyPathStrType) -> Any:
         return dill.load(file)
 
 
-# too many arguments
-# pylint: disable=R0913
-def get_file_in_dir(
-    directory: AnyPathStrType,
-    pattern_str: str,
-    extension: str = None,
-    filename_only: bool = False,
-    get_list: bool = False,
-    exact_name: bool = False,
-) -> Union[AnyPathType, list]:
-    """
-    .. deprecated:: 1.30.0
-       Import it from :py:mod:`sertit.path` instead of :py:mod:`sertit.files`
-
-    Get one or all matching files (pattern + extension) from inside a directory.
-
-    Note that the pattern is a regex with glob's convention, i.e. :code:`*pattern*`.
-
-    If :code:`exact_name` is :code:`False`, the searched pattern will be :code:`*{pattern}*.{extension}`,
-    else :code:`{pattern}.{extension}`.
-
-    Args:
-        directory (str): Directory where to find the files
-        pattern_str (str): Pattern wanted as a string, with glob's convention.
-        extension (str): Extension wanted, optional. With or without point. (:code:`yaml` or :code:`.yaml` accepted)
-        filename_only (bool): Get only the filename
-        get_list (bool): Get the whole list of matching files
-        exact_name (bool): Get the exact name (without adding :code:`*` before and after the given pattern)
-
-    Returns:
-        Union[AnyPathType, list]: File
-
-    Example:
-        >>> directory = 'D:/path/to/dir'
-        >>> os.listdir(directory)
-        ["haha.txt", "huhu1.txt", "huhu1.geojson", "hoho.txt"]
-        >>>
-        >>> get_file_in_dir(directory, "huhu")
-        'D:/path/to/dir/huhu1.geojson'
-        >>>
-        >>> get_file_in_dir(directory, "huhu", extension="txt")
-        'D:/path/to/dir/huhu1.txt'
-        >>>
-        >>> get_file_in_dir(directory, "huhu", get_list=True)
-        ['D:/path/to/dir/huhu1.txt', 'D:/path/to/dir/huhu1.geojson']
-        >>>
-        >>> get_file_in_dir(directory, "huhu", filename_only=True, get_list=True)
-        ['huhu1.txt', 'huhu1.geojson']
-        >>>
-        >>> get_file_in_dir(directory, "huhu", get_list=True, exact_name=True)
-        []
-    """
-    logs.deprecation_warning(
-        "This function is deprecated. Import it from 'sertit.path' instead of 'sertit.files'"
-    )
-    return path.get_file_in_dir(
-        directory, pattern_str, extension, filename_only, get_list, exact_name
-    )
-
-
 # pylint: disable=E1121
 def hash_file_content(file_content: str, len_param: int = 5) -> str:
     """
@@ -610,22 +319,3 @@ def hash_file_content(file_content: str, len_param: int = 5) -> str:
     hasher = hashlib.shake_256()
     hasher.update(str.encode(file_content))
     return hasher.hexdigest(len_param)
-
-
-def is_writable(dir_path: AnyPathStrType):
-    """
-    .. deprecated:: 1.30.0
-       Import it from :py:mod:`sertit.path` instead of :py:mod:`sertit.files`
-
-    Determine whether the directory is writeable or not.
-
-    Args:
-        dir_path (AnyPathStrType): Directory path
-
-    Returns:
-        bool: True if the directory is writable
-    """
-    logs.deprecation_warning(
-        "This function is deprecated. Import it from 'sertit.path' instead of 'sertit.files'"
-    )
-    return path.is_writable(dir_path)
diff --git a/sertit/rasters.py b/sertit/rasters.py
index aa7dd35..ccbd2a4 100644
--- a/sertit/rasters.py
+++ b/sertit/rasters.py
@@ -126,25 +126,6 @@ def get_nodata_value_from_dtype(dtype) -> float:
     return rasters_rio.get_nodata_value_from_dtype(dtype)
 
 
-def get_nodata_value(dtype) -> float:
-    """
-    .. deprecated:: 1.41.0
-       Use :code:`get_nodata_value_from_dtype` instead.
-
-    Get default nodata value:
-
-    Args:
-        dtype: Dtype for the wanted nodata. Best if numpy's dtype.
-
-    Returns:
-        float: Nodata value
-    """
-    logs.deprecation_warning(
-        "This function is deprecated. Use 'get_nodata_value_from_dtype' instead."
-    )
-    return get_nodata_value_from_dtype(dtype)
-
-
 def any_raster_to_xr_ds(function: Callable) -> Callable:
     """
     Allows a function to ingest AnyRasterType and convert it into a xr.DataArray:
@@ -256,27 +237,6 @@ def wrapper(any_raster_type: AnyRasterType, *args, **kwargs) -> Any:
     return wrapper
 
 
-def path_xarr_dst(function: Callable) -> Callable:
-    """
-    .. deprecated:: 1.40.0
-       Use :py:func:`rasters.any_raster_to_xr_ds` instead.
-    """
-    logs.deprecation_warning(
-        "Deprecated 'path_xarr_dst' decorator. Please use 'any_raster_to_xr_ds' instead."
-    )
-    return any_raster_to_xr_ds(function)
-
-
-@any_raster_to_xr_ds
-def get_nodata_mask(xds: AnyXrDataStructure) -> np.ndarray:
-    """
-    .. deprecated:: 1.36.0
-       Use :py:func:`rasters.get_data_mask` instead.
-    """
-    logs.deprecation_warning("This function is deprecated. Use 'get_data_mask' instead")
-    return get_data_mask(xds)
-
-
 @any_raster_to_xr_ds
 def get_data_mask(xds: AnyXrDataStructure) -> np.ndarray:
     """
@@ -1104,12 +1064,6 @@ def write(
         >>> # Rewrite it
         >>> write(xds, raster_out)
     """
-    if output_path is None:
-        logs.deprecation_warning(
-            "'path' is deprecated in 'rasters.write'. Use 'output_path' instead."
-        )
-        output_path = kwargs.pop("path")
-
     # Prune empty kwargs to avoid throwing GDAL warnings/errors
     kwargs = {k: v for k, v in kwargs.items() if v is not None}
 
diff --git a/sertit/rasters_rio.py b/sertit/rasters_rio.py
index c01971b..9663590 100644
--- a/sertit/rasters_rio.py
+++ b/sertit/rasters_rio.py
@@ -43,7 +43,7 @@
         "Please install 'rasterio' to use the 'rasters_rio' package."
     ) from ex
 
-from sertit import AnyPath, geometry, logs, misc, path, s3, strings, vectors, xml
+from sertit import AnyPath, geometry, misc, path, s3, strings, vectors, xml
 from sertit.logs import SU_NAME
 from sertit.types import AnyNumpyArray, AnyPathStrType, AnyPathType, AnyRasterType
 
@@ -112,25 +112,6 @@ def get_nodata_value_from_dtype(dtype) -> float:
     return nodata
 
 
-def get_nodata_value(dtype) -> float:
-    """
-    .. deprecated:: 1.41.0
-       Use :code:`get_nodata_value_from_dtype` instead.
-
-    Get default nodata value:
-
-    Args:
-        dtype: Dtype for the wanted nodata. Best if numpy's dtype.
-
-    Returns:
-        float: Nodata value
-    """
-    logs.deprecation_warning(
-        "This function is deprecated. Use 'get_nodata_value_from_dtype' instead."
-    )
-    return get_nodata_value_from_dtype(dtype)
-
-
 def bigtiff_value(arr: Any) -> str:
     """
     Returns :code:`YES` if array is larger than 4 GB, :code:`IF_NEEDED` otherwise.
@@ -250,17 +231,6 @@ def wrapper(any_raster_type: AnyRasterType, *args, **kwargs) -> Any:
     return wrapper
 
 
-def path_arr_dst(function: Callable) -> Callable:
-    """
-    .. deprecated:: 1.40.0
-       Use :py:func:`rasters.any_raster_to_rio_ds` instead.
-    """
-    logs.deprecation_warning(
-        "Deprecated 'path_arr_dst' decorator. Please use 'any_raster_to_rio_ds' instead."
-    )
-    return any_raster_to_rio_ds(function)
-
-
 @any_raster_to_rio_ds
 def get_new_shape(
     ds: AnyRasterType,
@@ -424,19 +394,6 @@ def update_meta(arr: AnyNumpyArray, meta: dict) -> dict:
     return out_meta
 
 
-def get_nodata_mask(
-    array: AnyNumpyArray,
-    has_nodata: bool,
-    default_nodata: int = 0,
-) -> np.ndarray:
-    """
-    .. deprecated:: 1.36.0
-       Use :py:func:`rasters_rio.get_data_mask` instead.
-    """
-    logs.deprecation_warning("This function is deprecated. Use 'get_data_mask' instead")
-    return get_data_mask(array, has_nodata, default_nodata)
-
-
 def get_data_mask(
     array: AnyNumpyArray,
     has_nodata: bool,
@@ -540,7 +497,7 @@ def rasterize(
 
     if not np.can_cast(np.array(nodata, dtype=ds.dtypes[0]), dtype):
         old_nodata = nodata
-        nodata = get_nodata_value(dtype)
+        nodata = get_nodata_value_from_dtype(dtype)
 
         # Only throw a warning if the value is really different  (we don't care about 255.0 being replaced by 255)
         if old_nodata - nodata != 0.0:
@@ -1090,12 +1047,6 @@ def write(
         >>> # Rewrite it on disk
         >>> write(raster, meta, raster_out)
     """
-    if output_path is None:
-        logs.deprecation_warning(
-            "'path' is deprecated in 'rasters_rio.write'. Use 'output_path' instead."
-        )
-        output_path = kwargs.pop("path")
-
     raster_out = raster.copy()
 
     # Prune empty kwargs to avoid throwing GDAL warnings/errors
diff --git a/sertit/vectors.py b/sertit/vectors.py
index 2f9df17..10ab4f7 100644
--- a/sertit/vectors.py
+++ b/sertit/vectors.py
@@ -34,7 +34,7 @@
 from cloudpathlib.exceptions import AnyPathTypeError
 from shapely import Polygon, wkt
 
-from sertit import AnyPath, archives, files, geometry, logs, misc, path, s3, strings
+from sertit import AnyPath, archives, files, geometry, misc, path, s3, strings
 from sertit.logs import SU_NAME
 from sertit.types import AnyPathStrType, AnyPathType
 
@@ -78,9 +78,6 @@ def is_geopandas_1_0():
 
 def to_utm_crs(lon: float, lat: float) -> "CRS":  # noqa: F821
     """
-    .. deprecated:: 1.29.1
-       Use `estimate_utm_crs <https://geopandas.org/en/stable/docs/reference/api/geopandas.GeoDataFrame.estimate_utm_crs.html>`_ instead, which directly returs a CRS instead of a string.
-
     Find the EPSG code of the UTM CRS from a lon/lat in WGS84.
 
     Args:
@@ -116,43 +113,6 @@ def to_utm_crs(lon: float, lat: float) -> "CRS":  # noqa: F821
     return gpd.GeoDataFrame(geometry=point, crs=EPSG_4326).estimate_utm_crs()
 
 
-def corresponding_utm_projection(lon: float, lat: float) -> str:
-    """
-    .. deprecated:: 1.29.1
-       Use `estimate_utm_crs <https://geopandas.org/en/stable/docs/reference/api/geopandas.GeoDataFrame.estimate_utm_crs.html>`_ instead, which directly returs a CRS instead of a string.
-
-    Find the EPSG code of the UTM CRS from a lon/lat in WGS84.
-
-    Args:
-        lon (float): Longitude (WGS84, epsg:4326)
-        lat (float): Latitude (WGS84, epsg:4326)
-
-    Returns:
-        CRS: UTM CRS
-
-    Example:
-        >>> to_utm_crs(lon=7.8, lat=48.6)  # Strasbourg
-        <Derived Projected CRS: EPSG:32632>
-        Name: WGS 84 / UTM zone 32N
-        Axis Info [cartesian]:
-        - E[east]: Easting (metre)
-        - N[north]: Northing (metre)
-        Area of Use:
-        - bounds: (6.0, 0.0, 12.0, 84.0)
-        Coordinate Operation:
-        - name: UTM zone 32N
-        - method: Transverse Mercator
-        Datum: World Geodetic System 1984 ensemble
-        - Ellipsoid: WGS 84
-        - Prime Meridian: Greenwich
-
-    """
-    logs.deprecation_warning(
-        "Deprecated, use 'to_utm_crs' instead, which directly returs a CRS instead of a string."
-    )
-    return to_utm_crs(lon, lat).to_string()
-
-
 def get_geodf(geom: Union[Polygon, list, gpd.GeoSeries], crs: str) -> gpd.GeoDataFrame:
     """
     Get a GeoDataFrame from a geometry and a crs
diff --git a/sertit/xml.py b/sertit/xml.py
index d52dfa0..0bae6c9 100644
--- a/sertit/xml.py
+++ b/sertit/xml.py
@@ -30,7 +30,7 @@
 )
 from lxml.html.builder import E
 
-from sertit import AnyPath, archives, logs, path, s3
+from sertit import AnyPath, archives, path, s3
 from sertit.logs import SU_NAME
 from sertit.misc import ListEnum
 from sertit.types import AnyPathStrType
@@ -97,12 +97,6 @@ def read_archive(
     Returns:
         _Element: XML Root
     """
-    if archive_path is None:
-        logs.deprecation_warning(
-            "'path' argument is deprecated, use 'archive_path' instead."
-        )
-        archive_path = kwargs.pop("path")
-
     try:
         if not regex:
             archive_base_path, basename = str(archive_path).split("!")

From 829ef817445765505d647956a20b437a39e058ca Mon Sep 17 00:00:00 2001
From: BRAUN REMI <remi.braun@unistra.fr>
Date: Fri, 13 Dec 2024 17:11:26 +0100
Subject: [PATCH 10/18] Fix storage option handling

---
 sertit/archives.py | 2 +-
 sertit/vectors.py  | 4 +---
 sertit/xml.py      | 2 +-
 3 files changed, 3 insertions(+), 5 deletions(-)

diff --git a/sertit/archives.py b/sertit/archives.py
index d8dc86a..db5d540 100644
--- a/sertit/archives.py
+++ b/sertit/archives.py
@@ -303,7 +303,7 @@ def archive(
         tmp_dir.cleanup()
 
     try:
-        arch = AnyPath(archive_fn, storage_options=folder_path.storage_options)
+        arch = AnyPath(archive_fn, **folder_path.storage_options)
     except AttributeError:
         arch = AnyPath(archive_fn)
 
diff --git a/sertit/vectors.py b/sertit/vectors.py
index 10ab4f7..1113bb9 100644
--- a/sertit/vectors.py
+++ b/sertit/vectors.py
@@ -433,9 +433,7 @@ def read(
             split_vect = str(vector_path).split("!")
             archive_regex = ".*{}".format(split_vect[1].replace(".", r"\."))
             try:
-                vector_path = AnyPath(
-                    split_vect[0], storage_options=vector_path.storage_options
-                )
+                vector_path = AnyPath(split_vect[0], **vector_path.storage_options)
             except AttributeError:
                 # Cloudpathlib
                 vector_path = AnyPath(split_vect[0])
diff --git a/sertit/xml.py b/sertit/xml.py
index 0bae6c9..befa80f 100644
--- a/sertit/xml.py
+++ b/sertit/xml.py
@@ -109,7 +109,7 @@ def read_archive(
             # For UPath
             with contextlib.suppress(AttributeError):
                 archive_base_path = AnyPath(
-                    archive_base_path, storage_options=archive_path.storage_options
+                    archive_base_path, **archive_path.storage_options
                 )
         else:
             archive_base_path = archive_path

From b6e8509f799fd54619711e37dc72ec405fc7bc27 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Mon, 23 Dec 2024 12:30:07 +0000
Subject: [PATCH 11/18] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 ci/test_archives.py | 2 +-
 ci/test_ci.py       | 2 +-
 ci/test_files.py    | 2 +-
 ci/test_path.py     | 2 +-
 ci/test_vectors.py  | 2 +-
 5 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/ci/test_archives.py b/ci/test_archives.py
index ddb5d0f..d6e6801 100644
--- a/ci/test_archives.py
+++ b/ci/test_archives.py
@@ -2,9 +2,9 @@
 import shutil
 
 import pytest
+from CI.SCRIPTS.script_utils import files_path, s3_env
 from lxml import etree, html
 
-from CI.SCRIPTS.script_utils import files_path, s3_env
 from sertit import archives, ci, files, path, s3, vectors
 
 
diff --git a/ci/test_ci.py b/ci/test_ci.py
index be911cc..14370c6 100644
--- a/ci/test_ci.py
+++ b/ci/test_ci.py
@@ -19,9 +19,9 @@
 import tempfile
 
 import pytest
+from CI.SCRIPTS.script_utils import files_path, rasters_path, s3_env, vectors_path
 from lxml import etree
 
-from CI.SCRIPTS.script_utils import files_path, rasters_path, s3_env, vectors_path
 from sertit import ci, path, rasters, rasters_rio, s3, vectors
 
 ci.reduce_verbosity()
diff --git a/ci/test_files.py b/ci/test_files.py
index 84539f7..b5233c2 100644
--- a/ci/test_files.py
+++ b/ci/test_files.py
@@ -20,8 +20,8 @@
 from datetime import date, datetime
 
 import numpy as np
-
 from CI.SCRIPTS.script_utils import Polarization
+
 from sertit import AnyPath, ci, files
 
 ci.reduce_verbosity()
diff --git a/ci/test_path.py b/ci/test_path.py
index d173bc3..bb94f47 100644
--- a/ci/test_path.py
+++ b/ci/test_path.py
@@ -19,8 +19,8 @@
 import tempfile
 
 import pytest
-
 from CI.SCRIPTS.script_utils import get_s3_ci_path
+
 from sertit import AnyPath, ci, misc, path
 
 ci.reduce_verbosity()
diff --git a/ci/test_vectors.py b/ci/test_vectors.py
index 98c9dd2..750435d 100644
--- a/ci/test_vectors.py
+++ b/ci/test_vectors.py
@@ -21,9 +21,9 @@
 
 import geopandas as gpd
 import pytest
+from CI.SCRIPTS.script_utils import KAPUT_KWARGS, files_path, s3_env, vectors_path
 from shapely import wkt
 
-from CI.SCRIPTS.script_utils import KAPUT_KWARGS, files_path, s3_env, vectors_path
 from sertit import archives, ci, files, path, vectors
 from sertit.vectors import EPSG_4326, DataSourceError
 

From c9e3f3c93f5437f53c21e25cea8455315325376b Mon Sep 17 00:00:00 2001
From: BRAUN REMI <remi.braun@unistra.fr>
Date: Mon, 23 Dec 2024 13:32:20 +0100
Subject: [PATCH 12/18] FIX: Allow str as paths in `ci.assert_files_equal`

---
 CHANGES.md    | 1 +
 ci/test_ci.py | 1 +
 sertit/ci.py  | 2 +-
 3 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/CHANGES.md b/CHANGES.md
index 3e0d1f1..f6bbfee 100644
--- a/CHANGES.md
+++ b/CHANGES.md
@@ -18,6 +18,7 @@
 - FIX: Fix deprecation warning for `get_nodata_value_from_dtype` in `rasters_rio`
 - FIX: Force blocksize to 128 when writing small COGs on disk (in order to have multiple overview levels)
 - FIX: Use `np.tan` in `rasters.slope`
+- FIX: Allow str as paths in `ci.assert_files_equal`
 - OPTIM: Compute the spatial index by default in `vectors.read` (set `vectors.read(..., compute_sindex=False)` if you don't want to compute them)
 - CI: Rename CI folder and remove unnecessary intermediate folder
 
diff --git a/ci/test_ci.py b/ci/test_ci.py
index 14370c6..0e2125d 100644
--- a/ci/test_ci.py
+++ b/ci/test_ci.py
@@ -73,6 +73,7 @@ def test_assert_files():
     false_path = files_path().joinpath("false.html")
 
     ci.assert_files_equal(ok_path, ok_path)
+    ci.assert_files_equal(str(ok_path), str(ok_path))
     with pytest.raises(AssertionError):
         ci.assert_files_equal(ok_path, false_path)
 
diff --git a/sertit/ci.py b/sertit/ci.py
index 43b28e3..9e07937 100644
--- a/sertit/ci.py
+++ b/sertit/ci.py
@@ -86,7 +86,7 @@ def assert_files_equal(file_1: AnyPathStrType, file_2: AnyPathStrType):
         file_1 (str): Path to file 1
         file_2 (str): Path to file 2
     """
-    with file_1.open("r") as f1, file_2.open("r") as f2:
+    with AnyPath(file_1).open("r") as f1, AnyPath(file_2).open("r") as f2:
         assert files.hash_file_content(f1.read()) == files.hash_file_content(f2.read())
 
 

From 2be7216620b3dd27a46ce31086ce726f48459dd5 Mon Sep 17 00:00:00 2001
From: BRAUN REMI <remi.braun@unistra.fr>
Date: Mon, 23 Dec 2024 14:09:26 +0100
Subject: [PATCH 13/18] Fix files after CI folder renaming

---
 .gitignore     | 6 +++---
 .gitlab-ci.yml | 4 ++--
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/.gitignore b/.gitignore
index 5eb2f90..b054ac0 100644
--- a/.gitignore
+++ b/.gitignore
@@ -24,9 +24,9 @@ dist
 dask-worker-space/*
 
 # Data in CI
-CI/*.tif
-CI/*.zip
-CI/*.vrt
+ci/*.tif
+ci/*.zip
+ci/*.vrt
 
 # Docs & Notebooks
 docs/_build/*
diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index cf8aeb1..aa372cb 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -27,7 +27,7 @@ pytest:
     - pip install --ignore-installed PyYAML
     - pip install -e .[full]
   script:
-    - python -m pytest -v --durations=0 --cov-report term --cov-report xml:cov.xml --cov=sertit ci/on_push --cov-config=.coveragerc --log-cli-level DEBUG --capture=sys
+    - python -m pytest -v --durations=0 --cov-report term --cov-report xml:cov.xml --cov=sertit ci --cov-config=.coveragerc --log-cli-level DEBUG --capture=sys
   coverage: '/TOTAL\s+\d+\s+\d+\s+(\d+%)/'
   tags:
     - sertit
@@ -50,7 +50,7 @@ pytest_s3:
     - pip install --ignore-installed PyYAML
     - pip install -e .[full]
   script:
-    - python -m pytest -v --durations=0 --cov-report term --cov-report xml:cov.xml --cov=sertit ci/on_push --cov-config=.coveragerc --log-cli-level DEBUG --capture=sys
+    - python -m pytest -v --durations=0 --cov-report term --cov-report xml:cov.xml --cov=sertit ci --cov-config=.coveragerc --log-cli-level DEBUG --capture=sys
   coverage: '/TOTAL\s+\d+\s+\d+\s+(\d+%)/'
   tags:
     - sertit

From 28b5f25719e0d69a18af83f1d9b67a17e4d9cabb Mon Sep 17 00:00:00 2001
From: BRAUN REMI <remi.braun@unistra.fr>
Date: Mon, 23 Dec 2024 14:27:46 +0100
Subject: [PATCH 14/18] Remove useless fct in CI

---
 ci/script_utils.py | 13 ++-----------
 1 file changed, 2 insertions(+), 11 deletions(-)

diff --git a/ci/script_utils.py b/ci/script_utils.py
index 583682a..33f0b05 100644
--- a/ci/script_utils.py
+++ b/ci/script_utils.py
@@ -52,21 +52,12 @@ def get_s3_ci_path():
     return ci_path
 
 
-def get_proj_path():
-    """Get project path"""
-    if int(os.getenv(CI_SERTIT_S3, 1)) and sys.platform != "win32":
-        return get_s3_ci_path()
-    else:
-        # ON DISK
-        return AnyPath(unistra.get_db3_path())
-
-
 def get_ci_data_path():
     """Get CI DATA path"""
     if int(os.getenv(CI_SERTIT_S3, 1)) and sys.platform != "win32":
-        return get_proj_path().joinpath("DATA")
+        return get_s3_ci_path() / "DATA"
     else:
-        return get_proj_path().joinpath("CI", "sertit_utils", "DATA")
+        return AnyPath(unistra.get_db3_path()) / "CI" / "sertit_utils" / "DATA"
 
 
 def dask_env(function):

From fcc353abb5b752267f4ff55f53f94535cadbf8c7 Mon Sep 17 00:00:00 2001
From: BRAUN REMI <remi.braun@unistra.fr>
Date: Mon, 23 Dec 2024 14:29:55 +0100
Subject: [PATCH 15/18] FIX: Better alignement between `rasters.read` function
 and `rasters.any_raster_to_xr_ds` decorator

---
 CHANGES.md         |  1 +
 ci/test_rasters.py |  2 +-
 sertit/rasters.py  | 55 ++++++++++++++--------------------------------
 3 files changed, 18 insertions(+), 40 deletions(-)

diff --git a/CHANGES.md b/CHANGES.md
index f6bbfee..7ac82ef 100644
--- a/CHANGES.md
+++ b/CHANGES.md
@@ -19,6 +19,7 @@
 - FIX: Force blocksize to 128 when writing small COGs on disk (in order to have multiple overview levels)
 - FIX: Use `np.tan` in `rasters.slope`
 - FIX: Allow str as paths in `ci.assert_files_equal`
+- FIX: Better alignement between `rasters.read` function and `rasters.any_raster_to_xr_ds` decorator
 - OPTIM: Compute the spatial index by default in `vectors.read` (set `vectors.read(..., compute_sindex=False)` if you don't want to compute them)
 - CI: Rename CI folder and remove unnecessary intermediate folder
 
diff --git a/ci/test_rasters.py b/ci/test_rasters.py
index 70145e0..99cb7ac 100644
--- a/ci/test_rasters.py
+++ b/ci/test_rasters.py
@@ -102,7 +102,7 @@ def ds_name(raster_path):
 @pytest.fixture
 def ds_dtype(raster_path):
     with rasterio.open(str(raster_path)) as ds:
-        return ds.meta["dtype"]
+        return getattr(np, ds.meta["dtype"])
 
 
 @pytest.fixture
diff --git a/sertit/rasters.py b/sertit/rasters.py
index ec338e4..c2a71e8 100644
--- a/sertit/rasters.py
+++ b/sertit/rasters.py
@@ -33,7 +33,7 @@
 try:
     import rasterio
     import rioxarray
-    from rasterio import MemoryFile, features
+    from rasterio import features
     from rasterio.enums import Resampling
     from rioxarray.exceptions import MissingCRS
 except ModuleNotFoundError as ex:
@@ -172,8 +172,8 @@ def wrapper(any_raster_type: AnyRasterType, *args, **kwargs) -> Any:
         if any_raster_type is None:
             raise ValueError("'any_raster_type' shouldn't be None!")
 
-        default_chunks = True if dask.get_client() is not None else None
-
+        default_chunks = "auto" if dask.get_client() is not None else None
+        masked = kwargs.get("masked", True)
         # By default, try with the input fct
         try:
             out = function(any_raster_type, *args, **kwargs)
@@ -197,41 +197,12 @@ def wrapper(any_raster_type: AnyRasterType, *args, **kwargs) -> Any:
                 except Exception as ex:
                     raise TypeError("Function not available for xarray.Dataset") from ex
 
-            elif isinstance(any_raster_type, tuple):
-                arr, meta = any_raster_type
-                with (
-                    MemoryFile() as memfile,
-                    memfile.open(
-                        **meta, BIGTIFF=rasters_rio.bigtiff_value(any_raster_type)
-                    ) as ds,
-                ):
-                    ds.write(arr.data)
-
-                    with rioxarray.open_rasterio(
-                        any_raster_type,
-                        masked=True,
-                        default_name=ds.name,
-                        chunks=kwargs.pop("chunks", default_chunks),
-                    ) as xds:
-                        out = function(xds, *args, **kwargs)
             else:
-                # Get the path from the input
-                if path.is_path(any_raster_type):
-                    name = str(any_raster_type)
-                    any_raster_type = str(any_raster_type)
-                else:
-                    # For rasterio datasets, '.name' gives the path
-                    name = any_raster_type.name
-
-                # Convert path or rasterio.dataset to xr.dataset
-                with rioxarray.open_rasterio(
-                    any_raster_type,
-                    masked=True,
-                    default_name=name,
-                    chunks=kwargs.pop("chunks", default_chunks),
-                ) as xds:
-                    out = function(xds, *args, **kwargs)
-
+                out = function(
+                    read(any_raster_type, chunks=default_chunks, masked=masked),
+                    *args,
+                    **kwargs,
+                )
         return out
 
     return wrapper
@@ -948,13 +919,19 @@ def read(
         rioxarray.set_options(export_grid_mapping=False),
         rioxarray.open_rasterio(
             ds,
-            lock=False,
             default_name=path.get_filename(ds.name),
             chunks=chunks,
+            masked=masked,
             **kwargs,
         ) as xda,
     ):
-        orig_dtype = xda.dtype
+        orig_dtype = xda.encoding.get(
+            "rasterio_dtype", xda.encoding.get("dtype", xda.dtype)
+        )
+
+        if isinstance(orig_dtype, str):
+            with contextlib.suppress(AttributeError):
+                orig_dtype = getattr(np, orig_dtype)
 
         # Windows
         if window is not None:

From 69bf6d133854c202b5fb053e8f3c1ce35d994d78 Mon Sep 17 00:00:00 2001
From: BRAUN REMI <remi.braun@unistra.fr>
Date: Mon, 23 Dec 2024 14:49:30 +0100
Subject: [PATCH 16/18] FIX: Fix `rasters.sieve` function with `xr.apply_ufunc`

---
 CHANGES.md         | 1 +
 ci/test_rasters.py | 6 +++++-
 sertit/rasters.py  | 9 +++++----
 3 files changed, 11 insertions(+), 5 deletions(-)

diff --git a/CHANGES.md b/CHANGES.md
index 7ac82ef..7527a5f 100644
--- a/CHANGES.md
+++ b/CHANGES.md
@@ -20,6 +20,7 @@
 - FIX: Use `np.tan` in `rasters.slope`
 - FIX: Allow str as paths in `ci.assert_files_equal`
 - FIX: Better alignement between `rasters.read` function and `rasters.any_raster_to_xr_ds` decorator
+- FIX: Fix `rasters.sieve` function with `xr.apply_ufunc`
 - OPTIM: Compute the spatial index by default in `vectors.read` (set `vectors.read(..., compute_sindex=False)` if you don't want to compute them)
 - CI: Rename CI folder and remove unnecessary intermediate folder
 
diff --git a/ci/test_rasters.py b/ci/test_rasters.py
index 99cb7ac..74680a3 100644
--- a/ci/test_rasters.py
+++ b/ci/test_rasters.py
@@ -314,7 +314,7 @@ def test_crop(tmp_path, xda, xds, xda_dask, mask):
 
 @s3_env
 @dask_env
-def test_sieve(tmp_path, xda, xds, xda_dask):
+def test_sieve(tmp_path, raster_path, xda, xds, xda_dask):
     """Test sieve function"""
     # DataArray
     xda_sieved = os.path.join(tmp_path, "test_sieved_xda.tif")
@@ -348,6 +348,10 @@ def test_sieve(tmp_path, xda, xds, xda_dask):
     ci.assert_raster_equal(xda_sieved, raster_sieved_path)
     ci.assert_raster_equal(xds_sieved, raster_sieved_path)
 
+    # From path
+    sieve_xda_path = rasters.sieve(raster_path, sieve_thresh=20, connectivity=4)
+    np.testing.assert_array_equal(sieve_xda, sieve_xda_path)
+
 
 @s3_env
 @dask_env
diff --git a/sertit/rasters.py b/sertit/rasters.py
index c2a71e8..52ca67c 100644
--- a/sertit/rasters.py
+++ b/sertit/rasters.py
@@ -1334,14 +1334,15 @@ def sieve(
 
     assert connectivity in [4, 8]
 
-    # Use this trick to make the sieve work
-    mask = np.where(np.isnan(xds.data), 0, 1).astype(np.uint8)
-    data = xds.data.astype(np.uint8)
+    mask = xr.where(np.isnan(xds), 0, 1).astype(np.uint8).data
+    data = xds.astype(np.uint8).data
 
     # Sieve
     try:
         sieved_arr = xr.apply_ufunc(
-            features.sieve, data, sieve_thresh, connectivity, mask
+            features.sieve,
+            data,
+            kwargs={"size": sieve_thresh, "connectivity": connectivity, "mask": mask},
         )
     except ValueError:
         sieved_arr = features.sieve(

From c47581a51645a24f64870867623e4d750ea7c651 Mon Sep 17 00:00:00 2001
From: BRAUN REMI <remi.braun@unistra.fr>
Date: Mon, 23 Dec 2024 15:16:37 +0100
Subject: [PATCH 17/18] Fix CI

---
 ci/test_archives.py | 2 +-
 ci/test_ci.py       | 9 ++++++---
 ci/test_files.py    | 2 +-
 ci/test_path.py     | 2 +-
 ci/test_vectors.py  | 2 +-
 5 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/ci/test_archives.py b/ci/test_archives.py
index d6e6801..8283d6d 100644
--- a/ci/test_archives.py
+++ b/ci/test_archives.py
@@ -2,9 +2,9 @@
 import shutil
 
 import pytest
-from CI.SCRIPTS.script_utils import files_path, s3_env
 from lxml import etree, html
 
+from ci.script_utils import files_path, s3_env
 from sertit import archives, ci, files, path, s3, vectors
 
 
diff --git a/ci/test_ci.py b/ci/test_ci.py
index 0e2125d..3625bfb 100644
--- a/ci/test_ci.py
+++ b/ci/test_ci.py
@@ -19,9 +19,9 @@
 import tempfile
 
 import pytest
-from CI.SCRIPTS.script_utils import files_path, rasters_path, s3_env, vectors_path
 from lxml import etree
 
+from ci.script_utils import files_path, rasters_path, s3_env, vectors_path
 from sertit import ci, path, rasters, rasters_rio, s3, vectors
 
 ci.reduce_verbosity()
@@ -67,13 +67,16 @@ def test_assert_dir():
 
 
 @s3_env
-def test_assert_files():
+def test_assert_files(tmp_path):
     """Test CI functions"""
     ok_path = files_path().joinpath("productPreview.html")
     false_path = files_path().joinpath("false.html")
 
     ci.assert_files_equal(ok_path, ok_path)
-    ci.assert_files_equal(str(ok_path), str(ok_path))
+    if path.is_cloud_path(ok_path):
+        str_ok_path = str(s3.download(ok_path, tmp_path))
+
+    ci.assert_files_equal(str_ok_path, str_ok_path)
     with pytest.raises(AssertionError):
         ci.assert_files_equal(ok_path, false_path)
 
diff --git a/ci/test_files.py b/ci/test_files.py
index b5233c2..d8de011 100644
--- a/ci/test_files.py
+++ b/ci/test_files.py
@@ -20,8 +20,8 @@
 from datetime import date, datetime
 
 import numpy as np
-from CI.SCRIPTS.script_utils import Polarization
 
+from ci.script_utils import Polarization
 from sertit import AnyPath, ci, files
 
 ci.reduce_verbosity()
diff --git a/ci/test_path.py b/ci/test_path.py
index bb94f47..6da5458 100644
--- a/ci/test_path.py
+++ b/ci/test_path.py
@@ -19,8 +19,8 @@
 import tempfile
 
 import pytest
-from CI.SCRIPTS.script_utils import get_s3_ci_path
 
+from ci.script_utils import get_s3_ci_path
 from sertit import AnyPath, ci, misc, path
 
 ci.reduce_verbosity()
diff --git a/ci/test_vectors.py b/ci/test_vectors.py
index 750435d..3aea31d 100644
--- a/ci/test_vectors.py
+++ b/ci/test_vectors.py
@@ -21,9 +21,9 @@
 
 import geopandas as gpd
 import pytest
-from CI.SCRIPTS.script_utils import KAPUT_KWARGS, files_path, s3_env, vectors_path
 from shapely import wkt
 
+from ci.script_utils import KAPUT_KWARGS, files_path, s3_env, vectors_path
 from sertit import archives, ci, files, path, vectors
 from sertit.vectors import EPSG_4326, DataSourceError
 

From e54eb497fb0aa3c7fee362d25cdc64a8b2b980ae Mon Sep 17 00:00:00 2001
From: BRAUN REMI <remi.braun@unistra.fr>
Date: Mon, 23 Dec 2024 16:08:37 +0100
Subject: [PATCH 18/18] Fix CI

---
 ci/test_ci.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/ci/test_ci.py b/ci/test_ci.py
index 3625bfb..3a836d6 100644
--- a/ci/test_ci.py
+++ b/ci/test_ci.py
@@ -75,6 +75,8 @@ def test_assert_files(tmp_path):
     ci.assert_files_equal(ok_path, ok_path)
     if path.is_cloud_path(ok_path):
         str_ok_path = str(s3.download(ok_path, tmp_path))
+    else:
+        str_ok_path = ok_path
 
     ci.assert_files_equal(str_ok_path, str_ok_path)
     with pytest.raises(AssertionError):