From d38b96435c1202f8bcac2ce1408f828c1dba7065 Mon Sep 17 00:00:00 2001
From: BRAUN REMI <remi.braun@unistra.fr>
Date: Wed, 11 Dec 2024 12:19:01 +0100
Subject: [PATCH] DEPR: Deprecate `xml_regex` and `file_regex` arguments from
 above-mentioned functions in favor of an harmonized `regex` argument.

---
 CHANGES.md      |  1 +
 sertit/files.py | 12 +++++++++---
 sertit/path.py  | 36 +++++++++++++++++++++++-------------
 3 files changed, 33 insertions(+), 16 deletions(-)
diff --git a/CHANGES.md b/CHANGES.md
index 78f07b8..10608f8 100644
--- a/CHANGES.md
+++ b/CHANGES.md
@@ -6,6 +6,7 @@
 - OPTIM: Don't download files stored on cloud when applying `ci.assert_files_equal` on them
 - OPTIM: Offer the ability to give the archived file list directly to `path.get_archived_file_list` and `files.read_archived_file`, as this operation is expensive when done with large archives stored on the cloud (and thus better done only once).
   Propagated into `path.get_archived_path`, `path.get_archived_rio_path`, `vectors.read`, `xml.read_archive`, files.read_archived_xml` and `files.read_archived_html`
+- DEPR: Deprecate `xml_regex` and `file_regex` arguments from above-mentioned functions in favor of an harmonized `regex` argument.
 
 ## 1.44.0 (2024-12-09)
 
diff --git a/sertit/files.py b/sertit/files.py
index 8bfa735..918a173 100644
--- a/sertit/files.py
+++ b/sertit/files.py
@@ -433,7 +433,7 @@ def read_archived_file(
 
 
 def read_archived_xml(
-    archive_path: AnyPathStrType, xml_regex: str, file_list: list = None
+    archive_path: AnyPathStrType, regex: str = None, file_list: list = None, **kwargs
 ) -> etree._Element:
     """
     Read archived XML from :code:`zip` or :code:`tar` archives.
@@ -442,7 +442,7 @@ def read_archived_xml(
 
     Args:
         archive_path (AnyPathStrType): Archive path
-        xml_regex (str): XML regex (used by re) as it can be found in the getmembers() list
+        regex (str): XML regex (used by re) as it can be found in the getmembers() list
         file_list (list): List of files contained in the archive. Optional, if not given it will be re-computed.
 
     Returns:
@@ -454,7 +454,13 @@ def read_archived_xml(
         >>> read_archived_xml(arch_path, file_regex)
         <Element LANDSAT_METADATA_FILE at 0x1c90007f8c8>
     """
-    xml_bytes = read_archived_file(archive_path, xml_regex, file_list=file_list)
+    if regex is None:
+        logs.deprecation_warning(
+            "'xml_regex' is deprecated, please use 'regex' instead."
+        )
+        regex = kwargs.pop("xml_regex")
+
+    xml_bytes = read_archived_file(archive_path, regex=regex, file_list=file_list)
 
     return etree.fromstring(xml_bytes)
 
diff --git a/sertit/path.py b/sertit/path.py
index 1f4754a..94a8106 100644
--- a/sertit/path.py
+++ b/sertit/path.py
@@ -26,7 +26,7 @@
 import zipfile
 from typing import Any, Union
 
-from sertit import AnyPath
+from sertit import AnyPath, logs
 from sertit.logs import SU_NAME
 from sertit.types import AnyPathStrType, AnyPathType
 
@@ -185,10 +185,11 @@ def get_archived_file_list(archive_path: AnyPathStrType) -> list:
 
 def get_archived_path(
     archive_path: AnyPathStrType,
-    file_regex: str,
+    regex: str,
     as_list: bool = False,
     case_sensitive: bool = False,
     file_list: list = None,
+    **kwargs,
 ) -> Union[list, AnyPathType]:
     """
     Get archived file path from inside the archive.
@@ -200,7 +201,7 @@ def get_archived_path(
 
     Args:
         archive_path (AnyPathStrType): Archive path
-        file_regex (str): File regex (used by re) as it can be found in the getmembers() list
+        regex (str): File regex (used by re) as it can be found in the getmembers() list
         as_list (bool): If true, returns a list (including all found files). If false, returns only the first match
         case_sensitive (bool): If true, the regex is case-sensitive.
         file_list (list): List of files to get archived from. Optional, if not given it will be re-computed.
@@ -214,6 +215,12 @@ def get_archived_path(
         >>> path = get_archived_path(arch_path, file_regex)
         'dir/filename.tif'
     """
+    if regex is None:
+        logs.deprecation_warning(
+            "'file_regex' is deprecated, please use 'regex' instead."
+        )
+        regex = kwargs.pop("file_regex")
+
     # Get file list
     archive_path = AnyPath(archive_path)
 
@@ -222,15 +229,11 @@ def get_archived_path(
         file_list = get_archived_file_list(archive_path)
 
     # Search for file
-    regex = (
-        re.compile(file_regex)
-        if case_sensitive
-        else re.compile(file_regex, re.IGNORECASE)
-    )
-    archived_band_paths = list(filter(regex.match, file_list))
+    re_rgx = re.compile(regex) if case_sensitive else re.compile(regex, re.IGNORECASE)
+    archived_band_paths = list(filter(re_rgx.match, file_list))
     if not archived_band_paths:
         raise FileNotFoundError(
-            f"Impossible to find file {file_regex} in {get_filename(archive_path)}"
+            f"Impossible to find file {regex} in {get_filename(archive_path)}"
         )
 
     # Convert to str if needed
@@ -242,9 +245,10 @@ def get_archived_path(
 
 def get_archived_rio_path(
     archive_path: AnyPathStrType,
-    file_regex: str,
+    regex: str,
     as_list: bool = False,
     file_list: list = None,
+    **kwargs,
 ) -> Union[list, AnyPathType]:
     """
     Get archived file path from inside the archive, to be read with rasterio:
@@ -266,7 +270,7 @@ def get_archived_rio_path(
 
     Args:
         archive_path (AnyPathStrType): Archive path
-        file_regex (str): File regex (used by re) as it can be found in the getmembers() list
+        regex (str): File regex (used by re) as it can be found in the getmembers() list
         as_list (bool): If true, returns a list (including all found files). If false, returns only the first match
         file_list (list): List of files contained in the archive. Optional, if not given it will be re-computed.
 
@@ -281,6 +285,12 @@ def get_archived_rio_path(
         >>> rasterio.open(path)
         <open DatasetReader name='zip+file://D:/path/to/output.zip!dir/filename.tif' mode='r'>
     """
+    if regex is None:
+        logs.deprecation_warning(
+            "'file_regex' is deprecated, please use 'regex' instead."
+        )
+        regex = kwargs.pop("file_regex")
+
     archive_path = AnyPath(archive_path)
     if archive_path.suffix in [".tar", ".zip"]:
         prefix = archive_path.suffix[-3:]
@@ -293,7 +303,7 @@ def get_archived_rio_path(
 
     # Search for file
     archived_band_paths = get_archived_path(
-        archive_path, file_regex, as_list=True, file_list=file_list
+        archive_path, regex=regex, as_list=True, file_list=file_list
     )
 
     # Convert to rio path