From d38b96435c1202f8bcac2ce1408f828c1dba7065 Mon Sep 17 00:00:00 2001 From: BRAUN REMI Date: Wed, 11 Dec 2024 12:19:01 +0100 Subject: [PATCH] DEPR: Deprecate `xml_regex` and `file_regex` arguments from above-mentioned functions in favor of an harmonized `regex` argument. --- CHANGES.md | 1 + sertit/files.py | 12 +++++++++--- sertit/path.py | 36 +++++++++++++++++++++++------------- 3 files changed, 33 insertions(+), 16 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index 78f07b8..10608f8 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -6,6 +6,7 @@ - OPTIM: Don't download files stored on cloud when applying `ci.assert_files_equal` on them - OPTIM: Offer the ability to give the archived file list directly to `path.get_archived_file_list` and `files.read_archived_file`, as this operation is expensive when done with large archives stored on the cloud (and thus better done only once). Propagated into `path.get_archived_path`, `path.get_archived_rio_path`, `vectors.read`, `xml.read_archive`, files.read_archived_xml` and `files.read_archived_html` +- DEPR: Deprecate `xml_regex` and `file_regex` arguments from above-mentioned functions in favor of an harmonized `regex` argument. ## 1.44.0 (2024-12-09) diff --git a/sertit/files.py b/sertit/files.py index 8bfa735..918a173 100644 --- a/sertit/files.py +++ b/sertit/files.py @@ -433,7 +433,7 @@ def read_archived_file( def read_archived_xml( - archive_path: AnyPathStrType, xml_regex: str, file_list: list = None + archive_path: AnyPathStrType, regex: str = None, file_list: list = None, **kwargs ) -> etree._Element: """ Read archived XML from :code:`zip` or :code:`tar` archives. @@ -442,7 +442,7 @@ def read_archived_xml( Args: archive_path (AnyPathStrType): Archive path - xml_regex (str): XML regex (used by re) as it can be found in the getmembers() list + regex (str): XML regex (used by re) as it can be found in the getmembers() list file_list (list): List of files contained in the archive. Optional, if not given it will be re-computed. Returns: @@ -454,7 +454,13 @@ def read_archived_xml( >>> read_archived_xml(arch_path, file_regex) """ - xml_bytes = read_archived_file(archive_path, xml_regex, file_list=file_list) + if regex is None: + logs.deprecation_warning( + "'xml_regex' is deprecated, please use 'regex' instead." + ) + regex = kwargs.pop("xml_regex") + + xml_bytes = read_archived_file(archive_path, regex=regex, file_list=file_list) return etree.fromstring(xml_bytes) diff --git a/sertit/path.py b/sertit/path.py index 1f4754a..94a8106 100644 --- a/sertit/path.py +++ b/sertit/path.py @@ -26,7 +26,7 @@ import zipfile from typing import Any, Union -from sertit import AnyPath +from sertit import AnyPath, logs from sertit.logs import SU_NAME from sertit.types import AnyPathStrType, AnyPathType @@ -185,10 +185,11 @@ def get_archived_file_list(archive_path: AnyPathStrType) -> list: def get_archived_path( archive_path: AnyPathStrType, - file_regex: str, + regex: str, as_list: bool = False, case_sensitive: bool = False, file_list: list = None, + **kwargs, ) -> Union[list, AnyPathType]: """ Get archived file path from inside the archive. @@ -200,7 +201,7 @@ def get_archived_path( Args: archive_path (AnyPathStrType): Archive path - file_regex (str): File regex (used by re) as it can be found in the getmembers() list + regex (str): File regex (used by re) as it can be found in the getmembers() list as_list (bool): If true, returns a list (including all found files). If false, returns only the first match case_sensitive (bool): If true, the regex is case-sensitive. file_list (list): List of files to get archived from. Optional, if not given it will be re-computed. @@ -214,6 +215,12 @@ def get_archived_path( >>> path = get_archived_path(arch_path, file_regex) 'dir/filename.tif' """ + if regex is None: + logs.deprecation_warning( + "'file_regex' is deprecated, please use 'regex' instead." + ) + regex = kwargs.pop("file_regex") + # Get file list archive_path = AnyPath(archive_path) @@ -222,15 +229,11 @@ def get_archived_path( file_list = get_archived_file_list(archive_path) # Search for file - regex = ( - re.compile(file_regex) - if case_sensitive - else re.compile(file_regex, re.IGNORECASE) - ) - archived_band_paths = list(filter(regex.match, file_list)) + re_rgx = re.compile(regex) if case_sensitive else re.compile(regex, re.IGNORECASE) + archived_band_paths = list(filter(re_rgx.match, file_list)) if not archived_band_paths: raise FileNotFoundError( - f"Impossible to find file {file_regex} in {get_filename(archive_path)}" + f"Impossible to find file {regex} in {get_filename(archive_path)}" ) # Convert to str if needed @@ -242,9 +245,10 @@ def get_archived_path( def get_archived_rio_path( archive_path: AnyPathStrType, - file_regex: str, + regex: str, as_list: bool = False, file_list: list = None, + **kwargs, ) -> Union[list, AnyPathType]: """ Get archived file path from inside the archive, to be read with rasterio: @@ -266,7 +270,7 @@ def get_archived_rio_path( Args: archive_path (AnyPathStrType): Archive path - file_regex (str): File regex (used by re) as it can be found in the getmembers() list + regex (str): File regex (used by re) as it can be found in the getmembers() list as_list (bool): If true, returns a list (including all found files). If false, returns only the first match file_list (list): List of files contained in the archive. Optional, if not given it will be re-computed. @@ -281,6 +285,12 @@ def get_archived_rio_path( >>> rasterio.open(path) """ + if regex is None: + logs.deprecation_warning( + "'file_regex' is deprecated, please use 'regex' instead." + ) + regex = kwargs.pop("file_regex") + archive_path = AnyPath(archive_path) if archive_path.suffix in [".tar", ".zip"]: prefix = archive_path.suffix[-3:] @@ -293,7 +303,7 @@ def get_archived_rio_path( # Search for file archived_band_paths = get_archived_path( - archive_path, file_regex, as_list=True, file_list=file_list + archive_path, regex=regex, as_list=True, file_list=file_list ) # Convert to rio path