Skip to content

Commit

Permalink
DEPR: Deprecate xml_regex and file_regex arguments from above-men…
Browse files Browse the repository at this point in the history
…tioned functions in favor of an harmonized `regex` argument.
  • Loading branch information
remi-braun committed Dec 11, 2024
1 parent 2dbae25 commit d38b964
Show file tree
Hide file tree
Showing 3 changed files with 33 additions and 16 deletions.
1 change: 1 addition & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
- OPTIM: Don't download files stored on cloud when applying `ci.assert_files_equal` on them
- OPTIM: Offer the ability to give the archived file list directly to `path.get_archived_file_list` and `files.read_archived_file`, as this operation is expensive when done with large archives stored on the cloud (and thus better done only once).
Propagated into `path.get_archived_path`, `path.get_archived_rio_path`, `vectors.read`, `xml.read_archive`, files.read_archived_xml` and `files.read_archived_html`
- DEPR: Deprecate `xml_regex` and `file_regex` arguments from above-mentioned functions in favor of an harmonized `regex` argument.

## 1.44.0 (2024-12-09)

Expand Down
12 changes: 9 additions & 3 deletions sertit/files.py
Original file line number Diff line number Diff line change
Expand Up @@ -433,7 +433,7 @@ def read_archived_file(


def read_archived_xml(
archive_path: AnyPathStrType, xml_regex: str, file_list: list = None
archive_path: AnyPathStrType, regex: str = None, file_list: list = None, **kwargs
) -> etree._Element:
"""
Read archived XML from :code:`zip` or :code:`tar` archives.
Expand All @@ -442,7 +442,7 @@ def read_archived_xml(
Args:
archive_path (AnyPathStrType): Archive path
xml_regex (str): XML regex (used by re) as it can be found in the getmembers() list
regex (str): XML regex (used by re) as it can be found in the getmembers() list
file_list (list): List of files contained in the archive. Optional, if not given it will be re-computed.
Returns:
Expand All @@ -454,7 +454,13 @@ def read_archived_xml(
>>> read_archived_xml(arch_path, file_regex)
<Element LANDSAT_METADATA_FILE at 0x1c90007f8c8>
"""
xml_bytes = read_archived_file(archive_path, xml_regex, file_list=file_list)
if regex is None:
logs.deprecation_warning(
"'xml_regex' is deprecated, please use 'regex' instead."
)
regex = kwargs.pop("xml_regex")

xml_bytes = read_archived_file(archive_path, regex=regex, file_list=file_list)

return etree.fromstring(xml_bytes)

Expand Down
36 changes: 23 additions & 13 deletions sertit/path.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
import zipfile
from typing import Any, Union

from sertit import AnyPath
from sertit import AnyPath, logs
from sertit.logs import SU_NAME
from sertit.types import AnyPathStrType, AnyPathType

Expand Down Expand Up @@ -185,10 +185,11 @@ def get_archived_file_list(archive_path: AnyPathStrType) -> list:

def get_archived_path(
archive_path: AnyPathStrType,
file_regex: str,
regex: str,
as_list: bool = False,
case_sensitive: bool = False,
file_list: list = None,
**kwargs,
) -> Union[list, AnyPathType]:
"""
Get archived file path from inside the archive.
Expand All @@ -200,7 +201,7 @@ def get_archived_path(
Args:
archive_path (AnyPathStrType): Archive path
file_regex (str): File regex (used by re) as it can be found in the getmembers() list
regex (str): File regex (used by re) as it can be found in the getmembers() list
as_list (bool): If true, returns a list (including all found files). If false, returns only the first match
case_sensitive (bool): If true, the regex is case-sensitive.
file_list (list): List of files to get archived from. Optional, if not given it will be re-computed.
Expand All @@ -214,6 +215,12 @@ def get_archived_path(
>>> path = get_archived_path(arch_path, file_regex)
'dir/filename.tif'
"""
if regex is None:
logs.deprecation_warning(
"'file_regex' is deprecated, please use 'regex' instead."
)
regex = kwargs.pop("file_regex")

# Get file list
archive_path = AnyPath(archive_path)

Expand All @@ -222,15 +229,11 @@ def get_archived_path(
file_list = get_archived_file_list(archive_path)

# Search for file
regex = (
re.compile(file_regex)
if case_sensitive
else re.compile(file_regex, re.IGNORECASE)
)
archived_band_paths = list(filter(regex.match, file_list))
re_rgx = re.compile(regex) if case_sensitive else re.compile(regex, re.IGNORECASE)
archived_band_paths = list(filter(re_rgx.match, file_list))
if not archived_band_paths:
raise FileNotFoundError(
f"Impossible to find file {file_regex} in {get_filename(archive_path)}"
f"Impossible to find file {regex} in {get_filename(archive_path)}"
)

# Convert to str if needed
Expand All @@ -242,9 +245,10 @@ def get_archived_path(

def get_archived_rio_path(
archive_path: AnyPathStrType,
file_regex: str,
regex: str,
as_list: bool = False,
file_list: list = None,
**kwargs,
) -> Union[list, AnyPathType]:
"""
Get archived file path from inside the archive, to be read with rasterio:
Expand All @@ -266,7 +270,7 @@ def get_archived_rio_path(
Args:
archive_path (AnyPathStrType): Archive path
file_regex (str): File regex (used by re) as it can be found in the getmembers() list
regex (str): File regex (used by re) as it can be found in the getmembers() list
as_list (bool): If true, returns a list (including all found files). If false, returns only the first match
file_list (list): List of files contained in the archive. Optional, if not given it will be re-computed.
Expand All @@ -281,6 +285,12 @@ def get_archived_rio_path(
>>> rasterio.open(path)
<open DatasetReader name='zip+file://D:/path/to/output.zip!dir/filename.tif' mode='r'>
"""
if regex is None:
logs.deprecation_warning(
"'file_regex' is deprecated, please use 'regex' instead."
)
regex = kwargs.pop("file_regex")

archive_path = AnyPath(archive_path)
if archive_path.suffix in [".tar", ".zip"]:
prefix = archive_path.suffix[-3:]
Expand All @@ -293,7 +303,7 @@ def get_archived_rio_path(

# Search for file
archived_band_paths = get_archived_path(
archive_path, file_regex, as_list=True, file_list=file_list
archive_path, regex=regex, as_list=True, file_list=file_list
)

# Convert to rio path
Expand Down

0 comments on commit d38b964

Please sign in to comment.