diff --git a/oceanstream/L0_unprocessed_data/__init__.py b/oceanstream/L0_unprocessed_data/__init__.py index f886f7d..d7e1aaa 100644 --- a/oceanstream/L0_unprocessed_data/__init__.py +++ b/oceanstream/L0_unprocessed_data/__init__.py @@ -1,5 +1,5 @@ from .ensure_time_continuity import check_reversed_time, fix_time_reversions -from .raw_reader import ( +from .raw_handler import ( concatenate_files, convert_raw_files, file_finder, diff --git a/oceanstream/L0_unprocessed_data/raw_reader.py b/oceanstream/L0_unprocessed_data/raw_handler.py similarity index 89% rename from oceanstream/L0_unprocessed_data/raw_reader.py rename to oceanstream/L0_unprocessed_data/raw_handler.py index bae6b53..60e8112 100644 --- a/oceanstream/L0_unprocessed_data/raw_reader.py +++ b/oceanstream/L0_unprocessed_data/raw_handler.py @@ -1,6 +1,6 @@ """ -raw_reader.py -------------- +raw_handler.py +-------------- Module for reading, verifying, and converting echo sounder raw data files. This module provides functionalities to: @@ -91,19 +91,24 @@ def file_finder(paths: Union[str, List[str]], file_type: str = "raw") -> List[st def file_integrity_checking( file_path: str, + use_swap: bool = False, ) -> Dict[str, Union[str, datetime, bool]]: # noqa: E501 """ Checks the integrity of a given echo sounder file. This function verifies if the provided echo sounder file is readable by echopype and extracts - essential metadata such as the campaign ID, date of measurement, - and sonar model. The function - supports raw, netCDF, and zarr file formats. + essential metadata such as the campaign ID, date of measurement, sonar model + and `use_swap` option (relevant only for raw files) + The function supports raw, netCDF, and zarr file formats. Parameters: - file_path (str): Absolute path to the echo sounder file. + - use_swap (bool, optional): Parameter specific to the echopype library `open_raw` function. Defaults to False\ + If True, variables with a large memory footprint will be written to a temporary zarr store at \ + ``~/.echopype/temp_output/parsed2zarr_temp_files``\ + Relevant only for raw files. Returns: @@ -115,6 +120,8 @@ def file_integrity_checking( extracted from the file name. 'sonar_model': Type of sonar that produced the file. 'file_integrity': Boolean indicating if the file is readable by echopype. + 'use_swap': Applicable only for raw files.\ + A Boolean indicating whether the option was used when reading raw files or not. Raises: @@ -130,6 +137,7 @@ def file_integrity_checking( 'date': datetime.datetime(2023, 5, 9, 10, 6, 45), 'sonar_model': 'EK60', 'file_integrity': True + 'use_swap': False } """ return_dict = {} @@ -159,7 +167,7 @@ def file_integrity_checking( if ".raw" in file_path: for s_m in SUPPORTED_SONAR_MODELS: try: - ed = ep.open_raw(file_path, sonar_model=s_m) # type: ignore + ed = ep.open_raw(file_path, sonar_model=s_m, use_swap=use_swap) # type: ignore file_integrity = True break except Exception: @@ -183,6 +191,8 @@ def file_integrity_checking( return_dict["date"] = date return_dict["sonar_model"] = ed.sonar_model return_dict["file_integrity"] = file_integrity + if ".raw" in file_path: + return_dict["use_swap"] = use_swap return return_dict @@ -210,7 +220,7 @@ def read_raw_files( """ ret_list = [] for f_i in file_dicts: - opened_file = _read_file(f_i["file_path"], f_i["sonar_model"]) + opened_file = _read_file(f_i["file_path"], f_i["sonar_model"], f_i["use_swap"]) ret_list.append(opened_file) return ret_list @@ -240,7 +250,9 @@ def read_processed_files(file_paths: List[str]) -> List[ep.echodata.EchoData]: return ret_list -def _read_file(file_path: str, sonar_model: str = "EK80") -> ep.echodata.EchoData: +def _read_file( + file_path: str, sonar_model: str = "EK80", use_swap: bool = False +) -> ep.echodata.EchoData: """ Reads an echo sounder file and returns the corresponding Dataset. @@ -248,13 +260,17 @@ def _read_file(file_path: str, sonar_model: str = "EK80") -> ep.echodata.EchoDat This function determines the type of the file (raw, netCDF, or zarr) based on its extension and opens it using echopype. - For raw files, the sonar model must be specified. + The sonar_model and use_swap parameters are relevant only for the raw files. Parameters: - file_path (str): Absolute path to the echo sounder file. - sonar_model (str, optional): Type of sonar model. Defaults to "EK80".\ Relevant only for raw files. + - use_swap (bool, optional): Parameter specific to the echopype library `open_raw` function. Defaults to False\ + If True, variables with a large memory footprint will be written to a temporary zarr store at \ + ``~/.echopype/temp_output/parsed2zarr_temp_files``\ + Relevant only for raw files. Returns: @@ -267,7 +283,7 @@ def _read_file(file_path: str, sonar_model: str = "EK80") -> ep.echodata.EchoDat """ file_name = os.path.split(file_path)[-1] if ".raw" in file_name: - ed = ep.open_raw(file_path, sonar_model=sonar_model) # type: ignore + ed = ep.open_raw(file_path, sonar_model=sonar_model, use_swap=use_swap) # type: ignore elif ".nc" in file_name or ".zarr" in file_name: ed = ep.open_converted(file_path) # create an EchoData object else: @@ -306,7 +322,7 @@ def convert_raw_files( """ ret_list = [] for f_i in file_dicts: - opened_file = _read_file(f_i["file_path"], f_i["sonar_model"]) + opened_file = _read_file(f_i["file_path"], f_i["sonar_model"], f_i["use_swap"]) _write_file(opened_file, save_path, save_file_type) file_name = os.path.split(f_i["file_path"])[-1] file_type = save_file_type diff --git a/tests/test_raw_reader.py b/tests/test_raw_handler.py similarity index 89% rename from tests/test_raw_reader.py rename to tests/test_raw_handler.py index ee08ffb..391cd1b 100644 --- a/tests/test_raw_reader.py +++ b/tests/test_raw_handler.py @@ -1,10 +1,8 @@ import os -from ftplib import FTP import pytest -from oceanstream.L0_unprocessed_data.raw_reader import ( - concatenate_files, +from oceanstream.L0_unprocessed_data.raw_handler import ( convert_raw_files, file_finder, file_integrity_checking, @@ -14,12 +12,11 @@ ) from tests.conftest import TEST_DATA_FOLDER + def test_file_finder(ftp_data): # Test with a valid directory path containing files found_files = file_finder(ftp_data) - assert ( - len(found_files) > 0 - ) # Assuming there's at least one file in the FTP directory + assert len(found_files) > 0 # Assuming there's at least one file in the FTP directory assert all([os.path.isfile(f) for f in found_files]) # Test with a list of valid file paths @@ -90,9 +87,7 @@ def test_read_processed_files(ftp_data): # Test with a list of valid processed file paths found_files = file_finder(ftp_data, "raw") file_dicts = [file_integrity_checking(f) for f in found_files[:3]] - file_paths = convert_raw_files( - file_dicts, save_path=TEST_DATA_FOLDER, save_file_type="nc" - ) + file_paths = convert_raw_files(file_dicts, save_path=TEST_DATA_FOLDER, save_file_type="nc") datasets = read_processed_files(file_paths) assert len(datasets) == 3 @@ -107,9 +102,7 @@ def test_convert_raw_files(ftp_data): # Test conversion of raw files to netCDF found_files = file_finder(ftp_data, "raw") file_dicts = [file_integrity_checking(f) for f in found_files[:3]] - converted_files = convert_raw_files( - file_dicts, save_path=TEST_DATA_FOLDER, save_file_type="nc" - ) + converted_files = convert_raw_files(file_dicts, save_path=TEST_DATA_FOLDER, save_file_type="nc") for file in converted_files: assert os.path.exists(file) assert file.endswith(".nc") @@ -126,9 +119,7 @@ def test_convert_raw_files(ftp_data): with pytest.raises( Exception ): # Assuming the function raises an exception for unsupported file types - convert_raw_files( - file_dicts, save_path=TEST_DATA_FOLDER, save_file_type="unsupported" - ) + convert_raw_files(file_dicts, save_path=TEST_DATA_FOLDER, save_file_type="unsupported") # Test with an empty save path converted_files = convert_raw_files(file_dicts, save_file_type="nc") @@ -158,4 +149,3 @@ def test_split_files(ftp_data): # Test with an empty list with pytest.raises(Exception): grouped_files = split_files([]) -