Skip to content

Commit

Permalink
pin zarr and add check unique ping time duplicates and tests
Browse files Browse the repository at this point in the history
  • Loading branch information
ctuguinay committed Jan 28, 2025
1 parent 1228601 commit 2ac44c2
Show file tree
Hide file tree
Showing 4 changed files with 103 additions and 5 deletions.
13 changes: 11 additions & 2 deletions echopype/convert/set_groups_ek80.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from ..utils.coding import set_time_encodings
from ..utils.log import _init_logger
from .set_groups_base import SetGroupsBase
from .utils.ek_duplicates import check_unique_ping_time_duplicates

logger = _init_logger(__name__)

Expand Down Expand Up @@ -1145,8 +1146,16 @@ def set_beam(self) -> List[xr.Dataset]:

ds_data = self._attach_vars_to_ds_data(ds_data, ch, rs_size=ds_data.range_sample.size)

# Drop any duplicate ping times
ds_data = ds_data.drop_duplicates(dim="ping_time")
# Access the 'ping_time' coordinate as a NumPy array
ping_times = ds_data["ping_time"].values

# Check if ping time duplicates exist
if len(ping_times) > len(np.unique(ping_times)):
# Check for unique ping time duplicates and if they are not unique, raise warning.
check_unique_ping_time_duplicates(ds_data, logger)

# Drop duplicates
ds_data = ds_data.drop_duplicates(dim="ping_time")

if ch in self.sorted_channel["complex"]:
ds_complex.append(ds_data)
Expand Down
44 changes: 44 additions & 0 deletions echopype/convert/utils/ek_duplicates.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
import logging

import xarray as xr


def check_unique_ping_time_duplicates(ds_data: xr.Dataset, logger: logging.Logger) -> None:
"""
Raises a warning if the data stored in duplicate pings is not unique.
Parameters
----------
ds_data : xr.Dataset
Single freq beam dataset being processed in the `SetGroupsEK80.set_beams` class function.
logger : logging.Logger
Warning logger initialized in `SetGroupsEK80` file.
"""
# Group the dataset by the "ping_time" coordinate
groups = ds_data.groupby("ping_time")

# Loop through each ping_time group
for ping_time_val, group in groups:
# Extract all data variable names to check
data_vars = list(group.data_vars)

# Use the first duplicate ping time index as a reference
ref_duplicate_ping_time_index = 0

# Iterate over each data variable in the group
for var in data_vars:
# Extract data array corresponding to the iterated variable
data_array = group[var]

# Use the slice corresponding to the reference index as the reference slice
ref_slice = data_array.isel({"ping_time": ref_duplicate_ping_time_index})

# Iterate over the remaining entries
for i in range(1, data_array.sizes["ping_time"]):
if not ref_slice.equals(data_array.isel({"ping_time": i})):
logger.warning(
f"Duplicate slices in variable '{var}' corresponding to "
f"ping_time {ping_time_val} differ in data. Data will be lost since we "
"will be dropping all duplicate ping times."
)
break
49 changes: 47 additions & 2 deletions echopype/tests/convert/test_convert_ek80.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,14 @@
import numpy as np
import pandas as pd
from scipy.io import loadmat
import xarray as xr

from echopype import open_raw, open_converted
from echopype.testing import TEST_DATA_FOLDER
from echopype.convert.parse_ek80 import ParseEK80
from echopype.convert.set_groups_ek80 import WIDE_BAND_TRANS, PULSE_COMPRESS, FILTER_IMAG, FILTER_REAL, DECIMATION
from echopype.utils import log
from echopype.convert.utils.ek_duplicates import check_unique_ping_time_duplicates


@pytest.fixture
Expand Down Expand Up @@ -514,17 +517,59 @@ def test_parse_missing_sound_velocity_profile():


@pytest.mark.unit
def test_duplicate_ping_times():
def test_duplicate_ping_times(caplog):
"""
Tests that RAW file with duplicate ping times can be parsed.
Tests that RAW file with duplicate ping times can be parsed and that the correct warning has been raised.
"""
# Turn on logger verbosity
log.verbose(override=False)

# Open RAW
ed = open_raw("echopype/test_data/ek80_duplicate_ping_times/Hake-D20210913-T130612.raw", sonar_model="EK80")

# Check that there are no ping time duplicates in Beam group
assert ed["Sonar/Beam_group1"].equals(
ed["Sonar/Beam_group1"].drop_duplicates(dim="ping_time")
)

# Check that no warning is logged since the data for all duplicate pings is unique
not_expected_warning = ("Data will be lost since we will be dropping all duplicate ping times.")
assert not any(not_expected_warning in record.message for record in caplog.records)

# Turn off logger verbosity
log.verbose(override=True)


@pytest.mark.unit
def test_check_unique_ping_time_duplicates(caplog):
"""
Checks that `check_unique_ping_time_duplicates` raises a warning when the data for duplicate ping times is not unique.
"""
# Initialize logger
logger = log._init_logger(__name__)

# Turn on logger verbosity
log.verbose(override=False)

# Open duplicate ping time beam dataset

Check failure on line 554 in echopype/tests/convert/test_convert_ek80.py

View workflow job for this annotation

GitHub Actions / 3.11--ubuntu-latest

test_check_unique_ping_time_duplicates FileNotFoundError: No such file or directory: '/home/runner/work/echopype/echopype/echopype/test_data/ek80_duplicate_ping_times/duplicate_beam_ds.zarr'
ds_data = xr.open_zarr("echopype/test_data/ek80_duplicate_ping_times/duplicate_beam_ds.zarr")

# Modify a single entry to ensure that there exists duplicate ping times that do not share the same backscatter data
ds_data["backscatter_r"][0,0,0] = 0

# Check for ping time duplicates
check_unique_ping_time_duplicates(ds_data, logger)

# Turn off logger verbosity
log.verbose(override=True)

# Check if the expected warning is logged
expected_warning = (
"Duplicate slices in variable 'backscatter_r' corresponding to ping_time "
f"{str(ds_data['ping_time'].values[0])} differ in data. Data will be lost since "
"we will be dropping all duplicate ping times."
)
assert any(expected_warning in record.message for record in caplog.records)


@pytest.mark.unit
Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ pytz
scipy
xarray
pandas
zarr
zarr>=2,<3
fsspec
s3fs
requests
Expand Down

0 comments on commit 2ac44c2

Please sign in to comment.