From d0810ee4933f202e7f539cc5ca0f20ae072a0a4f Mon Sep 17 00:00:00 2001
From: David Nicholson <nickledave@users.noreply.github.com>
Date: Tue, 10 Sep 2024 14:31:48 -0400
Subject: [PATCH] Rename BioSoundSegBench dataset -> CMACBench and refactor,
 fix #776

- move class into sub-package with separate modules for helper functions and default transforms
---
 src/vak/datasets/__init__.py                  |   8 +-
 src/vak/datasets/cmacbench/__init__.py        |   0
 .../cmacbench.py}                             | 357 +-----------------
 src/vak/datasets/cmacbench/helper.py          | 156 ++++++++
 src/vak/datasets/cmacbench/transforms.py      | 205 ++++++++++
 5 files changed, 380 insertions(+), 346 deletions(-)
 create mode 100644 src/vak/datasets/cmacbench/__init__.py
 rename src/vak/datasets/{biosoundsegbench.py => cmacbench/cmacbench.py} (53%)
 create mode 100644 src/vak/datasets/cmacbench/helper.py
 create mode 100644 src/vak/datasets/cmacbench/transforms.py

diff --git a/src/vak/datasets/__init__.py b/src/vak/datasets/__init__.py
index aa2f3be75..c91cdc2f8 100644
--- a/src/vak/datasets/__init__.py
+++ b/src/vak/datasets/__init__.py
@@ -1,13 +1,13 @@
 from . import biosoundsegbench
-from .biosoundsegbench import BioSoundSegBench, SplitsMetadata
+from .cmacbench import CMACBench, SplitsMetadata
 from .get import get
 
 __all__ = [
-    "biosoundsegbench",
-    "BioSoundSegBench",
+    "cmacbench",
+    "CMACBench",
     "get",
     "SplitsMetadata",
 ]
 
 # TODO: make this a proper registry
-DATASETS = {"BioSoundSegBench": BioSoundSegBench}
+DATASETS = {"CMACBench": CMACBench}
diff --git a/src/vak/datasets/cmacbench/__init__.py b/src/vak/datasets/cmacbench/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/src/vak/datasets/biosoundsegbench.py b/src/vak/datasets/cmacbench/cmacbench.py
similarity index 53%
rename from src/vak/datasets/biosoundsegbench.py
rename to src/vak/datasets/cmacbench/cmacbench.py
index f4b5369a8..9b04aeeee 100644
--- a/src/vak/datasets/biosoundsegbench.py
+++ b/src/vak/datasets/cmacbench/cmacbench.py
@@ -1,5 +1,4 @@
-"""Class representing BioSoundSegBench dataset."""
-
+"""Class representing CMACBench dataset."""
 from __future__ import annotations
 
 import json
@@ -12,10 +11,14 @@
 import torchvision.transforms
 from attrs import define
 
-from .. import common, datapipes, transforms
+from ... import common, datapipes, transforms
+
+from .helper import metadata_from_splits_json_path, SplitsMetadata
+from .transforms import TrainItemTransform, InferItemTransform
+
 
 if TYPE_CHECKING:
-    from ..transforms import FramesStandardizer
+    from ...transforms import FramesStandardizer
 
 
 VALID_TARGET_TYPES = (
@@ -33,345 +36,15 @@
 BOUNDARY_FRAME_LABELS_PATH_COL_NAME = "boundary_frame_labels_path"
 
 
-@define
-class SampleIDVectorPaths:
-    train: pathlib.Path
-    val: pathlib.Path
-    test: pathlib.Path
-
-
-@define
-class IndsInSampleVectorPaths:
-    train: pathlib.Path
-    val: pathlib.Path
-    test: pathlib.Path
-
-
-@define
-class SplitsMetadata:
-    """Class that represents metadata about dataset splits
-    in the BioSoundSegBench dataset, loaded from a json file"""
-
-    splits_csv_path: pathlib.Path
-    sample_id_vector_paths: SampleIDVectorPaths
-    inds_in_sample_vector_paths: IndsInSampleVectorPaths
-
-    @classmethod
-    def from_paths(cls, json_path, dataset_path):
-        json_path = pathlib.Path(json_path)
-        with json_path.open("r") as fp:
-            splits_json = json.load(fp)
-
-        dataset_path = pathlib.Path(dataset_path)
-        if not dataset_path.exists() or not dataset_path.is_dir():
-            raise NotADirectoryError(
-                f"`dataset_path` not found or not a directory: {dataset_path}"
-            )
-
-        splits_csv_path = pathlib.Path(
-            dataset_path / splits_json["splits_csv_path"]
-        )
-        if not splits_csv_path.exists():
-            raise FileNotFoundError(
-                f"`splits_csv_path` not found: {splits_csv_path}"
-            )
-
-        sample_id_vector_paths = {
-            split: dataset_path / path
-            for split, path in splits_json["sample_id_vec_path"].items()
-        }
-        for split, vec_path in sample_id_vector_paths.items():
-            if not vec_path.exists():
-                raise FileNotFoundError(
-                    f"`sample_id_vector_path` for split '{split}' not found: {vec_path}"
-                )
-        sample_id_vector_paths = SampleIDVectorPaths(**sample_id_vector_paths)
-
-        inds_in_sample_vector_paths = {
-            split: dataset_path / path
-            for split, path in splits_json["inds_in_sample_vec_path"].items()
-        }
-        for split, vec_path in inds_in_sample_vector_paths.items():
-            if not vec_path.exists():
-                raise FileNotFoundError(
-                    f"`inds_in_sample_vec_path` for split '{split}' not found: {vec_path}"
-                )
-        inds_in_sample_vector_paths = IndsInSampleVectorPaths(
-            **inds_in_sample_vector_paths
-        )
-
-        return cls(
-            splits_csv_path,
-            sample_id_vector_paths,
-            inds_in_sample_vector_paths,
-        )
-
-
-@define
-class TrainingReplicateMetadata:
-    """Class representing metadata for a
-    pre-defined training replicate
-    in the BioSoundSegBench dataset.
+class CMACBench(torch.utils.data.Dataset):
+    """Class representing the CMAC dataset.
+    
+    Notes
+    -----
+    For more information about this dataset, please see
+    https://github.com/vocalpy/CMACBench
     """
 
-    biosound_group: str
-    id: str | None
-    frame_dur: float
-    unit: str
-    data_source: str | None
-    train_dur: float
-    replicate_num: int
-
-
-def metadata_from_splits_json_path(
-    splits_json_path: pathlib.Path, datset_path: pathlib.Path
-) -> TrainingReplicateMetadata:
-    name = splits_json_path.name
-    try:
-        (
-            biosound_group,
-            unit,
-            id_,
-            frame_dur_1st_half,
-            frame_dur_2nd_half,
-            data_source,
-            train_dur_1st_half,
-            train_dur_2nd_half,
-            replicate_num,
-            _,
-            _,
-        ) = name.split(".")
-    # Human-Speech doesn't have ID or data source in filename
-    # so it will raise a ValueError
-    except ValueError:
-        name = splits_json_path.name
-        (
-            biosound_group,
-            unit,
-            frame_dur_1st_half,
-            frame_dur_2nd_half,
-            train_dur_1st_half,
-            train_dur_2nd_half,
-            replicate_num,
-            _,
-            _,
-        ) = name.split(".")
-        id_ = None
-        data_source = None
-    if id_ is not None:
-        id_ = id_.split("-")[-1]
-    frame_dur = float(
-        frame_dur_1st_half.split("-")[-1]
-        + "."
-        + frame_dur_2nd_half.split("-")[0]
-    )
-    train_dur = float(
-        train_dur_1st_half.split("-")[-1]
-        + "."
-        + train_dur_2nd_half.split("-")[0]
-    )
-    replicate_num = int(replicate_num.split("-")[-1])
-    return TrainingReplicateMetadata(
-        biosound_group,
-        id_,
-        frame_dur,
-        unit,
-        data_source,
-        train_dur,
-        replicate_num,
-    )
-
-
-class TrainItemTransform:
-    """Default transform used when training frame classification models
-    with :class:`BioSoundSegBench` dataset."""
-
-    def __init__(
-        self,
-        frames_standardizer: FramesStandardizer | None = None,
-    ):
-        from ..transforms import FramesStandardizer  # avoid circular import
-
-        if frames_standardizer is not None:
-            if isinstance(frames_standardizer, FramesStandardizer):
-                frames_transform = [frames_standardizer]
-            else:
-                raise TypeError(
-                    f"invalid type for frames_standardizer: {type(frames_standardizer)}. "
-                    "Should be an instance of vak.transforms.StandardizeSpect"
-                )
-        else:
-            frames_transform = []
-        # add as an attribute on self so that high-level functions can save this class as needed
-        self.frames_standardizer = frames_standardizer
-
-        frames_transform.extend(
-            [
-                transforms.ToFloatTensor(),
-                transforms.AddChannel(),
-            ]
-        )
-        self.frames_transform = torchvision.transforms.Compose(
-            frames_transform
-        )
-        self.frame_labels_transform = transforms.ToLongTensor()
-
-    def __call__(
-        self,
-        frames: torch.Tensor,
-        multi_frame_labels: torch.Tensor | None = None,
-        binary_frame_labels: torch.Tensor | None = None,
-        boundary_frame_labels: torch.Tensor | None = None,
-    ) -> dict:
-        frames = self.frames_transform(frames)
-        item = {
-            "frames": frames,
-        }
-        if multi_frame_labels is not None:
-            item["multi_frame_labels"] = self.frame_labels_transform(
-                multi_frame_labels
-            )
-
-        if binary_frame_labels is not None:
-            item["binary_frame_labels"] = self.frame_labels_transform(
-                binary_frame_labels
-            )
-
-        if boundary_frame_labels is not None:
-            item["boundary_frame_labels"] = self.frame_labels_transform(
-                boundary_frame_labels
-            )
-
-        return item
-
-
-class InferItemTransform:
-    """Default transform used when running inference on classification models
-    with :class:`BioSoundSegBench` dataset, for evaluation or to generate new predictions.
-
-    Returned item includes frames reshaped into a stack of windows,
-    with padded added to make reshaping possible.
-    Any `frame_labels` are not padded and reshaped,
-    but are converted to :class:`torch.LongTensor`.
-    If return_padding_mask is True, item includes 'padding_mask' that
-    can be used to crop off any predictions made on the padding.
-
-    Attributes
-    ----------
-    frames_standardizer : vak.transforms.FramesStandardizer
-        instance that has already been fit to dataset, using fit_df method.
-        Default is None, in which case no standardization transform is applied.
-    window_size : int
-        width of window in number of elements. Argument to PadToWindow transform.
-    frames_padval : float
-        Value to pad frames with. Added to end of array, the "right side".
-        Argument to PadToWindow transform. Default is 0.0.
-    frame_labels_padval : int
-        Value to pad frame labels vector with. Added to the end of the array.
-        Argument to PadToWindow transform. Default is -1.
-        Used with ``ignore_index`` argument of :mod:`torch.nn.CrossEntropyLoss`.
-    return_padding_mask : bool
-        if True, the dictionary returned by ItemTransform classes will include
-        a boolean vector to use for cropping back down to size before padding.
-        padding_mask has size equal to width of padded array, i.e. original size
-        plus padding at the end, and has values of 1 where
-        columns in padded are from the original array,
-        and values of 0 where columns were added for padding.
-    """
-
-    def __init__(
-        self,
-        window_size,
-        frames_standardizer=None,
-        frames_padval=0.0,
-        frame_labels_padval=-1,
-        return_padding_mask=True,
-        channel_dim=1,
-    ):
-        from ..transforms import FramesStandardizer  # avoid circular import
-
-        self.window_size = window_size
-        self.frames_padval = frames_padval
-        self.frame_labels_padval = frame_labels_padval
-        self.return_padding_mask = return_padding_mask
-        self.channel_dim = channel_dim
-
-        if frames_standardizer is not None:
-            if not isinstance(frames_standardizer, FramesStandardizer):
-                raise TypeError(
-                    f"Invalid type for frames_standardizer: {type(frames_standardizer)}. "
-                    "Should be an instance of vak.transforms.FramesStandardizer"
-                )
-        # add as an attribute on self to use inside __call__
-        # *and* so that high-level functions can save this class as needed
-        self.frames_standardizer = frames_standardizer
-
-        self.pad_to_window = transforms.PadToWindow(
-            window_size, frames_padval, return_padding_mask=return_padding_mask
-        )
-
-        self.frames_transform_after_pad = torchvision.transforms.Compose(
-            [
-                transforms.ViewAsWindowBatch(window_size),
-                transforms.ToFloatTensor(),
-                # below, add channel at first dimension because windows become batch
-                transforms.AddChannel(channel_dim=channel_dim),
-            ]
-        )
-
-        self.frame_labels_padval = frame_labels_padval
-        self.frame_labels_transform = transforms.ToLongTensor()
-
-    def __call__(
-        self,
-        frames: torch.Tensor,
-        multi_frame_labels: torch.Tensor | None = None,
-        binary_frame_labels: torch.Tensor | None = None,
-        boundary_frame_labels: torch.Tensor | None = None,
-        frames_path=None,
-    ) -> dict:
-        if self.frames_standardizer:
-            frames = self.frames_standardizer(frames)
-
-        if self.pad_to_window.return_padding_mask:
-            frames, padding_mask = self.pad_to_window(frames)
-        else:
-            frames = self.pad_to_window(frames)
-            padding_mask = None
-        frames = self.frames_transform_after_pad(frames)
-
-        item = {
-            "frames": frames,
-        }
-
-        if multi_frame_labels is not None:
-            item["multi_frame_labels"] = self.frame_labels_transform(
-                multi_frame_labels
-            )
-
-        if binary_frame_labels is not None:
-            item["binary_frame_labels"] = self.frame_labels_transform(
-                binary_frame_labels
-            )
-
-        if boundary_frame_labels is not None:
-            item["boundary_frame_labels"] = self.frame_labels_transform(
-                boundary_frame_labels
-            )
-
-        if padding_mask is not None:
-            item["padding_mask"] = padding_mask
-
-        if frames_path is not None:
-            # make sure frames_path is a str, not a pathlib.Path
-            item["frames_path"] = str(frames_path)
-
-        return item
-
-
-class BioSoundSegBench:
-    """Class representing BioSoundSegBench dataset."""
-
     def __init__(
         self,
         dataset_path: str | pathlib.Path,
@@ -575,7 +248,7 @@ def shape(self):
         elif (
             self.split in ("val", "test", "predict") and len(input_shape) == 4
         ):
-            # discard windows dimension from shape --
+            # discard windows dimension from shape,
             # it's sample dependent and not what we want
             return input_shape[1:]
 
diff --git a/src/vak/datasets/cmacbench/helper.py b/src/vak/datasets/cmacbench/helper.py
new file mode 100644
index 000000000..bbcd2b71b
--- /dev/null
+++ b/src/vak/datasets/cmacbench/helper.py
@@ -0,0 +1,156 @@
+"""Helper functions used with CMACBench dataset."""
+from __future__ import annotations
+
+import json
+import pathlib
+
+from attrs import define
+
+
+@define
+class SampleIDVectorPaths:
+    train: pathlib.Path
+    val: pathlib.Path
+    test: pathlib.Path
+
+
+@define
+class IndsInSampleVectorPaths:
+    train: pathlib.Path
+    val: pathlib.Path
+    test: pathlib.Path
+
+
+@define
+class SplitsMetadata:
+    """Class that represents metadata about dataset splits
+    in the BioSoundSegBench dataset, loaded from a json file"""
+
+    splits_csv_path: pathlib.Path
+    sample_id_vector_paths: SampleIDVectorPaths
+    inds_in_sample_vector_paths: IndsInSampleVectorPaths
+
+    @classmethod
+    def from_paths(cls, json_path, dataset_path):
+        json_path = pathlib.Path(json_path)
+        with json_path.open("r") as fp:
+            splits_json = json.load(fp)
+
+        dataset_path = pathlib.Path(dataset_path)
+        if not dataset_path.exists() or not dataset_path.is_dir():
+            raise NotADirectoryError(
+                f"`dataset_path` not found or not a directory: {dataset_path}"
+            )
+
+        splits_csv_path = pathlib.Path(
+            dataset_path / splits_json["splits_csv_path"]
+        )
+        if not splits_csv_path.exists():
+            raise FileNotFoundError(
+                f"`splits_csv_path` not found: {splits_csv_path}"
+            )
+
+        sample_id_vector_paths = {
+            split: dataset_path / path
+            for split, path in splits_json["sample_id_vec_path"].items()
+        }
+        for split, vec_path in sample_id_vector_paths.items():
+            if not vec_path.exists():
+                raise FileNotFoundError(
+                    f"`sample_id_vector_path` for split '{split}' not found: {vec_path}"
+                )
+        sample_id_vector_paths = SampleIDVectorPaths(**sample_id_vector_paths)
+
+        inds_in_sample_vector_paths = {
+            split: dataset_path / path
+            for split, path in splits_json["inds_in_sample_vec_path"].items()
+        }
+        for split, vec_path in inds_in_sample_vector_paths.items():
+            if not vec_path.exists():
+                raise FileNotFoundError(
+                    f"`inds_in_sample_vec_path` for split '{split}' not found: {vec_path}"
+                )
+        inds_in_sample_vector_paths = IndsInSampleVectorPaths(
+            **inds_in_sample_vector_paths
+        )
+
+        return cls(
+            splits_csv_path,
+            sample_id_vector_paths,
+            inds_in_sample_vector_paths,
+        )
+
+
+@define
+class TrainingReplicateMetadata:
+    """Class representing metadata for a
+    pre-defined training replicate
+    in the BioSoundSegBench dataset.
+    """
+
+    biosound_group: str
+    id: str | None
+    frame_dur: float
+    unit: str
+    data_source: str | None
+    train_dur: float
+    replicate_num: int
+
+
+def metadata_from_splits_json_path(
+    splits_json_path: pathlib.Path, datset_path: pathlib.Path
+) -> TrainingReplicateMetadata:
+    name = splits_json_path.name
+    try:
+        (
+            biosound_group,
+            unit,
+            id_,
+            frame_dur_1st_half,
+            frame_dur_2nd_half,
+            data_source,
+            train_dur_1st_half,
+            train_dur_2nd_half,
+            replicate_num,
+            _,
+            _,
+        ) = name.split(".")
+    # Human-Speech doesn't have ID or data source in filename
+    # so it will raise a ValueError
+    except ValueError:
+        name = splits_json_path.name
+        (
+            biosound_group,
+            unit,
+            frame_dur_1st_half,
+            frame_dur_2nd_half,
+            train_dur_1st_half,
+            train_dur_2nd_half,
+            replicate_num,
+            _,
+            _,
+        ) = name.split(".")
+        id_ = None
+        data_source = None
+    if id_ is not None:
+        id_ = id_.split("-")[-1]
+    frame_dur = float(
+        frame_dur_1st_half.split("-")[-1]
+        + "."
+        + frame_dur_2nd_half.split("-")[0]
+    )
+    train_dur = float(
+        train_dur_1st_half.split("-")[-1]
+        + "."
+        + train_dur_2nd_half.split("-")[0]
+    )
+    replicate_num = int(replicate_num.split("-")[-1])
+    return TrainingReplicateMetadata(
+        biosound_group,
+        id_,
+        frame_dur,
+        unit,
+        data_source,
+        train_dur,
+        replicate_num,
+    )
\ No newline at end of file
diff --git a/src/vak/datasets/cmacbench/transforms.py b/src/vak/datasets/cmacbench/transforms.py
new file mode 100644
index 000000000..96b26b0ce
--- /dev/null
+++ b/src/vak/datasets/cmacbench/transforms.py
@@ -0,0 +1,205 @@
+"""Default transforms for CMACBench dataset."""
+
+from __future__ import annotations
+
+import json
+import pathlib
+from typing import TYPE_CHECKING, Callable, Literal
+
+import numpy as np
+import pandas as pd
+import torch
+import torchvision.transforms
+from attrs import define
+
+from ... import common, datapipes, transforms
+
+if TYPE_CHECKING:
+    from ...transforms import FramesStandardizer
+
+
+class TrainItemTransform:
+    """Default transform used when training frame classification models
+    with :class:`CMACBench` dataset."""
+
+    def __init__(
+        self,
+        frames_standardizer: FramesStandardizer | None = None,
+    ):
+        from ...transforms import FramesStandardizer  # avoid circular import
+
+        if frames_standardizer is not None:
+            if isinstance(frames_standardizer, FramesStandardizer):
+                frames_transform = [frames_standardizer]
+            else:
+                raise TypeError(
+                    f"invalid type for frames_standardizer: {type(frames_standardizer)}. "
+                    "Should be an instance of vak.transforms.StandardizeSpect"
+                )
+        else:
+            frames_transform = []
+        # add as an attribute on self so that high-level functions can save this class as needed
+        self.frames_standardizer = frames_standardizer
+
+        frames_transform.extend(
+            [
+                transforms.ToFloatTensor(),
+                transforms.AddChannel(),
+            ]
+        )
+        self.frames_transform = torchvision.transforms.Compose(
+            frames_transform
+        )
+        self.frame_labels_transform = transforms.ToLongTensor()
+
+    def __call__(
+        self,
+        frames: torch.Tensor,
+        multi_frame_labels: torch.Tensor | None = None,
+        binary_frame_labels: torch.Tensor | None = None,
+        boundary_frame_labels: torch.Tensor | None = None,
+    ) -> dict:
+        frames = self.frames_transform(frames)
+        item = {
+            "frames": frames,
+        }
+        if multi_frame_labels is not None:
+            item["multi_frame_labels"] = self.frame_labels_transform(
+                multi_frame_labels
+            )
+
+        if binary_frame_labels is not None:
+            item["binary_frame_labels"] = self.frame_labels_transform(
+                binary_frame_labels
+            )
+
+        if boundary_frame_labels is not None:
+            item["boundary_frame_labels"] = self.frame_labels_transform(
+                boundary_frame_labels
+            )
+
+        return item
+
+
+class InferItemTransform:
+    """Default transform used when running inference on classification models
+    with :class:`CMACBench` dataset, for evaluation or to generate new predictions.
+
+    Returned item includes frames reshaped into a stack of windows,
+    with padded added to make reshaping possible.
+    Any `frame_labels` are not padded and reshaped,
+    but are converted to :class:`torch.LongTensor`.
+    If return_padding_mask is True, item includes 'padding_mask' that
+    can be used to crop off any predictions made on the padding.
+
+    Attributes
+    ----------
+    frames_standardizer : vak.transforms.FramesStandardizer
+        instance that has already been fit to dataset, using fit_df method.
+        Default is None, in which case no standardization transform is applied.
+    window_size : int
+        width of window in number of elements. Argument to PadToWindow transform.
+    frames_padval : float
+        Value to pad frames with. Added to end of array, the "right side".
+        Argument to PadToWindow transform. Default is 0.0.
+    frame_labels_padval : int
+        Value to pad frame labels vector with. Added to the end of the array.
+        Argument to PadToWindow transform. Default is -1.
+        Used with ``ignore_index`` argument of :mod:`torch.nn.CrossEntropyLoss`.
+    return_padding_mask : bool
+        if True, the dictionary returned by ItemTransform classes will include
+        a boolean vector to use for cropping back down to size before padding.
+        padding_mask has size equal to width of padded array, i.e. original size
+        plus padding at the end, and has values of 1 where
+        columns in padded are from the original array,
+        and values of 0 where columns were added for padding.
+    """
+
+    def __init__(
+        self,
+        window_size,
+        frames_standardizer=None,
+        frames_padval=0.0,
+        frame_labels_padval=-1,
+        return_padding_mask=True,
+        channel_dim=1,
+    ):
+        from ...transforms import FramesStandardizer  # avoid circular import
+
+        self.window_size = window_size
+        self.frames_padval = frames_padval
+        self.frame_labels_padval = frame_labels_padval
+        self.return_padding_mask = return_padding_mask
+        self.channel_dim = channel_dim
+
+        if frames_standardizer is not None:
+            if not isinstance(frames_standardizer, FramesStandardizer):
+                raise TypeError(
+                    f"Invalid type for frames_standardizer: {type(frames_standardizer)}. "
+                    "Should be an instance of vak.transforms.FramesStandardizer"
+                )
+        # add as an attribute on self to use inside __call__
+        # *and* so that high-level functions can save this class as needed
+        self.frames_standardizer = frames_standardizer
+
+        self.pad_to_window = transforms.PadToWindow(
+            window_size, frames_padval, return_padding_mask=return_padding_mask
+        )
+
+        self.frames_transform_after_pad = torchvision.transforms.Compose(
+            [
+                transforms.ViewAsWindowBatch(window_size),
+                transforms.ToFloatTensor(),
+                # below, add channel at first dimension because windows become batch
+                transforms.AddChannel(channel_dim=channel_dim),
+            ]
+        )
+
+        self.frame_labels_padval = frame_labels_padval
+        self.frame_labels_transform = transforms.ToLongTensor()
+
+    def __call__(
+        self,
+        frames: torch.Tensor,
+        multi_frame_labels: torch.Tensor | None = None,
+        binary_frame_labels: torch.Tensor | None = None,
+        boundary_frame_labels: torch.Tensor | None = None,
+        frames_path=None,
+    ) -> dict:
+        if self.frames_standardizer:
+            frames = self.frames_standardizer(frames)
+
+        if self.pad_to_window.return_padding_mask:
+            frames, padding_mask = self.pad_to_window(frames)
+        else:
+            frames = self.pad_to_window(frames)
+            padding_mask = None
+        frames = self.frames_transform_after_pad(frames)
+
+        item = {
+            "frames": frames,
+        }
+
+        if multi_frame_labels is not None:
+            item["multi_frame_labels"] = self.frame_labels_transform(
+                multi_frame_labels
+            )
+
+        if binary_frame_labels is not None:
+            item["binary_frame_labels"] = self.frame_labels_transform(
+                binary_frame_labels
+            )
+
+        if boundary_frame_labels is not None:
+            item["boundary_frame_labels"] = self.frame_labels_transform(
+                boundary_frame_labels
+            )
+
+        if padding_mask is not None:
+            item["padding_mask"] = padding_mask
+
+        if frames_path is not None:
+            # make sure frames_path is a str, not a pathlib.Path
+            item["frames_path"] = str(frames_path)
+
+        return item
\ No newline at end of file