diff --git a/.gitignore b/.gitignore index a290e06..b61c06e 100644 --- a/.gitignore +++ b/.gitignore @@ -164,3 +164,8 @@ cython_debug/ # tracks the version _version.py + +# rogue directories from example notebooks running in local space +checkpoints/ +loss_figures/ +loss_logs/ \ No newline at end of file diff --git a/docs/customising_training.rst b/docs/customising_training.rst index 9a80418..5a455c4 100644 --- a/docs/customising_training.rst +++ b/docs/customising_training.rst @@ -11,6 +11,7 @@ We will cover the following topics: * Number of epochs * Checkpoint suffix modification * Number of workers in PyTorch DataLoader +* Train/test and cross-validation splitting yourself Early stopping -------------- @@ -248,3 +249,65 @@ You can change the number of workers in the PyTorch DataLoader using the ``num_w fusion_model=example_model, ) + + +----- + +Train/test and cross-validation splitting yourself +--------------------------------------------------- + +By default, fusilli will split your data into train/test or cross-validation splits for you randomly based on a test size or a number of folds you specify in the :func:`~.fusilli.data.prepare_fusion_data` function. + +You can remove the randomness and specify the data indices for train and test, or for the different cross validation folds yourself by passing in optional arguments to :func:`~.fusilli.data.prepare_fusion_data`. + + +For train/test splitting, the argument `test_indices` should be a list of indices for the test set. To make the test set the first 6 data points in the overall dataset, follow the example below: + +.. code-block:: python + + from fusilli.data import prepare_fusion_data + from fusilli.train import train_and_save_models + + test_indices = [0, 1, 2, 3, 4, 5] + + datamodule = prepare_fusion_data( + prediction_task="binary", + fusion_model=example_model, + data_paths=data_paths, + output_paths=output_path, + test_indices=test_indices, + ) + +For specifying your own cross validation folds, the argument `own_kfold_indices` should be a list of lists of indices for each fold. + +If you wanted to have non-random cross validation folds through your data, you can either specify the folds like so for 3 folds: + +.. code-block:: python + + own_kfold_indices = [ + ([ 4, 5, 6, 7, 8, 9, 10, 11], [0, 1, 2, 3]), # first fold + ([ 0, 1, 2, 3, 8, 9, 10, 11], [4, 5, 6, 7]), # second fold + ([ 0, 1, 2, 3, 4, 5, 6, 7], [8, 9, 10, 11]) # third fold + ] + +Or to do this automatically, use the Scikit-Learn `KFold functionality `_ to generate the folds outside of the fusilli functions, like so: + +.. code-block:: python + + from sklearn.model_selection import KFold + + num_folds = 5 + + own_kfold_indices = [(train_index, test_index) for train_index, test_index in KFold(n_splits=num_folds).split(range(len(dataset)))] + + + datamodule = prepare_fusion_data( + kfold=True, + prediction_task="binary", + fusion_model=example_model, + data_paths=data_paths, + output_paths=output_path, + own_kfold_indices=own_kfold_indices, + num_folds=num_folds, + ) + diff --git a/fusilli/data.py b/fusilli/data.py index 2a2a505..b6c54ef 100644 --- a/fusilli/data.py +++ b/fusilli/data.py @@ -422,6 +422,9 @@ class TrainTestDataModule(pl.LightningDataModule): Early stopping callback class. num_workers : int Number of workers for the dataloader (default 0). + test_indices : list + List of indices to use for testing (default None). If None, the test indices are + randomly selected using the test_size parameter. kwargs : dict Dictionary of extra arguments for the subspace method class. """ @@ -434,7 +437,6 @@ def __init__( prediction_task, batch_size, test_size, - num_folds, # not needed for train/test split multiclass_dimensions, subspace_method=None, image_downsample_size=None, @@ -443,6 +445,7 @@ def __init__( extra_log_string_dict=None, own_early_stopping_callback=None, num_workers=0, + test_indices=None, kwargs=None, ): """ @@ -461,8 +464,6 @@ def __init__( Batch size (default 8). test_size : float Fraction of data to use for testing (default 0.2). - num_folds : int - Total number of folds. Not needed for this class for train/test split but it's here to be consistent with KFoldDataModule. multiclass_dimensions : int Number of classes for multiclass prediction (default None). subspace_method : class @@ -482,6 +483,9 @@ def __init__( Early stopping callback class (default None). num_workers : int Number of workers for the dataloader (default 0). + test_indices : list + List of indices to use for testing (default None). If None, the test indices are + randomly selected using the test_size parameter. kwargs : dict Dictionary of extra arguments for the subspace method class. """ @@ -515,6 +519,7 @@ def __init__( self.max_epochs = max_epochs self.own_early_stopping_callback = own_early_stopping_callback self.num_workers = num_workers + self.test_indices = test_indices self.kwargs = kwargs def prepare_data(self): @@ -555,9 +560,18 @@ def setup( """ # split the dataset into train and test sets - [self.train_dataset, self.test_dataset] = torch.utils.data.random_split( - self.dataset, [1 - self.test_size, self.test_size] - ) + if self.test_indices is None: + [self.train_dataset, self.test_dataset] = torch.utils.data.random_split( + self.dataset, [1 - self.test_size, self.test_size] + ) + else: + self.test_dataset = torch.utils.data.Subset( + self.dataset, self.test_indices + ) + + self.train_dataset = torch.utils.data.Subset( + self.dataset, list(set(range(len(self.dataset))) - set(self.test_indices)) + ) if self.subspace_method is not None: # if subspace method is specified if ( @@ -705,6 +719,10 @@ class KFoldDataModule(pl.LightningDataModule): Early stopping callback class. num_workers : int Number of workers for the dataloader (default 0). + own_kfold_indices : list + List of indices to use for k-fold cross validation (default None). If None, the k-fold + indices are randomly selected. Structure is a list of tuples of (train_indices, + test_indices). Must be the same length as num_folds. kwargs : dict Dictionary of extra arguments for the subspace method class. """ @@ -717,7 +735,6 @@ def __init__( prediction_task, batch_size, num_folds, - test_size, # not needed for k-fold multiclass_dimensions, subspace_method=None, image_downsample_size=None, @@ -726,6 +743,7 @@ def __init__( extra_log_string_dict=None, own_early_stopping_callback=None, num_workers=0, + own_kfold_indices=None, kwargs=None, ): """ @@ -765,6 +783,10 @@ def __init__( Early stopping callback class (default None). num_workers : int Number of workers for the dataloader (default 0). + own_kfold_indices : list + List of indices to use for k-fold cross validation (default None). If None, the k-fold + indices are randomly selected. Structure is a list of tuples of (train_indices, + test_indices). Must be the same length as num_folds. kwargs : dict Dictionary of extra arguments for the subspace method class. """ @@ -805,6 +827,7 @@ def __init__( self.max_epochs = max_epochs self.own_early_stopping_callback = own_early_stopping_callback self.num_workers = num_workers + self.own_kfold_indices = own_kfold_indices self.kwargs = kwargs def prepare_data(self): @@ -832,15 +855,18 @@ def kfold_split(self): folds : list List of tuples of (train_dataset, test_dataset) """ - - # split the dataset into k folds - kf = KFold(n_splits=self.num_folds, shuffle=True) - # get the indices of the dataset indices = list(range(len(self.dataset))) + # split the dataset into k folds + if self.own_kfold_indices is None: + kf = KFold(n_splits=self.num_folds, shuffle=True) + split_kf = kf.split(indices) + else: + split_kf = self.own_kfold_indices + folds = [] - for train_indices, val_indices in kf.split(indices): + for train_indices, val_indices in split_kf: # split the dataset into train and test sets for each fold train_dataset = torch.utils.data.Subset(self.dataset, train_indices) test_dataset = torch.utils.data.Subset(self.dataset, val_indices) @@ -1046,7 +1072,9 @@ class TrainTestGraphDataModule: List of indices for testing. Created in setup(). graph_data : graph data structure Graph data structure. Created in setup(). - + own_test_indices : list + List of indices to use for testing (default None). If None, the test indices are + randomly selected using the test_size parameter. """ def __init__( @@ -1058,6 +1086,7 @@ def __init__( image_downsample_size=None, layer_mods=None, extra_log_string_dict=None, + own_test_indices=None, ): """ Parameters @@ -1079,6 +1108,9 @@ def __init__( (default None) extra_log_string_dict : dict Dictionary of extra strings to add to the log. + own_test_indices : list + List of indices to use for testing (default None). If None, the test indices are + randomly selected using the test_size parameter. """ @@ -1107,6 +1139,7 @@ def __init__( self.test_size = test_size self.graph_creation_method = graph_creation_method self.layer_mods = layer_mods + self.own_test_indices = own_test_indices def prepare_data(self): """ @@ -1133,11 +1166,17 @@ def setup(self): None """ # get random train and test idxs - [train_dataset, test_dataset] = torch.utils.data.random_split( - self.dataset, [1 - self.test_size, self.test_size] - ) - self.train_idxs = train_dataset.indices - self.test_idxs = test_dataset.indices + if self.own_test_indices is None: + [train_dataset, test_dataset] = torch.utils.data.random_split( + self.dataset, [1 - self.test_size, self.test_size] + ) + self.train_idxs = train_dataset.indices + self.test_idxs = test_dataset.indices + else: + self.test_idxs = self.own_test_indices + self.train_idxs = list( + set(range(len(self.dataset))) - set(self.test_idxs) + ) # get the graph data structure self.graph_maker_instance = self.graph_creation_method(self.dataset) @@ -1216,6 +1255,7 @@ def __init__( image_downsample_size=None, layer_mods=None, extra_log_string_dict=None, + own_kfold_indices=None, ): """ Parameters @@ -1236,6 +1276,10 @@ def __init__( (default None) extra_log_string_dict : dict Dictionary of extra strings to add to the log. + own_kfold_indices : list + List of indices to use for k-fold cross validation (default None). If None, the k-fold + indices are randomly selected. Structure is a list of tuples of (train_indices, + test_indices). Must be the same length as num_folds. """ super().__init__() self.num_folds = num_folds # total number of folds @@ -1261,6 +1305,7 @@ def __init__( self.modality_type = self.fusion_model.modality_type self.graph_creation_method = graph_creation_method self.layer_mods = layer_mods + self.own_kfold_indices = own_kfold_indices def prepare_data(self): """ @@ -1279,21 +1324,28 @@ def kfold_split(self): Returns ------ folds : list - List of tuples of (graph_data, train_idxs, test_idxs) + List of tuples of (train_dataset, test_dataset) """ - # splits the dataset into k folds - kf = KFold(n_splits=self.num_folds, shuffle=True) - indices = list(range(len(self.dataset))) # get the indices of the dataset + # get the indices of the dataset + indices = list(range(len(self.dataset))) + + # split the dataset into k folds + if self.own_kfold_indices is None: + kf = KFold(n_splits=self.num_folds, shuffle=True) + split_kf = kf.split(indices) + else: + split_kf = self.own_kfold_indices folds = [] - for train_indices, val_indices in kf.split(indices): + for train_indices, val_indices in split_kf: # split the dataset into train and test sets for each fold train_dataset = torch.utils.data.Subset(self.dataset, train_indices) test_dataset = torch.utils.data.Subset(self.dataset, val_indices) - folds.append( - (train_dataset, test_dataset) - ) # list of tuples of (train_dataset, test_dataset) - return folds + + # append the train and test datasets to the folds list + folds.append((train_dataset, test_dataset)) + + return folds # list of tuples of (train_dataset, test_dataset) def setup(self): """ @@ -1378,6 +1430,8 @@ def prepare_fusion_data( extra_log_string_dict=None, own_early_stopping_callback=None, num_workers=0, + test_indices=None, + own_kfold_indices=None, **kwargs, ): """ @@ -1425,6 +1479,10 @@ def prepare_fusion_data( Early stopping callback class (default None). num_workers : int Number of workers for the dataloader (default 0). + test_indices : list or None + List of indices to use for testing (default None). If None, then random split is used. + own_kfold_indices : list or None + List of indices to use for k-fold cross validation (default None). If None, then random split is used. **kwargs : dict Extra keyword arguments. Usable for extra arguments for the subspace method MCVAE's early stopping callback: "mcvae_patience" and "mcvae_tolerance". @@ -1461,6 +1519,7 @@ def prepare_fusion_data( image_downsample_size=image_downsample_size, layer_mods=layer_mods, extra_log_string_dict=extra_log_string_dict, + # here is where the kfold split will go ) else: graph_data_module = TrainTestGraphDataModule( @@ -1471,6 +1530,7 @@ def prepare_fusion_data( image_downsample_size=image_downsample_size, layer_mods=layer_mods, extra_log_string_dict=extra_log_string_dict, + own_test_indices=test_indices, ) graph_data_module.prepare_data() @@ -1499,28 +1559,43 @@ def prepare_fusion_data( else: # another other than graph fusion if kfold: - datamodule_func = KFoldDataModule + data_module = KFoldDataModule( + fusion_model, + sources=data_sources, + output_paths=output_paths, + prediction_task=prediction_task, + batch_size=batch_size, + num_folds=num_folds, + multiclass_dimensions=multiclass_dimensions, + subspace_method=fusion_model.subspace_method, + image_downsample_size=image_downsample_size, + layer_mods=layer_mods, + max_epochs=max_epochs, + extra_log_string_dict=extra_log_string_dict, + own_early_stopping_callback=own_early_stopping_callback, + num_workers=num_workers, + own_kfold_indices=own_kfold_indices, + kwargs=kwargs, + ) else: - datamodule_func = TrainTestDataModule - - data_module = datamodule_func( - fusion_model, - sources=data_sources, - output_paths=output_paths, - prediction_task=prediction_task, - batch_size=batch_size, - test_size=test_size, - num_folds=num_folds, - multiclass_dimensions=multiclass_dimensions, - subspace_method=fusion_model.subspace_method, - image_downsample_size=image_downsample_size, - layer_mods=layer_mods, - max_epochs=max_epochs, - extra_log_string_dict=extra_log_string_dict, - own_early_stopping_callback=own_early_stopping_callback, - num_workers=num_workers, - kwargs=kwargs, - ) + data_module = TrainTestDataModule( + fusion_model, + sources=data_sources, + output_paths=output_paths, + prediction_task=prediction_task, + batch_size=batch_size, + test_size=test_size, + multiclass_dimensions=multiclass_dimensions, + subspace_method=fusion_model.subspace_method, + image_downsample_size=image_downsample_size, + layer_mods=layer_mods, + max_epochs=max_epochs, + extra_log_string_dict=extra_log_string_dict, + own_early_stopping_callback=own_early_stopping_callback, + num_workers=num_workers, + test_indices=test_indices, + kwargs=kwargs, + ) data_module.prepare_data() data_module.setup(checkpoint_path=checkpoint_path) diff --git a/tests/test_data/test_KFoldDataModule.py b/tests/test_data/test_KFoldDataModule.py index bd2b2c8..1ef7d2e 100644 --- a/tests/test_data/test_KFoldDataModule.py +++ b/tests/test_data/test_KFoldDataModule.py @@ -1,8 +1,9 @@ import pytest import torch from fusilli.data import KFoldDataModule -from .test_TrainTestDataModule import create_test_files, MockSubspaceMethod +from .test_TrainTestDataModule import create_test_files, MockSubspaceMethod, create_test_files_more_features from unittest.mock import patch, Mock +from sklearn.model_selection import KFold @pytest.fixture @@ -31,7 +32,6 @@ def create_kfold_data_module(create_test_files): multiclass_dimensions=params["multiclass_dims"], num_folds=params["num_k"], batch_size=batch_size, - test_size=0.2 ) return data_module @@ -66,6 +66,42 @@ def test_kfold_split(create_kfold_data_module): assert len(folds) == 5 # Check if the correct number of folds is generated +def test_kfold_split_own_indices(create_test_files_more_features): + tabular1_csv = create_test_files_more_features["tabular1_csv"] + tabular2_csv = create_test_files_more_features["tabular2_csv"] + image_torch_file_2d = create_test_files_more_features["image_torch_file_2d"] + + prediction_task = "binary" + multiclass_dimensions = None + + sources = [tabular1_csv, tabular2_csv, image_torch_file_2d] + + # specifying own kfold indices using a non random split + own_folds = [(train_index, test_index) for train_index, test_index in KFold(n_splits=5).split(range(36))] + + example_fusion_model = Mock() + example_fusion_model.modality_type = "tabular_image" + + datamodule = KFoldDataModule( + fusion_model=example_fusion_model, + sources=sources, + output_paths={}, + prediction_task=prediction_task, + multiclass_dimensions=multiclass_dimensions, + num_folds=5, + own_kfold_indices=own_folds, + batch_size=9, + ) + + datamodule.prepare_data() + folds = datamodule.kfold_split() # returns list of tuples of datasets + + assert len(folds) == 5 # Check if the correct number of folds is generated + + # check if the correct number of samples is in each fold + assert len(folds[0][0]) == len(own_folds[0][0]) + + def test_train_dataloader(create_kfold_data_module): data_module = create_kfold_data_module data_module.prepare_data() diff --git a/tests/test_data/test_KFoldGraphDataModule.py b/tests/test_data/test_KFoldGraphDataModule.py index 28b97e8..cc6d36f 100644 --- a/tests/test_data/test_KFoldGraphDataModule.py +++ b/tests/test_data/test_KFoldGraphDataModule.py @@ -1,10 +1,11 @@ import pytest from fusilli.data import KFoldGraphDataModule -from .test_TrainTestDataModule import create_test_files +from .test_TrainTestDataModule import create_test_files, create_test_files_more_features import torch_geometric import numpy as np from unittest.mock import patch, Mock from pytest_mock import mocker +from sklearn.model_selection import KFold class MockGraphMakerModule: @@ -54,6 +55,39 @@ def test_kfold_split(create_graph_data_module): assert len(fold) == 2 +def test_kfold_split_own_indices(create_test_files_more_features): + tabular1_csv = create_test_files_more_features["tabular1_csv"] + tabular2_csv = create_test_files_more_features["tabular2_csv"] + image_torch_file_2d = create_test_files_more_features["image_torch_file_2d"] + + prediction_task = "binary" + multiclass_dimensions = None + + sources = [tabular1_csv, tabular2_csv, image_torch_file_2d] + + # specifying own kfold indices using a non random split + own_folds = [(train_index, test_index) for train_index, test_index in KFold(n_splits=5).split(range(36))] + + example_fusion_model = Mock() + example_fusion_model.modality_type = "tabular_image" + + datamodule = KFoldGraphDataModule( + num_folds=5, + fusion_model=example_fusion_model, + sources=sources, + graph_creation_method=MockGraphMakerModule, + own_kfold_indices=own_folds, + ) + + datamodule.prepare_data() + folds = datamodule.kfold_split() # returns list of tuples of datasets + + assert len(folds) == 5 # Check if the correct number of folds is generated + + # check if the correct number of samples is in each fold + assert len(folds[0][0]) == len(own_folds[0][0]) + + def test_setup(create_graph_data_module, mocker): datamodule = create_graph_data_module mocker.patch.object( diff --git a/tests/test_data/test_TrainTestDataModule.py b/tests/test_data/test_TrainTestDataModule.py index e0b2289..4901bf6 100644 --- a/tests/test_data/test_TrainTestDataModule.py +++ b/tests/test_data/test_TrainTestDataModule.py @@ -153,8 +153,7 @@ def test_train_dataloader(create_test_files): prediction_task=prediction_task, batch_size=batch_size, test_size=test_size, - multiclass_dimensions=None, - num_folds=None) + multiclass_dimensions=None, ) datamodule.prepare_data() datamodule.setup() @@ -190,8 +189,7 @@ def test_val_dataloader(create_test_files): prediction_task=prediction_task, batch_size=batch_size, test_size=test_size, - multiclass_dimensions=multiclass_dimensions, - num_folds=None) + multiclass_dimensions=multiclass_dimensions, ) datamodule.prepare_data() datamodule.setup() @@ -244,7 +242,6 @@ def test_setup_calls_subspace_method(create_test_files): batch_size=batch_size, test_size=test_size, multiclass_dimensions=multiclass_dimensions, - num_folds=None, subspace_method=mock_subspace_method) datamodule.prepare_data() datamodule.setup() @@ -255,6 +252,48 @@ def test_setup_calls_subspace_method(create_test_files): ) +# Testing that the test indices are correctly input and used instead of a random split +def test_owntestindices(create_test_files_more_features): + tabular1_csv = create_test_files_more_features["tabular1_csv"] + tabular2_csv = create_test_files_more_features["tabular2_csv"] + image_torch_file_2d = create_test_files_more_features["image_torch_file_2d"] + + test_size = 0.2 + prediction_task = "binary" + multiclass_dimensions = None + + sources = [tabular1_csv, tabular2_csv, image_torch_file_2d] + batch_size = 23 + + example_fusion_model = Mock() + example_fusion_model.modality_type = "tabular_image" + + # make test indices people 25 to 36 + test_indices = list(range(25, 36)) + + datamodule = TrainTestDataModule(fusion_model=example_fusion_model, + sources=sources, + output_paths=None, + prediction_task=prediction_task, + batch_size=batch_size, + test_size=test_size, + multiclass_dimensions=multiclass_dimensions, + test_indices=test_indices) + datamodule.prepare_data() + datamodule.setup() + + # check that the test indices are correctly input + assert datamodule.test_indices == test_indices + # look at the test dataset + test_dataset = datamodule.test_dataset + # check that the test dataset has the correct number of people + assert len(test_dataset) == len(test_indices) + # check train dataset + train_dataset = datamodule.train_dataset + # check that the train dataset has the correct number of people + assert len(train_dataset) == 25 + + # Run pytest if __name__ == "__main__": pytest.main() diff --git a/tests/test_data/test_TrainTestGraphDataModule.py b/tests/test_data/test_TrainTestGraphDataModule.py index e5c3711..49be25a 100644 --- a/tests/test_data/test_TrainTestGraphDataModule.py +++ b/tests/test_data/test_TrainTestGraphDataModule.py @@ -1,6 +1,6 @@ import pytest from fusilli.data import TrainTestGraphDataModule -from .test_TrainTestDataModule import create_test_files +from .test_TrainTestDataModule import create_test_files, create_test_files_more_features from pytest import approx from pytest_mock import mocker @@ -28,8 +28,6 @@ def create_graph_data_module(create_test_files): sources = [tabular1_csv, tabular2_csv, image_torch_file_2d] batch_size = 23 - # modality_type = "tabular_tabular" - class example_fusion_model: modality_type = "tabular_tabular" @@ -98,5 +96,45 @@ def test_get_lightning_module(create_graph_data_module): assert lightning_module is not None +# Testing the TrainTestGraphDataModule class for the case where the user specifies their own test indices +def test_owntestindices(create_test_files_more_features): + params = { + "test_size": 0.3, + "pred_type": "binary", + "multiclass_dims": None, + } + + tabular1_csv = create_test_files_more_features["tabular1_csv"] + tabular2_csv = create_test_files_more_features["tabular2_csv"] + image_torch_file_2d = create_test_files_more_features["image_torch_file_2d"] + + sources = [tabular1_csv, tabular2_csv, image_torch_file_2d] + batch_size = 23 + + # make test indices people 25 to 36 + test_indices = list(range(25, 36)) + + class example_fusion_model: + modality_type = "tabular_tabular" + + def __init__(self): + pass + + data_module = TrainTestGraphDataModule( + fusion_model=example_fusion_model, + sources=sources, + graph_creation_method=MockGraphMakerModule, + test_size=params["test_size"], + own_test_indices=test_indices, + ) + + data_module.prepare_data() + data_module.setup() + lightning_module = data_module.get_lightning_module() + + # check that the test indices are the same as the ones we specified + assert data_module.test_idxs == test_indices + + if __name__ == "__main__": pytest.main() diff --git a/tests/test_models/test_subspace_and_graph_methods.py b/tests/test_models/test_subspace_and_graph_methods.py index 5bc9a37..a30a514 100644 --- a/tests/test_models/test_subspace_and_graph_methods.py +++ b/tests/test_models/test_subspace_and_graph_methods.py @@ -58,7 +58,6 @@ def sample_datamodule(create_test_files): prediction_task="binary", batch_size=8, test_size=0.3, - num_folds=None, multiclass_dimensions=None, ) @@ -96,7 +95,6 @@ def sample_tabimg_datamodule(create_test_files): prediction_task="binary", batch_size=8, test_size=0.3, - num_folds=None, multiclass_dimensions=None, ) diff --git a/tests/test_modifications/test_subspace_modifications.py b/tests/test_modifications/test_subspace_modifications.py index f5f8e16..9deade7 100644 --- a/tests/test_modifications/test_subspace_modifications.py +++ b/tests/test_modifications/test_subspace_modifications.py @@ -731,8 +731,7 @@ def model_instance_denoising_autoencoder_subspace_method_2D(create_test_files): prediction_task="binary", batch_size=batch_size, test_size=0.2, - multiclass_dimensions=None, - num_folds=None) + multiclass_dimensions=None, ) dm.prepare_data() dm.setup() @@ -757,8 +756,7 @@ def model_instance_denoising_autoencoder_subspace_method_3D(create_test_files): prediction_task="binary", batch_size=batch_size, test_size=0.2, - multiclass_dimensions=None, - num_folds=None) + multiclass_dimensions=None, ) dm.prepare_data() dm.setup() @@ -783,7 +781,7 @@ def model_instance_concat_img_latent_tab_subspace_method_2D(create_test_files): prediction_task="binary", batch_size=batch_size, test_size=0.2, - multiclass_dimensions=None, num_folds=None) + multiclass_dimensions=None, ) dm.prepare_data() dm.setup() @@ -808,7 +806,7 @@ def model_instance_concat_img_latent_tab_subspace_method_3D(create_test_files): prediction_task="binary", batch_size=batch_size, test_size=0.2, - multiclass_dimensions=None, num_folds=None) + multiclass_dimensions=None, ) datamodule.prepare_data() datamodule.setup()