From 1afb9d2520789db9fae9717f8c1f889974bd1260 Mon Sep 17 00:00:00 2001 From: Jonas Date: Thu, 19 Dec 2024 13:30:00 +0100 Subject: [PATCH] reduce waiting time and use environment variable for label path --- .../splitlearning_titanic/client/forward.py | 2 +- examples/splitlearning_titanic/readme.rst | 45 ++++++++++++++----- .../combiner/aggregators/splitlearningagg.py | 2 +- fedn/network/combiner/roundhandler.py | 2 +- fedn/network/controller/control.py | 4 +- .../helpers/plugins/splitlearninghelper.py | 37 ++++++--------- 6 files changed, 51 insertions(+), 41 deletions(-) diff --git a/examples/splitlearning_titanic/client/forward.py b/examples/splitlearning_titanic/client/forward.py index 37ec6b53..2f1d90fe 100644 --- a/examples/splitlearning_titanic/client/forward.py +++ b/examples/splitlearning_titanic/client/forward.py @@ -56,7 +56,7 @@ def forward_pass(client_id, out_embedding_path, data_path=None): # save embeddings locally if not os.path.exists(f"{abs_path}/embeddings"): - os.makedirs(f"{abs_path}/embeddings", exist_ok=True) + os.makedirs(f"{abs_path}/embeddings") np.savez(f"{abs_path}/embeddings/embeddings_{client_id}.npz", embedding) diff --git a/examples/splitlearning_titanic/readme.rst b/examples/splitlearning_titanic/readme.rst index 56600427..6ab01387 100644 --- a/examples/splitlearning_titanic/readme.rst +++ b/examples/splitlearning_titanic/readme.rst @@ -19,7 +19,7 @@ Also install the following libraries into your virtual environment: pip install pandas torch scikit-learn -Create a client.yaml file in the examples/splitlearning_titanic folder and add the following content: +Make sure a client.yaml file is available in the examples/splitlearning_titanic folder with the following content: .. code-block:: bash network_id: fedn-network @@ -34,7 +34,7 @@ Data Preparation Make sure the titanic dataset is downloaded in the splitlearning_titanic/data folder with the files "labels.csv", "train.csv" and "test.csv". Download the dataset from https://www.kaggle.com/competitions/titanic/data if necessary. -We split the dataset (vertical split) into 2 clients. For this, locate yourself in the examples/splitlearning_titanic folder and run: +We split the dataset (vertical split) into 2 clients. For this, locate yourself in the examples/splitlearning_titanic/client folder and run: .. code-block:: bash @@ -43,7 +43,7 @@ We split the dataset (vertical split) into 2 clients. For this, locate yourself Compute Package --------------- -Then, to create the compute package, run: +Then, locate yourself into the examples/splitlearning_titanic folder. To create the compute package, run: .. code-block:: bash @@ -51,33 +51,54 @@ Then, to create the compute package, run: Note: For split learning, we do not need a seed model as for horizontal federated learning. -Environment Variables +Local Setup with FEDn --------------------- -Now, set up the necessary environment variables: +Currently, Split Learning can only be run locally. To start all necessary services, run the following commands in different terminals: + +To start mongo and minio + +.. code-block:: bash + + docker compose up -d mongo minio + + +We need to set some environment variables in order to let the system know where to find the data and labels. +In another terminal, set the compute package directory to the examples/splitlearning_titanic folder (the directory where package.tgz is located) and start the controller. .. code-block:: bash export FEDN_COMPUTE_PACKAGE_DIR=/path/to/fedn/examples/splitlearning_titanic + fedn controller start + +Now, we start the combiner. First, we set the labels path to the labels.pt file in the client folder. + +.. code-block:: bash + export FEDN_LABELS_PATH=/path/to/labels.pt + fedn combiner start -In 2 different client terminals, set the data path to the client data folder generated by the data.py script. +Open 2 new client terminals. Both clients should have access to their respective vertical dataset. +This is why we set the datapath to the different data folders which are generated by the data.py script. +To start the first client, run: .. code-block:: bash export FEDN_DATA_PATH=./data/clients/1/titanic.pt + fedn client start --api-url http://localhost --api-port 8092 -in client.yaml --local-package + +and to start the second client, run: + +.. code-block:: bash + export FEDN_DATA_PATH=./data/clients/2/titanic.pt + fedn client start --api-url http://localhost --api-port 8092 -in client.yaml --local-package + -Local Setup with FEDn ---------------------- -Currently, Split Learning can only be run locally. To start all necessary services, run the following commands in different terminals: -To start mongo and minio -.. code-block:: bash - docker compose up -d mongo minio Start the controller diff --git a/fedn/network/combiner/aggregators/splitlearningagg.py b/fedn/network/combiner/aggregators/splitlearningagg.py index 194f940d..47b4c892 100644 --- a/fedn/network/combiner/aggregators/splitlearningagg.py +++ b/fedn/network/combiner/aggregators/splitlearningagg.py @@ -16,7 +16,7 @@ class ServerModel(nn.Module): def __init__(self): super(ServerModel, self).__init__() - self.fc1 = nn.Linear(12, 6) # TODO: make this dynamic + self.fc1 = nn.Linear(12, 6) self.fc2 = nn.Linear(6, 1) def forward(self, x): diff --git a/fedn/network/combiner/roundhandler.py b/fedn/network/combiner/roundhandler.py index ebf38da7..2c9f7482 100644 --- a/fedn/network/combiner/roundhandler.py +++ b/fedn/network/combiner/roundhandler.py @@ -282,7 +282,7 @@ def _backward_pass(self, config: dict, clients: list): self.server.request_backward_pass(session_id=config["session_id"], gradient_id=config["model_id"], config=config, clients=clients) - time.sleep(3) # TODO: this is an easy hack for now. There needs to be some waiting time for the backward pass to complete. + time.sleep(1) # TODO: this is an easy hack for now. There needs to be some waiting time for the backward pass to complete. # the above mechanism cannot be used, as the backward pass is not returning any model updates (update_handler.waitforit checks for aggregation on the queue) return meta diff --git a/fedn/network/controller/control.py b/fedn/network/controller/control.py index 2ffb8693..5928d565 100644 --- a/fedn/network/controller/control.py +++ b/fedn/network/controller/control.py @@ -547,13 +547,13 @@ def check_combiners_done_reporting(): "round_id": round_id, "task": "backward", "session_id": session_id, - "model_id": model_id + "model_id": model_id }) participating_combiners = [(combiner, backward_config) for combiner, _ in participating_combiners] _ = self.request_model_updates(participating_combiners) - time.sleep(3) # TODO: this is an easy hack for now. There needs to be some waiting time for the backward pass to complete. + time.sleep(1) # TODO: this is an easy hack for now. There needs to be some waiting time for the backward pass to complete. # the above mechanism cannot be used, as the backward pass is not producing any model updates (unlike the forward pass) logger.info("CONTROLLER: Backward pass completed.") diff --git a/fedn/utils/helpers/plugins/splitlearninghelper.py b/fedn/utils/helpers/plugins/splitlearninghelper.py index 693d9527..b33c854e 100644 --- a/fedn/utils/helpers/plugins/splitlearninghelper.py +++ b/fedn/utils/helpers/plugins/splitlearninghelper.py @@ -1,6 +1,6 @@ import os import tempfile -from io import BytesIO +import time import numpy as np import torch @@ -34,15 +34,13 @@ def save(self, data_dict, path=None, file_type="npz"): # Ensure all values are numpy arrays processed_dict = {str(k): np.array(v) for k, v in data_dict.items()} - + # Use with statement to ensure proper file closure with open(path, "wb") as f: np.savez_compressed(f, **processed_dict) - + # Small delay to ensure file is fully written - import time time.sleep(0.1) - return path def load(self, path): @@ -61,27 +59,18 @@ def load(self, path): except Exception as e: logger.error(f"Error in splitlearninghelper: loading data from {path}: {str(e)}") raise - - def load_targets(self, data_path=None): - """Load target labels for split learning. - - Args: - data_path (str, optional): Path to the labels file. Defaults to None. - - Returns: - torch.Tensor: The target labels - """ - if data_path is None: - # Try to get path from environment variable first - # data_path = os.environ.get("FEDN_LABELS_PATH") - data_path = "/Users/jonas/Documents/fedn/examples/splitlearning_titanic/client/data/clients/labels.pt" - logger.info("label path is {}".format(data_path)) - # if data_path is None: - # raise ValueError("FEDN_LABELS_PATH environment variable is not set. Set via export FEDN_LABELS_PATH='path/to/labels.pt'") - + + def load_targets(self): + """Load target labels for split learning.""" + try: + data_path = os.environ.get("FEDN_LABELS_PATH") + except Exception as e: + logger.error("FEDN_LABELS_PATH environment variable is not set. Set via export FEDN_LABELS_PATH='path/to/labels.pt'") + raise + try: data = torch.load(data_path, weights_only=True) - targets = data["y_train"] #.float() + targets = data["y_train"] return targets.reshape(-1, 1) # Reshape to match model output shape except Exception as e: logger.error(f"Error loading labels from {data_path}: {str(e)}")