diff --git a/.github/workflows/branch-name-check.yaml b/.github/workflows/branch-name-check.yaml index 67ff4d04c..41f431cc1 100644 --- a/.github/workflows/branch-name-check.yaml +++ b/.github/workflows/branch-name-check.yaml @@ -7,7 +7,7 @@ on: - master env: - BRANCH_REGEX: '^((feature|github|hotfix|bugfix|fix|bug|docs|refactor)\/.+)|(release\/v((([0-9]+)\.([0-9]+)\.([0-9]+)(?:-([0-9a-zA-Z-]+(?:\.[0-9a-zA-Z-]+)*))?)(?:\+([0-9a-zA-Z-]+(?:\.[0-9a-zA-Z-]+)*))?))$' + BRANCH_REGEX: '^((feature|github|dependabot|hotfix|bugfix|fix|bug|docs|refactor)\/.+)|(release\/v((([0-9]+)\.([0-9]+)\.([0-9]+)(?:-([0-9a-zA-Z-]+(?:\.[0-9a-zA-Z-]+)*))?)(?:\+([0-9a-zA-Z-]+(?:\.[0-9a-zA-Z-]+)*))?))$' jobs: branch-name-check: diff --git a/.github/workflows/build-containers.yaml b/.github/workflows/build-containers.yaml index 8dd2f561a..89648764e 100644 --- a/.github/workflows/build-containers.yaml +++ b/.github/workflows/build-containers.yaml @@ -1,6 +1,7 @@ name: "build containers" on: + workflow_dispatch: push: branches: - master diff --git a/.github/workflows/code-checks.yaml b/.github/workflows/code-checks.yaml index dbe378c63..8c48a3015 100644 --- a/.github/workflows/code-checks.yaml +++ b/.github/workflows/code-checks.yaml @@ -25,6 +25,7 @@ jobs: --exclude-dir='docs' --exclude-dir='flower-client' --exclude='tests.py' + --exclude='controller_cmd.py' --exclude='README.rst' '^[ \t]+(import|from) ' -I . diff --git a/.github/workflows/pr-title-check.yaml b/.github/workflows/pr-title-check.yaml new file mode 100644 index 000000000..f0311634f --- /dev/null +++ b/.github/workflows/pr-title-check.yaml @@ -0,0 +1,35 @@ +name: PR Title Check + +on: + pull_request: + types: [opened, edited, reopened, synchronize] + +jobs: + title-check: + runs-on: ubuntu-latest + + steps: + - name: Check out repository + uses: actions/checkout@v3 + + - name: Check if PR is internal + id: check_internal + run: | + if [[ "${{ github.event.pull_request.head.repo.full_name }}" == "${{ github.repository }}" ]]; then + echo "internal=true" >> $GITHUB_ENV + else + echo "internal=false" >> $GITHUB_ENV + fi + + - name: Run title check script + if: env.internal == 'true' + run: | + pr_title="${{ github.event.pull_request.title }}" + pattern="^(Feature|Fix|Bug|Bugfix|Docs|Refactor|Chore|Github)\/SK-[0-9]+ \| .+" + if [[ ! "$pr_title" =~ $pattern ]]; then + echo "Error: PR title does not follow the required pattern." + echo "Please ensure the title follows the pattern: 'Feature|Fix|Bug|Bugfix|Docs|Refactor|Chore|Github/SK- | '" + exit 1 + else + echo "PR title is valid." + fi \ No newline at end of file diff --git a/.github/workflows/push-to-pypi.yaml b/.github/workflows/push-to-pypi.yaml index 1b59835ad..1e184c17f 100644 --- a/.github/workflows/push-to-pypi.yaml +++ b/.github/workflows/push-to-pypi.yaml @@ -1,8 +1,9 @@ name: Publish Python distribution to PyPI on: + workflow_dispatch: release: - types: [created] + types: published jobs: build-and-publish: diff --git a/README.rst b/README.rst index a13d7463f..c0fbc2836 100644 --- a/README.rst +++ b/README.rst @@ -9,50 +9,50 @@ .. |pic3| image:: https://readthedocs.org/projects/fedn/badge/?version=latest&style=flat :target: https://fedn.readthedocs.io -FEDn --------- +FEDn: An enterprise-ready federated learning framework +------------------------------------------------------- -FEDn empowers its users to create federated learning applications that seamlessly transition from local proofs-of-concept to secure distributed deployments. +Our goal is to provide a federated learning framework that is both secure, scalable and easy-to-use. We believe that that minimal code change should be needed to progress from early proof-of-concepts to production. This is reflected in our core design: -Leverage a flexible pseudo-local sandbox to rapidly transition your existing ML project to a federated setting. Test and scale in real-world scenarios using FEDn Studio - a fully managed, secure deployment of all server-side components (SaaS). +- **Minimal server-side complexity for the end-user**. Running a proper distributed FL deployment is hard. With FEDn Studio we seek to handle all server-side complexity and provide a UI, REST API and a Python interface to help users manage FL experiments and track metrics in real time. -We develop the FEDn framework following these core design principles: +- **Secure by design.** FL clients do not need to open any ingress ports. Industry-standard communication protocols (gRPC) and token-based authentication and RBAC (Jason Web Tokens) provides flexible integration in a range of production environments. -- **Seamless transition from proof-of-concepts to real-world FL**. FEDn has been designed to make the journey from R&D to real-world deployments as smooth as possibe. Develop your federated learning use case in a pseudo-local environment, then deploy it to FEDn Studio (cloud or on-premise) for real-world scenarios. No code change is required to go from development and testing to production. +- **ML-framework agnostic**. A black-box client-side architecture lets data scientists interface with their framework of choice. -- **Designed for scalability and resilience.** FEDn enables model aggregation through multiple aggregation servers sharing the workload. A hierarchical architecture makes the framework well suited borh for cross-silo and cross-device use-cases. FEDn seamlessly recover from failures in all critical components, and manages intermittent client-connections, ensuring robust deployment in production environments. +- **Cloud native.** By following cloud native design principles, we ensure a wide range of deployment options including private cloud and on-premise infrastructure. -- **Secure by design.** FL clients do not need to open any ingress ports, facilitating distributed deployments across a wide variety of settings. Additionally, FEDn utilizes secure, industry-standard communication protocols and supports token-based authentication and RBAC for FL clients (JWT), providing flexible integration in production environments. +- **Scalability and resilience.** Multiple aggregation servers (combiners) can share the workload. FEDn seamlessly recover from failures in all critical components and manages intermittent client-connections. -- **Developer and data scientist friendly.** Extensive event logging and distributed tracing enables developers to monitor experiments in real-time, simplifying troubleshooting and auditing. Machine learning metrics can be accessed via both a Python API and visualized in an intuitive UI that helps the data scientists analyze and communicate ML-model training progress. +- **Developer and DevOps friendly.** Extensive event logging and distributed tracing enables developers to monitor the sytem in real-time, simplifying troubleshooting and auditing. Extensions and integrations are facilitated by a flexible plug-in architecture. +We provide a fully managed deployment for testing, academic, and personal use. Sign up for a `FEDn Studio account `__ and take the `Quickstart tutorial `__ to get started with FEDn. Features ========= -Core FL framework (this repository): +Federated learning: - Tiered federated learning architecture enabling massive scalability and resilience. - Support for any ML framework (examples for PyTorch, Tensforflow/Keras and Scikit-learn) - Extendable via a plug-in architecture (aggregators, load balancers, object storage backends, databases etc.) - Built-in federated algorithms (FedAvg, FedAdam, FedYogi, FedAdaGrad, etc.) -- CLI and Python API. +- UI, CLI and Python API. - Implement clients in any language (Python, C++, Kotlin etc.) - No open ports needed client-side. -- Flexible deployment of server-side components using Docker / docker compose. -FEDn Studio - From development to FL in production: +From development to FL in production: - Secure deployment of server-side / control-plane on Kubernetes. -- UI with dashboards for orchestrating experiments and visualizing results +- UI with dashboards for orchestrating FL experiments and for visualizing results - Team features - collaborate with other users in shared project workspaces. - Features for the trusted-third party: Manage access to the FL network, FL clients and training progress. - REST API for handling experiments/jobs. - View and export logging and tracing information. - Public cloud, dedicated cloud and on-premise deployment options. -Available clients: +Available client APIs: - Python client (this repository) - C++ client (`FEDn C++ client `__) @@ -64,11 +64,11 @@ Getting started Get started with FEDn in two steps: -1. Sign up for a `Free FEDn Studio account `__ +1. Register for a `FEDn Studio account `__ 2. Take the `Quickstart tutorial `__ -FEDn Studio (SaaS) is free for academic use and personal development / small-scale testing and exploration. For users and teams requiring -additional project resources, dedicated support or other hosting options, `explore our plans `__. +Use of our multi-tenant, managed deployment of FEDn Studio (SaaS) is free forever for academic research and personal development/testing purposes. +For users and teams requiring additional resources, more storage and cpu, dedicated support, and other hosting options (private cloud, on-premise), `explore our plans `__. Documentation ============= diff --git a/docker-compose.yaml b/docker-compose.yaml index c22f60283..e4ecf67f4 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -78,7 +78,7 @@ services: - mongo entrypoint: [ "sh", "-c" ] command: - - "/venv/bin/pip install --no-cache-dir -e . && /venv/bin/python fedn/network/api/server.py" + - "/venv/bin/pip install --no-cache-dir -e . && /venv/bin/fedn controller start" ports: - 8092:8092 diff --git a/docs/apiclient.rst b/docs/apiclient.rst index b4dfd789f..2806ebe86 100644 --- a/docs/apiclient.rst +++ b/docs/apiclient.rst @@ -1,11 +1,9 @@ +.. _apiclient-label: + APIClient ========= -.. note:: - - For access to FEDn Studio API, please see :ref:`studio-api`. - -FEDn comes with an *APIClient* for interacting with the FEDn network. The APIClient is a Python3 library that can be used to interact with the FEDn network programmatically. +FEDn comes with an *APIClient* - a Python3 library that can be used to interact with FEDn programmatically. **Installation** @@ -17,12 +15,15 @@ The APIClient is available as a Python package on PyPI, and can be installed usi **Initialize the APIClient** -To initialize the APIClient, you need to provide the hostname and port of the FEDn API server. The default port is 8092. The following code snippet shows how to initialize the APIClient: +The FEDn REST API is available at /api/v1/. To access this API you need the url to the controller-host, as well as an admin API token. The controller host can be found in the project dashboard (top right corner). +To obtain an admin API token, navigate to the "Settings" tab in your Studio project and click on the "Generate token" button. Copy the 'access' token and use it to access the API using the instructions below. + .. code-block:: python - - from fedn import APIClient - client = APIClient("localhost", 8092) + + >>> from fedn import APIClient + >>> client = APIClient(host="", token="", secure=True, verify=True) + **Set active package and seed model** @@ -38,9 +39,9 @@ To set the initial seed model, you can use the following code snippet: client.set_active_model(path="path/to/seed.npz") -**Start training session** +**Start a training session** -Once the active package and seed model are set, you can connect clients to the network and start training models. The following code snippet initializes a session (training rounds): +Once the active package and seed model are set, you can connect clients to the network and start training models. The following code snippet starts a traing session: .. code-block:: python diff --git a/docs/architecture.rst b/docs/architecture.rst index a820e7e20..85e2430da 100644 --- a/docs/architecture.rst +++ b/docs/architecture.rst @@ -1,3 +1,5 @@ +.. _architecture-label: + Architecture overview ===================== diff --git a/docs/conf.py b/docs/conf.py index 1fe8d9929..c45e90846 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -12,7 +12,7 @@ author = "Scaleout Systems AB" # The full version, including alpha/beta/rc tags -release = "0.9.6" +release = "0.11.1" # Add any Sphinx extension module names here, as strings extensions = [ @@ -24,7 +24,7 @@ "sphinx.ext.ifconfig", "sphinx.ext.viewcode", "sphinx_rtd_theme", - "sphinx_code_tabs" + "sphinx_code_tabs", ] # The master toctree document. @@ -71,15 +71,12 @@ # The paper size ('letterpaper' or 'a4paper'). # # 'papersize': 'letterpaper', - # The font size ('10pt', '11pt' or '12pt'). # # 'pointsize': '10pt', - # Additional stuff for the LaTeX preamble. # # 'preamble': '', - # Latex figure (float) alignment # # 'figure_align': 'htbp', @@ -89,24 +86,18 @@ # (source start file, target name, title, # author, documentclass [howto, manual, or own class]). latex_documents = [ - (master_doc, "fedn.tex", "FEDn Documentation", - "Scaleout Systems AB", "manual"), + (master_doc, "fedn.tex", "FEDn Documentation", "Scaleout Systems AB", "manual"), ] # One entry per manual page. List of tuples # (source start file, name, description, authors, manual section). -man_pages = [ - (master_doc, "fedn", "FEDn Documentation", - [author], 1) -] +man_pages = [(master_doc, "fedn", "FEDn Documentation", [author], 1)] # Grouping the document tree into Texinfo files. List of tuples # (source start file, target name, title, author, # dir menu entry, description, category) texinfo_documents = [ - (master_doc, "fedn", "FEDn Documentation", - author, "fedn", "One line description of project.", - "Miscellaneous"), + (master_doc, "fedn", "FEDn Documentation", author, "fedn", "One line description of project.", "Miscellaneous"), ] # Bibliographic Dublin Core info. diff --git a/docs/developer.rst b/docs/developer.rst new file mode 100644 index 000000000..8a9e4b87d --- /dev/null +++ b/docs/developer.rst @@ -0,0 +1,161 @@ +.. _developer-label: + +Local development and deployment +================================ + +.. note:: + These instructions are for users wanting to set up a local development deployment of FEDn (i.e. without FEDn Studio). + This requires practical knowledge of Docker and docker-compose. + +Running the FEDn development sandbox (docker-compose) +------------------------------------------------------ + +During development on FEDn, and when working on own aggregators/helpers, it is +useful to have a local development setup of the core FEDn services (controller, combiner, database, object store). +For this, we provide Dockerfiles and docker-compose template. + +To start a development sandbox for FEDn using docker-compose: + +.. code-block:: + + docker compose \ + -f ../../docker-compose.yaml \ + -f docker-compose.override.yaml \ + up + +This starts up local services for MongoDB, Minio, the API Server, one Combiner and two clients. +You can verify the deployment using these urls: + +- API Server: http://localhost:8092/get_controller_status +- Minio: http://localhost:9000 +- Mongo Express: http://localhost:8081 + +This setup does not include the security features of Studio, and thus will not require authentication of clients. +To use the APIClient to test a compute package and seed model against a local FEDn deployment: + +.. code-block:: + + from fedn import APIClient + client = APIClient(host="localhost", port=8092) + client.set_active_package("package.tgz", helper="numpyhelper") + client.set_active_model("seed.npz") + + +To connect a native FEDn client, you need to make sure that the combiner service can be resolved using the name "combiner". +One way to achieve this is to edit your '/etc/hosts' and add a line '127.0.0.1 combiner'. + +Access message logs and validation data from MongoDB +------------------------------------------------------ +You can access and download event logs and validation data via the API, and you can also as a developer obtain +the MongoDB backend data using pymongo or via the MongoExpress interface: + +- http://localhost:8081/db/fedn-network/ + +Username and password are found in 'docker-compose.yaml'. + +Access global models +------------------------------------------------------ + +You can obtain global model updates from the 'fedn-models' bucket in Minio: + +- http://localhost:9000 + +Username and password are found in 'docker-compose.yaml'. + +Reset the FEDn deployment +------------------------------------------------------ + +To purge all data from a deployment incuding all session and round data, access the MongoExpress UI interface and +delete the entire ``fedn-network`` collection. Then restart all services. + +Clean up +------------------------------------------------------ +You can clean up by running + +.. code-block:: + + docker-compose -f ../../docker-compose.yaml -f docker-compose.override.yaml down -v + + +Connecting clients using Docker: +------------------------------------------------------ + +For convenience, we distribute a Docker image hosted on ghrc.io with FEDn preinstalled. For example, to start a client for the MNIST PyTorch example using Docker +and FEDN 0.10.0, run this from the example folder: + +.. code-block:: + + docker run \ + -v $PWD/client.yaml:/app/client.yaml \ + -e FEDN_PACKAGE_EXTRACT_DIR=package \ + -e FEDN_NUM_DATA_SPLITS=2 \ + -e FEDN_DATA_PATH=/app/package/data/clients/1/mnist.pt \ + ghcr.io/scaleoutsystems/fedn/fedn:0.10.0 run client -in client.yaml --force-ssl --secure=True + + +Self-managed distributed deployment +------------------------------------------------------ + +You can use different hosts for the various FEDn services. These instructions shows how to set up FEDn on a **local network** using a single workstation or laptop as +the host for the servier-side components, and other hosts or devices as clients. + +.. note:: + For a secure and production-grade deployment solution over **public networks**, explore the FEDn Studio service at + **fedn.scaleoutsystems.com**. + + Alternatively follow this tutorial substituting the hosts local IP with your public IP, open the neccesary + ports (see which ports are used in docker-compose.yaml), and ensure you have taken additional neccesary security + precautions. + +**Prerequisites** +- `One host workstation and atleast one client device` +- `Python 3.8, 3.9, 3.10 or 3.11 `__ +- `Docker `__ +- `Docker Compose `__ + +Launch a distributed FEDn Network +--------------------------------- + + +Start by noting your host's local IP address, used within your network. Discover it by running ifconfig on UNIX or +ipconfig on Windows, typically listed under inet for Unix and IPv4 for Windows. + +Continue by following the standard procedure to initiate a FEDn network, for example using the provided docker-compose template. +Once the network is active, upload your compute package and seed (for comprehensive details, see the quickstart tutorials). + +.. note:: + This guide covers general local networks where server and client may be on different hosts but able to communicate on their private IPs. + A common scenario is also to run fedn and the clients on **localhost** on a single machine. In that case, you can replace + by "127.0.0.1" below. + +Configuring and Attaching Clients +--------------------------------- + +On your client device, continue with initializing your client. To connect to the host machine we need to ensure we are +routing the correct DNS to our hosts local IP address. We can do this using the standard FEDn `client.yaml`: + +.. code-block:: + + network_id: fedn-network + discover_host: api-server + discover_port: 8092 + + +We can then run a client using docker by adding the hostname:ip mapping in the docker run command: + +.. code-block:: + + docker run \ + -v $PWD/client.yaml: \ + + —add-host=api-server: \ + —add-host=combiner: \ + run client -in client.yaml --name client1 + + +Alternatively updating the `/etc/hosts` file, appending the following lines for running naitively: + +.. code-block:: + + api-server + combiner diff --git a/docs/distributed.rst b/docs/distributed.rst deleted file mode 100644 index 13803dd3f..000000000 --- a/docs/distributed.rst +++ /dev/null @@ -1,73 +0,0 @@ -Self-managed distributed deployment -=================================== - -This tutorial outlines the steps for deploying the FEDn framework over a **local network**, using a single workstation or laptop as -the host for the servier-side components, and other hosts or devices as clients. For general steps on how to run FEDn, see the quickstart tutorials. - - -.. note:: - For a secure and production-grade deployment solution over **public networks**, explore the FEDn Studio service at - **fedn.scaleoutsystems.com**. - - Alternatively follow this tutorial substituting the hosts local IP with your public IP, open the neccesary - ports (see which ports are used in docker-compose.yaml), and ensure you have taken additional neccesary security - precautions. - -**Prerequisites** -- `One host workstation and atleast one client device` -- `Python 3.8, 3.9, 3.10 or 3.11 `__ -- `Docker `__ -- `Docker Compose `__ - -Launch a distributed FEDn Network ---------------------------------- - - -Start by noting your host's local IP address, used within your network. Discover it by running ifconfig on UNIX or -ipconfig on Windows, typically listed under inet for Unix and IPv4 for Windows. - -Continue by following the standard procedure to initiate a FEDn network, for example using the provided docker-compose template. -Once the network is active, upload your compute package and seed (for comprehensive details, see the quickstart tutorials). - -.. note:: - This guide covers general local networks where server and client may be on different hosts but able to communicate on their private IPs. - A common scenario is also to run fedn and the clients on **localhost** on a single machine. In that case, you can replace - by "127.0.0.1" below. - -Configuring and Attaching Clients ---------------------------------- - -On your client device, continue with initializing your client. To connect to the host machine we need to ensure we are -routing the correct DNS to our hosts local IP address. We can do this using the standard FEDn `client.yaml`: - -.. code-block:: - - network_id: fedn-network - discover_host: api-server - discover_port: 8092 - - -We can then run a client using docker by adding the hostname:ip mapping in the docker run command: - -.. code-block:: - - docker run \ - -v $PWD/client.yaml: \ - - —add-host=api-server: \ - —add-host=combiner: \ - run client -in client.yaml --name client1 - - -Alternatively updating the `/etc/hosts` file, appending the following lines for running naitively: - -.. code-block:: - - api-server - combiner - - -Start a training session ------------------------- - -After connecting with your clients, you are ready to start training sessions from the host machine. \ No newline at end of file diff --git a/docs/faq.rst b/docs/faq.rst index 223aa2e49..b40ab2f5b 100644 --- a/docs/faq.rst +++ b/docs/faq.rst @@ -52,7 +52,7 @@ see the section about model marshaling: Q: Can I start a client listening only to training requests or only on validation requests?: -------------------------------------------------------------------------------------------- -Yes! You can toggle which message streams a client subscibes to when starting the client. For example, to start a pure validation client: +Yes! You can toggle which message streams a client subscribes to when starting the client. For example, to start a pure validation client: .. code-block:: bash @@ -62,12 +62,12 @@ Yes! You can toggle which message streams a client subscibes to when starting th Q: How do you approach the question of output privacy? ---------------------------------------------------------------------------------- -We take security in (federated) machine learning seriously. Federated learning is a foundational technology that impoves input privacy +We take security in (federated) machine learning seriously. Federated learning is a foundational technology that improves input privacy in machine learning by allowing datasets to stay local and private, and not copied to a server. FEDn is designed to provide an industry grade -implementation of the core communication and aggregration layers of federated learning, as well as configurable modules for traceability, logging +implementation of the core communication and aggregation layers of federated learning, as well as configurable modules for traceability, logging etc, to allow the developer balance between privacy and auditability. With `FEDn Studio `__ we add functionality for user authentication, authorization, and federated client identity management. As such, The FEDn Framework provides -a comprehensive software suite for implemeting secure federated learning following industry best-practices. +a comprehensive software suite for implementing secure federated learning following industry best-practices. Going beyond input privacy, there are several additional considerations relating to output privacy and potential attacks on (federated) machine learning systems. For an introduction to the topic, see this blog post: @@ -85,4 +85,4 @@ with the Scaleout team. - `LEAKPRO: Leakage Profiling and Risk Oversight for Machine Learning Models `__ - `Validating a System Development Kit for edge federated learning `__ - `Trusted Execution Environments for Federated Learning: `__ -- `Robust IoT Security: Intrusion Detection Leveraging Contributions from Multiple Systems `__ \ No newline at end of file +- `Robust IoT Security: Intrusion Detection Leveraging Contributions from Multiple Systems `__ diff --git a/docs/fedn.network.dashboard.rst b/docs/fedn.network.dashboard.rst index 25ee3e8d8..d64563ad7 100644 --- a/docs/fedn.network.dashboard.rst +++ b/docs/fedn.network.dashboard.rst @@ -9,14 +9,6 @@ fedn.network.dashboard package Submodules ---------- -fedn.network.dashboard.plots module ------------------------------------ - -.. automodule:: fedn.network.dashboard.plots - :members: - :undoc-members: - :show-inheritance: - fedn.network.dashboard.restservice module ----------------------------------------- diff --git a/docs/index.rst b/docs/index.rst index f73a9aa5c..79b862c0e 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -4,19 +4,18 @@ introduction quickstart + projects .. toctree:: :maxdepth: 1 :caption: Documentation - studio - distributed apiclient - projects architecture aggregators helpers auth + developer .. toctree:: :maxdepth: 1 diff --git a/docs/projects.rst b/docs/projects.rst index 6397a3a56..2cf31f23f 100644 --- a/docs/projects.rst +++ b/docs/projects.rst @@ -1,8 +1,11 @@ .. _projects-label: -FEDn Projects +Develop your own project ================================================ +This guide explains how a FEDn project is structured, and details how to develop your own +projects. + A FEDn project is a convention for packaging/wrapping machine learning code to be used for federated learning with FEDn. At the core, a project is a directory of files (often a Git repository), containing your machine learning code, FEDn entry points, and a specification of the runtime environment (python environment or a Docker image). The FEDn API and command-line tools provides functionality @@ -28,9 +31,9 @@ We recommend that projects have roughly the following folder and file structure: | └ Dockerfile / docker-compose.yaml | -The "client" folder is referred to as the *compute package*. The file fedn.yaml is the FEDn Project File. It informs the FEDn Client of the code entry points to execute when computing model updates (local training) and validating models (optionally) . -When deploying the project to FEDn, the client folder will be compressed as a .tgz bundle and uploaded to the FEDn controller. FEDn can then manage the distribution of the compute package to each client/data provider when they connect. -Upon recipt of the bundle, the client will unpack it and stage it locally. +The ``client`` folder is commonly referred to as the *compute package*. The file ``fedn.yaml`` is the FEDn Project File. It contains information about the ``entry points``. The entry points are used by the client to compute model updates (local training) and local validations (optional) . +To run a project in FEDn, the client folder is compressed as a .tgz bundle and pushed to the FEDn controller. FEDn then manages the distribution of the compute package to each client. +Upon recipt of the package, a client will unpack it and stage it locally. .. image:: img/ComputePackageOverview.png :alt: Compute package overview @@ -41,7 +44,7 @@ The above figure provides a logical view of how FEDn uses the compute package (c recieves a model update request, it calls upon a Dispatcher that looks up entry point definitions in the compute package from the FEDn Project File. -FEDn Project File (fedn.yaml) +The Project File (fedn.yaml) ------------------------------ FEDn uses on a project file named 'fedn.yaml' to specify which entrypoints to execute when the client recieves a training or validation request, and @@ -60,32 +63,37 @@ what environment to execute those entrypoints in. command: python validate.py -Environment -^^^^^^^^^^^ - -The software environment to be used to exectute the entry points. This should specify all client side dependencies of the project. -FEDn currently supports Virtualenv environments, with packages on PyPI. When a project specifies a **python_env**, the FEDn -client will create an isolated virtual environment and install the project dependencies into it before starting up the client. +**Environment** +It is assumed that all entry points are executable within the client runtime environment. As a user, you have two main options +to specify the environment: + + 1. Provide a ``python_env`` in the ``fedn.yaml`` file. In this case, FEDn will create an isolated virtual environment and install the project dependencies into it before starting up the client. FEDn currently supports Virtualenv environments, with packages on PyPI. + 2. Manage the environment manually. Here you have several options, such as managing your own virtualenv, running in a Docker container, etc. Remove the ``python_env`` tag from ``fedn.yaml`` to handle the environment manually. Entry Points -^^^^^^^^^^^^ +------------- There are up to four Entry Points to be specified. -**Build Entrypoint (build, optional):** +**build (optional):** -This entrypoint is usually called **once** for building artifacts such as initial seed models. However, it not limited to artifacts, and can be used for any kind of setup that needs to be done before the client starts up. +This entrypoint is intended to be called **once** for building artifacts such as initial seed models. However, it not limited to artifacts, and can be used for any kind of setup that needs to be done before the client starts up. -**Startup Entrypoint (startup, optional):** +To invoke the build entrypoint using the CLI: +.. code-block:: bash + fedn build -- + + +**startup (optional):** This entrypoint is called **once**, immediately after the client starts up and the environment has been initalized. It can be used to do runtime configurations of the local execution environment. For example, in the quickstart tutorial example, the startup entrypoint invokes a script that downloads the MNIST dataset and creates a partition to be used by that client. This is a convenience useful for automation of experiments and not all clients will specify such a script. -**Training Entrypoint (train, mandatory):** +**train (mandatory):** This entrypoint is invoked every time the client recieves a new model update request. The training entry point must be a single-input single-output (SISO) program. It will be invoked by FEDn as such: @@ -96,7 +104,7 @@ This entrypoint is invoked every time the client recieves a new model update req where 'model_in' is the file containing the current global model to be updated, and 'model_out' is a path to write the new model update to. Download and upload of these files are handled automatically by the FEDn client, the user only specifies how to read and parse the data contained in them (see examples) . -**Validation Entrypoint (validate, optional):** +**validate (optional):** The validation entry point works in a similar was as the trainig entrypoint. It can be used to specify how a client should validate the current global model on local test/validation data. It should read a model update from file, validate it (in any way suitable to the user), and write a **json file** containing validation data: @@ -107,8 +115,7 @@ model on local test/validation data. It should read a model update from file, va The validate entrypoint is optional. -Example train entry point -^^^^^^^^^^^^^^^^^^^^^^^^^^^ +**Example train entry point** Below is an example training entry point taken from the PyTorch getting stated project. @@ -202,7 +209,7 @@ using the pytorch helper module. The fifth function (_init_seed) is used to init The seventh function (_validate) is used to validate the model, again observe the two first arguments which will be set by the FEDn client. -Packaging for distribution +Build a compute package -------------------------- To deploy a project to FEDn (Studio or pseudo-local) we simply compress the *client* folder as .tgz file. using fedn command line tool or manually: @@ -223,15 +230,15 @@ by looking at the code above. Here we assume that the dataset is present in a fi the exection of entrypoint.py. Then, independent on the preferred way to run the client (native, Docker, K8s etc) this structure needs to be maintained for this particular compute package. Note however, that there are many ways to accompish this on a local operational level. -Testing the entry points before deploying the package to FEDn --------------------------------------------------------------- +Testing the entry points locally +--------------------------------- -We recommend you to test your code before deploying it to FEDn for distibution to clients. You can conveniently test *train* and *validate* by: +We recommend you to test your entrypoints locally before uploading the compute package to Studio. You can test *train* and *validate* by (example for the mnist-keras +project): .. code-block:: bash python train.py ../seed.npz ../model_update.npz --data_path ../data/mnist.npz python validate.py ../model_update.npz ../validation.json --data_path ../data/mnist.npz -Once everything works as expected you can start the federated network, upload the .tgz compute package and the initial model (use :py:meth:`fedn.network.api.client.APIClient.set_initial_model` for uploading an initial model). - +Note that we here assume execution in the correct Python environment. diff --git a/docs/quickstart.rst b/docs/quickstart.rst index d231aca9e..7723e7f28 100644 --- a/docs/quickstart.rst +++ b/docs/quickstart.rst @@ -2,26 +2,34 @@ Getting started with FEDn ========================= .. note:: - This tutorial is a quickstart guide to FEDn based on a pre-made FEDn Project. It is designed to serve as a minimalistic starting point for developers. - To learn about FEDn Projects in order to develop your own federated machine learning projects, see :ref:`projects-label`. + This tutorial is a quickstart guide to FEDn based on a pre-made FEDn Project. It is designed to serve as a starting point for new developers. + To learn how to develop your own project from scratch, see :ref:`projects-label`. **Prerequisites** -- `Python >=3.8, <=3.11 `__ +- `Python >=3.8, <=3.12 `__ - `A FEDn Studio account `__ -Set up a FEDn Studio Project ----------------------------- +1. Start a FEDn Studio Project +------------------------------ -Start by creating an account in FEDn Studio and set up a project by following the instruction here: :ref:`studio`. +Start by creating an account in Studio. Head over to `fedn.scaleoutsystems.com/signup `_ and sign up. -Install FEDn ------------- +Logged into Studio, create a new project by clicking on the "New Project" button in the top right corner of the screen. +You will see a Studio project similar to the image below. The Studio project provides all the necessary server side components of FEDn. +We will use this project in a later stage to run the federated experiments. But first, we will set up the local client. + + +.. image:: img/studio_project_overview.png + + +2. Install FEDn on your client +------------------------------- **Using pip** -Install the FEDn package using pip: +On you local machine/client, install the FEDn package using pip: .. code-block:: bash @@ -41,44 +49,64 @@ It is recommended to use a virtual environment when installing FEDn. .. _package-creation: -Initialize FEDn with the client code bundle and seed model ----------------------------------------------------------- +Next, we will prepare the client. We will use one of the pre-defined projects in the FEDn repository, ``mnist-pytorch``. -Next, we will prepare the client. The key part of a FEDn Project is the client definition - -code that contains entrypoints for training and (optionally) validating a model update on the client. +3. Create the compute package and seed model +-------------------------------------------- + +In order to train a federated model using FEDn, your Studio project needs to be initialized with a ``compute package`` and a ``seed model``. The compute package is a code bundle containing the +code used by the client to execute local training and local validation. The seed model is a first version of the global model. +For a detailed explaination of the compute package and seed model, see this guide: :ref:`projects-label` + +To work through this quick start you need a local copy of the ``mnist-pytorch`` example project contained in the main FEDn Git repository. +The following command clones the entire repository but you will only use the example: + +.. code-block:: bash -Locate into ``examples/mnist-pytorch`` and familiarize yourself with the project structure. The dependencies needed in the client environment are specified -in ``client/python_env.yaml``. + git clone https://github.com/scaleoutsystems/fedn.git -In order to train a federated model using FEDn, your Studio project needs to be initialized with a compute package and a seed model. The compute package is a bundle -of the client specification, and the seed model is a first version of the global model. +Locate into the ``fedn/examples/mnist-pytorch`` folder. The compute package is located in the folder ``client``. -Create a package of the fedn project (assumes your current working directory is in the root of the project /examples/mnist-pytorch): +Create a compute package: .. code-block:: fedn package create --path client -This will create a package called 'package.tgz' in the root of the project. +This will create a file called ``package.tgz`` in the root of the project. -Next, run the build entrypoint defined in ``client/fedn.yaml`` to build the model artifact. +Next, create the seed model: .. code-block:: fedn run build --path client -This will create a seed model called 'seed.npz' in the root of the project. We will now upload these to your Studio project using the FEDn APIClient. +This will create a file called ``seed.npz`` in the root of the project. + +.. note:: + This example automatically creates the runtime environment for the compute package using Virtualenv. + When you first exectue the above commands, FEDn will build a venv, and this takes + a bit of time. For more information on the various options to manage the environement, see :ref:`projects-label`. + +Next will now upload these files to your Studio project: + +4. Initialize your FEDn Studio Project +-------------------------------------- + +In the Studio UI, navigate to the project you created above and click on the "Sessions" tab. Click on the "New Session" button. Under the "Compute package" tab, select a name and upload the generated package file. Under the "Seed model" tab, upload the generated seed file: + +.. image:: img/upload_package.png + +**Upload the package and seed model using the Python APIClient** -**Upload the package and seed model** +It is also possible to upload a package and seed model using the Python API Client. .. note:: You need to create an API admin token and use the token to authenticate the APIClient. Do this by going to the 'Settings' tab in FEDn Studio and click 'Generate token'. Copy the access token and use it in the APIClient below. - The controller host can be found on the main Dashboard in FEDn Studio. + The controller host can be found on the main Dashboard in FEDn Studio. More information on the use of the APIClient can be found here: :ref:`apiclient-label. - You can also upload the file via the FEDn Studio UI. Please see :ref:`studio-upload-files` for more details. - -Upload the package and seed model using the APIClient: +To upload the package and seed model using the APIClient: .. code:: python @@ -88,23 +116,30 @@ Upload the package and seed model using the APIClient: >>> client.set_active_model("seed.npz") -Configure and attach clients ----------------------------- +5. Configure and attach clients +------------------------------- + +**Generate an access token for the client (in Studio)** + +Each local client needs an access token in order to connect securely to the FEDn server. These tokens are issued from your Studio Project. +Go to the Clients' tab and click 'Connect client'. Download a client configuration file and save it to the root of the ``examples/mnist-pytorch folder``. +Rename the file to 'client.yaml'. + +**Start the client (on your local machine)** -Each local client needs an access token in order to connect. These tokens are issued from your Studio Project. Go to the 'Clients' tab and click 'Connect client'. -Download a client configuration file and save it to the root of the examples/mnist-pytorch folder. Rename the file to 'client.yaml'. -Then start the client by running the following command in the root of the project: +Now we can start the client by running the following command: .. code-block:: fedn run client -in client.yaml --secure=True --force-ssl -Repeat the above for the number of clients you want to use. A normal laptop should be able to handle several clients for this example. +Repeat these two steps (generate an access token and start a local client) for the number of clients you want to use. +A normal laptop should be able to handle several clients for this example. -**Modifing the data split:** +**Modifying the data split (multiple-clients, optional):** -The default traning and test data for this example (MNIST) is for convenience downloaded and split by the client when it starts up (see 'startup' entrypoint). -The number of splits and which split used by a client can be controlled via the environment variables ``FEDN_NUM_DATA_SPLITS`` and ``FEDN_DATA_PATH``. +The default traning and test data for this particular example (mnist-pytorch) is for convenience downloaded and split automatically by the client when it starts up (see the 'startup' entrypoint). +The number of splits and which split to use by a client can be controlled via the environment variables ``FEDN_NUM_DATA_SPLITS`` and ``FEDN_DATA_PATH``. For example, to split the data in 10 parts and start a client using the 8th partiton: .. tabs:: @@ -126,10 +161,27 @@ For example, to split the data in 10 parts and start a client using the 8th part fedn client start -in client.yaml --secure=True --force-ssl -Start a training session ------------------------- +6. Start a training session +--------------------------- + +In Studio click on the "Sessions" link, then the "New session" button in the upper right corner. Click the "Start session" tab and enter your desirable settings (the default settings are good for this example) and hit the "Start run" button. +In the terminal where your are running your client you should now see some activity. When a round is completed, you can see the results on the "Models" page. + +**Watch the training progress** + +Once a training session is started, you can monitor the progress of the training by navigating to "Sessions" and click on the "Open" button of the active session. The session page will list the models as soon as they are generated. +To get more information about a particular model, navigate to the model page by clicking the model name. From the model page you can download the model weights and get validation metrics. + +.. image:: img/studio_model_overview.png -You are now ready to start training the model using the APIClient: +.. _studio-api: + +Congratulations, you have now completed your first federated training session with FEDn! Below you find additional information that can +be useful as you progress in your federated learning journey. + +**Control training sessions using the Python APIClient** + +You can also issue training sessions using the APIClient: .. code:: python @@ -144,125 +196,39 @@ You are now ready to start training the model using the APIClient: >>> validations = client.get_validations(model_id=model_id) -Please see :py:mod:`fedn.network.api` for more details on the APIClient. - -.. note:: - - In FEDn Studio, you can start a training session by going to the 'Sessions' tab and click 'Start session'. See :ref:`studio` for a - step-by-step guide for how to control experiments using the UI. +Please see :py:mod:`fedn.network.api` for more details on how to use the APIClient. -Access model updates --------------------- +**Downloading global model updates** .. note:: In FEDn Studio, you can access global model updates by going to the 'Models' or 'Sessions' tab. Here you can download model updates, metrics (as csv) and view the model trail. -You can access global model updates via the APIClient: +You can also access global model updates via the APIClient: .. code:: python >>> ... >>> client.download_model("", path="model.npz") - -**Connecting clients using Docker** - -You can also use Docker to containerize the client. -For convenience, there is a Docker image hosted on ghrc.io with fedn preinstalled. -To start a client using Docker: - -.. code-block:: - - docker run \ - -v $PWD/client.yaml:/app/client.yaml \ - -e FEDN_PACKAGE_EXTRACT_DIR=package \ - -e FEDN_NUM_DATA_SPLITS=2 \ - -e FEDN_DATA_PATH=/app/package/data/clients/1/mnist.pt \ - ghcr.io/scaleoutsystems/fedn/fedn:0.9.0 run client -in client.yaml --force-ssl --secure=True - - **Where to go from here?** +-------------------------- -With you first FEDn federation set up, we suggest that you take a close look at how a FEDn project is structured +With you first FEDn federated project set up, we suggest that you take a close look at how a FEDn project is structured and how you develop your own FEDn projects: - :ref:`projects-label` +You can also dive into the architecture overview to learn more about how FEDn is designed and works under the hood: -Local development deployment (using docker compose) ----------------------------------------------------------- - -.. note:: - These instructions are for users wanting to set up a local development deployment of FEDn (wihout Studio). - This requires basic knowledge of Docker and docker-compose. - The main use-case for this is rapid iteration while developing the FEDn Project, - development of aggregator plugins, etc. - -Follow the steps above to install FEDn, generate 'package.tgz' and 'seed.tgz'. Then, instead of -using a Studio project for a managed FEDn server-side, start a local FEDn network -using docker-compose: - -.. code-block:: - - docker compose \ - -f ../../docker-compose.yaml \ - -f docker-compose.override.yaml \ - up - -This starts up local services for MongoDB, Minio, the API Server, one Combiner and two clients. -You can verify the deployment using these urls: +- :ref:`architecture-label` -- API Server: http://localhost:8092/get_controller_status -- Minio: http://localhost:9000 -- Mongo Express: http://localhost:8081 +For developers looking to cutomize FEDn and develop own aggregators, check out the local development guide. +This page also has instructions for using Docker to run clients: -Upload the package and seed model to FEDn controller using the APIClient. In Python: +- :ref:`developer-label` -.. code-block:: - - from fedn import APIClient - client = APIClient(host="localhost", port=8092) - client.set_active_package("package.tgz", helper="numpyhelper") - client.set_active_model("seed.npz") -You can now start a training session with 5 rounds (default): - -.. code-block:: - client.start_session() -**Automate experimentation with several clients** - -If you want to scale the number of clients, you can do so by modifying ``docker-compose.override.yaml``. For example, -in order to run with 3 clients, change the environment variable ``FEDN_NUM_DATA_SPLITS`` to 3, and add one more client -by copying ``client1`` and setting ``FEDN_DATA_PATH`` to ``/app/package/data/clients/3/mnist.pt`` - - -**Access message logs and validation data from MongoDB** - -You can access and download event logs and validation data via the API, and you can also as a developer obtain -the MongoDB backend data using pymongo or via the MongoExpress interface: - -- http://localhost:8081/db/fedn-network/ - -The credentials are as set in docker-compose.yaml in the root of the repository. - -**Access global models** - -You can obtain global model updates from the 'fedn-models' bucket in Minio: - -- http://localhost:9000 - -**Reset the FEDn deployment** - -To purge all data from a deployment incuding all session and round data, access the MongoExpress UI interface and -delete the entire ``fedn-network`` collection. Then restart all services. - -**Clean up** - -You can clean up by running - -.. code-block:: - docker-compose -f ../../docker-compose.yaml -f docker-compose.override.yaml down -v diff --git a/docs/studio.rst b/docs/studio.rst index 26e4615a0..e52a9fa6a 100644 --- a/docs/studio.rst +++ b/docs/studio.rst @@ -5,7 +5,7 @@ Studio FEDn Studio is a web-based tool for managing and monitoring federated learning experiments. It provides the FEDn network as a managed service, as well as a user-friendly interface for monitoring the progress of training and visualizing the results. FEDn Studio is available as a SaaS at `fedn.scaleoutsystems.com `_ . It is free for development, testing and research (one project per user, backend compute resources sized for dev/test). -Scaleout can also support users to scale up experiments and demonstrators on Studio, by granting custom resource quotas. Additonally, charts are available for self-managed deployment on-premise or in your cloud VPC (all major cloud providers). Contact the Scaleout team for more information. +Scaleout can also support users to scale up experiments and demonstrators on Studio, by granting custom resource quotas. Additionally, charts are available for self-managed deployment on-premise or in your cloud VPC (all major cloud providers). Contact the Scaleout team for more information. Getting started --------------- @@ -29,7 +29,7 @@ Once you have created a project, you can find it via the sidebar link Projects. 2. **Clients**: management of client configurations and a list of current clients. Observe that this feature does not deploy clients, instead it configures a client config that contains a unique token which is required to connect to the reducer and the combiner. 3. **Combiners**: a list of combiners. Observe number of active clients for each combiner. 4. **Sessions**: a list of sessions with related models. Configure and start a new session. Upload compute package and seed model, set number of rounds, timeout limit etc. -5. **Models**: a list of models generated across sessions, and dahsboards for visualizing training progress. +5. **Models**: a list of models generated across sessions, and dashboards for visualizing training progress. 6. **Events**: a log of events from the combiner and the clients of the federated network. 7. **Settings**: project settings, including the option to give access to other users and to delete the project. @@ -46,7 +46,7 @@ Please see :ref:`package-creation` for instructions on how to create a package a Upload files ------------ -In the Studio UI, navigate to the project you created and click on the "Sessions" tab. Click on the "New Session" button. Under the Compute package tab, select a name and upload the generated package file. Under the Seed model tab upload the generated seed file: +In the Studio UI, navigate to the project you created and click on the "Sessions" tab. Click on the "New Session" button. Under the "Compute package" tab, select a name and upload the generated package file. Under the "Seed model" tab, upload the generated seed file: .. image:: img/upload_package.png @@ -69,12 +69,12 @@ If the client is successfully connected, you should see the client listed in the Start a training session ------------------------ -In Studio click on the "Sessions" link, then the "New session" button in the upper right corner. Click the Start session tab and enter your desirable settings (or use default) and hit the "Start run" button. In the terminal where your are running your client you should now see some activity. When the round is completed you can see the results in the FEDn Studio UI on the "Models" page. +In Studio click on the "Sessions" link, then the "New session" button in the upper right corner. Click the "Start session" tab and enter your desirable settings (or use default) and hit the "Start run" button. In the terminal where your are running your client you should now see some activity. When the round is completed, you can see the results in the FEDn Studio UI on the "Models" page. Watch the training progress --------------------------- -Once a training session is started, you can monitor the progress of the training by navigating to "Sessions" and click on the "Open" button of the active session. The session page will list the models as soon as they are generated. To get more information about a particular model, navigate to the model page by clicking the model name. From the model page you can download the model wieghts and get validation metrics. +Once a training session is started, you can monitor the progress of the training by navigating to "Sessions" and click on the "Open" button of the active session. The session page will list the models as soon as they are generated. To get more information about a particular model, navigate to the model page by clicking the model name. From the model page you can download the model weights and get validation metrics. To get an overview of how the models have evolved over time, navigate to the "Models" tab in the sidebar. Here you can see a list of all models generated across sessions along with a graph showing some metrics of how the models are performing. @@ -86,5 +86,5 @@ Accessing the API ----------------- The FEDn Studio API is available at /api/v1/. The controller host can be found in the project dashboard. Further, to access the API you need an admin API token. -Nevigate to the "Settings" tab in the project and click on the "Generate token" button. Copy the token and use it to access the API. Please see :py:mod:`fedn.network.api` for how to pass the token to the APIClient. +Navigate to the "Settings" tab in the project and click on the "Generate token" button. Copy the token and use it to access the API. Please see :py:mod:`fedn.network.api` for how to pass the token to the APIClient. diff --git a/examples/FedSimSiam/README.rst b/examples/FedSimSiam/README.rst index 54434c6dc..5831fd3ea 100644 --- a/examples/FedSimSiam/README.rst +++ b/examples/FedSimSiam/README.rst @@ -1,18 +1,23 @@ + **Note: If you are new to FEDn, we recommend that you start with the MNIST-Pytorch example instead: https://github.com/scaleoutsystems/fedn/tree/master/examples/mnist-pytorch** + FEDn Project: FedSimSiam on CIFAR-10 ------------------------------------ -This is an example FEDn Project that runs the federated self-supervised learning algorithm FedSimSiam on -the CIFAR-10 dataset. This is a standard example often used for benchmarking. To be able to run this example, you -need to have GPU access. +This is an example FEDn Project that trains the federated self-supervised learning algorithm FedSimSiam on +the CIFAR-10 dataset. CIFAR-10 is a popular benchmark dataset that contains images of 10 different classes, such as cars, dogs, and ships. +In short, FedSimSiam trains an encoder to learn useful feature embeddings for images, without the use of labels. +After the self-supervised training stage, the resulting encoder can be downloaded and trained for a downstream task (e.g., image classification) via supervised learning on labeled data. +To learn more about self-supervised learning and FedSimSiam, have a look at our blog-post: https://www.scaleoutsystems.com/post/federated-self-supervised-learning-and-autonomous-driving + +To run the example, follow the steps below. For a more detailed explanation, follow the Quickstart Tutorial: https://fedn.readthedocs.io/en/stable/quickstart.html - **Note: We recommend all new users to start by following the Quickstart Tutorial: https://fedn.readthedocs.io/en/stable/quickstart.html** +**Note: To be able to run this example, you need to have GPU access.** Prerequisites ------------- -- `Python 3.8, 3.9, 3.10 or 3.11 `__ -- `A FEDn Studio account `__ -- Change the dependencies in the 'client/python_env.yaml' file to match your cuda version. +- `Python >=3.8, <=3.12 `__ +- `A project in FEDn Studio `__ Creating the compute package and seed model ------------------------------------------- @@ -36,90 +41,31 @@ Create the compute package: fedn package create --path client -This should create a file 'package.tgz' in the project folder. +This creates a file 'package.tgz' in the project folder. -Next, generate a seed model (the first model in a global model trail): +Next, generate the seed model: .. code-block:: fedn run build --path client -This will create a seed model called 'seed.npz' in the root of the project. This step will take a few minutes, depending on hardware and internet connection (builds a virtualenv). - -Using FEDn Studio ------------------ - -Follow the instructions to register for FEDN Studio and start a project (https://fedn.readthedocs.io/en/stable/studio.html). - -In your Studio project: - -- Go to the 'Sessions' menu, click on 'New session', and upload the compute package (package.tgz) and seed model (seed.npz). -- In the 'Clients' menu, click on 'Connect client' and download the client configuration file (client.yaml) -- Save the client configuration file to the FedSimSiam example directory (fedn/examples/FedSimSiam) - -To connect a client, run the following command in your terminal: - -.. code-block:: - - fedn client start -in client.yaml --secure=True --force-ssl - - -Running the example -------------------- +This will create a model file 'seed.npz' in the root of the project. This step will take a few minutes, depending on hardware and internet connection (builds a virtualenv). -After everything is set up, go to 'Sessions' and click on 'New Session'. Click on 'Start run' and the example will execute. You can follow the training progress on 'Events' and 'Models', where you -can monitor the training progress. The monitoring is done using a kNN classifier that is fitted on the feature embeddings of the training images that are obtained by -FedSimSiam's encoder, and evaluated on the feature embeddings of the test images. This process is repeated after each training round. +Running the project on FEDn Studio +---------------------------------- -This is a common method to track FedSimSiam's training progress, as FedSimSiam aims to minimize the distance between the embeddings of similar images. -A high accuracy implies that the feature embeddings for images within the same class are indeed close to each other in the -embedding space, i.e., FedSimSiam learned useful feature embeddings. +To learn how to set up your FEDn Studio project and connect clients, take the quickstart tutorial: https://fedn.readthedocs.io/en/stable/quickstart.html. -Running FEDn in local development mode: ---------------------------------------- - -Follow the steps above to install FEDn, generate 'package.tgz' and 'seed.tgz'. - -Start a pseudo-distributed FEDn network using docker-compose: -.. code-block:: - - docker compose \ - -f ../../docker-compose.yaml \ - -f docker-compose.override.yaml \ - up - -This starts up local services for MongoDB, Minio, the API Server, one Combiner and two clients. -You can verify the deployment using these urls: - -- API Server: http://localhost:8092/get_controller_status -- Minio: http://localhost:9000 -- Mongo Express: http://localhost:8081 - -Upload the package and seed model to FEDn controller using the APIClient: - -.. code-block:: - - from fedn import APIClient - client = APIClient(host="localhost", port=8092) - client.set_active_package("package.tgz", helper="numpyhelper") - client.set_active_model("seed.npz") - - -You can now start a training session with 100 rounds using the API client: - -.. code-block:: - - client.start_session(rounds=100) - -Clean up --------- - -You can clean up by running - -.. code-block:: +When running the example in FEDn Studio, you can follow the training progress of FedSimSiam under 'Models'. +After each training round, a kNN classifier is fitted to the feature embeddings of the training images obtained +by FedSimSiam's encoder and evaluated on the feature embeddings of the test images. +This is a common method to track FedSimSiam's training progress, +as FedSimSiam aims to minimize the distance between the embeddings of similar images. +If training progresses as intended, accuracy increases as the feature embeddings for +images within the same class are getting closer to each other in the embedding space. +In the figure below we can see that the kNN accuracy increases over the training rounds, +indicating that the training of FedSimSiam is proceeding as intended. - docker-compose \ - -f ../../docker-compose.yaml \ - -f docker-compose.override.yaml \ - down -v +.. image:: figs/fedsimsiam_monitoring.png + :width: 50% diff --git a/examples/FedSimSiam/figs/fedsimsiam_monitoring.png b/examples/FedSimSiam/figs/fedsimsiam_monitoring.png new file mode 100644 index 000000000..236ef29c1 Binary files /dev/null and b/examples/FedSimSiam/figs/fedsimsiam_monitoring.png differ diff --git a/examples/flower-client/README.rst b/examples/flower-client/README.rst index fff8e20b3..4207ee019 100644 --- a/examples/flower-client/README.rst +++ b/examples/flower-client/README.rst @@ -47,10 +47,10 @@ a FEDn network. Here you have two main options: using FEDn Studio (recommended for new users), or a self-managed pseudo-distributed deployment on your own machine. -If you are using FEDn Studio (recommended): +Using FEDn Studio: ------------------------------------------- -Follow instructions here to register for Studio and start a project: https://fedn.readthedocs.io/en/stable/studio.html. +Follow instructions here to register for Studio and start a project: https://fedn.readthedocs.io/en/stable/quickstart.html. In your Studio project: @@ -73,47 +73,13 @@ Or, if you prefer to use Docker (this might take a long time): -v $PWD/client.yaml:/app/client.yaml \ -e CLIENT_NUMBER=0 \ -e FEDN_PACKAGE_EXTRACT_DIR=package \ - ghcr.io/scaleoutsystems/fedn/fedn:0.9.0 run client -in client.yaml --secure=True --force-ssl - - -If you are running FEDn in local development mode: --------------------------------------------------- - -Deploy a FEDn network on local host (see `https://fedn.readthedocs.io/en/stable/quickstart.html#local-development-deployment-using-docker-compose`). - -Use the FEDn API Client to initalize FEDn with the compute package and seed model: - -.. code-block:: - - python init_fedn.py - -Create a file 'client.yaml' with the following content: - -.. code-block:: - - network_id: fedn-network - discover_host: api-server - discover_port: 8092 - name: myclient - -Then start the client (using Docker) - -.. code-block:: - - docker run \ - -v $PWD/client.yaml:/app/client.yaml \ - --network=fedn_default \ - -e CLIENT_NUMBER=0 \ - -e FEDN_PACKAGE_EXTRACT_DIR=package \ - ghcr.io/scaleoutsystems/fedn/fedn:0.9.0 run client -in client.yaml - + ghcr.io/scaleoutsystems/fedn/fedn:0.11.1 run client -in client.yaml --secure=True --force-ssl Scaling to multiple clients ------------------------------------------------------------------ -To scale the experiment with additional clients on the same host, execute the run command -again from another terminal. If running from another host, add another 'client.yaml', install -fedn, and execute the run command. In both cases inject a client number as an environment +To scale the experiment with additional clients on the same host, generate another 'client.yaml' and execute the run command +again from another terminal. Inject a client number as an environment varible which is used for distributing data (see 'flwr_task.py'). For Unix Operating Systems: diff --git a/examples/huggingface/README.rst b/examples/huggingface/README.rst index 3d5653b7b..eaaad3254 100644 --- a/examples/huggingface/README.rst +++ b/examples/huggingface/README.rst @@ -1,3 +1,6 @@ + + **Note: If you are new to FEDn, we recommend that you start with the MNIST-Pytorch example instead: https://github.com/scaleoutsystems/fedn/tree/master/examples/mnist-pytorch** + Hugging Face Transformer Example -------------------------------- @@ -11,20 +14,21 @@ Federated learning is a privacy preserving machine learning technique that enabl Fine-tuning large language models (LLMs) on various data sources enhances both accuracy and generalizability. In this example, the Enron email spam dataset is split among two clients. The BERT-tiny model is fine-tuned on the client data using federated learning to predict whether an email is spam or not. -Execute the following steps to run the example: -Prerequisites -------------- +In FEDn studio, you can visualize the training progress by plotting test loss and accuracy, as shown in the plot below. +After running the example for only a few rounds in FEDn studio, the BERT-tiny model - fine-tuned via federated learning - +is able to detect spam emails on the test dataset with high accuracy. -Using FEDn Studio: +.. image:: figs/hf_figure.png + :width: 50% -- `Python 3.8, 3.9, 3.10 or 3.11 `__ -- `A FEDn Studio account `__ +To run the example, follow the steps below. For a more detailed explanation, follow the Quickstart Tutorial: https://fedn.readthedocs.io/en/stable/quickstart.html -If using pseudo-distributed mode with docker-compose: +Prerequisites +------------- -- `Docker `__ -- `Docker Compose `__ +- `Python >=3.8, <=3.12 `__ +- `A project in FEDn Studio `__ Creating the compute package and seed model ------------------------------------------- @@ -48,100 +52,17 @@ Create the compute package: fedn package create --path client -This should create a file 'package.tgz' in the project folder. +This creates a file 'package.tgz' in the project folder. -Next, generate a seed model (the first model in a global model trail): +Next, generate the seed model: .. code-block:: fedn run build --path client -This will create a seed model called 'seed.npz' in the root of the project. This step will take a few minutes, depending on hardware and internet connection (builds a virtualenv). - - - -Using FEDn Studio (recommended) -------------------------------- - -Follow the instructions to register for FEDN Studio and start a project (https://fedn.readthedocs.io/en/stable/studio.html). - -In your Studio project: - -- Go to the 'Sessions' menu, click on 'New session', and upload the compute package (package.tgz) and seed model (seed.npz). -- In the 'Clients' menu, click on 'Connect client' and download the client configuration file (client.yaml) -- Save the client configuration file to the huggingface example directory (fedn/examples/huggingface) - -To connect a client, run the following command in your terminal: - -.. code-block:: - - fedn client start -in client.yaml --secure=True --force-ssl - - -Alternatively, if you prefer to use Docker, run the following: - -.. code-block:: - - docker run \ - -v $PWD/client.yaml:/app/client.yaml \ - -e CLIENT_NUMBER=0 \ - -e FEDN_PACKAGE_EXTRACT_DIR=package \ - ghcr.io/scaleoutsystems/fedn/fedn:0.9.0 client start -in client.yaml --secure=True --force-ssl - - -Running the example -------------------- - -After everything is set up, go to 'Sessions' and click on 'New Session'. Click on 'Start run' and the example -will execute. You can follow the training progress on 'Events' and 'Models', where you can view the calculated metrics. - +This will create a model file 'seed.npz' in the root of the project. This step will take a few minutes, depending on hardware and internet connection (builds a virtualenv). +Running the project on FEDn +---------------------------- -Running FEDn in local development mode: ---------------------------------------- - -Create the compute package and seed model as explained above. Then run the following command: - - -.. code-block:: - - docker-compose \ - -f ../../docker-compose.yaml \ - -f docker-compose.override.yaml \ - up - - -This starts up local services for MongoDB, Minio, the API Server, one Combiner and two clients. You can verify the deployment using these urls: - -- API Server: http://localhost:8092/get_controller_status -- Minio: http://localhost:9000 -- Mongo Express: http://localhost:8081 - - -Upload the package and seed model to FEDn controller using the APIClient: - -.. code-block:: - - from fedn import APIClient - client = APIClient(host="localhost", port=8092) - client.set_active_package("package.tgz", helper="numpyhelper") - client.set_active_model("seed.npz") - - -You can now start a training session with 5 rounds (default) using the API client: - -.. code-block:: - - client.start_session() - -Clean up --------- - -You can clean up by running - -.. code-block:: - - docker-compose \ - -f ../../docker-compose.yaml \ - -f docker-compose.override.yaml \ - down -v +To learn how to set up your FEDn Studio project and connect clients, take the quickstart tutorial: https://fedn.readthedocs.io/en/stable/quickstart.html. diff --git a/examples/huggingface/figs/hf_figure.png b/examples/huggingface/figs/hf_figure.png new file mode 100644 index 000000000..896f93d98 Binary files /dev/null and b/examples/huggingface/figs/hf_figure.png differ diff --git a/examples/mnist-keras/README.rst b/examples/mnist-keras/README.rst index 0f2ec9cf1..aaf13c21d 100644 --- a/examples/mnist-keras/README.rst +++ b/examples/mnist-keras/README.rst @@ -1,14 +1,14 @@ FEDn Project: Keras/Tensorflow (MNIST) ------------------------------------------- -This is a TF/Keras version of the Quickstart Tutorial (PyTorch) FEDn Project. For a step-by-step guide, refer to that tutorial. +This is a TF/Keras version of the PyTorch Quickstart Tutorial. For a step-by-step guide, refer to that tutorial. **Note: We recommend all new users to start by following the Quickstart Tutorial: https://fedn.readthedocs.io/en/latest/quickstart.html** Prerequisites ------------------------------------------- -- `Python 3.8, 3.9, 3.10 or 3.11 `__ +- `Python >=3.8, <=3.12 `__ Creating the compute package and seed model ------------------------------------------- @@ -24,7 +24,7 @@ Clone this repository, then locate into this directory: .. code-block:: git clone https://github.com/scaleoutsystems/fedn.git - cd fedn/examples/mnist-pytorch + cd fedn/examples/mnist-keras Create the compute package: @@ -42,13 +42,8 @@ Next, generate a seed model (the first model in a global model trail): This step will take a few minutes, depending on hardware and internet connection (builds a virtualenv). -Using FEDn Studio ------------------- +Running the project on FEDn +---------------------------- -To set up your FEDn Studio project and learn how to connect clients (using JWT token authentication), follow this guide: https://fedn.readthedocs.io/en/latest/studio.html. On the -step "Upload Files", upload 'package.tgz' and 'seed.npz' created above. +To learn how to set up your FEDn Studio project and connect clients, take the quickstart tutorial: https://fedn.readthedocs.io/en/stable/quickstart.html. -Using pseudo-distributed mode (local development) -------------------------------------------------- - -See the PyTorch version of this example for detailed instructions on how to deploy FEDn in `local development mode <../mnist-pytorch/README.rst>`_. diff --git a/examples/mnist-pytorch/README.rst b/examples/mnist-pytorch/README.rst index 71e5de2d1..990b902b2 100644 --- a/examples/mnist-pytorch/README.rst +++ b/examples/mnist-pytorch/README.rst @@ -2,25 +2,15 @@ FEDn Project: MNIST (PyTorch) ----------------------------- This is an example FEDn Project based on the classic hand-written text recognition dataset MNIST. -The example is intented as a minimalistic quickstart and automates the handling of training data -by letting the client download and create its partition of the dataset as it starts up. +The example is intented as a minimalistic quickstart to learn how to use FEDn. - **Note: These instructions are geared towards users seeking to learn how to work - with FEDn in local development mode using Docker/docker-compose. We recommend all new users - to start by following the Quickstart Tutorial: https://fedn.readthedocs.io/en/stable/quickstart.html** + **Note: We recommend that all new users start by taking the Quickstart Tutorial: https://fedn.readthedocs.io/en/stable/quickstart.html** Prerequisites ------------- -Using FEDn Studio: - -- `Python 3.8, 3.9, 3.10 or 3.11 `__ -- `A FEDn Studio account `__ - -If using pseudo-distributed mode with docker-compose: - -- `Docker `__ -- `Docker Compose `__ +- `Python >=3.8, <=3.12 `__ +- `A project in FEDn Studio `__ Creating the compute package and seed model ------------------------------------------- @@ -44,126 +34,17 @@ Create the compute package: fedn package create --path client -This should create a file 'package.tgz' in the project folder. +This creates a file 'package.tgz' in the project folder. -Next, generate a seed model (the first model in a global model trail): +Next, generate the seed model: .. code-block:: fedn run build --path client -This will create a seed model called 'seed.npz' in the root of the project. This step will take a few minutes, depending on hardware and internet connection (builds a virtualenv). - -Using FEDn Studio ------------------ - -Follow the guide here to set up your FEDn Studio project and learn how to connect clients (using token authentication): `Studio guide `__. -On the step "Upload Files", upload 'package.tgz' and 'seed.npz' created above. - - -Modifing the data split: -======================== - -The default traning and test data for this example is downloaded and split direcly by the client when it starts up (see 'startup' entrypoint). -The number of splits and which split used by a client can be controlled via the environment variables ``FEDN_NUM_DATA_SPLITS`` and ``FEDN_DATA_PATH``. -For example, to split the data in 10 parts and start a client using the 8th partiton: - -.. code-block:: - - export FEDN_PACKAGE_EXTRACT_DIR=package - export FEDN_NUM_DATA_SPLITS=10 - export FEDN_DATA_PATH=./data/clients/8/mnist.pt - fedn client start -in client.yaml --secure=True --force-ssl - -The default is to split the data into 2 partitions and use the first partition. - - -Connecting clients using Docker: -================================ - -For convenience, there is a Docker image hosted on ghrc.io with fedn preinstalled. To start a client using Docker: - -.. code-block:: - - docker run \ - -v $PWD/client.yaml:/app/client.yaml \ - -e FEDN_PACKAGE_EXTRACT_DIR=package \ - -e FEDN_NUM_DATA_SPLITS=2 \ - -e FEDN_DATA_PATH=/app/package/data/clients/1/mnist.pt \ - ghcr.io/scaleoutsystems/fedn/fedn:0.9.0 run client -in client.yaml --force-ssl --secure=True - - -Local development mode using Docker/docker compose --------------------------------------------------- - -Follow the steps above to install FEDn, generate 'package.tgz' and 'seed.tgz'. - -Start a pseudo-distributed FEDn network using docker-compose: - -.. code-block:: - - docker compose \ - -f ../../docker-compose.yaml \ - -f docker-compose.override.yaml \ - up +This will create a model file 'seed.npz' in the root of the project. This step will take a few minutes, depending on hardware and internet connection (builds a virtualenv). -This starts up local services for MongoDB, Minio, the API Server, one Combiner and two clients. -You can verify the deployment using these urls: - -- API Server: http://localhost:8092/get_controller_status -- Minio: http://localhost:9000 -- Mongo Express: http://localhost:8081 - -Upload the package and seed model to FEDn controller using the APIClient. In Python: - -.. code-block:: - - from fedn import APIClient - client = APIClient(host="localhost", port=8092) - client.set_active_package("package.tgz", helper="numpyhelper") - client.set_active_model("seed.npz") - -You can now start a training session with 5 rounds (default): - -.. code-block:: - - client.start_session() - -Automate experimentation with several clients -============================================= - -If you want to scale the number of clients, you can do so by modifying ``docker-compose.override.yaml``. For example, -in order to run with 3 clients, change the environment variable ``FEDN_NUM_DATA_SPLITS`` to 3, and add one more client -by copying ``client1`` and setting ``FEDN_DATA_PATH`` to ``/app/package/data/clients/3/mnist.pt`` - - -Access message logs and validation data from MongoDB -==================================================== - -You can access and download event logs and validation data via the API, and you can also as a developer obtain -the MongoDB backend data using pymongo or via the MongoExpress interface: - -- http://localhost:8081/db/fedn-network/ - -The credentials are as set in docker-compose.yaml in the root of the repository. - -Access global models -==================== - -You can obtain global model updates from the 'fedn-models' bucket in Minio: - -- http://localhost:9000 - -Reset the FEDn deployment -========================= - -To purge all data from a deployment incuding all session and round data, access the MongoExpress UI interface and -delete the entire ``fedn-network`` collection. Then restart all services. - -Clean up -======== -You can clean up by running - -.. code-block:: +Running the project on FEDn +---------------------------- - docker-compose -f ../../docker-compose.yaml -f docker-compose.override.yaml down -v +To learn how to set up your FEDn Studio project and connect clients, take the quickstart tutorial: https://fedn.readthedocs.io/en/stable/quickstart.html. diff --git a/examples/mnist-pytorch/client/fedn.yaml b/examples/mnist-pytorch/client/fedn.yaml index b05504102..30873488b 100644 --- a/examples/mnist-pytorch/client/fedn.yaml +++ b/examples/mnist-pytorch/client/fedn.yaml @@ -7,4 +7,6 @@ entry_points: train: command: python train.py validate: - command: python validate.py \ No newline at end of file + command: python validate.py + predict: + command: python predict.py \ No newline at end of file diff --git a/examples/mnist-pytorch/client/predict.py b/examples/mnist-pytorch/client/predict.py new file mode 100644 index 000000000..aaf9f0f50 --- /dev/null +++ b/examples/mnist-pytorch/client/predict.py @@ -0,0 +1,37 @@ +import os +import sys + +import torch +from data import load_data +from model import load_parameters + +dir_path = os.path.dirname(os.path.realpath(__file__)) +sys.path.append(os.path.abspath(dir_path)) + + +def predict(in_model_path, out_artifact_path, data_path=None): + """Validate model. + + :param in_model_path: The path to the input model. + :type in_model_path: str + :param out_artifact_path: The path to save the predict output to. + :type out_artifact_path: str + :param data_path: The path to the data file. + :type data_path: str + """ + # Load data + x_test, y_test = load_data(data_path, is_train=False) + + # Load model + model = load_parameters(in_model_path) + model.eval() + + # Predict + with torch.no_grad(): + y_pred = model(x_test) + # Save prediction to file/artifact, the artifact will be uploaded to the object store by the client + torch.save(y_pred, out_artifact_path) + + +if __name__ == "__main__": + predict(sys.argv[1], sys.argv[2]) diff --git a/examples/monai-2D-mednist/README.rst b/examples/monai-2D-mednist/README.rst index c2c536f27..f61820682 100644 --- a/examples/monai-2D-mednist/README.rst +++ b/examples/monai-2D-mednist/README.rst @@ -1,15 +1,15 @@ FEDn Project: MonAI 2D Classification with the MedNIST Dataset (PyTorch) ------------------------------------------------------------------------ -This is an example FEDn Project based on the MonAI 2D Classification with the MedNIST Dataset. +This is an example FEDn Project based on the MonAI 2D Classification with the MedNIST Dataset. The example is intented as a minimalistic quickstart and automates the handling of training data -by letting the client download and create its partition of the dataset as it starts up. +by letting the client download and create its partition of the dataset as it starts up. + +Links: -Links: - - MonAI: https://monai.io/ - Base example notebook: https://github.com/Project-MONAI/tutorials/blob/main/2d_classification/mednist_tutorial.ipynb -- MedNIST dataset: https://github.com/Project-MONAI/MONAI-extra-test-data/releases/download/0.8.1/MedNIST.tar.gz +- MedNIST dataset: https://github.com/Project-MONAI/MONAI-extra-test-data/releases/download/0.8.1/MedNIST.tar.gz Prerequisites ------------- @@ -17,17 +17,13 @@ Prerequisites Using FEDn Studio: - `Python 3.8, 3.9, 3.10 or 3.11 `__ -- `A FEDn Studio account `__ - -If using pseudo-distributed mode with docker-compose: +- `A FEDn Studio account `__ -- `Docker `__ -- `Docker Compose `__ Creating the compute package and seed model ------------------------------------------- -Install fedn: +Install fedn: .. code-block:: @@ -54,13 +50,30 @@ Next, generate a seed model (the first model in a global model trail): fedn run build --path client -This will create a seed model called 'seed.npz' in the root of the project. This step will take a few minutes, depending on hardware and internet connection (builds a virtualenv). +This will create a seed model called 'seed.npz' in the root of the project. This step will take a few minutes, depending on hardware and internet connection (builds a virtualenv). + +Download and Prepare the data +------------------------------------------- + +Install requirements: + +.. code-block:: + + pip install -r requirements.txt + +Download and divide the data into parts. Set the number of +data parts as an arguments python prepare_data.py NR-OF-DATAPARTS. In the +below command we divide the dataset into 10 parts. +.. code-block:: + + python prepare_data.py 10 + Using FEDn Studio ----------------- -Follow the guide here to set up your FEDn Studio project and learn how to connect clients (using token authentication): `Studio guide `__. -On the step "Upload Files", upload 'package.tgz' and 'seed.npz' created above. +Follow the guide here to set up your FEDn Studio project and learn how to connect clients (using token authentication): `Studio guide `__. +On the step "Upload Files", upload 'package.tgz' and 'seed.npz' created above. Connecting clients: =================== @@ -70,100 +83,25 @@ Connecting clients: .. code-block:: export FEDN_PACKAGE_EXTRACT_DIR=package - export FEDN_DATA_PATH=./data/ + export FEDN_DATA_PATH=/data/ export FEDN_CLIENT_SETTINGS_PATH=/client_settings.yaml + export FEDN_DATA_SPLIT_INDEX=0 + fedn client start -in client.yaml --secure=True --force-ssl Connecting clients using Docker: ================================ -For convenience, there is a Docker image hosted on ghrc.io with fedn preinstalled. To start a client using Docker: +For convenience, there is a Docker image hosted on ghrc.io with fedn preinstalled. To start a client using Docker: .. code-block:: docker run \ -v $PWD/client.yaml:/app/client.yaml \ -v $PWD/client_settings.yaml:/app/client_settings.yaml \ + -v $PWD/data:/app/data \ -e FEDN_PACKAGE_EXTRACT_DIR=package \ - -e FEDN_DATA_PATH=./data/ \ + -e FEDN_DATA_PATH=/app/data/ \ -e FEDN_CLIENT_SETTINGS_PATH=/app/client_settings.yaml \ - ghcr.io/scaleoutsystems/fedn/fedn:0.9.0 run client -in client.yaml --force-ssl --secure=True - - -**NOTE: The following instructions are only for SDK-based client communication and for local development environments using Docker.** - - -Local development mode using Docker/docker compose --------------------------------------------------- - -Follow the steps above to install FEDn, generate 'package.tgz' and 'seed.tgz'. - -Start a pseudo-distributed FEDn network using docker-compose: - -.. code-block:: - - docker compose \ - -f ../../docker-compose.yaml \ - -f docker-compose.override.yaml \ - up - -This starts up local services for MongoDB, Minio, the API Server, one Combiner and two clients. -You can verify the deployment using these urls: - -- API Server: http://localhost:8092/get_controller_status -- Minio: http://localhost:9000 -- Mongo Express: http://localhost:8081 - -Upload the package and seed model to FEDn controller using the APIClient. In Python: - -.. code-block:: - - from fedn import APIClient - client = APIClient(host="localhost", port=8092) - client.set_active_package("package.tgz", helper="numpyhelper") - client.set_active_model("seed.npz") - -You can now start a training session with 5 rounds (default): - -.. code-block:: - - client.start_session() - -Automate experimentation with several clients -============================================= - -If you want to scale the number of clients, you can do so by modifying ``docker-compose.override.yaml``. For example, -in order to run with 3 clients, change the environment variable ``FEDN_NUM_DATA_SPLITS`` to 3, and add one more client -by copying ``client1`` and setting ``FEDN_DATA_PATH`` to ``/app/package/data3/`` - - -Access message logs and validation data from MongoDB -==================================================== - -You can access and download event logs and validation data via the API, and you can also as a developer obtain -the MongoDB backend data using pymongo or via the MongoExpress interface: - -- http://localhost:8081/db/fedn-network/ - -The credentials are as set in docker-compose.yaml in the root of the repository. - -Access global models -==================== - -You can obtain global model updates from the 'fedn-models' bucket in Minio: - -- http://localhost:9000 - -Reset the FEDn deployment -========================= - -To purge all data from a deployment incuding all session and round data, access the MongoExpress UI interface and -delete the entire ``fedn-network`` collection. Then restart all services. - -Clean up -======== -You can clean up by running - -.. code-block:: - - docker-compose -f ../../docker-compose.yaml -f docker-compose.override.yaml down -v + -e FEDN_DATA_SPLIT_INDEX=0 \ + ghcr.io/scaleoutsystems/fedn/fedn:0.11.1 run client -in client.yaml --force-ssl --secure=True \ No newline at end of file diff --git a/examples/monai-2D-mednist/client/data.py b/examples/monai-2D-mednist/client/data.py index 0a8b5c306..c8a8a4e0b 100644 --- a/examples/monai-2D-mednist/client/data.py +++ b/examples/monai-2D-mednist/client/data.py @@ -1,11 +1,8 @@ import os import random - import numpy as np import PIL import torch -import yaml -from monai.apps import download_and_extract dir_path = os.path.dirname(os.path.realpath(__file__)) abs_path = os.path.abspath(dir_path) @@ -13,54 +10,6 @@ DATA_CLASSES = {"AbdomenCT": 0, "BreastMRI": 1, "CXR": 2, "ChestCT": 3, "Hand": 4, "HeadCT": 5} -def split_data(data_path="data/MedNIST", splits=100, validation_split=0.9): - # create clients - clients = {"client " + str(i): {"train": [], "validation": []} for i in range(splits)} - - for class_ in os.listdir(data_path): - if os.path.isdir(os.path.join(data_path, class_)): - patients_in_class = [os.path.join(class_, patient) for patient in os.listdir(os.path.join(data_path, class_))] - np.random.shuffle(patients_in_class) - chops = np.int32(np.linspace(0, len(patients_in_class), splits + 1)) - for split in range(splits): - p = patients_in_class[chops[split] : chops[split + 1]] - valsplit = np.int32(len(p) * validation_split) - - clients["client " + str(split)]["train"] += p[:valsplit] - clients["client " + str(split)]["validation"] += p[valsplit:] - - with open(os.path.join(os.path.dirname(data_path), "data_splits.yaml"), "w") as file: - yaml.dump(clients, file, default_flow_style=False) - - -def get_data(out_dir="data"): - """Get data from the external repository. - - :param out_dir: Path to data directory. If doesn't - :type data_dir: str - """ - # Make dir if necessary - if not os.path.exists(out_dir): - os.mkdir(out_dir) - - resource = "https://github.com/Project-MONAI/MONAI-extra-test-data/releases/download/0.8.1/MedNIST.tar.gz" - md5 = "0bc7306e7427e00ad1c5526a6677552d" - - compressed_file = os.path.join(out_dir, "MedNIST.tar.gz") - - data_dir = os.path.abspath(out_dir) - print("data_dir:", data_dir) - if os.path.exists(data_dir): - print("path exist.") - if not os.path.exists(compressed_file): - print("compressed file does not exist, downloading and extracting data.") - download_and_extract(resource, compressed_file, data_dir, md5) - else: - print("files already exist.") - - split_data() - - def get_classes(data_path): """Get a list of classes from the dataset @@ -148,6 +97,5 @@ def __getitem__(self, index): return (self.transforms(os.path.join(self.data_path, self.image_files[index])), DATA_CLASSES[os.path.dirname(self.image_files[index])]) -if __name__ == "__main__": - # Prepare data if not already done - get_data() + + diff --git a/examples/monai-2D-mednist/client/python_env.yaml b/examples/monai-2D-mednist/client/python_env.yaml index 7580ffb76..ec39b5084 100644 --- a/examples/monai-2D-mednist/client/python_env.yaml +++ b/examples/monai-2D-mednist/client/python_env.yaml @@ -2,11 +2,11 @@ name: monai-2d-mdnist build_dependencies: - pip - setuptools - - wheel==0.37.1 + - wheel dependencies: - torch==2.2.1 - torchvision==0.17.1 - - fedn==0.9.0 + - fedn - monai-weekly[pillow, tqdm] - - scikit-learn - - tensorboard + - numpy==1.26.4 + - scikit-learn diff --git a/examples/monai-2D-mednist/client/train.py b/examples/monai-2D-mednist/client/train.py index e3cb235c0..2ee922865 100644 --- a/examples/monai-2D-mednist/client/train.py +++ b/examples/monai-2D-mednist/client/train.py @@ -22,7 +22,6 @@ dir_path = os.path.dirname(os.path.realpath(__file__)) sys.path.append(os.path.abspath(dir_path)) - train_transforms = Compose( [ LoadImage(image_only=True), @@ -54,18 +53,16 @@ def train(in_model_path, out_model_path, data_path=None, client_settings_path=No if client_settings_path is None: client_settings_path = os.environ.get("FEDN_CLIENT_SETTINGS_PATH", dir_path + "/client_settings.yaml") - print("client_settings_path: ", client_settings_path) with open(client_settings_path, "r") as fh: # Used by CJG for local training try: client_settings = dict(yaml.safe_load(fh)) except yaml.YAMLError: raise - print("client settings: ", client_settings) batch_size = client_settings["batch_size"] max_epochs = client_settings["local_epochs"] num_workers = client_settings["num_workers"] - split_index = client_settings["split_index"] + split_index = os.environ.get("FEDN_DATA_SPLIT_INDEX") lr = client_settings["lr"] if data_path is None: @@ -76,8 +73,7 @@ def train(in_model_path, out_model_path, data_path=None, client_settings_path=No image_list = clients["client " + str(split_index)]["train"] - train_ds = MedNISTDataset(data_path="data/MedNIST", transforms=train_transforms, image_files=image_list) - + train_ds = MedNISTDataset(data_path=data_path+"/MedNIST/", transforms=train_transforms, image_files=image_list) train_loader = DataLoader(train_ds, batch_size=batch_size, shuffle=True, num_workers=num_workers) # Load parmeters and initialize model diff --git a/examples/monai-2D-mednist/client/validate.py b/examples/monai-2D-mednist/client/validate.py index 74292c34f..ff4eb9263 100644 --- a/examples/monai-2D-mednist/client/validate.py +++ b/examples/monai-2D-mednist/client/validate.py @@ -45,7 +45,7 @@ def validate(in_model_path, out_json_path, data_path=None, client_settings_path= num_workers = client_settings["num_workers"] batch_size = client_settings["batch_size"] - split_index = client_settings["split_index"] + split_index = os.environ.get("FEDN_DATA_SPLIT_INDEX") if data_path is None: data_path = os.environ.get("FEDN_DATA_PATH") @@ -55,7 +55,7 @@ def validate(in_model_path, out_json_path, data_path=None, client_settings_path= image_list = clients["client " + str(split_index)]["validation"] - val_ds = MedNISTDataset(data_path="data/MedNIST", transforms=val_transforms, image_files=image_list) + val_ds = MedNISTDataset(data_path=data_path+"/MedNIST/", transforms=val_transforms, image_files=image_list) val_loader = DataLoader(val_ds, batch_size=batch_size, shuffle=True, num_workers=num_workers) diff --git a/examples/monai-2D-mednist/client_settings.yaml b/examples/monai-2D-mednist/client_settings.yaml index f7bccb303..468c78802 100644 --- a/examples/monai-2D-mednist/client_settings.yaml +++ b/examples/monai-2D-mednist/client_settings.yaml @@ -1,6 +1,5 @@ lr: 0.01 -batch_size: 32 -local_epochs: 10 +batch_size: 8 +local_epochs: 1 num_workers: 1 sample_size: 30 -split_index: 4 diff --git a/examples/monai-2D-mednist/docker-compose.override.yaml b/examples/monai-2D-mednist/docker-compose.override.yaml index afeaf1437..88fda24d8 100644 --- a/examples/monai-2D-mednist/docker-compose.override.yaml +++ b/examples/monai-2D-mednist/docker-compose.override.yaml @@ -15,13 +15,15 @@ services: service: client environment: <<: *defaults - FEDN_DATA_PATH: /app/package/client/data/MedNIST - FEDN_CLIENT_SETTINGS_PATH: /app/client_settings.yaml + FEDN_DATA_PATH: /app/data/MedNIST + FEDN_CLIENT_SETTINGS_PATH: /app/client_settings.yaml + FEDN_DATA_SPLIT_INDEX: 0 deploy: replicas: 1 volumes: - ${HOST_REPO_DIR:-.}/fedn:/app/fedn - - ${HOST_REPO_DIR:-.}/examples/monai-2D-mednist/client_settings.yaml:/app/client_settings.yaml + - ${HOST_REPO_DIR:-.}/examples/monai-2D-mednist/client_settings.yaml:/app/client_settings.yaml + - ${HOST_REPO_DIR:-.}/examples/monai-2D-mednist/data:/app/data client2: extends: @@ -29,8 +31,12 @@ services: service: client environment: <<: *defaults - FEDN_DATA_PATH: /app/package/client/data/MedNIST + FEDN_DATA_PATH: /app/data/MedNIST + FEDN_CLIENT_SETTINGS_PATH: /app/client_settings.yaml + FEDN_DATA_SPLIT_INDEX: 1 deploy: replicas: 1 volumes: - ${HOST_REPO_DIR:-.}/fedn:/app/fedn + - ${HOST_REPO_DIR:-.}/examples/monai-2D-mednist/client_settings.yaml:/app/client_settings.yaml + - ${HOST_REPO_DIR:-.}/examples/monai-2D-mednist/data:/app/data diff --git a/examples/monai-2D-mednist/prepare_data.py b/examples/monai-2D-mednist/prepare_data.py new file mode 100644 index 000000000..80c083549 --- /dev/null +++ b/examples/monai-2D-mednist/prepare_data.py @@ -0,0 +1,66 @@ +import os +import sys +import numpy as np + +import yaml +from monai.apps import download_and_extract + + +def split_data(data_path="data/MedNIST", splits=100, validation_split=0.9): + # create clients + clients = {"client " + str(i): {"train": [], "validation": []} for i in range(splits)} + print("splits: ", splits) + for class_ in os.listdir(data_path): + if os.path.isdir(os.path.join(data_path, class_)): + patients_in_class = [os.path.join(class_, patient) for patient in os.listdir(os.path.join(data_path, class_))] + np.random.shuffle(patients_in_class) + chops = np.int32(np.linspace(0, len(patients_in_class), splits + 1)) + for split in range(splits): + p = patients_in_class[chops[split] : chops[split + 1]] + + valsplit = np.int32(len(p) * validation_split) + + clients["client " + str(split)]["train"] += p[:valsplit] + clients["client " + str(split)]["validation"] += p[valsplit:] + + if split == 0: + print("len p: ", len(p)) + print("valsplit: ", valsplit) + print("p[:valsplit]: ", p[:valsplit]) + print("p[valsplit:]: ", p[valsplit:]) + + with open(os.path.join(os.path.dirname(data_path), "data_splits.yaml"), "w") as file: + yaml.dump(clients, file, default_flow_style=False) + + +def get_data(out_dir="data", data_splits=10): + """Get data from the external repository. + + :param out_dir: Path to data directory. If doesn't + :type data_dir: str + """ + # Make dir if necessary + if not os.path.exists(out_dir): + os.mkdir(out_dir) + + resource = "https://github.com/Project-MONAI/MONAI-extra-test-data/releases/download/0.8.1/MedNIST.tar.gz" + md5 = "0bc7306e7427e00ad1c5526a6677552d" + + compressed_file = os.path.join(out_dir, "MedNIST.tar.gz") + + data_dir = os.path.abspath(out_dir) + print("data_dir:", data_dir) + if os.path.exists(data_dir): + print("path exist.") + if not os.path.exists(compressed_file): + print("compressed file does not exist, downloading and extracting data.") + download_and_extract(resource, compressed_file, data_dir, md5) + else: + print("files already exist.") + + split_data(splits=data_splits) + + +if __name__ == "__main__": + # Prepare data if not already done + get_data(data_splits=int(sys.argv[1])) diff --git a/examples/monai-2D-mednist/requirements.txt b/examples/monai-2D-mednist/requirements.txt new file mode 100644 index 000000000..0e2857824 --- /dev/null +++ b/examples/monai-2D-mednist/requirements.txt @@ -0,0 +1,3 @@ +monai +PyYAML +numpy==1.26.4 \ No newline at end of file diff --git a/fedn/cli/__init__.py b/fedn/cli/__init__.py index f4be1a9ae..7028dbfa6 100644 --- a/fedn/cli/__init__.py +++ b/fedn/cli/__init__.py @@ -10,3 +10,4 @@ from .session_cmd import session_cmd # noqa: F401 from .status_cmd import status_cmd # noqa: F401 from .validation_cmd import validation_cmd # noqa: F401 +from .controller_cmd import controller_cmd # noqa: F401 diff --git a/fedn/cli/controller_cmd.py b/fedn/cli/controller_cmd.py new file mode 100644 index 000000000..ab8727b27 --- /dev/null +++ b/fedn/cli/controller_cmd.py @@ -0,0 +1,18 @@ +import click + +from .main import main + + +@main.group("controller") +@click.pass_context +def controller_cmd(ctx): + """:param ctx:""" + pass + + +@controller_cmd.command("start") +@click.pass_context +def controller_cmd(ctx): + from fedn.network.api.server import start_server_api + + start_server_api() diff --git a/fedn/cli/main.py b/fedn/cli/main.py index d6f912e62..ab1dd448e 100644 --- a/fedn/cli/main.py +++ b/fedn/cli/main.py @@ -1,3 +1,4 @@ +from fedn.utils.dist import get_version import click CONTEXT_SETTINGS = dict( @@ -5,10 +6,12 @@ help_option_names=["-h", "--help"], ) +version=get_version("fedn") + @click.group(context_settings=CONTEXT_SETTINGS) +@click.version_option(version) @click.pass_context def main(ctx): - """:param ctx: - """ + """:param ctx:""" ctx.obj = dict() diff --git a/fedn/cli/run_cmd.py b/fedn/cli/run_cmd.py index 123f17320..0aa069046 100644 --- a/fedn/cli/run_cmd.py +++ b/fedn/cli/run_cmd.py @@ -4,7 +4,6 @@ import click import yaml - from fedn.common.exceptions import InvalidClientConfig from fedn.common.log_config import logger from fedn.network.clients.client import Client @@ -44,7 +43,100 @@ def run_cmd(ctx): """:param ctx: """ pass +@run_cmd.command("validate") +@click.option("-p", "--path", required=True, help="Path to package directory containing fedn.yaml") +@click.option("-i", "--input", required=True, help="Path to input model" ) +@click.option("-o", "--output", required=True,help="Path to write the output JSON containing validation metrics") +@click.pass_context +def validate_cmd(ctx, path,input,output): + """Execute 'validate' entrypoint in fedn.yaml. + + :param ctx: + :param path: Path to folder containing fedn.yaml + :type path: str + """ + path = os.path.abspath(path) + yaml_file = os.path.join(path, "fedn.yaml") + if not os.path.exists(yaml_file): + logger.error(f"Could not find fedn.yaml in {path}") + exit(-1) + config = _read_yaml_file(yaml_file) + # Check that validate is defined in fedn.yaml under entry_points + if "validate" not in config["entry_points"]: + logger.error("No validate command defined in fedn.yaml") + exit(-1) + + dispatcher = Dispatcher(config, path) + _ = dispatcher._get_or_create_python_env() + dispatcher.run_cmd("validate {} {}".format(input, output)) + + # delete the virtualenv + if dispatcher.python_env_path: + logger.info(f"Removing virtualenv {dispatcher.python_env_path}") + shutil.rmtree(dispatcher.python_env_path) +@run_cmd.command("train") +@click.option("-p", "--path", required=True, help="Path to package directory containing fedn.yaml") +@click.option("-i", "--input", required=True, help="Path to input model parameters" ) +@click.option("-o", "--output", required=True,help="Path to write the updated model parameters ") +@click.pass_context +def train_cmd(ctx, path,input,output): + """Execute 'train' entrypoint in fedn.yaml. + + :param ctx: + :param path: Path to folder containing fedn.yaml + :type path: str + """ + path = os.path.abspath(path) + yaml_file = os.path.join(path, "fedn.yaml") + if not os.path.exists(yaml_file): + logger.error(f"Could not find fedn.yaml in {path}") + exit(-1) + + config = _read_yaml_file(yaml_file) + # Check that train is defined in fedn.yaml under entry_points + if "train" not in config["entry_points"]: + logger.error("No train command defined in fedn.yaml") + exit(-1) + + dispatcher = Dispatcher(config, path) + _ = dispatcher._get_or_create_python_env() + dispatcher.run_cmd("train {} {}".format(input, output)) + + # delete the virtualenv + if dispatcher.python_env_path: + logger.info(f"Removing virtualenv {dispatcher.python_env_path}") + shutil.rmtree(dispatcher.python_env_path) +@run_cmd.command("startup") +@click.option("-p", "--path", required=True, help="Path to package directory containing fedn.yaml") +@click.pass_context +def startup_cmd(ctx, path): + """Execute 'startup' entrypoint in fedn.yaml. + + :param ctx: + :param path: Path to folder containing fedn.yaml + :type path: str + """ + path = os.path.abspath(path) + yaml_file = os.path.join(path, "fedn.yaml") + if not os.path.exists(yaml_file): + logger.error(f"Could not find fedn.yaml in {path}") + exit(-1) + + config = _read_yaml_file(yaml_file) + # Check that startup is defined in fedn.yaml under entry_points + if "startup" not in config["entry_points"]: + logger.error("No startup command defined in fedn.yaml") + exit(-1) + + dispatcher = Dispatcher(config, path) + _ = dispatcher._get_or_create_python_env() + dispatcher.run_cmd("startup") + + # delete the virtualenv + if dispatcher.python_env_path: + logger.info(f"Removing virtualenv {dispatcher.python_env_path}") + shutil.rmtree(dispatcher.python_env_path) @run_cmd.command("build") @click.option("-p", "--path", required=True, help="Path to package directory containing fedn.yaml") diff --git a/fedn/common/config.py b/fedn/common/config.py index 4864ce1ef..23d873ff7 100644 --- a/fedn/common/config.py +++ b/fedn/common/config.py @@ -2,8 +2,7 @@ import yaml -global STATESTORE_CONFIG -global MODELSTORAGE_CONFIG +from fedn.utils.dist import get_package_path SECRET_KEY = os.environ.get("FEDN_JWT_SECRET_KEY", False) FEDN_JWT_CUSTOM_CLAIM_KEY = os.environ.get("FEDN_JWT_CUSTOM_CLAIM_KEY", False) @@ -23,9 +22,15 @@ def get_environment_config(): """Get the configuration from environment variables.""" global STATESTORE_CONFIG global MODELSTORAGE_CONFIG - - STATESTORE_CONFIG = os.environ.get("STATESTORE_CONFIG", "/workspaces/fedn/config/settings-reducer.yaml.template") - MODELSTORAGE_CONFIG = os.environ.get("MODELSTORAGE_CONFIG", "/workspaces/fedn/config/settings-reducer.yaml.template") + if not os.environ.get("STATESTORE_CONFIG", False): + STATESTORE_CONFIG = get_package_path() + "/common/settings-controller.yaml.template" + else: + STATESTORE_CONFIG = os.environ.get("STATESTORE_CONFIG") + + if not os.environ.get("MODELSTORAGE_CONFIG", False): + MODELSTORAGE_CONFIG = get_package_path() + "/common/settings-controller.yaml.template" + else: + MODELSTORAGE_CONFIG = os.environ.get("MODELSTORAGE_CONFIG") def get_statestore_config(file=None): diff --git a/fedn/common/settings-controller.yaml.template b/fedn/common/settings-controller.yaml.template new file mode 100644 index 000000000..a5266a38b --- /dev/null +++ b/fedn/common/settings-controller.yaml.template @@ -0,0 +1,24 @@ +network_id: fedn-network +controller: + host: localhost + port: 8092 + debug: True + +statestore: + type: MongoDB + mongo_config: + username: fedn_admin + password: password + host: localhost + port: 6534 + +storage: + storage_type: S3 + storage_config: + storage_hostname: localhost + storage_port: 9000 + storage_access_key: fedn_admin + storage_secret_key: password + storage_bucket: fedn-models + context_bucket: fedn-context + storage_secure_mode: False diff --git a/fedn/genprot.sh b/fedn/genprot.sh index bb08ada35..def170de1 100755 --- a/fedn/genprot.sh +++ b/fedn/genprot.sh @@ -1,4 +1,4 @@ #!/bin/bash echo "Generating protocol" -python3 -m grpc_tools.protoc -I=. --python_out=. --grpc_python_out=. fedn/network/grpc/*.proto +python3 -m grpc_tools.protoc -I=. --python_out=. --grpc_python_out=. network/grpc/*.proto echo "DONE" diff --git a/fedn/network/api/interface.py b/fedn/network/api/interface.py index 655e4bef8..5fa771b52 100644 --- a/fedn/network/api/interface.py +++ b/fedn/network/api/interface.py @@ -1,11 +1,11 @@ import base64 import copy -import os import threading import uuid from io import BytesIO from flask import jsonify, send_from_directory +from werkzeug.security import safe_join from werkzeug.utils import secure_filename from fedn.common.config import get_controller_config, get_network_config @@ -14,7 +14,6 @@ from fedn.network.combiner.modelservice import load_model_from_BytesIO from fedn.network.state import ReducerState, ReducerStateToString from fedn.utils.checksum import sha -from fedn.utils.plots import Plot __all__ = ("API",) @@ -233,7 +232,7 @@ def set_compute_package(self, file, helper_type: str, name: str = None, descript file_name = file.filename storage_file_name = secure_filename(f"{str(uuid.uuid4())}.{extension}") - file_path = os.path.join("/app/client/package/", storage_file_name) + file_path = safe_join("/app/client/package/", storage_file_name) file.save(file_path) self.control.set_compute_package(storage_file_name, file_path) @@ -378,7 +377,7 @@ def download_compute_package(self, name): try: data = self.control.get_compute_package(name) # TODO: make configurable, perhaps in config.py or package.py - file_path = os.path.join("/app/client/package/", name) + file_path = safe_join("/app/client/package/", name) with open(file_path, "wb") as fh: fh.write(data) # TODO: make configurable, perhaps in config.py or package.py @@ -400,7 +399,7 @@ def _create_checksum(self, name=None): name, message = self._get_compute_package_name() if name is None: return False, message, "" - file_path = os.path.join("/app/client/package/", name) # TODO: make configurable, perhaps in config.py or package.py + file_path = safe_join("/app/client/package/", name) # TODO: make configurable, perhaps in config.py or package.py try: sum = str(sha(file_path)) except FileNotFoundError: @@ -555,7 +554,7 @@ def add_combiner(self, combiner_id, secure_grpc, address, remote_addr, fqdn, por return jsonify(payload) - def add_client(self, client_id, preferred_combiner, remote_addr): + def add_client(self, client_id, preferred_combiner, remote_addr, name): """Add a client to the network. :param client_id: The client id to add. @@ -601,7 +600,8 @@ def add_client(self, client_id, preferred_combiner, remote_addr): ) client_config = { - "name": client_id, + "client_id": client_id, + "name": name, "combiner_preferred": preferred_combiner, "combiner": combiner.name, "ip": remote_addr, @@ -627,8 +627,6 @@ def add_client(self, client_id, preferred_combiner, remote_addr): "certificate": cert, "helper_type": self.control.statestore.get_helper(), } - logger.info(f"Sending payload: {payload}") - return jsonify(payload) def get_initial_model(self): @@ -664,7 +662,7 @@ def set_initial_model(self, file): self.control.commit(file.filename, model) except Exception as e: logger.debug(e) - return jsonify({"success": False, "message": e}) + return jsonify({"success": False, "message": "Failed to add initial model."}) return jsonify({"success": True, "message": "Initial model added successfully."}) @@ -908,30 +906,6 @@ def get_client_config(self, checksum=True): payload["checksum"] = checksum_str return jsonify(payload) - def get_plot_data(self, feature=None): - """Get plot data. - - :return: The plot data as json response. - :rtype: :py:class:`flask.Response` - """ - plot = Plot(self.control.statestore) - - try: - valid_metrics = plot.fetch_valid_metrics() - feature = feature or valid_metrics[0] - box_plot = plot.create_box_plot(feature) - except Exception as e: - valid_metrics = None - box_plot = None - logger.debug(e) - - result = { - "valid_metrics": valid_metrics, - "box_plot": box_plot, - } - - return jsonify(result) - def list_combiners_data(self, combiners): """Get combiners data. diff --git a/fedn/network/api/network.py b/fedn/network/api/network.py index 045f8aa34..5e2f2ef91 100644 --- a/fedn/network/api/network.py +++ b/fedn/network/api/network.py @@ -113,10 +113,10 @@ def add_client(self, client): :type client: dict :return: None """ - if self.get_client(client["name"]): + if self.get_client(client["client_id"]): return - logger.info("adding client {}".format(client["name"])) + logger.info("adding client {}".format(client["client_id"])) self.statestore.set_client(client) def get_client(self, name): diff --git a/fedn/network/api/server.py b/fedn/network/api/server.py index 955a43c1d..d56c3ab0b 100644 --- a/fedn/network/api/server.py +++ b/fedn/network/api/server.py @@ -9,6 +9,7 @@ from fedn.network.api.v1 import _routes custom_url_prefix = os.environ.get("FEDN_CUSTOM_URL_PREFIX", False) +# statestore_config,modelstorage_config,network_id,control=set_statestore_config() api = API(statestore, control) app = Flask(__name__) for bp in _routes: @@ -568,8 +569,10 @@ def add_combiner(): remote_addr = request.remote_addr try: response = api.add_combiner(**json_data, remote_addr=remote_addr) - except TypeError as e: - return jsonify({"success": False, "message": str(e)}), 400 + except TypeError: + return jsonify({"success": False, "message": "Invalid data provided"}), 400 + except Exception: + return jsonify({"success": False, "message": "An unexpected error occurred"}), 500 return response @@ -588,8 +591,10 @@ def add_client(): remote_addr = request.remote_addr try: response = api.add_client(**json_data, remote_addr=remote_addr) - except TypeError as e: - return jsonify({"success": False, "message": str(e)}), 400 + except TypeError: + return jsonify({"success": False, "message": "Invalid data provided"}), 400 + except Exception: + return jsonify({"success": False, "message": "An unexpected error occurred"}), 500 return response @@ -611,8 +616,10 @@ def list_combiners_data(): try: response = api.list_combiners_data(combiners) - except TypeError as e: - return jsonify({"success": False, "message": str(e)}), 400 + except TypeError: + return jsonify({"success": False, "message": "Invalid data provided"}), 400 + except Exception: + return jsonify({"success": False, "message": "An unexpected error occurred"}), 500 return response @@ -620,25 +627,13 @@ def list_combiners_data(): app.add_url_rule(f"{custom_url_prefix}/list_combiners_data", view_func=list_combiners_data, methods=["POST"]) -@app.route("/get_plot_data", methods=["GET"]) -@jwt_auth_required(role="admin") -def get_plot_data(): - """Get plot data from the statestore. - rtype: json - """ - try: - feature = request.args.get("feature", None) - response = api.get_plot_data(feature=feature) - except TypeError as e: - return jsonify({"success": False, "message": str(e)}), 400 - return response - - -if custom_url_prefix: - app.add_url_rule(f"{custom_url_prefix}/get_plot_data", view_func=get_plot_data, methods=["GET"]) - -if __name__ == "__main__": +def start_server_api(): config = get_controller_config() port = config["port"] debug = config["debug"] - app.run(debug=debug, port=port, host="0.0.0.0") + host = "0.0.0.0" + app.run(debug=debug, port=port, host=host) + + +if __name__ == "__main__": + start_server_api() diff --git a/fedn/network/api/shared.py b/fedn/network/api/shared.py index fc8d4ae57..9e0e5acbd 100644 --- a/fedn/network/api/shared.py +++ b/fedn/network/api/shared.py @@ -5,7 +5,6 @@ statestore_config = get_statestore_config() modelstorage_config = get_modelstorage_config() network_id = get_network_config() - statestore = MongoStateStore(network_id, statestore_config["mongo_config"]) statestore.set_storage_backend(modelstorage_config) control = Control(statestore=statestore) diff --git a/fedn/network/api/v1/client_routes.py b/fedn/network/api/v1/client_routes.py index d5ccc58ee..8fa13febe 100644 --- a/fedn/network/api/v1/client_routes.py +++ b/fedn/network/api/v1/client_routes.py @@ -121,8 +121,8 @@ def get_clients(): response = {"count": clients["count"], "result": result} return jsonify(response), 200 - except Exception as e: - return jsonify({"message": str(e)}), 500 + except Exception: + return jsonify({"message": "An unexpected error occurred"}), 500 @bp.route("/list", methods=["POST"]) @@ -206,8 +206,8 @@ def list_clients(): response = {"count": clients["count"], "result": result} return jsonify(response), 200 - except Exception as e: - return jsonify({"message": str(e)}), 500 + except Exception: + return jsonify({"message": "An unexpected error occurred"}), 500 @bp.route("/count", methods=["GET"]) @@ -267,8 +267,8 @@ def get_clients_count(): count = client_store.count(**kwargs) response = count return jsonify(response), 200 - except Exception as e: - return jsonify({"message": str(e)}), 404 + except Exception: + return jsonify({"message": "An unexpected error occurred"}), 500 @bp.route("/count", methods=["POST"]) @@ -320,8 +320,8 @@ def clients_count(): count = client_store.count(**kwargs) response = count return jsonify(response), 200 - except Exception as e: - return jsonify({"message": str(e)}), 404 + except Exception: + return jsonify({"message": "An unexpected error occurred"}), 500 @bp.route("/", methods=["GET"]) @@ -364,7 +364,7 @@ def get_client(id: str): response = client return jsonify(response), 200 - except EntityNotFound as e: - return jsonify({"message": str(e)}), 404 - except Exception as e: - return jsonify({"message": str(e)}), 500 + except EntityNotFound: + return jsonify({"message": f"Entity with id: {id} not found"}), 404 + except Exception: + return jsonify({"message": "An unexpected error occurred"}), 500 diff --git a/fedn/network/api/v1/combiner_routes.py b/fedn/network/api/v1/combiner_routes.py index 1f9360461..9210a7e30 100644 --- a/fedn/network/api/v1/combiner_routes.py +++ b/fedn/network/api/v1/combiner_routes.py @@ -113,8 +113,8 @@ def get_combiners(): response = {"count": combiners["count"], "result": result} return jsonify(response), 200 - except Exception as e: - return jsonify({"message": str(e)}), 500 + except Exception: + return jsonify({"message": "An unexpected error occurred"}), 500 @bp.route("/list", methods=["POST"]) @@ -196,8 +196,8 @@ def list_combiners(): response = {"count": combiners["count"], "result": result} return jsonify(response), 200 - except Exception as e: - return jsonify({"message": str(e)}), 500 + except Exception: + return jsonify({"message": "An unexpected error occurred"}), 500 @bp.route("/count", methods=["GET"]) @@ -243,8 +243,8 @@ def get_combiners_count(): count = combiner_store.count(**kwargs) response = count return jsonify(response), 200 - except Exception as e: - return jsonify({"message": str(e)}), 500 + except Exception: + return jsonify({"message": "An unexpected error occurred"}), 500 @bp.route("/count", methods=["POST"]) @@ -292,8 +292,8 @@ def combiners_count(): count = combiner_store.count(**kwargs) response = count return jsonify(response), 200 - except Exception as e: - return jsonify({"message": str(e)}), 500 + except Exception: + return jsonify({"message": "An unexpected error occurred"}), 500 @bp.route("/", methods=["GET"]) @@ -335,7 +335,7 @@ def get_combiner(id: str): response = combiner return jsonify(response), 200 - except EntityNotFound as e: - return jsonify({"message": str(e)}), 404 - except Exception as e: - return jsonify({"message": str(e)}), 500 + except EntityNotFound: + return jsonify({"message": f"Entity with id: {id} not found"}), 404 + except Exception: + return jsonify({"message": "An unexpected error occurred"}), 500 diff --git a/fedn/network/api/v1/model_routes.py b/fedn/network/api/v1/model_routes.py index 4de0822d2..f227443e0 100644 --- a/fedn/network/api/v1/model_routes.py +++ b/fedn/network/api/v1/model_routes.py @@ -4,8 +4,8 @@ from flask import Blueprint, jsonify, request, send_file from fedn.network.api.auth import jwt_auth_required -from fedn.network.api.v1.shared import api_version, get_limit, get_post_data_to_kwargs, get_reverse, get_typed_list_headers, mdb from fedn.network.api.shared import modelstorage_config +from fedn.network.api.v1.shared import api_version, get_limit, get_post_data_to_kwargs, get_reverse, get_typed_list_headers, mdb from fedn.network.storage.s3.base import RepositoryBase from fedn.network.storage.s3.miniorepository import MINIORepository from fedn.network.storage.statestore.stores.model_store import ModelStore @@ -117,8 +117,8 @@ def get_models(): response = {"count": models["count"], "result": result} return jsonify(response), 200 - except Exception as e: - return jsonify({"message": str(e)}), 500 + except Exception: + return jsonify({"message": "An unexpected error occurred"}), 500 @bp.route("/list", methods=["POST"]) @@ -202,8 +202,8 @@ def list_models(): response = {"count": models["count"], "result": result} return jsonify(response), 200 - except Exception as e: - return jsonify({"message": str(e)}), 500 + except Exception: + return jsonify({"message": "An unexpected error occurred"}), 500 @bp.route("/count", methods=["GET"]) @@ -250,8 +250,8 @@ def get_models_count(): count = model_store.count(**kwargs) response = count return jsonify(response), 200 - except Exception as e: - return jsonify({"message": str(e)}), 500 + except Exception: + return jsonify({"message": "An unexpected error occurred"}), 500 @bp.route("/count", methods=["POST"]) @@ -302,8 +302,8 @@ def models_count(): count = model_store.count(**kwargs) response = count return jsonify(response), 200 - except Exception as e: - return jsonify({"message": str(e)}), 500 + except Exception: + return jsonify({"message": "An unexpected error occurred"}), 500 @bp.route("/", methods=["GET"]) @@ -346,10 +346,132 @@ def get_model(id: str): response = model return jsonify(response), 200 - except EntityNotFound as e: - return jsonify({"message": str(e)}), 404 - except Exception as e: - return jsonify({"message": str(e)}), 500 + except EntityNotFound: + return jsonify({"message": f"Entity with id: {id} not found"}), 404 + except Exception: + return jsonify({"message": "An unexpected error occurred"}), 500 + + +@bp.route("/", methods=["PATCH"]) +@jwt_auth_required(role="admin") +def patch_model(id: str): + """Patch model + Updates a model based on the provided id. Only the fields that are present in the request will be updated. + --- + tags: + - Models + parameters: + - name: id + in: path + required: true + type: string + description: The id or model property of the model + - name: model + in: body + required: true + type: object + description: The model data to update + responses: + 200: + description: The updated model + schema: + $ref: '#/definitions/Model' + 404: + description: The model was not found + schema: + type: object + properties: + message: + type: string + 500: + description: An error occurred + schema: + type: object + properties: + message: + type: string + """ + try: + model = model_store.get(id, use_typing=False) + + data = request.get_json() + _id = model["id"] + + # Update the model with the new data + # Only update the fields that are present in the request + for key, value in data.items(): + if key in ["_id", "model"]: + continue + model[key] = value + + success, message = model_store.update(_id, model) + + if success: + response = model + return jsonify(response), 200 + + return jsonify({"message": f"Failed to update model: {message}"}), 500 + except EntityNotFound: + return jsonify({"message": f"Entity with id: {id} not found"}), 404 + except Exception: + return jsonify({"message": "An unexpected error occurred"}), 500 + + +@bp.route("/", methods=["PUT"]) +@jwt_auth_required(role="admin") +def put_model(id: str): + """Put model + Updates a model based on the provided id. All fields will be updated with the new data. + --- + tags: + - Models + parameters: + - name: id + in: path + required: true + type: string + description: The id or model property of the model + - name: model + in: body + required: true + type: object + description: The model data to update + responses: + 200: + description: The updated model + schema: + $ref: '#/definitions/Model' + 404: + description: The model was not found + schema: + type: object + properties: + message: + type: string + 500: + description: An error occurred + schema: + type: object + properties: + message: + type: string + """ + try: + model = model_store.get(id, use_typing=False) + data = request.get_json() + _id = model["id"] + + success, message = model_store.update(_id, data) + + if success: + response = model + return jsonify(response), 200 + + return jsonify({"message": f"Failed to update model: {message}"}), 500 + except EntityNotFound: + return jsonify({"message": f"Entity with id: {id} not found"}), 404 + except Exception: + return jsonify({"message": "An unexpected error occurred"}), 500 @bp.route("//descendants", methods=["GET"]) @@ -400,10 +522,10 @@ def get_descendants(id: str): response = descendants return jsonify(response), 200 - except EntityNotFound as e: - return jsonify({"message": str(e)}), 404 - except Exception as e: - return jsonify({"message": str(e)}), 500 + except EntityNotFound: + return jsonify({"message": f"Entity with id: {id} not found"}), 404 + except Exception: + return jsonify({"message": "An unexpected error occurred"}), 500 @bp.route("//ancestors", methods=["GET"]) @@ -469,13 +591,14 @@ def get_ancestors(id: str): response = ancestors return jsonify(response), 200 - except EntityNotFound as e: - return jsonify({"message": str(e)}), 404 - except Exception as e: - return jsonify({"message": str(e)}), 500 + except EntityNotFound: + return jsonify({"message": f"Entity with id: {id} not found"}), 404 + except Exception: + return jsonify({"message": "An unexpected error occurred"}), 500 @bp.route("//download", methods=["GET"]) +@jwt_auth_required(role="admin") def download(id: str): """Download Downloads the model file of the provided id. @@ -517,13 +640,14 @@ def download(id: str): return send_file(file, as_attachment=True, download_name=model_id) else: return jsonify({"message": "No model storage configured"}), 500 - except EntityNotFound as e: - return jsonify({"message": str(e)}), 404 - except Exception as e: - return jsonify({"message": str(e)}), 500 + except EntityNotFound: + return jsonify({"message": f"Entity with id: {id} not found"}), 404 + except Exception: + return jsonify({"message": "An unexpected error occurred"}), 500 @bp.route("//parameters", methods=["GET"]) +@jwt_auth_required(role="admin") def get_parameters(id: str): """Download Downloads parameters of the model of the provided id. @@ -581,7 +705,40 @@ def get_parameters(id: str): return jsonify(array=weights), 200 else: return jsonify({"message": "No model storage configured"}), 500 - except EntityNotFound as e: - return jsonify({"message": str(e)}), 404 - except Exception as e: - return jsonify({"message": str(e)}), 500 + except EntityNotFound: + return jsonify({"message": f"Entity with id: {id} not found"}), 404 + except Exception: + return jsonify({"message": "An unexpected error occurred"}), 500 + + +@bp.route("/active", methods=["GET"]) +@jwt_auth_required(role="admin") +def get_active_model(): + """Get active model + Retrieves the active model (id). + --- + tags: + - Models + responses: + 200: + description: The active model id + schema: + type: string + 500: + description: An error occurred + schema: + type: object + properties: + message: + type: string + """ + try: + active_model = model_store.get_active() + + response = active_model + + return jsonify(response), 200 + except EntityNotFound: + return jsonify({"message": "No active model found"}), 404 + except Exception: + return jsonify({"message": "An unexpected error occurred"}), 500 diff --git a/fedn/network/api/v1/package_routes.py b/fedn/network/api/v1/package_routes.py index 65783f54b..7add1a220 100644 --- a/fedn/network/api/v1/package_routes.py +++ b/fedn/network/api/v1/package_routes.py @@ -125,8 +125,8 @@ def get_packages(): response = {"count": packages["count"], "result": result} return jsonify(response), 200 - except Exception as e: - return jsonify({"message": str(e)}), 500 + except Exception: + return jsonify({"message": "An unexpected error occurred"}), 500 @bp.route("/list", methods=["POST"]) @@ -213,8 +213,8 @@ def list_packages(): response = {"count": packages["count"], "result": result} return jsonify(response), 200 - except Exception as e: - return jsonify({"message": str(e)}), 500 + except Exception: + return jsonify({"message": "An unexpected error occurred"}), 500 @bp.route("/count", methods=["GET"]) @@ -274,8 +274,8 @@ def get_packages_count(): count = package_store.count(**kwargs) response = count return jsonify(response), 200 - except Exception as e: - return jsonify({"message": str(e)}), 500 + except Exception: + return jsonify({"message": "An unexpected error occurred"}), 500 @bp.route("/count", methods=["POST"]) @@ -336,8 +336,8 @@ def packages_count(): count = package_store.count(**kwargs) response = count return jsonify(response), 200 - except Exception as e: - return jsonify({"message": str(e)}), 500 + except Exception: + return jsonify({"message": "An unexpected error occurred"}), 500 @bp.route("/", methods=["GET"]) @@ -381,10 +381,10 @@ def get_package(id: str): response = package.__dict__ if use_typing else package return jsonify(response), 200 - except EntityNotFound as e: - return jsonify({"message": str(e)}), 404 - except Exception as e: - return jsonify({"message": str(e)}), 500 + except EntityNotFound: + return jsonify({"message": f"Entity with id: {id} not found"}), 404 + except Exception: + return jsonify({"message": "An unexpected error occurred"}), 500 @bp.route("/active", methods=["GET"]) @@ -421,7 +421,7 @@ def get_active_package(): response = package.__dict__ if use_typing else package return jsonify(response), 200 - except EntityNotFound as e: - return jsonify({"message": str(e)}), 404 - except Exception as e: - return jsonify({"message": str(e)}), 500 + except EntityNotFound: + return jsonify({"message": f"Entity with id: {id} not found"}), 404 + except Exception: + return jsonify({"message": "An unexpected error occurred"}), 500 diff --git a/fedn/network/api/v1/round_routes.py b/fedn/network/api/v1/round_routes.py index 4c2eb0c44..14476a091 100644 --- a/fedn/network/api/v1/round_routes.py +++ b/fedn/network/api/v1/round_routes.py @@ -101,8 +101,8 @@ def get_rounds(): response = {"count": rounds["count"], "result": result} return jsonify(response), 200 - except Exception as e: - return jsonify({"message": str(e)}), 500 + except Exception: + return jsonify({"message": "An unexpected error occurred"}), 500 @bp.route("/list", methods=["POST"]) @@ -180,8 +180,8 @@ def list_rounds(): response = {"count": rounds["count"], "result": result} return jsonify(response), 200 - except Exception as e: - return jsonify({"message": str(e)}), 500 + except Exception: + return jsonify({"message": "An unexpected error occurred"}), 500 @bp.route("/count", methods=["GET"]) @@ -221,8 +221,8 @@ def get_rounds_count(): count = round_store.count(**kwargs) response = count return jsonify(response), 200 - except Exception as e: - return jsonify({"message": str(e)}), 500 + except Exception: + return jsonify({"message": "An unexpected error occurred"}), 500 @bp.route("/count", methods=["POST"]) @@ -266,8 +266,8 @@ def rounds_count(): count = round_store.count(**kwargs) response = count return jsonify(response), 200 - except Exception as e: - return jsonify({"message": str(e)}), 500 + except Exception: + return jsonify({"message": "An unexpected error occurred"}), 500 @bp.route("/", methods=["GET"]) @@ -309,7 +309,7 @@ def get_round(id: str): response = round return jsonify(response), 200 - except EntityNotFound as e: - return jsonify({"message": str(e)}), 404 - except Exception as e: - return jsonify({"message": str(e)}), 500 + except EntityNotFound: + return jsonify({"message": f"Entity with id: {id} not found"}), 404 + except Exception: + return jsonify({"message": "An unexpected error occurred"}), 500 diff --git a/fedn/network/api/v1/session_routes.py b/fedn/network/api/v1/session_routes.py index 240cb8443..a045e60bf 100644 --- a/fedn/network/api/v1/session_routes.py +++ b/fedn/network/api/v1/session_routes.py @@ -3,11 +3,14 @@ from flask import Blueprint, jsonify, request from fedn.network.api.auth import jwt_auth_required +from fedn.network.api.shared import control from fedn.network.api.v1.shared import api_version, get_post_data_to_kwargs, get_typed_list_headers, mdb +from fedn.network.combiner.interfaces import CombinerUnavailableError +from fedn.network.state import ReducerState from fedn.network.storage.statestore.stores.session_store import SessionStore from fedn.network.storage.statestore.stores.shared import EntityNotFound + from .model_routes import model_store -from fedn.network.api.shared import control bp = Blueprint("session", __name__, url_prefix=f"/api/{api_version}/sessions") @@ -97,8 +100,8 @@ def get_sessions(): response = {"count": sessions["count"], "result": result} return jsonify(response), 200 - except Exception as e: - return jsonify({"message": str(e)}), 500 + except Exception: + return jsonify({"message": "An unexpected error occurred"}), 500 @bp.route("/list", methods=["POST"]) @@ -175,8 +178,8 @@ def list_sessions(): response = {"count": sessions["count"], "result": result} return jsonify(response), 200 - except Exception as e: - return jsonify({"message": str(e)}), 500 + except Exception: + return jsonify({"message": "An unexpected error occurred"}), 500 @bp.route("/count", methods=["GET"]) @@ -216,8 +219,8 @@ def get_sessions_count(): count = session_store.count(**kwargs) response = count return jsonify(response), 200 - except Exception as e: - return jsonify({"message": str(e)}), 500 + except Exception: + return jsonify({"message": "An unexpected error occurred"}), 500 @bp.route("/count", methods=["POST"]) @@ -261,8 +264,8 @@ def sessions_count(): count = session_store.count(**kwargs) response = count return jsonify(response), 200 - except Exception as e: - return jsonify({"message": str(e)}), 500 + except Exception: + return jsonify({"message": "An unexpected error occurred"}), 500 @bp.route("/", methods=["GET"]) @@ -304,10 +307,10 @@ def get_session(id: str): response = session return jsonify(response), 200 - except EntityNotFound as e: - return jsonify({"message": str(e)}), 404 - except Exception as e: - return jsonify({"message": str(e)}), 500 + except EntityNotFound: + return jsonify({"message": f"Entity with id: {id} not found"}), 404 + except Exception: + return jsonify({"message": "An unexpected error occurred"}), 500 @bp.route("/", methods=["POST"]) @@ -349,8 +352,20 @@ def post(): status_code: int = 201 if successful else 400 return jsonify(response), status_code - except Exception as e: - return jsonify({"message": str(e)}), 500 + except Exception: + return jsonify({"message": "An unexpected error occurred"}), 500 + + +def _get_number_of_available_clients(): + result = 0 + for combiner in control.network.get_combiners(): + try: + nr_active_clients = len(combiner.list_active_clients()) + result = result + int(nr_active_clients) + except CombinerUnavailableError: + return 0 + + return result @bp.route("/start", methods=["POST"]) @@ -366,25 +381,152 @@ def start_session(): data = request.json if request.headers["Content-Type"] == "application/json" else request.form.to_dict() session_id: str = data.get("session_id") rounds: int = data.get("rounds", "") + round_timeout: int = data.get("round_timeout", None) if not session_id or session_id == "": return jsonify({"message": "Session ID is required"}), 400 - if not rounds or rounds == "": - return jsonify({"message": "Rounds is required"}), 400 - - if not isinstance(rounds, int): - return jsonify({"message": "Rounds must be an integer"}), 400 - session = session_store.get(session_id, use_typing=False) session_config = session["session_config"] model_id = session_config["model_id"] + min_clients = session_config["clients_required"] + + if control.state() == ReducerState.monitoring: + return jsonify({"message": "A session is already running."}) + + if not rounds or not isinstance(rounds, int): + rounds = session_config["rounds"] + nr_available_clients = _get_number_of_available_clients() + + if nr_available_clients < min_clients: + return jsonify({"message": f"Number of available clients is lower than the required minimum of {min_clients}"}), 400 _ = model_store.get(model_id, use_typing=False) - threading.Thread(target=control.start_session, args=(session_id, rounds)).start() + threading.Thread(target=control.start_session, args=(session_id, rounds, round_timeout)).start() return jsonify({"message": "Session started"}), 200 - except Exception as e: - return jsonify({"message": str(e)}), 500 + except Exception: + return jsonify({"message": "An unexpected error occurred"}), 500 + +@bp.route("/", methods=["PATCH"]) +@jwt_auth_required(role="admin") +def patch_session(id: str): + """Patch session + Updates a session based on the provided id. Only the fields that are present in the request will be updated. + --- + tags: + - Sessions + parameters: + - name: id + in: path + required: true + type: string + description: The id or session property of the session + - name: session + in: body + required: true + type: object + description: The session data to update + responses: + 200: + description: The updated session + schema: + $ref: '#/definitions/Session' + 404: + description: The session was not found + schema: + type: object + properties: + message: + type: string + 500: + description: An error occurred + schema: + type: object + properties: + message: + type: string + """ + try: + session = session_store.get(id, use_typing=False) + + data = request.get_json() + _id = session["id"] + + # Update the session with the new data + # Only update the fields that are present in the request + for key, value in data.items(): + if key in ["_id", "session_id"]: + continue + session[key] = value + + success, message = session_store.update(_id, session) + + if success: + response = session + return jsonify(response), 200 + + return jsonify({"message": f"Failed to update session: {message}"}), 500 + except EntityNotFound: + return jsonify({"message": f"Entity with id: {id} not found"}), 404 + except Exception: + return jsonify({"message": "An unexpected error occurred"}), 500 + + +@bp.route("/", methods=["PUT"]) +@jwt_auth_required(role="admin") +def put_session(id: str): + """Put session + Updates a session based on the provided id. All fields will be updated with the new data. + --- + tags: + - Sessions + parameters: + - name: id + in: path + required: true + type: string + description: The id or session property of the session + - name: session + in: body + required: true + type: object + description: The session data to update + responses: + 200: + description: The updated session + schema: + $ref: '#/definitions/Session' + 404: + description: The session was not found + schema: + type: object + properties: + message: + type: string + 500: + description: An error occurred + schema: + type: object + properties: + message: + type: string + """ + try: + session = session_store.get(id, use_typing=False) + data = request.get_json() + _id = session["id"] + + success, message = session_store.update(_id, data) + + if success: + response = session + return jsonify(response), 200 + + return jsonify({"message": f"Failed to update session: {message}"}), 500 + except EntityNotFound: + return jsonify({"message": f"Entity with id: {id} not found"}), 404 + except Exception: + return jsonify({"message": "An unexpected error occurred"}), 500 diff --git a/fedn/network/api/v1/shared.py b/fedn/network/api/v1/shared.py index b7ae170af..a27a6f637 100644 --- a/fedn/network/api/v1/shared.py +++ b/fedn/network/api/v1/shared.py @@ -3,10 +3,9 @@ import pymongo from pymongo.database import Database -from fedn.network.api.shared import statestore_config, network_id +from fedn.network.api.shared import statestore_config,network_id api_version = "v1" - mc = pymongo.MongoClient(**statestore_config["mongo_config"]) mc.server_info() mdb: Database = mc[network_id] diff --git a/fedn/network/api/v1/status_routes.py b/fedn/network/api/v1/status_routes.py index b88772b01..cf3907bea 100644 --- a/fedn/network/api/v1/status_routes.py +++ b/fedn/network/api/v1/status_routes.py @@ -131,8 +131,8 @@ def get_statuses(): response = {"count": statuses["count"], "result": result} return jsonify(response), 200 - except Exception as e: - return jsonify({"message": str(e)}), 500 + except Exception: + return jsonify({"message": "An unexpected error occurred"}), 500 @bp.route("/list", methods=["POST"]) @@ -226,8 +226,8 @@ def list_statuses(): response = {"count": statuses["count"], "result": result} return jsonify(response), 200 - except Exception as e: - return jsonify({"message": str(e)}), 500 + except Exception: + return jsonify({"message": "An unexpected error occurred"}), 500 @bp.route("/count", methods=["GET"]) @@ -288,8 +288,8 @@ def get_statuses_count(): count = status_store.count(**kwargs) response = count return jsonify(response), 200 - except Exception as e: - return jsonify({"message": str(e)}), 500 + except Exception: + return jsonify({"message": "An unexpected error occurred"}), 500 @bp.route("/count", methods=["POST"]) @@ -350,8 +350,8 @@ def statuses_count(): count = status_store.count(**kwargs) response = count return jsonify(response), 200 - except Exception as e: - return jsonify({"message": str(e)}), 500 + except Exception: + return jsonify({"message": "An unexpected error occurred"}), 500 @bp.route("/", methods=["GET"]) @@ -395,7 +395,7 @@ def get_status(id: str): response = status.__dict__ if use_typing else status return jsonify(response), 200 - except EntityNotFound as e: - return jsonify({"message": str(e)}), 404 - except Exception as e: - return jsonify({"message": str(e)}), 500 + except EntityNotFound: + return jsonify({"message": f"Entity with id: {id} not found"}), 404 + except Exception: + return jsonify({"message": "An unexpected error occurred"}), 500 diff --git a/fedn/network/api/v1/validation_routes.py b/fedn/network/api/v1/validation_routes.py index 59767e3e8..665abbb4b 100644 --- a/fedn/network/api/v1/validation_routes.py +++ b/fedn/network/api/v1/validation_routes.py @@ -138,8 +138,8 @@ def get_validations(): response = {"count": validations["count"], "result": result} return jsonify(response), 200 - except Exception as e: - return jsonify({"message": str(e)}), 500 + except Exception: + return jsonify({"message": "An unexpected error occurred"}), 500 @bp.route("/list", methods=["POST"]) @@ -236,8 +236,8 @@ def list_validations(): response = {"count": validations["count"], "result": result} return jsonify(response), 200 - except Exception as e: - return jsonify({"message": str(e)}), 500 + except Exception: + return jsonify({"message": "An unexpected error occurred"}), 500 @bp.route("/count", methods=["GET"]) @@ -302,8 +302,8 @@ def get_validations_count(): count = validation_store.count(**kwargs) response = count return jsonify(response), 200 - except Exception as e: - return jsonify({"message": str(e)}), 500 + except Exception: + return jsonify({"message": "An unexpected error occurred"}), 500 @bp.route("/count", methods=["POST"]) @@ -367,8 +367,8 @@ def validations_count(): count = validation_store.count(**kwargs) response = count return jsonify(response), 200 - except Exception as e: - return jsonify({"message": str(e)}), 500 + except Exception: + return jsonify({"message": "An unexpected error occurred"}), 500 @bp.route("/", methods=["GET"]) @@ -412,7 +412,7 @@ def get_validation(id: str): response = validation.__dict__ if use_typing else validation return jsonify(response), 200 - except EntityNotFound as e: - return jsonify({"message": str(e)}), 404 - except Exception as e: - return jsonify({"message": str(e)}), 500 + except EntityNotFound: + return jsonify({"message": f"Entity with id: {id} not found"}), 404 + except Exception: + return jsonify({"message": "An unexpected error occurred"}), 500 diff --git a/fedn/network/clients/client.py b/fedn/network/clients/client.py index c0f1b0baa..e594d7b6d 100644 --- a/fedn/network/clients/client.py +++ b/fedn/network/clients/client.py @@ -14,6 +14,7 @@ from shutil import copytree import grpc +import requests from cryptography.hazmat.primitives.serialization import Encoding from google.protobuf.json_format import MessageToJson from OpenSSL import SSL @@ -22,13 +23,11 @@ import fedn.network.grpc.fedn_pb2 as fedn import fedn.network.grpc.fedn_pb2_grpc as rpc from fedn.common.config import FEDN_AUTH_SCHEME, FEDN_PACKAGE_EXTRACT_DIR -from fedn.common.log_config import (logger, set_log_level_from_string, - set_log_stream) +from fedn.common.log_config import logger, set_log_level_from_string, set_log_stream from fedn.network.clients.connect import ConnectorClient, Status from fedn.network.clients.package import PackageRuntime from fedn.network.clients.state import ClientState, ClientStateToString -from fedn.network.combiner.modelservice import (get_tmp_path, - upload_request_generator) +from fedn.network.combiner.modelservice import get_tmp_path, upload_request_generator from fedn.utils.dispatcher import Dispatcher from fedn.utils.helpers.helpers import get_helper @@ -64,6 +63,8 @@ def __init__(self, config): set_log_level_from_string(config.get("verbosity", "INFO")) set_log_stream(config.get("logfile", None)) + self.id = config["client_id"] or str(uuid.uuid4()) + self.connector = ConnectorClient( host=config["discover_host"], port=config["discover_port"], @@ -73,7 +74,7 @@ def __init__(self, config): force_ssl=config["force_ssl"], verify=config["verify"], combiner=config["preferred_combiner"], - id=config["client_id"], + id=self.id, ) # Validate client name @@ -421,6 +422,7 @@ def _listen_to_task_stream(self): r = fedn.ClientAvailableMessage() r.sender.name = self.name r.sender.role = fedn.WORKER + r.sender.client_id = self.id # Add client to metadata self._add_grpc_metadata("client", self.name) @@ -438,12 +440,18 @@ def _listen_to_task_stream(self): request=request, sesssion_id=request.session_id, ) - logger.info("Received model update request of type {} for model_id {}".format(request.type, request.model_id)) + logger.info("Received task request of type {} for model_id {}".format(request.type, request.model_id)) if request.type == fedn.StatusType.MODEL_UPDATE and self.config["trainer"]: self.inbox.put(("train", request)) elif request.type == fedn.StatusType.MODEL_VALIDATION and self.config["validator"]: self.inbox.put(("validate", request)) + elif request.type == fedn.StatusType.INFERENCE and self.config["validator"]: + logger.info("Received inference request for model_id {}".format(request.model_id)) + presigned_url = json.loads(request.data) + presigned_url = presigned_url["presigned_url"] + logger.info("Inference presigned URL: {}".format(presigned_url)) + self.inbox.put(("infer", request)) else: logger.error("Unknown request type: {}".format(request.type)) @@ -586,6 +594,51 @@ def _process_validation_request(self, model_id: str, is_inference: bool, session self.state = ClientState.idle return validation + def _process_inference_request(self, model_id: str, session_id: str, presigned_url: str): + """Process an inference request. + + :param model_id: The model id of the model to be used for inference. + :type model_id: str + :param session_id: The id of the current session. + :type session_id: str + :param presigned_url: The presigned URL for the data to be used for inference. + :type presigned_url: str + :return: None + """ + self.send_status(f"Processing inference request for model_id {model_id}", sesssion_id=session_id) + try: + model = self.get_model_from_combiner(str(model_id)) + if model is None: + logger.error("Could not retrieve model from combiner. Aborting inference request.") + return + inpath = self.helper.get_tmp_path() + + with open(inpath, "wb") as fh: + fh.write(model.getbuffer()) + + outpath = get_tmp_path() + self.dispatcher.run_cmd(f"predict {inpath} {outpath}") + + # Upload the inference result to the presigned URL + with open(outpath, "rb") as fh: + response = requests.put(presigned_url, data=fh.read()) + + os.unlink(inpath) + os.unlink(outpath) + + if response.status_code != 200: + logger.warning("Inference upload failed with status code {}".format(response.status_code)) + self.state = ClientState.idle + return + + except Exception as e: + logger.warning("Inference failed with exception {}".format(e)) + self.state = ClientState.idle + return + + self.state = ClientState.idle + return + def process_request(self): """Process training and validation tasks.""" while True: @@ -682,6 +735,22 @@ def process_request(self): self.state = ClientState.idle self.inbox.task_done() + elif task_type == "infer": + self.state = ClientState.inferencing + try: + presigned_url = json.loads(request.data) + except json.JSONDecodeError as e: + logger.error(f"Failed to decode inference request data: {e}") + self.state = ClientState.idle + continue + + if "presigned_url" not in presigned_url: + logger.error("Inference request missing presigned_url.") + self.state = ClientState.idle + continue + presigned_url = presigned_url["presigned_url"] + _ = self._process_inference_request(request.model_id, request.session_id, presigned_url) + self.state = ClientState.idle except queue.Empty: pass except grpc.RpcError as e: @@ -696,7 +765,7 @@ def _send_heartbeat(self, update_frequency=2.0): :rtype: None """ while True: - heartbeat = fedn.Heartbeat(sender=fedn.Client(name=self.name, role=fedn.WORKER)) + heartbeat = fedn.Heartbeat(sender=fedn.Client(name=self.name, role=fedn.WORKER, client_id=self.id)) try: self.connectorStub.SendHeartbeat(heartbeat, metadata=self.metadata) self._missed_heartbeat = 0 diff --git a/fedn/network/clients/connect.py b/fedn/network/clients/connect.py index 09450c5ab..59aaead35 100644 --- a/fedn/network/clients/connect.py +++ b/fedn/network/clients/connect.py @@ -74,7 +74,7 @@ def assign(self): """ try: retval = None - payload = {"client_id": self.name, "preferred_combiner": self.preferred_combiner} + payload = {"name": self.name, "client_id": self.id, "preferred_combiner": self.preferred_combiner} retval = requests.post( self.connect_string + FEDN_CUSTOM_URL_PREFIX + "/add_client", json=payload, diff --git a/fedn/network/clients/state.py b/fedn/network/clients/state.py index a349f846e..d7f82a769 100644 --- a/fedn/network/clients/state.py +++ b/fedn/network/clients/state.py @@ -7,6 +7,7 @@ class ClientState(Enum): idle = 1 training = 2 validating = 3 + inferencing = 4 def ClientStateToString(state): diff --git a/fedn/network/combiner/combiner.py b/fedn/network/combiner/combiner.py index 22f4bc807..78fe39026 100644 --- a/fedn/network/combiner/combiner.py +++ b/fedn/network/combiner/combiner.py @@ -127,7 +127,10 @@ def __init__(self, config): # Set the status to offline for previous clients. previous_clients = self.statestore.clients.find({"combiner": config["name"]}) for client in previous_clients: - self.statestore.set_client({"name": client["name"], "status": "offline"}) + try: + self.statestore.set_client({"name": client["name"], "status": "offline", "client_id": client["client_id"]}) + except KeyError: + self.statestore.set_client({"name": client["name"], "status": "offline"}) self.modelservice = ModelService() @@ -169,12 +172,12 @@ def request_model_update(self, session_id, model_id, config, clients=[]): :type clients: list """ - request, clients = self._send_request_type(fedn.StatusType.MODEL_UPDATE, session_id, model_id, config, clients) + clients = self._send_request_type(fedn.StatusType.MODEL_UPDATE, session_id, model_id, config, clients) if len(clients) < 20: - logger.info("Sent model update request for model {} to clients {}".format(request.model_id, clients)) + logger.info("Sent model update request for model {} to clients {}".format(model_id, clients)) else: - logger.info("Sent model update request for model {} to {} clients".format(request.model_id, len(clients))) + logger.info("Sent model update request for model {} to {} clients".format(model_id, len(clients))) def request_model_validation(self, session_id, model_id, clients=[]): """Ask clients to validate the current global model. @@ -187,12 +190,12 @@ def request_model_validation(self, session_id, model_id, clients=[]): :type clients: list """ - request, clients = self._send_request_type(fedn.StatusType.MODEL_VALIDATION, session_id, model_id, clients) + clients = self._send_request_type(fedn.StatusType.MODEL_VALIDATION, session_id, model_id, clients) if len(clients) < 20: - logger.info("Sent model validation request for model {} to clients {}".format(request.model_id, clients)) + logger.info("Sent model validation request for model {} to clients {}".format(model_id, clients)) else: - logger.info("Sent model validation request for model {} to {} clients".format(request.model_id, len(clients))) + logger.info("Sent model validation request for model {} to {} clients".format(model_id, len(clients))) def request_model_inference(self, session_id: str, model_id: str, clients: list = []) -> None: """Ask clients to perform inference on the model. @@ -205,12 +208,12 @@ def request_model_inference(self, session_id: str, model_id: str, clients: list :type clients: list """ - request, clients = self._send_request_type(fedn.StatusType.INFERENCE, session_id, model_id, clients) + clients = self._send_request_type(fedn.StatusType.INFERENCE, session_id, model_id, clients) if len(clients) < 20: - logger.info("Sent model inference request for model {} to clients {}".format(request.model_id, clients)) + logger.info("Sent model inference request for model {} to clients {}".format(model_id, clients)) else: - logger.info("Sent model inference request for model {} to {} clients".format(request.model_id, len(clients))) + logger.info("Sent model inference request for model {} to {} clients".format(model_id, len(clients))) def _send_request_type(self, request_type, session_id, model_id, config=None, clients=[]): """Send a request of a specific type to clients. @@ -223,40 +226,38 @@ def _send_request_type(self, request_type, session_id, model_id, config=None, cl :type config: dict :param clients: the clients to send the request to :type clients: list - :return: the request and the clients - :rtype: tuple + :return: the clients + :rtype: list """ - request = fedn.TaskRequest() - request.model_id = model_id - request.correlation_id = str(uuid.uuid4()) - request.timestamp = str(datetime.now()) - request.type = request_type - request.session_id = session_id - - request.sender.name = self.id - request.sender.role = fedn.COMBINER - - if request_type == fedn.StatusType.MODEL_UPDATE: - request.data = json.dumps(config) - if len(clients) == 0: + if len(clients) == 0: + if request_type == fedn.StatusType.MODEL_UPDATE: clients = self.get_active_trainers() - elif request_type == fedn.StatusType.MODEL_VALIDATION: - if len(clients) == 0: + elif request_type == fedn.StatusType.MODEL_VALIDATION: clients = self.get_active_validators() - elif request_type == fedn.StatusType.INFERENCE: - request.data = json.dumps(config) - if len(clients) == 0: + elif request_type == fedn.StatusType.INFERENCE: # TODO: add inference clients type clients = self.get_active_validators() - - # TODO: if inference, request.data should be user-defined data/parameters - for client in clients: - request.receiver.name = client + request = fedn.TaskRequest() + request.model_id = model_id + request.correlation_id = str(uuid.uuid4()) + request.timestamp = str(datetime.now()) + request.type = request_type + request.session_id = session_id + + request.sender.name = self.id + request.sender.role = fedn.COMBINER + request.receiver.client_id = client request.receiver.role = fedn.WORKER + # Set the request data, not used in validation + if request_type == fedn.StatusType.INFERENCE: + presigned_url = self.repository.presigned_put_url(self.repository.inference_bucket, f"{client}/{session_id}") + # TODO: in inference, request.data should also contain user-defined data/parameters + request.data = json.dumps({"presigned_url": presigned_url}) + elif request_type == fedn.StatusType.MODEL_UPDATE: + request.data = json.dumps(config) self._put_request_to_client_queue(request, fedn.Queue.TASK_QUEUE) - - return request, clients + return clients def get_active_trainers(self): """Get a list of active trainers. @@ -292,9 +293,9 @@ def __join_client(self, client): :param client: the client to add :type client: :class:`fedn.network.grpc.fedn_pb2.Client` """ - if client.name not in self.clients.keys(): + if client.client_id not in self.clients.keys(): # The status is set to offline by default, and will be updated once _list_active_clients is called. - self.clients[client.name] = {"lastseen": datetime.now(), "status": "offline"} + self.clients[client.client_id] = {"last_seen": datetime.now(), "status": "offline"} def _subscribe_client_to_queue(self, client, queue_name): """Subscribe a client to the queue. @@ -305,8 +306,8 @@ def _subscribe_client_to_queue(self, client, queue_name): :type queue_name: str """ self.__join_client(client) - if queue_name not in self.clients[client.name].keys(): - self.clients[client.name][queue_name] = queue.Queue() + if queue_name not in self.clients[client.client_id].keys(): + self.clients[client.client_id][queue_name] = queue.Queue() def __get_queue(self, client, queue_name): """Get the queue for a client. @@ -321,7 +322,7 @@ def __get_queue(self, client, queue_name): :raises KeyError: if the queue does not exist """ try: - return self.clients[client.name][queue_name] + return self.clients[client.client_id][queue_name] except KeyError: raise @@ -356,7 +357,7 @@ def _list_active_clients(self, channel): for client in self._list_subscribed_clients(channel): status = self.clients[client]["status"] now = datetime.now() - then = self.clients[client]["lastseen"] + then = self.clients[client]["last_seen"] if (now - then) < timedelta(seconds=10): clients["active_clients"].append(client) # If client has changed status, update statestore @@ -621,7 +622,7 @@ def SendHeartbeat(self, heartbeat: fedn.Heartbeat, context): # Update the clients dict with the last seen timestamp. client = heartbeat.sender self.__join_client(client) - self.clients[client.name]["lastseen"] = datetime.now() + self.clients[client.client_id]["last_seen"] = datetime.now() response = fedn.Response() response.sender.name = heartbeat.sender.name @@ -657,15 +658,15 @@ def TaskStream(self, response, context): self._send_status(status) # Set client status to online - self.clients[client.name]["status"] = "online" - self.statestore.set_client({"name": client.name, "status": "online"}) + self.clients[client.client_id]["status"] = "online" + self.statestore.set_client({"name": client.name, "status": "online", "client_id": client.client_id, "last_seen": datetime.now()}) # Keep track of the time context has been active start_time = time.time() while context.is_active(): # Check if the context has been active for more than 10 seconds if time.time() - start_time > 10: - self.clients[client.name]["lastseen"] = datetime.now() + self.clients[client.client_id]["last_seen"] = datetime.now() # Reset the start time start_time = time.time() try: diff --git a/fedn/network/combiner/roundhandler.py b/fedn/network/combiner/roundhandler.py index b99a7c524..c1872eaf1 100644 --- a/fedn/network/combiner/roundhandler.py +++ b/fedn/network/combiner/roundhandler.py @@ -316,7 +316,6 @@ def _assign_round_clients(self, n, type="trainers"): clients = self.server.get_active_trainers() else: logger.error("(ERROR): {} is not a supported type of client".format(type)) - raise # If the number of requested trainers exceeds the number of available, use all available. if n > len(clients): diff --git a/fedn/network/controller/control.py b/fedn/network/controller/control.py index 4fe331007..708ce4440 100644 --- a/fedn/network/controller/control.py +++ b/fedn/network/controller/control.py @@ -67,6 +67,20 @@ def __init__(self, message): super().__init__(self.message) +class SessionTerminatedException(Exception): + """Exception class for when session is terminated""" + + def __init__(self, message): + """Constructor method. + + :param message: The exception message. + :type message: str + + """ + self.message = message + super().__init__(self.message) + + class Control(ControlBase): """Controller, implementing the overall global training, validation and inference logic. @@ -79,7 +93,7 @@ def __init__(self, statestore): super().__init__(statestore) self.name = "DefaultControl" - def start_session(self, session_id: str, rounds: int) -> None: + def start_session(self, session_id: str, rounds: int, round_timeout: int) -> None: if self._state == ReducerState.instructing: logger.info("Controller already in INSTRUCTING state. A session is in progress.") return @@ -102,6 +116,9 @@ def start_session(self, session_id: str, rounds: int) -> None: logger.error("Session not properly configured.") return + if round_timeout is not None: + session_config["round_timeout"] = round_timeout + self._state = ReducerState.monitoring last_round = int(self.get_latest_round_id()) @@ -122,6 +139,9 @@ def start_session(self, session_id: str, rounds: int) -> None: current_round = round try: + if self.get_session_status(session_id) == "Terminated": + logger.info("Session terminated.") + break _, round_data = self.round(session_config, str(current_round)) except TypeError as e: logger.error("Failed to execute round: {0}".format(e)) @@ -130,9 +150,12 @@ def start_session(self, session_id: str, rounds: int) -> None: session_config["model_id"] = self.statestore.get_latest_model() - self.set_session_status(session_id, "Finished") + if self.get_session_status(session_id) == "Started": + self.set_session_status(session_id, "Finished") self._state = ReducerState.idle + self.set_session_config(session_id, session_config) + def session(self, config: RoundConfig) -> None: """Execute a new training session. A session consists of one or several global rounds. All rounds in the same session @@ -174,6 +197,9 @@ def session(self, config: RoundConfig) -> None: current_round = round try: + if self.get_session_status(config["session_id"]) == "Terminated": + logger.info("Session terminated.") + break _, round_data = self.round(config, str(current_round)) except TypeError as e: logger.error("Failed to execute round: {0}".format(e)) @@ -183,7 +209,8 @@ def session(self, config: RoundConfig) -> None: config["model_id"] = self.statestore.get_latest_model() # TODO: Report completion of session - self.set_session_status(config["session_id"], "Finished") + if self.get_session_status(config["session_id"]) == "Started": + self.set_session_status(config["session_id"], "Finished") self._state = ReducerState.idle def inference_session(self, config: RoundConfig) -> None: @@ -229,6 +256,7 @@ def round(self, session_config: RoundConfig, round_id: str): : type round_id: str """ + session_id = session_config["session_id"] self.create_round({"round_id": round_id, "status": "Pending"}) if len(self.network.get_combiners()) < 1: @@ -241,7 +269,7 @@ def round(self, session_config: RoundConfig, round_id: str): round_config["rounds"] = 1 round_config["round_id"] = round_id round_config["task"] = "training" - round_config["session_id"] = session_config["session_id"] + round_config["session_id"] = session_id self.set_round_config(round_id, round_config) @@ -266,6 +294,7 @@ def round(self, session_config: RoundConfig, round_id: str): # or round times out. def do_if_round_times_out(result): logger.warning("Round timed out!") + return True @retry( wait=wait_random(min=1.0, max=2.0), @@ -275,6 +304,10 @@ def do_if_round_times_out(result): ) def combiners_done(): round = self.statestore.get_round(round_id) + session_status = self.get_session_status(session_id) + if session_status == "Terminated": + self.set_round_status(round_id, "Terminated") + return False if "combiners" not in round: logger.info("Waiting for combiners to update model...") raise CombinersNotDoneException("Combiners have not yet reported.") @@ -285,7 +318,9 @@ def combiners_done(): return True - combiners_done() + combiners_are_done = combiners_done() + if not combiners_are_done: + return None, self.statestore.get_round(round_id) # Due to the distributed nature of the computation, there might be a # delay before combiners have reported the round data to the db, diff --git a/fedn/network/controller/controlbase.py b/fedn/network/controller/controlbase.py index 141848b78..297efd426 100644 --- a/fedn/network/controller/controlbase.py +++ b/fedn/network/controller/controlbase.py @@ -183,6 +183,26 @@ def set_session_status(self, session_id, status): """ self.statestore.set_session_status(session_id, status) + def get_session_status(self, session_id): + """Get the status of a session. + + :param session_id: The session unique identifier + :type session_id: str + :return: The status + :rtype: str + """ + return self.statestore.get_session_status(session_id) + + def set_session_config(self, session_id: str, config: dict): + """Set the model id for a session. + + :param session_id: The session unique identifier + :type session_id: str + :param config: The session config + :type config: dict + """ + self.statestore.set_session_config_v2(session_id, config) + def create_round(self, round_data): """Initialize a new round in backend db.""" self.statestore.create_round(round_data) diff --git a/fedn/network/grpc/fedn.proto b/fedn/network/grpc/fedn.proto index cefa1728b..fc5ec8b65 100644 --- a/fedn/network/grpc/fedn.proto +++ b/fedn/network/grpc/fedn.proto @@ -149,6 +149,7 @@ enum Role { message Client { Role role = 1; string name = 2; + string client_id = 3; } message ReassignRequest { diff --git a/fedn/network/grpc/fedn_pb2.py b/fedn/network/grpc/fedn_pb2.py index b763a7c30..c667e1517 100644 --- a/fedn/network/grpc/fedn_pb2.py +++ b/fedn/network/grpc/fedn_pb2.py @@ -1,7 +1,6 @@ # -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! # source: fedn/network/grpc/fedn.proto -# Protobuf Python Version: 4.25.0 """Generated protocol buffer code.""" from google.protobuf import descriptor as _descriptor from google.protobuf import descriptor_pool as _descriptor_pool @@ -15,25 +14,25 @@ from google.protobuf import timestamp_pb2 as google_dot_protobuf_dot_timestamp__pb2 -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x1c\x66\x65\x64n/network/grpc/fedn.proto\x12\x04\x66\x65\x64n\x1a\x1fgoogle/protobuf/timestamp.proto\":\n\x08Response\x12\x1c\n\x06sender\x18\x01 \x01(\x0b\x32\x0c.fedn.Client\x12\x10\n\x08response\x18\x02 \x01(\t\"\xbc\x02\n\x06Status\x12\x1c\n\x06sender\x18\x01 \x01(\x0b\x32\x0c.fedn.Client\x12\x0e\n\x06status\x18\x02 \x01(\t\x12(\n\tlog_level\x18\x03 \x01(\x0e\x32\x15.fedn.Status.LogLevel\x12\x0c\n\x04\x64\x61ta\x18\x04 \x01(\t\x12\x16\n\x0e\x63orrelation_id\x18\x05 \x01(\t\x12-\n\ttimestamp\x18\x06 \x01(\x0b\x32\x1a.google.protobuf.Timestamp\x12\x1e\n\x04type\x18\x07 \x01(\x0e\x32\x10.fedn.StatusType\x12\r\n\x05\x65xtra\x18\x08 \x01(\t\x12\x12\n\nsession_id\x18\t \x01(\t\"B\n\x08LogLevel\x12\x08\n\x04INFO\x10\x00\x12\t\n\x05\x44\x45\x42UG\x10\x01\x12\x0b\n\x07WARNING\x10\x02\x12\t\n\x05\x45RROR\x10\x03\x12\t\n\x05\x41UDIT\x10\x04\"\xd8\x01\n\x0bTaskRequest\x12\x1c\n\x06sender\x18\x01 \x01(\x0b\x32\x0c.fedn.Client\x12\x1e\n\x08receiver\x18\x02 \x01(\x0b\x32\x0c.fedn.Client\x12\x10\n\x08model_id\x18\x03 \x01(\t\x12\x0c\n\x04\x64\x61ta\x18\x04 \x01(\t\x12\x16\n\x0e\x63orrelation_id\x18\x05 \x01(\t\x12\x11\n\ttimestamp\x18\x06 \x01(\t\x12\x0c\n\x04meta\x18\x07 \x01(\t\x12\x12\n\nsession_id\x18\x08 \x01(\t\x12\x1e\n\x04type\x18\t \x01(\x0e\x32\x10.fedn.StatusType\"\xbf\x01\n\x0bModelUpdate\x12\x1c\n\x06sender\x18\x01 \x01(\x0b\x32\x0c.fedn.Client\x12\x1e\n\x08receiver\x18\x02 \x01(\x0b\x32\x0c.fedn.Client\x12\x10\n\x08model_id\x18\x03 \x01(\t\x12\x17\n\x0fmodel_update_id\x18\x04 \x01(\t\x12\x16\n\x0e\x63orrelation_id\x18\x05 \x01(\t\x12\x11\n\ttimestamp\x18\x06 \x01(\t\x12\x0c\n\x04meta\x18\x07 \x01(\t\x12\x0e\n\x06\x63onfig\x18\x08 \x01(\t\"\xd8\x01\n\x0fModelValidation\x12\x1c\n\x06sender\x18\x01 \x01(\x0b\x32\x0c.fedn.Client\x12\x1e\n\x08receiver\x18\x02 \x01(\x0b\x32\x0c.fedn.Client\x12\x10\n\x08model_id\x18\x03 \x01(\t\x12\x0c\n\x04\x64\x61ta\x18\x04 \x01(\t\x12\x16\n\x0e\x63orrelation_id\x18\x05 \x01(\t\x12-\n\ttimestamp\x18\x06 \x01(\x0b\x32\x1a.google.protobuf.Timestamp\x12\x0c\n\x04meta\x18\x07 \x01(\t\x12\x12\n\nsession_id\x18\x08 \x01(\t\"\x89\x01\n\x0cModelRequest\x12\x1c\n\x06sender\x18\x01 \x01(\x0b\x32\x0c.fedn.Client\x12\x1e\n\x08receiver\x18\x02 \x01(\x0b\x32\x0c.fedn.Client\x12\x0c\n\x04\x64\x61ta\x18\x03 \x01(\x0c\x12\n\n\x02id\x18\x04 \x01(\t\x12!\n\x06status\x18\x05 \x01(\x0e\x32\x11.fedn.ModelStatus\"]\n\rModelResponse\x12\x0c\n\x04\x64\x61ta\x18\x01 \x01(\x0c\x12\n\n\x02id\x18\x02 \x01(\t\x12!\n\x06status\x18\x03 \x01(\x0e\x32\x11.fedn.ModelStatus\x12\x0f\n\x07message\x18\x04 \x01(\t\"U\n\x15GetGlobalModelRequest\x12\x1c\n\x06sender\x18\x01 \x01(\x0b\x32\x0c.fedn.Client\x12\x1e\n\x08receiver\x18\x02 \x01(\x0b\x32\x0c.fedn.Client\"h\n\x16GetGlobalModelResponse\x12\x1c\n\x06sender\x18\x01 \x01(\x0b\x32\x0c.fedn.Client\x12\x1e\n\x08receiver\x18\x02 \x01(\x0b\x32\x0c.fedn.Client\x12\x10\n\x08model_id\x18\x03 \x01(\t\")\n\tHeartbeat\x12\x1c\n\x06sender\x18\x01 \x01(\x0b\x32\x0c.fedn.Client\"W\n\x16\x43lientAvailableMessage\x12\x1c\n\x06sender\x18\x01 \x01(\x0b\x32\x0c.fedn.Client\x12\x0c\n\x04\x64\x61ta\x18\x02 \x01(\t\x12\x11\n\ttimestamp\x18\x03 \x01(\t\"P\n\x12ListClientsRequest\x12\x1c\n\x06sender\x18\x01 \x01(\x0b\x32\x0c.fedn.Client\x12\x1c\n\x07\x63hannel\x18\x02 \x01(\x0e\x32\x0b.fedn.Queue\"*\n\nClientList\x12\x1c\n\x06\x63lient\x18\x01 \x03(\x0b\x32\x0c.fedn.Client\"0\n\x06\x43lient\x12\x18\n\x04role\x18\x01 \x01(\x0e\x32\n.fedn.Role\x12\x0c\n\x04name\x18\x02 \x01(\t\"m\n\x0fReassignRequest\x12\x1c\n\x06sender\x18\x01 \x01(\x0b\x32\x0c.fedn.Client\x12\x1e\n\x08receiver\x18\x02 \x01(\x0b\x32\x0c.fedn.Client\x12\x0e\n\x06server\x18\x03 \x01(\t\x12\x0c\n\x04port\x18\x04 \x01(\r\"c\n\x10ReconnectRequest\x12\x1c\n\x06sender\x18\x01 \x01(\x0b\x32\x0c.fedn.Client\x12\x1e\n\x08receiver\x18\x02 \x01(\x0b\x32\x0c.fedn.Client\x12\x11\n\treconnect\x18\x03 \x01(\r\"\'\n\tParameter\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t\"T\n\x0e\x43ontrolRequest\x12\x1e\n\x07\x63ommand\x18\x01 \x01(\x0e\x32\r.fedn.Command\x12\"\n\tparameter\x18\x02 \x03(\x0b\x32\x0f.fedn.Parameter\"F\n\x0f\x43ontrolResponse\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\"\n\tparameter\x18\x02 \x03(\x0b\x32\x0f.fedn.Parameter\"\x13\n\x11\x43onnectionRequest\"<\n\x12\x43onnectionResponse\x12&\n\x06status\x18\x01 \x01(\x0e\x32\x16.fedn.ConnectionStatus\"]\n\x0f\x46unctionRequest\x12\x0c\n\x04task\x18\x01 \x01(\t\x12\x18\n\x0epayload_string\x18\x02 \x01(\tH\x00\x12\x17\n\rpayload_bytes\x18\x03 \x01(\x0cH\x00\x42\t\n\x07payload\"M\n\x10\x46unctionResponse\x12\x17\n\rresult_string\x18\x02 \x01(\tH\x00\x12\x16\n\x0cresult_bytes\x18\x03 \x01(\x0cH\x00\x42\x08\n\x06result*\x84\x01\n\nStatusType\x12\x07\n\x03LOG\x10\x00\x12\x18\n\x14MODEL_UPDATE_REQUEST\x10\x01\x12\x10\n\x0cMODEL_UPDATE\x10\x02\x12\x1c\n\x18MODEL_VALIDATION_REQUEST\x10\x03\x12\x14\n\x10MODEL_VALIDATION\x10\x04\x12\r\n\tINFERENCE\x10\x05*$\n\x05Queue\x12\x0b\n\x07\x44\x45\x46\x41ULT\x10\x00\x12\x0e\n\nTASK_QUEUE\x10\x01*S\n\x0bModelStatus\x12\x06\n\x02OK\x10\x00\x12\x0f\n\x0bIN_PROGRESS\x10\x01\x12\x12\n\x0eIN_PROGRESS_OK\x10\x02\x12\n\n\x06\x46\x41ILED\x10\x03\x12\x0b\n\x07UNKNOWN\x10\x04*8\n\x04Role\x12\n\n\x06WORKER\x10\x00\x12\x0c\n\x08\x43OMBINER\x10\x01\x12\x0b\n\x07REDUCER\x10\x02\x12\t\n\x05OTHER\x10\x03*J\n\x07\x43ommand\x12\x08\n\x04IDLE\x10\x00\x12\t\n\x05START\x10\x01\x12\t\n\x05PAUSE\x10\x02\x12\x08\n\x04STOP\x10\x03\x12\t\n\x05RESET\x10\x04\x12\n\n\x06REPORT\x10\x05*I\n\x10\x43onnectionStatus\x12\x11\n\rNOT_ACCEPTING\x10\x00\x12\r\n\tACCEPTING\x10\x01\x12\x13\n\x0fTRY_AGAIN_LATER\x10\x02\x32z\n\x0cModelService\x12\x33\n\x06Upload\x12\x12.fedn.ModelRequest\x1a\x13.fedn.ModelResponse(\x01\x12\x35\n\x08\x44ownload\x12\x12.fedn.ModelRequest\x1a\x13.fedn.ModelResponse0\x01\x32\xbc\x02\n\x07\x43ontrol\x12\x34\n\x05Start\x12\x14.fedn.ControlRequest\x1a\x15.fedn.ControlResponse\x12\x33\n\x04Stop\x12\x14.fedn.ControlRequest\x1a\x15.fedn.ControlResponse\x12\x44\n\x15\x46lushAggregationQueue\x12\x14.fedn.ControlRequest\x1a\x15.fedn.ControlResponse\x12<\n\rSetAggregator\x12\x14.fedn.ControlRequest\x1a\x15.fedn.ControlResponse\x12\x42\n\x13SetFunctionProvider\x12\x14.fedn.ControlRequest\x1a\x15.fedn.ControlResponse2V\n\x07Reducer\x12K\n\x0eGetGlobalModel\x12\x1b.fedn.GetGlobalModelRequest\x1a\x1c.fedn.GetGlobalModelResponse2\xab\x03\n\tConnector\x12\x44\n\x14\x41llianceStatusStream\x12\x1c.fedn.ClientAvailableMessage\x1a\x0c.fedn.Status0\x01\x12*\n\nSendStatus\x12\x0c.fedn.Status\x1a\x0e.fedn.Response\x12?\n\x11ListActiveClients\x12\x18.fedn.ListClientsRequest\x1a\x10.fedn.ClientList\x12\x45\n\x10\x41\x63\x63\x65ptingClients\x12\x17.fedn.ConnectionRequest\x1a\x18.fedn.ConnectionResponse\x12\x30\n\rSendHeartbeat\x12\x0f.fedn.Heartbeat\x1a\x0e.fedn.Response\x12\x37\n\x0eReassignClient\x12\x15.fedn.ReassignRequest\x1a\x0e.fedn.Response\x12\x39\n\x0fReconnectClient\x12\x16.fedn.ReconnectRequest\x1a\x0e.fedn.Response2\xbf\x01\n\x08\x43ombiner\x12?\n\nTaskStream\x12\x1c.fedn.ClientAvailableMessage\x1a\x11.fedn.TaskRequest0\x01\x12\x34\n\x0fSendModelUpdate\x12\x11.fedn.ModelUpdate\x1a\x0e.fedn.Response\x12<\n\x13SendModelValidation\x12\x15.fedn.ModelValidation\x1a\x0e.fedn.Response2S\n\x0f\x46unctionService\x12@\n\x0f\x45xecuteFunction\x12\x15.fedn.FunctionRequest\x1a\x16.fedn.FunctionResponseb\x06proto3') +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x1c\x66\x65\x64n/network/grpc/fedn.proto\x12\x04\x66\x65\x64n\x1a\x1fgoogle/protobuf/timestamp.proto\":\n\x08Response\x12\x1c\n\x06sender\x18\x01 \x01(\x0b\x32\x0c.fedn.Client\x12\x10\n\x08response\x18\x02 \x01(\t\"\xbc\x02\n\x06Status\x12\x1c\n\x06sender\x18\x01 \x01(\x0b\x32\x0c.fedn.Client\x12\x0e\n\x06status\x18\x02 \x01(\t\x12(\n\tlog_level\x18\x03 \x01(\x0e\x32\x15.fedn.Status.LogLevel\x12\x0c\n\x04\x64\x61ta\x18\x04 \x01(\t\x12\x16\n\x0e\x63orrelation_id\x18\x05 \x01(\t\x12-\n\ttimestamp\x18\x06 \x01(\x0b\x32\x1a.google.protobuf.Timestamp\x12\x1e\n\x04type\x18\x07 \x01(\x0e\x32\x10.fedn.StatusType\x12\r\n\x05\x65xtra\x18\x08 \x01(\t\x12\x12\n\nsession_id\x18\t \x01(\t\"B\n\x08LogLevel\x12\x08\n\x04INFO\x10\x00\x12\t\n\x05\x44\x45\x42UG\x10\x01\x12\x0b\n\x07WARNING\x10\x02\x12\t\n\x05\x45RROR\x10\x03\x12\t\n\x05\x41UDIT\x10\x04\"\xd8\x01\n\x0bTaskRequest\x12\x1c\n\x06sender\x18\x01 \x01(\x0b\x32\x0c.fedn.Client\x12\x1e\n\x08receiver\x18\x02 \x01(\x0b\x32\x0c.fedn.Client\x12\x10\n\x08model_id\x18\x03 \x01(\t\x12\x0c\n\x04\x64\x61ta\x18\x04 \x01(\t\x12\x16\n\x0e\x63orrelation_id\x18\x05 \x01(\t\x12\x11\n\ttimestamp\x18\x06 \x01(\t\x12\x0c\n\x04meta\x18\x07 \x01(\t\x12\x12\n\nsession_id\x18\x08 \x01(\t\x12\x1e\n\x04type\x18\t \x01(\x0e\x32\x10.fedn.StatusType\"\xbf\x01\n\x0bModelUpdate\x12\x1c\n\x06sender\x18\x01 \x01(\x0b\x32\x0c.fedn.Client\x12\x1e\n\x08receiver\x18\x02 \x01(\x0b\x32\x0c.fedn.Client\x12\x10\n\x08model_id\x18\x03 \x01(\t\x12\x17\n\x0fmodel_update_id\x18\x04 \x01(\t\x12\x16\n\x0e\x63orrelation_id\x18\x05 \x01(\t\x12\x11\n\ttimestamp\x18\x06 \x01(\t\x12\x0c\n\x04meta\x18\x07 \x01(\t\x12\x0e\n\x06\x63onfig\x18\x08 \x01(\t\"\xd8\x01\n\x0fModelValidation\x12\x1c\n\x06sender\x18\x01 \x01(\x0b\x32\x0c.fedn.Client\x12\x1e\n\x08receiver\x18\x02 \x01(\x0b\x32\x0c.fedn.Client\x12\x10\n\x08model_id\x18\x03 \x01(\t\x12\x0c\n\x04\x64\x61ta\x18\x04 \x01(\t\x12\x16\n\x0e\x63orrelation_id\x18\x05 \x01(\t\x12-\n\ttimestamp\x18\x06 \x01(\x0b\x32\x1a.google.protobuf.Timestamp\x12\x0c\n\x04meta\x18\x07 \x01(\t\x12\x12\n\nsession_id\x18\x08 \x01(\t\"\x89\x01\n\x0cModelRequest\x12\x1c\n\x06sender\x18\x01 \x01(\x0b\x32\x0c.fedn.Client\x12\x1e\n\x08receiver\x18\x02 \x01(\x0b\x32\x0c.fedn.Client\x12\x0c\n\x04\x64\x61ta\x18\x03 \x01(\x0c\x12\n\n\x02id\x18\x04 \x01(\t\x12!\n\x06status\x18\x05 \x01(\x0e\x32\x11.fedn.ModelStatus\"]\n\rModelResponse\x12\x0c\n\x04\x64\x61ta\x18\x01 \x01(\x0c\x12\n\n\x02id\x18\x02 \x01(\t\x12!\n\x06status\x18\x03 \x01(\x0e\x32\x11.fedn.ModelStatus\x12\x0f\n\x07message\x18\x04 \x01(\t\"U\n\x15GetGlobalModelRequest\x12\x1c\n\x06sender\x18\x01 \x01(\x0b\x32\x0c.fedn.Client\x12\x1e\n\x08receiver\x18\x02 \x01(\x0b\x32\x0c.fedn.Client\"h\n\x16GetGlobalModelResponse\x12\x1c\n\x06sender\x18\x01 \x01(\x0b\x32\x0c.fedn.Client\x12\x1e\n\x08receiver\x18\x02 \x01(\x0b\x32\x0c.fedn.Client\x12\x10\n\x08model_id\x18\x03 \x01(\t\")\n\tHeartbeat\x12\x1c\n\x06sender\x18\x01 \x01(\x0b\x32\x0c.fedn.Client\"W\n\x16\x43lientAvailableMessage\x12\x1c\n\x06sender\x18\x01 \x01(\x0b\x32\x0c.fedn.Client\x12\x0c\n\x04\x64\x61ta\x18\x02 \x01(\t\x12\x11\n\ttimestamp\x18\x03 \x01(\t\"P\n\x12ListClientsRequest\x12\x1c\n\x06sender\x18\x01 \x01(\x0b\x32\x0c.fedn.Client\x12\x1c\n\x07\x63hannel\x18\x02 \x01(\x0e\x32\x0b.fedn.Queue\"*\n\nClientList\x12\x1c\n\x06\x63lient\x18\x01 \x03(\x0b\x32\x0c.fedn.Client\"0\n\x06\x43lient\x12\x18\n\x04role\x18\x01 \x01(\x0e\x32\n.fedn.Role\x12\x0c\n\x04name\x18\x02 \x01(\t\"m\n\x0fReassignRequest\x12\x1c\n\x06sender\x18\x01 \x01(\x0b\x32\x0c.fedn.Client\x12\x1e\n\x08receiver\x18\x02 \x01(\x0b\x32\x0c.fedn.Client\x12\x0e\n\x06server\x18\x03 \x01(\t\x12\x0c\n\x04port\x18\x04 \x01(\r\"c\n\x10ReconnectRequest\x12\x1c\n\x06sender\x18\x01 \x01(\x0b\x32\x0c.fedn.Client\x12\x1e\n\x08receiver\x18\x02 \x01(\x0b\x32\x0c.fedn.Client\x12\x11\n\treconnect\x18\x03 \x01(\r\"\'\n\tParameter\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t\"T\n\x0e\x43ontrolRequest\x12\x1e\n\x07\x63ommand\x18\x01 \x01(\x0e\x32\r.fedn.Command\x12\"\n\tparameter\x18\x02 \x03(\x0b\x32\x0f.fedn.Parameter\"F\n\x0f\x43ontrolResponse\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\"\n\tparameter\x18\x02 \x03(\x0b\x32\x0f.fedn.Parameter\"\x13\n\x11\x43onnectionRequest\"<\n\x12\x43onnectionResponse\x12&\n\x06status\x18\x01 \x01(\x0e\x32\x16.fedn.ConnectionStatus*\x84\x01\n\nStatusType\x12\x07\n\x03LOG\x10\x00\x12\x18\n\x14MODEL_UPDATE_REQUEST\x10\x01\x12\x10\n\x0cMODEL_UPDATE\x10\x02\x12\x1c\n\x18MODEL_VALIDATION_REQUEST\x10\x03\x12\x14\n\x10MODEL_VALIDATION\x10\x04\x12\r\n\tINFERENCE\x10\x05*$\n\x05Queue\x12\x0b\n\x07\x44\x45\x46\x41ULT\x10\x00\x12\x0e\n\nTASK_QUEUE\x10\x01*S\n\x0bModelStatus\x12\x06\n\x02OK\x10\x00\x12\x0f\n\x0bIN_PROGRESS\x10\x01\x12\x12\n\x0eIN_PROGRESS_OK\x10\x02\x12\n\n\x06\x46\x41ILED\x10\x03\x12\x0b\n\x07UNKNOWN\x10\x04*8\n\x04Role\x12\n\n\x06WORKER\x10\x00\x12\x0c\n\x08\x43OMBINER\x10\x01\x12\x0b\n\x07REDUCER\x10\x02\x12\t\n\x05OTHER\x10\x03*J\n\x07\x43ommand\x12\x08\n\x04IDLE\x10\x00\x12\t\n\x05START\x10\x01\x12\t\n\x05PAUSE\x10\x02\x12\x08\n\x04STOP\x10\x03\x12\t\n\x05RESET\x10\x04\x12\n\n\x06REPORT\x10\x05*I\n\x10\x43onnectionStatus\x12\x11\n\rNOT_ACCEPTING\x10\x00\x12\r\n\tACCEPTING\x10\x01\x12\x13\n\x0fTRY_AGAIN_LATER\x10\x02\x32z\n\x0cModelService\x12\x33\n\x06Upload\x12\x12.fedn.ModelRequest\x1a\x13.fedn.ModelResponse(\x01\x12\x35\n\x08\x44ownload\x12\x12.fedn.ModelRequest\x1a\x13.fedn.ModelResponse0\x01\x32\xf8\x01\n\x07\x43ontrol\x12\x34\n\x05Start\x12\x14.fedn.ControlRequest\x1a\x15.fedn.ControlResponse\x12\x33\n\x04Stop\x12\x14.fedn.ControlRequest\x1a\x15.fedn.ControlResponse\x12\x44\n\x15\x46lushAggregationQueue\x12\x14.fedn.ControlRequest\x1a\x15.fedn.ControlResponse\x12<\n\rSetAggregator\x12\x14.fedn.ControlRequest\x1a\x15.fedn.ControlResponse2V\n\x07Reducer\x12K\n\x0eGetGlobalModel\x12\x1b.fedn.GetGlobalModelRequest\x1a\x1c.fedn.GetGlobalModelResponse2\xab\x03\n\tConnector\x12\x44\n\x14\x41llianceStatusStream\x12\x1c.fedn.ClientAvailableMessage\x1a\x0c.fedn.Status0\x01\x12*\n\nSendStatus\x12\x0c.fedn.Status\x1a\x0e.fedn.Response\x12?\n\x11ListActiveClients\x12\x18.fedn.ListClientsRequest\x1a\x10.fedn.ClientList\x12\x45\n\x10\x41\x63\x63\x65ptingClients\x12\x17.fedn.ConnectionRequest\x1a\x18.fedn.ConnectionResponse\x12\x30\n\rSendHeartbeat\x12\x0f.fedn.Heartbeat\x1a\x0e.fedn.Response\x12\x37\n\x0eReassignClient\x12\x15.fedn.ReassignRequest\x1a\x0e.fedn.Response\x12\x39\n\x0fReconnectClient\x12\x16.fedn.ReconnectRequest\x1a\x0e.fedn.Response2\xbf\x01\n\x08\x43ombiner\x12?\n\nTaskStream\x12\x1c.fedn.ClientAvailableMessage\x1a\x11.fedn.TaskRequest0\x01\x12\x34\n\x0fSendModelUpdate\x12\x11.fedn.ModelUpdate\x1a\x0e.fedn.Response\x12<\n\x13SendModelValidation\x12\x15.fedn.ModelValidation\x1a\x0e.fedn.Responseb\x06proto3') _globals = globals() _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) -_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'fedn.network.grpc.fedn_pb2', _globals) +_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'network.grpc.fedn_pb2', _globals) if _descriptor._USE_C_DESCRIPTORS == False: DESCRIPTOR._options = None - _globals['_STATUSTYPE']._serialized_start=2487 - _globals['_STATUSTYPE']._serialized_end=2619 - _globals['_QUEUE']._serialized_start=2621 - _globals['_QUEUE']._serialized_end=2657 - _globals['_MODELSTATUS']._serialized_start=2659 - _globals['_MODELSTATUS']._serialized_end=2742 - _globals['_ROLE']._serialized_start=2744 - _globals['_ROLE']._serialized_end=2800 - _globals['_COMMAND']._serialized_start=2802 - _globals['_COMMAND']._serialized_end=2876 - _globals['_CONNECTIONSTATUS']._serialized_start=2878 - _globals['_CONNECTIONSTATUS']._serialized_end=2951 + _globals['_STATUSTYPE']._serialized_start=2313 + _globals['_STATUSTYPE']._serialized_end=2445 + _globals['_QUEUE']._serialized_start=2447 + _globals['_QUEUE']._serialized_end=2483 + _globals['_MODELSTATUS']._serialized_start=2485 + _globals['_MODELSTATUS']._serialized_end=2568 + _globals['_ROLE']._serialized_start=2570 + _globals['_ROLE']._serialized_end=2626 + _globals['_COMMAND']._serialized_start=2628 + _globals['_COMMAND']._serialized_end=2702 + _globals['_CONNECTIONSTATUS']._serialized_start=2704 + _globals['_CONNECTIONSTATUS']._serialized_end=2777 _globals['_RESPONSE']._serialized_start=71 _globals['_RESPONSE']._serialized_end=129 _globals['_STATUS']._serialized_start=132 @@ -78,20 +77,14 @@ _globals['_CONNECTIONREQUEST']._serialized_end=2248 _globals['_CONNECTIONRESPONSE']._serialized_start=2250 _globals['_CONNECTIONRESPONSE']._serialized_end=2310 - _globals['_FUNCTIONREQUEST']._serialized_start=2312 - _globals['_FUNCTIONREQUEST']._serialized_end=2405 - _globals['_FUNCTIONRESPONSE']._serialized_start=2407 - _globals['_FUNCTIONRESPONSE']._serialized_end=2484 - _globals['_MODELSERVICE']._serialized_start=2953 - _globals['_MODELSERVICE']._serialized_end=3075 - _globals['_CONTROL']._serialized_start=3078 - _globals['_CONTROL']._serialized_end=3394 - _globals['_REDUCER']._serialized_start=3396 - _globals['_REDUCER']._serialized_end=3482 - _globals['_CONNECTOR']._serialized_start=3485 - _globals['_CONNECTOR']._serialized_end=3912 - _globals['_COMBINER']._serialized_start=3915 - _globals['_COMBINER']._serialized_end=4106 - _globals['_FUNCTIONSERVICE']._serialized_start=4108 - _globals['_FUNCTIONSERVICE']._serialized_end=4191 + _globals['_MODELSERVICE']._serialized_start=2779 + _globals['_MODELSERVICE']._serialized_end=2901 + _globals['_CONTROL']._serialized_start=2904 + _globals['_CONTROL']._serialized_end=3152 + _globals['_REDUCER']._serialized_start=3154 + _globals['_REDUCER']._serialized_end=3240 + _globals['_CONNECTOR']._serialized_start=3243 + _globals['_CONNECTOR']._serialized_end=3670 + _globals['_COMBINER']._serialized_start=3673 + _globals['_COMBINER']._serialized_end=3864 # @@protoc_insertion_point(module_scope) diff --git a/fedn/network/grpc/fedn_pb2_grpc.py b/fedn/network/grpc/fedn_pb2_grpc.py index 63bf1f625..a1f03ea50 100644 --- a/fedn/network/grpc/fedn_pb2_grpc.py +++ b/fedn/network/grpc/fedn_pb2_grpc.py @@ -2,7 +2,7 @@ """Client and server classes corresponding to protobuf-defined services.""" import grpc -from fedn.network.grpc import fedn_pb2 as fedn_dot_network_dot_grpc_dot_fedn__pb2 +from ..grpc import fedn_pb2 as network_dot_grpc_dot_fedn__pb2 class ModelServiceStub(object): @@ -16,13 +16,13 @@ def __init__(self, channel): """ self.Upload = channel.stream_unary( '/fedn.ModelService/Upload', - request_serializer=fedn_dot_network_dot_grpc_dot_fedn__pb2.ModelRequest.SerializeToString, - response_deserializer=fedn_dot_network_dot_grpc_dot_fedn__pb2.ModelResponse.FromString, + request_serializer=network_dot_grpc_dot_fedn__pb2.ModelRequest.SerializeToString, + response_deserializer=network_dot_grpc_dot_fedn__pb2.ModelResponse.FromString, ) self.Download = channel.unary_stream( '/fedn.ModelService/Download', - request_serializer=fedn_dot_network_dot_grpc_dot_fedn__pb2.ModelRequest.SerializeToString, - response_deserializer=fedn_dot_network_dot_grpc_dot_fedn__pb2.ModelResponse.FromString, + request_serializer=network_dot_grpc_dot_fedn__pb2.ModelRequest.SerializeToString, + response_deserializer=network_dot_grpc_dot_fedn__pb2.ModelResponse.FromString, ) @@ -46,13 +46,13 @@ def add_ModelServiceServicer_to_server(servicer, server): rpc_method_handlers = { 'Upload': grpc.stream_unary_rpc_method_handler( servicer.Upload, - request_deserializer=fedn_dot_network_dot_grpc_dot_fedn__pb2.ModelRequest.FromString, - response_serializer=fedn_dot_network_dot_grpc_dot_fedn__pb2.ModelResponse.SerializeToString, + request_deserializer=network_dot_grpc_dot_fedn__pb2.ModelRequest.FromString, + response_serializer=network_dot_grpc_dot_fedn__pb2.ModelResponse.SerializeToString, ), 'Download': grpc.unary_stream_rpc_method_handler( servicer.Download, - request_deserializer=fedn_dot_network_dot_grpc_dot_fedn__pb2.ModelRequest.FromString, - response_serializer=fedn_dot_network_dot_grpc_dot_fedn__pb2.ModelResponse.SerializeToString, + request_deserializer=network_dot_grpc_dot_fedn__pb2.ModelRequest.FromString, + response_serializer=network_dot_grpc_dot_fedn__pb2.ModelResponse.SerializeToString, ), } generic_handler = grpc.method_handlers_generic_handler( @@ -76,8 +76,8 @@ def Upload(request_iterator, timeout=None, metadata=None): return grpc.experimental.stream_unary(request_iterator, target, '/fedn.ModelService/Upload', - fedn_dot_network_dot_grpc_dot_fedn__pb2.ModelRequest.SerializeToString, - fedn_dot_network_dot_grpc_dot_fedn__pb2.ModelResponse.FromString, + network_dot_grpc_dot_fedn__pb2.ModelRequest.SerializeToString, + network_dot_grpc_dot_fedn__pb2.ModelResponse.FromString, options, channel_credentials, insecure, call_credentials, compression, wait_for_ready, timeout, metadata) @@ -93,8 +93,8 @@ def Download(request, timeout=None, metadata=None): return grpc.experimental.unary_stream(request, target, '/fedn.ModelService/Download', - fedn_dot_network_dot_grpc_dot_fedn__pb2.ModelRequest.SerializeToString, - fedn_dot_network_dot_grpc_dot_fedn__pb2.ModelResponse.FromString, + network_dot_grpc_dot_fedn__pb2.ModelRequest.SerializeToString, + network_dot_grpc_dot_fedn__pb2.ModelResponse.FromString, options, channel_credentials, insecure, call_credentials, compression, wait_for_ready, timeout, metadata) @@ -110,23 +110,23 @@ def __init__(self, channel): """ self.Start = channel.unary_unary( '/fedn.Control/Start', - request_serializer=fedn_dot_network_dot_grpc_dot_fedn__pb2.ControlRequest.SerializeToString, - response_deserializer=fedn_dot_network_dot_grpc_dot_fedn__pb2.ControlResponse.FromString, + request_serializer=network_dot_grpc_dot_fedn__pb2.ControlRequest.SerializeToString, + response_deserializer=network_dot_grpc_dot_fedn__pb2.ControlResponse.FromString, ) self.Stop = channel.unary_unary( '/fedn.Control/Stop', - request_serializer=fedn_dot_network_dot_grpc_dot_fedn__pb2.ControlRequest.SerializeToString, - response_deserializer=fedn_dot_network_dot_grpc_dot_fedn__pb2.ControlResponse.FromString, + request_serializer=network_dot_grpc_dot_fedn__pb2.ControlRequest.SerializeToString, + response_deserializer=network_dot_grpc_dot_fedn__pb2.ControlResponse.FromString, ) self.FlushAggregationQueue = channel.unary_unary( '/fedn.Control/FlushAggregationQueue', - request_serializer=fedn_dot_network_dot_grpc_dot_fedn__pb2.ControlRequest.SerializeToString, - response_deserializer=fedn_dot_network_dot_grpc_dot_fedn__pb2.ControlResponse.FromString, + request_serializer=network_dot_grpc_dot_fedn__pb2.ControlRequest.SerializeToString, + response_deserializer=network_dot_grpc_dot_fedn__pb2.ControlResponse.FromString, ) self.SetAggregator = channel.unary_unary( '/fedn.Control/SetAggregator', - request_serializer=fedn_dot_network_dot_grpc_dot_fedn__pb2.ControlRequest.SerializeToString, - response_deserializer=fedn_dot_network_dot_grpc_dot_fedn__pb2.ControlResponse.FromString, + request_serializer=network_dot_grpc_dot_fedn__pb2.ControlRequest.SerializeToString, + response_deserializer=network_dot_grpc_dot_fedn__pb2.ControlResponse.FromString, ) self.SetFunctionProvider = channel.unary_unary( '/fedn.Control/SetFunctionProvider', @@ -173,23 +173,23 @@ def add_ControlServicer_to_server(servicer, server): rpc_method_handlers = { 'Start': grpc.unary_unary_rpc_method_handler( servicer.Start, - request_deserializer=fedn_dot_network_dot_grpc_dot_fedn__pb2.ControlRequest.FromString, - response_serializer=fedn_dot_network_dot_grpc_dot_fedn__pb2.ControlResponse.SerializeToString, + request_deserializer=network_dot_grpc_dot_fedn__pb2.ControlRequest.FromString, + response_serializer=network_dot_grpc_dot_fedn__pb2.ControlResponse.SerializeToString, ), 'Stop': grpc.unary_unary_rpc_method_handler( servicer.Stop, - request_deserializer=fedn_dot_network_dot_grpc_dot_fedn__pb2.ControlRequest.FromString, - response_serializer=fedn_dot_network_dot_grpc_dot_fedn__pb2.ControlResponse.SerializeToString, + request_deserializer=network_dot_grpc_dot_fedn__pb2.ControlRequest.FromString, + response_serializer=network_dot_grpc_dot_fedn__pb2.ControlResponse.SerializeToString, ), 'FlushAggregationQueue': grpc.unary_unary_rpc_method_handler( servicer.FlushAggregationQueue, - request_deserializer=fedn_dot_network_dot_grpc_dot_fedn__pb2.ControlRequest.FromString, - response_serializer=fedn_dot_network_dot_grpc_dot_fedn__pb2.ControlResponse.SerializeToString, + request_deserializer=network_dot_grpc_dot_fedn__pb2.ControlRequest.FromString, + response_serializer=network_dot_grpc_dot_fedn__pb2.ControlResponse.SerializeToString, ), 'SetAggregator': grpc.unary_unary_rpc_method_handler( servicer.SetAggregator, - request_deserializer=fedn_dot_network_dot_grpc_dot_fedn__pb2.ControlRequest.FromString, - response_serializer=fedn_dot_network_dot_grpc_dot_fedn__pb2.ControlResponse.SerializeToString, + request_deserializer=network_dot_grpc_dot_fedn__pb2.ControlRequest.FromString, + response_serializer=network_dot_grpc_dot_fedn__pb2.ControlResponse.SerializeToString, ), 'SetFunctionProvider': grpc.unary_unary_rpc_method_handler( servicer.SetFunctionProvider, @@ -218,8 +218,8 @@ def Start(request, timeout=None, metadata=None): return grpc.experimental.unary_unary(request, target, '/fedn.Control/Start', - fedn_dot_network_dot_grpc_dot_fedn__pb2.ControlRequest.SerializeToString, - fedn_dot_network_dot_grpc_dot_fedn__pb2.ControlResponse.FromString, + network_dot_grpc_dot_fedn__pb2.ControlRequest.SerializeToString, + network_dot_grpc_dot_fedn__pb2.ControlResponse.FromString, options, channel_credentials, insecure, call_credentials, compression, wait_for_ready, timeout, metadata) @@ -235,8 +235,8 @@ def Stop(request, timeout=None, metadata=None): return grpc.experimental.unary_unary(request, target, '/fedn.Control/Stop', - fedn_dot_network_dot_grpc_dot_fedn__pb2.ControlRequest.SerializeToString, - fedn_dot_network_dot_grpc_dot_fedn__pb2.ControlResponse.FromString, + network_dot_grpc_dot_fedn__pb2.ControlRequest.SerializeToString, + network_dot_grpc_dot_fedn__pb2.ControlResponse.FromString, options, channel_credentials, insecure, call_credentials, compression, wait_for_ready, timeout, metadata) @@ -252,8 +252,8 @@ def FlushAggregationQueue(request, timeout=None, metadata=None): return grpc.experimental.unary_unary(request, target, '/fedn.Control/FlushAggregationQueue', - fedn_dot_network_dot_grpc_dot_fedn__pb2.ControlRequest.SerializeToString, - fedn_dot_network_dot_grpc_dot_fedn__pb2.ControlResponse.FromString, + network_dot_grpc_dot_fedn__pb2.ControlRequest.SerializeToString, + network_dot_grpc_dot_fedn__pb2.ControlResponse.FromString, options, channel_credentials, insecure, call_credentials, compression, wait_for_ready, timeout, metadata) @@ -269,8 +269,8 @@ def SetAggregator(request, timeout=None, metadata=None): return grpc.experimental.unary_unary(request, target, '/fedn.Control/SetAggregator', - fedn_dot_network_dot_grpc_dot_fedn__pb2.ControlRequest.SerializeToString, - fedn_dot_network_dot_grpc_dot_fedn__pb2.ControlResponse.FromString, + network_dot_grpc_dot_fedn__pb2.ControlRequest.SerializeToString, + network_dot_grpc_dot_fedn__pb2.ControlResponse.FromString, options, channel_credentials, insecure, call_credentials, compression, wait_for_ready, timeout, metadata) @@ -303,8 +303,8 @@ def __init__(self, channel): """ self.GetGlobalModel = channel.unary_unary( '/fedn.Reducer/GetGlobalModel', - request_serializer=fedn_dot_network_dot_grpc_dot_fedn__pb2.GetGlobalModelRequest.SerializeToString, - response_deserializer=fedn_dot_network_dot_grpc_dot_fedn__pb2.GetGlobalModelResponse.FromString, + request_serializer=network_dot_grpc_dot_fedn__pb2.GetGlobalModelRequest.SerializeToString, + response_deserializer=network_dot_grpc_dot_fedn__pb2.GetGlobalModelResponse.FromString, ) @@ -322,8 +322,8 @@ def add_ReducerServicer_to_server(servicer, server): rpc_method_handlers = { 'GetGlobalModel': grpc.unary_unary_rpc_method_handler( servicer.GetGlobalModel, - request_deserializer=fedn_dot_network_dot_grpc_dot_fedn__pb2.GetGlobalModelRequest.FromString, - response_serializer=fedn_dot_network_dot_grpc_dot_fedn__pb2.GetGlobalModelResponse.SerializeToString, + request_deserializer=network_dot_grpc_dot_fedn__pb2.GetGlobalModelRequest.FromString, + response_serializer=network_dot_grpc_dot_fedn__pb2.GetGlobalModelResponse.SerializeToString, ), } generic_handler = grpc.method_handlers_generic_handler( @@ -347,8 +347,8 @@ def GetGlobalModel(request, timeout=None, metadata=None): return grpc.experimental.unary_unary(request, target, '/fedn.Reducer/GetGlobalModel', - fedn_dot_network_dot_grpc_dot_fedn__pb2.GetGlobalModelRequest.SerializeToString, - fedn_dot_network_dot_grpc_dot_fedn__pb2.GetGlobalModelResponse.FromString, + network_dot_grpc_dot_fedn__pb2.GetGlobalModelRequest.SerializeToString, + network_dot_grpc_dot_fedn__pb2.GetGlobalModelResponse.FromString, options, channel_credentials, insecure, call_credentials, compression, wait_for_ready, timeout, metadata) @@ -364,38 +364,38 @@ def __init__(self, channel): """ self.AllianceStatusStream = channel.unary_stream( '/fedn.Connector/AllianceStatusStream', - request_serializer=fedn_dot_network_dot_grpc_dot_fedn__pb2.ClientAvailableMessage.SerializeToString, - response_deserializer=fedn_dot_network_dot_grpc_dot_fedn__pb2.Status.FromString, + request_serializer=network_dot_grpc_dot_fedn__pb2.ClientAvailableMessage.SerializeToString, + response_deserializer=network_dot_grpc_dot_fedn__pb2.Status.FromString, ) self.SendStatus = channel.unary_unary( '/fedn.Connector/SendStatus', - request_serializer=fedn_dot_network_dot_grpc_dot_fedn__pb2.Status.SerializeToString, - response_deserializer=fedn_dot_network_dot_grpc_dot_fedn__pb2.Response.FromString, + request_serializer=network_dot_grpc_dot_fedn__pb2.Status.SerializeToString, + response_deserializer=network_dot_grpc_dot_fedn__pb2.Response.FromString, ) self.ListActiveClients = channel.unary_unary( '/fedn.Connector/ListActiveClients', - request_serializer=fedn_dot_network_dot_grpc_dot_fedn__pb2.ListClientsRequest.SerializeToString, - response_deserializer=fedn_dot_network_dot_grpc_dot_fedn__pb2.ClientList.FromString, + request_serializer=network_dot_grpc_dot_fedn__pb2.ListClientsRequest.SerializeToString, + response_deserializer=network_dot_grpc_dot_fedn__pb2.ClientList.FromString, ) self.AcceptingClients = channel.unary_unary( '/fedn.Connector/AcceptingClients', - request_serializer=fedn_dot_network_dot_grpc_dot_fedn__pb2.ConnectionRequest.SerializeToString, - response_deserializer=fedn_dot_network_dot_grpc_dot_fedn__pb2.ConnectionResponse.FromString, + request_serializer=network_dot_grpc_dot_fedn__pb2.ConnectionRequest.SerializeToString, + response_deserializer=network_dot_grpc_dot_fedn__pb2.ConnectionResponse.FromString, ) self.SendHeartbeat = channel.unary_unary( '/fedn.Connector/SendHeartbeat', - request_serializer=fedn_dot_network_dot_grpc_dot_fedn__pb2.Heartbeat.SerializeToString, - response_deserializer=fedn_dot_network_dot_grpc_dot_fedn__pb2.Response.FromString, + request_serializer=network_dot_grpc_dot_fedn__pb2.Heartbeat.SerializeToString, + response_deserializer=network_dot_grpc_dot_fedn__pb2.Response.FromString, ) self.ReassignClient = channel.unary_unary( '/fedn.Connector/ReassignClient', - request_serializer=fedn_dot_network_dot_grpc_dot_fedn__pb2.ReassignRequest.SerializeToString, - response_deserializer=fedn_dot_network_dot_grpc_dot_fedn__pb2.Response.FromString, + request_serializer=network_dot_grpc_dot_fedn__pb2.ReassignRequest.SerializeToString, + response_deserializer=network_dot_grpc_dot_fedn__pb2.Response.FromString, ) self.ReconnectClient = channel.unary_unary( '/fedn.Connector/ReconnectClient', - request_serializer=fedn_dot_network_dot_grpc_dot_fedn__pb2.ReconnectRequest.SerializeToString, - response_deserializer=fedn_dot_network_dot_grpc_dot_fedn__pb2.Response.FromString, + request_serializer=network_dot_grpc_dot_fedn__pb2.ReconnectRequest.SerializeToString, + response_deserializer=network_dot_grpc_dot_fedn__pb2.Response.FromString, ) @@ -454,38 +454,38 @@ def add_ConnectorServicer_to_server(servicer, server): rpc_method_handlers = { 'AllianceStatusStream': grpc.unary_stream_rpc_method_handler( servicer.AllianceStatusStream, - request_deserializer=fedn_dot_network_dot_grpc_dot_fedn__pb2.ClientAvailableMessage.FromString, - response_serializer=fedn_dot_network_dot_grpc_dot_fedn__pb2.Status.SerializeToString, + request_deserializer=network_dot_grpc_dot_fedn__pb2.ClientAvailableMessage.FromString, + response_serializer=network_dot_grpc_dot_fedn__pb2.Status.SerializeToString, ), 'SendStatus': grpc.unary_unary_rpc_method_handler( servicer.SendStatus, - request_deserializer=fedn_dot_network_dot_grpc_dot_fedn__pb2.Status.FromString, - response_serializer=fedn_dot_network_dot_grpc_dot_fedn__pb2.Response.SerializeToString, + request_deserializer=network_dot_grpc_dot_fedn__pb2.Status.FromString, + response_serializer=network_dot_grpc_dot_fedn__pb2.Response.SerializeToString, ), 'ListActiveClients': grpc.unary_unary_rpc_method_handler( servicer.ListActiveClients, - request_deserializer=fedn_dot_network_dot_grpc_dot_fedn__pb2.ListClientsRequest.FromString, - response_serializer=fedn_dot_network_dot_grpc_dot_fedn__pb2.ClientList.SerializeToString, + request_deserializer=network_dot_grpc_dot_fedn__pb2.ListClientsRequest.FromString, + response_serializer=network_dot_grpc_dot_fedn__pb2.ClientList.SerializeToString, ), 'AcceptingClients': grpc.unary_unary_rpc_method_handler( servicer.AcceptingClients, - request_deserializer=fedn_dot_network_dot_grpc_dot_fedn__pb2.ConnectionRequest.FromString, - response_serializer=fedn_dot_network_dot_grpc_dot_fedn__pb2.ConnectionResponse.SerializeToString, + request_deserializer=network_dot_grpc_dot_fedn__pb2.ConnectionRequest.FromString, + response_serializer=network_dot_grpc_dot_fedn__pb2.ConnectionResponse.SerializeToString, ), 'SendHeartbeat': grpc.unary_unary_rpc_method_handler( servicer.SendHeartbeat, - request_deserializer=fedn_dot_network_dot_grpc_dot_fedn__pb2.Heartbeat.FromString, - response_serializer=fedn_dot_network_dot_grpc_dot_fedn__pb2.Response.SerializeToString, + request_deserializer=network_dot_grpc_dot_fedn__pb2.Heartbeat.FromString, + response_serializer=network_dot_grpc_dot_fedn__pb2.Response.SerializeToString, ), 'ReassignClient': grpc.unary_unary_rpc_method_handler( servicer.ReassignClient, - request_deserializer=fedn_dot_network_dot_grpc_dot_fedn__pb2.ReassignRequest.FromString, - response_serializer=fedn_dot_network_dot_grpc_dot_fedn__pb2.Response.SerializeToString, + request_deserializer=network_dot_grpc_dot_fedn__pb2.ReassignRequest.FromString, + response_serializer=network_dot_grpc_dot_fedn__pb2.Response.SerializeToString, ), 'ReconnectClient': grpc.unary_unary_rpc_method_handler( servicer.ReconnectClient, - request_deserializer=fedn_dot_network_dot_grpc_dot_fedn__pb2.ReconnectRequest.FromString, - response_serializer=fedn_dot_network_dot_grpc_dot_fedn__pb2.Response.SerializeToString, + request_deserializer=network_dot_grpc_dot_fedn__pb2.ReconnectRequest.FromString, + response_serializer=network_dot_grpc_dot_fedn__pb2.Response.SerializeToString, ), } generic_handler = grpc.method_handlers_generic_handler( @@ -509,8 +509,8 @@ def AllianceStatusStream(request, timeout=None, metadata=None): return grpc.experimental.unary_stream(request, target, '/fedn.Connector/AllianceStatusStream', - fedn_dot_network_dot_grpc_dot_fedn__pb2.ClientAvailableMessage.SerializeToString, - fedn_dot_network_dot_grpc_dot_fedn__pb2.Status.FromString, + network_dot_grpc_dot_fedn__pb2.ClientAvailableMessage.SerializeToString, + network_dot_grpc_dot_fedn__pb2.Status.FromString, options, channel_credentials, insecure, call_credentials, compression, wait_for_ready, timeout, metadata) @@ -526,8 +526,8 @@ def SendStatus(request, timeout=None, metadata=None): return grpc.experimental.unary_unary(request, target, '/fedn.Connector/SendStatus', - fedn_dot_network_dot_grpc_dot_fedn__pb2.Status.SerializeToString, - fedn_dot_network_dot_grpc_dot_fedn__pb2.Response.FromString, + network_dot_grpc_dot_fedn__pb2.Status.SerializeToString, + network_dot_grpc_dot_fedn__pb2.Response.FromString, options, channel_credentials, insecure, call_credentials, compression, wait_for_ready, timeout, metadata) @@ -543,8 +543,8 @@ def ListActiveClients(request, timeout=None, metadata=None): return grpc.experimental.unary_unary(request, target, '/fedn.Connector/ListActiveClients', - fedn_dot_network_dot_grpc_dot_fedn__pb2.ListClientsRequest.SerializeToString, - fedn_dot_network_dot_grpc_dot_fedn__pb2.ClientList.FromString, + network_dot_grpc_dot_fedn__pb2.ListClientsRequest.SerializeToString, + network_dot_grpc_dot_fedn__pb2.ClientList.FromString, options, channel_credentials, insecure, call_credentials, compression, wait_for_ready, timeout, metadata) @@ -560,8 +560,8 @@ def AcceptingClients(request, timeout=None, metadata=None): return grpc.experimental.unary_unary(request, target, '/fedn.Connector/AcceptingClients', - fedn_dot_network_dot_grpc_dot_fedn__pb2.ConnectionRequest.SerializeToString, - fedn_dot_network_dot_grpc_dot_fedn__pb2.ConnectionResponse.FromString, + network_dot_grpc_dot_fedn__pb2.ConnectionRequest.SerializeToString, + network_dot_grpc_dot_fedn__pb2.ConnectionResponse.FromString, options, channel_credentials, insecure, call_credentials, compression, wait_for_ready, timeout, metadata) @@ -577,8 +577,8 @@ def SendHeartbeat(request, timeout=None, metadata=None): return grpc.experimental.unary_unary(request, target, '/fedn.Connector/SendHeartbeat', - fedn_dot_network_dot_grpc_dot_fedn__pb2.Heartbeat.SerializeToString, - fedn_dot_network_dot_grpc_dot_fedn__pb2.Response.FromString, + network_dot_grpc_dot_fedn__pb2.Heartbeat.SerializeToString, + network_dot_grpc_dot_fedn__pb2.Response.FromString, options, channel_credentials, insecure, call_credentials, compression, wait_for_ready, timeout, metadata) @@ -594,8 +594,8 @@ def ReassignClient(request, timeout=None, metadata=None): return grpc.experimental.unary_unary(request, target, '/fedn.Connector/ReassignClient', - fedn_dot_network_dot_grpc_dot_fedn__pb2.ReassignRequest.SerializeToString, - fedn_dot_network_dot_grpc_dot_fedn__pb2.Response.FromString, + network_dot_grpc_dot_fedn__pb2.ReassignRequest.SerializeToString, + network_dot_grpc_dot_fedn__pb2.Response.FromString, options, channel_credentials, insecure, call_credentials, compression, wait_for_ready, timeout, metadata) @@ -611,8 +611,8 @@ def ReconnectClient(request, timeout=None, metadata=None): return grpc.experimental.unary_unary(request, target, '/fedn.Connector/ReconnectClient', - fedn_dot_network_dot_grpc_dot_fedn__pb2.ReconnectRequest.SerializeToString, - fedn_dot_network_dot_grpc_dot_fedn__pb2.Response.FromString, + network_dot_grpc_dot_fedn__pb2.ReconnectRequest.SerializeToString, + network_dot_grpc_dot_fedn__pb2.Response.FromString, options, channel_credentials, insecure, call_credentials, compression, wait_for_ready, timeout, metadata) @@ -628,18 +628,18 @@ def __init__(self, channel): """ self.TaskStream = channel.unary_stream( '/fedn.Combiner/TaskStream', - request_serializer=fedn_dot_network_dot_grpc_dot_fedn__pb2.ClientAvailableMessage.SerializeToString, - response_deserializer=fedn_dot_network_dot_grpc_dot_fedn__pb2.TaskRequest.FromString, + request_serializer=network_dot_grpc_dot_fedn__pb2.ClientAvailableMessage.SerializeToString, + response_deserializer=network_dot_grpc_dot_fedn__pb2.TaskRequest.FromString, ) self.SendModelUpdate = channel.unary_unary( '/fedn.Combiner/SendModelUpdate', - request_serializer=fedn_dot_network_dot_grpc_dot_fedn__pb2.ModelUpdate.SerializeToString, - response_deserializer=fedn_dot_network_dot_grpc_dot_fedn__pb2.Response.FromString, + request_serializer=network_dot_grpc_dot_fedn__pb2.ModelUpdate.SerializeToString, + response_deserializer=network_dot_grpc_dot_fedn__pb2.Response.FromString, ) self.SendModelValidation = channel.unary_unary( '/fedn.Combiner/SendModelValidation', - request_serializer=fedn_dot_network_dot_grpc_dot_fedn__pb2.ModelValidation.SerializeToString, - response_deserializer=fedn_dot_network_dot_grpc_dot_fedn__pb2.Response.FromString, + request_serializer=network_dot_grpc_dot_fedn__pb2.ModelValidation.SerializeToString, + response_deserializer=network_dot_grpc_dot_fedn__pb2.Response.FromString, ) @@ -670,18 +670,18 @@ def add_CombinerServicer_to_server(servicer, server): rpc_method_handlers = { 'TaskStream': grpc.unary_stream_rpc_method_handler( servicer.TaskStream, - request_deserializer=fedn_dot_network_dot_grpc_dot_fedn__pb2.ClientAvailableMessage.FromString, - response_serializer=fedn_dot_network_dot_grpc_dot_fedn__pb2.TaskRequest.SerializeToString, + request_deserializer=network_dot_grpc_dot_fedn__pb2.ClientAvailableMessage.FromString, + response_serializer=network_dot_grpc_dot_fedn__pb2.TaskRequest.SerializeToString, ), 'SendModelUpdate': grpc.unary_unary_rpc_method_handler( servicer.SendModelUpdate, - request_deserializer=fedn_dot_network_dot_grpc_dot_fedn__pb2.ModelUpdate.FromString, - response_serializer=fedn_dot_network_dot_grpc_dot_fedn__pb2.Response.SerializeToString, + request_deserializer=network_dot_grpc_dot_fedn__pb2.ModelUpdate.FromString, + response_serializer=network_dot_grpc_dot_fedn__pb2.Response.SerializeToString, ), 'SendModelValidation': grpc.unary_unary_rpc_method_handler( servicer.SendModelValidation, - request_deserializer=fedn_dot_network_dot_grpc_dot_fedn__pb2.ModelValidation.FromString, - response_serializer=fedn_dot_network_dot_grpc_dot_fedn__pb2.Response.SerializeToString, + request_deserializer=network_dot_grpc_dot_fedn__pb2.ModelValidation.FromString, + response_serializer=network_dot_grpc_dot_fedn__pb2.Response.SerializeToString, ), } generic_handler = grpc.method_handlers_generic_handler( @@ -705,8 +705,8 @@ def TaskStream(request, timeout=None, metadata=None): return grpc.experimental.unary_stream(request, target, '/fedn.Combiner/TaskStream', - fedn_dot_network_dot_grpc_dot_fedn__pb2.ClientAvailableMessage.SerializeToString, - fedn_dot_network_dot_grpc_dot_fedn__pb2.TaskRequest.FromString, + network_dot_grpc_dot_fedn__pb2.ClientAvailableMessage.SerializeToString, + network_dot_grpc_dot_fedn__pb2.TaskRequest.FromString, options, channel_credentials, insecure, call_credentials, compression, wait_for_ready, timeout, metadata) @@ -722,8 +722,8 @@ def SendModelUpdate(request, timeout=None, metadata=None): return grpc.experimental.unary_unary(request, target, '/fedn.Combiner/SendModelUpdate', - fedn_dot_network_dot_grpc_dot_fedn__pb2.ModelUpdate.SerializeToString, - fedn_dot_network_dot_grpc_dot_fedn__pb2.Response.FromString, + network_dot_grpc_dot_fedn__pb2.ModelUpdate.SerializeToString, + network_dot_grpc_dot_fedn__pb2.Response.FromString, options, channel_credentials, insecure, call_credentials, compression, wait_for_ready, timeout, metadata) @@ -739,8 +739,8 @@ def SendModelValidation(request, timeout=None, metadata=None): return grpc.experimental.unary_unary(request, target, '/fedn.Combiner/SendModelValidation', - fedn_dot_network_dot_grpc_dot_fedn__pb2.ModelValidation.SerializeToString, - fedn_dot_network_dot_grpc_dot_fedn__pb2.Response.FromString, + network_dot_grpc_dot_fedn__pb2.ModelValidation.SerializeToString, + network_dot_grpc_dot_fedn__pb2.Response.FromString, options, channel_credentials, insecure, call_credentials, compression, wait_for_ready, timeout, metadata) diff --git a/fedn/network/grpc/server.py b/fedn/network/grpc/server.py index 4354a7aa5..a23691505 100644 --- a/fedn/network/grpc/server.py +++ b/fedn/network/grpc/server.py @@ -9,10 +9,9 @@ class Server: - """ Class for configuring and launching the gRPC server.""" + """Class for configuring and launching the gRPC server.""" def __init__(self, servicer, modelservicer, config): - set_log_level_from_string(config.get("verbosity", "INFO")) set_log_stream(config.get("logfile", None)) @@ -34,21 +33,26 @@ def __init__(self, servicer, modelservicer, config): health_pb2_grpc.add_HealthServicer_to_server(self.health_servicer, self.server) if config["secure"]: - logger.info(f'Creating secure gRPCS server using certificate: {config["certificate"]}') + logger.info("Creating secure gRPCS server using certificate") server_credentials = grpc.ssl_server_credentials( - ((config["key"], config["certificate"],),)) - self.server.add_secure_port( - "[::]:" + str(config["port"]), server_credentials) + ( + ( + config["key"], + config["certificate"], + ), + ) + ) + self.server.add_secure_port("[::]:" + str(config["port"]), server_credentials) else: logger.info("Creating gRPC server") self.server.add_insecure_port("[::]:" + str(config["port"])) def start(self): - """ Start the gRPC server.""" + """Start the gRPC server.""" logger.info("gRPC Server started") self.server.start() def stop(self): - """ Stop the gRPC server.""" + """Stop the gRPC server.""" logger.info("gRPC Server stopped") self.server.stop(0) diff --git a/fedn/network/loadbalancer/leastpacked.py b/fedn/network/loadbalancer/leastpacked.py index 786dd8de0..8e793e95a 100644 --- a/fedn/network/loadbalancer/leastpacked.py +++ b/fedn/network/loadbalancer/leastpacked.py @@ -16,18 +16,16 @@ def find_combiner(self): """Find the combiner with the least number of attached clients. """ - min_clients = None + min_clients = -1 selected_combiner = None - for combiner in self.network.get_combiners(): try: if combiner.allowing_clients(): # Using default default Channel = 1, MODEL_UPDATE_REQUESTS nr_active_clients = len(combiner.list_active_clients()) - if not min_clients or nr_active_clients < min_clients: + if min_clients == -1 or nr_active_clients < min_clients: min_clients = nr_active_clients selected_combiner = combiner except CombinerUnavailableError: pass - return selected_combiner diff --git a/fedn/network/storage/s3/repository.py b/fedn/network/storage/s3/repository.py index c1704e5ca..2a5ee3449 100644 --- a/fedn/network/storage/s3/repository.py +++ b/fedn/network/storage/s3/repository.py @@ -1,3 +1,4 @@ +import datetime import uuid from fedn.common.log_config import logger @@ -10,12 +11,17 @@ class Repository: def __init__(self, config): self.model_bucket = config["storage_bucket"] self.context_bucket = config["context_bucket"] + try: + self.inference_bucket = config["inference_bucket"] + except KeyError: + self.inference_bucket = "fedn-inference" # TODO: Make a plug-in solution self.client = MINIORepository(config) self.client.create_bucket(self.context_bucket) self.client.create_bucket(self.model_bucket) + self.client.create_bucket(self.inference_bucket) def get_model(self, model_id): """Retrieve a model with id model_id. @@ -104,3 +110,31 @@ def delete_compute_package(self, compute_package): except Exception: logger.error("Failed to delete compute_package from repository.") raise + + def presigned_put_url(self, bucket: str, object_name: str, expires: datetime.timedelta = datetime.timedelta(hours=1)): + """Generate a presigned URL for an upload object request. + + :param bucket: The bucket name + :type bucket: str + :param object_name: The object name + :type object_name: str + :param expires: The time the URL is valid + :type expires: datetime.timedelta + :return: The URL + :rtype: str + """ + return self.client.client.presigned_put_object(bucket, object_name, expires) + + def presigned_get_url(self, bucket: str, object_name: str, expires: datetime.timedelta = datetime.timedelta(hours=1)) -> str: + """Generate a presigned URL for a download object request. + + :param bucket: The bucket name + :type bucket: str + :param object_name: The object name + :type object_name: str + :param expires: The time the URL is valid + :type expires: datetime.timedelta + :return: The URL + :rtype: str + """ + return self.client.client.presigned_get_object(bucket, object_name, expires) diff --git a/fedn/network/storage/statestore/mongostatestore.py b/fedn/network/storage/statestore/mongostatestore.py index 724077984..7ef22a795 100644 --- a/fedn/network/storage/statestore/mongostatestore.py +++ b/fedn/network/storage/statestore/mongostatestore.py @@ -81,6 +81,7 @@ def connect(self): def init_index(self): self.package.create_index([("id", pymongo.DESCENDING)]) + self.clients.create_index([("client_id", pymongo.DESCENDING)]) def is_inited(self): """Check if the statestore is intialized. @@ -168,6 +169,17 @@ def get_session(self, session_id): """ return self.sessions.find_one({"session_id": session_id}) + def get_session_status(self, session_id): + """Get the session status. + + :param session_id: The session id. + :type session_id: str + :return: The session status. + :rtype: str + """ + session = self.sessions.find_one({"session_id": session_id}) + return session["status"] + def set_latest_model(self, model_id, session_id=None): """Set the latest model id. @@ -726,18 +738,24 @@ def set_client(self, client_data): :return: """ client_data["updated_at"] = str(datetime.now()) - self.clients.update_one({"name": client_data["name"]}, {"$set": client_data}, True) - - def get_client(self, name): - """Get client by name. - - :param name: name of client to get. - :type name: str + try: + self.clients.update_one({"client_id": client_data["client_id"]}, {"$set": client_data}, True) + except KeyError: + # If client_id is not present, use name as identifier, for backwards compatibility + id = str(uuid.uuid4()) + client_data["client_id"] = id + self.clients.update_one({"name": client_data["name"]}, {"$set": client_data}, True) + + def get_client(self, client_id): + """Get client by client_id. + + :param client_id: client_id of client to get. + :type client_id: str :return: The client. None if not found. :rtype: ObjectId """ try: - ret = self.clients.find({"key": name}) + ret = self.clients.find({"key": client_id}) if list(ret) == []: return None else: @@ -870,6 +888,17 @@ def set_session_config(self, id: str, config: RoundConfig) -> None: """ self.sessions.update_one({"session_id": str(id)}, {"$push": {"session_config": config}}, True) + # Added to accomodate new session config structure + def set_session_config_v2(self, id: str, config: RoundConfig) -> None: + """Set the session configuration. + + :param id: The session id + :type id: str + :param config: Session configuration + :type config: dict + """ + self.sessions.update_one({"session_id": str(id)}, {"$set": {"session_config": config}}, True) + def set_session_status(self, id, status): """Set session status. @@ -925,7 +954,7 @@ def update_client_status(self, clients, status): :return: None """ datetime_now = datetime.now() - filter_query = {"name": {"$in": clients}} + filter_query = {"client_id": {"$in": clients}} update_query = {"$set": {"last_seen": datetime_now, "status": status}} self.clients.update_many(filter_query, update_query) diff --git a/fedn/network/storage/statestore/stores/model_store.py b/fedn/network/storage/statestore/stores/model_store.py index 5fff639f3..3048f2a26 100644 --- a/fedn/network/storage/statestore/stores/model_store.py +++ b/fedn/network/storage/statestore/stores/model_store.py @@ -57,8 +57,24 @@ def get(self, id: str, use_typing: bool = False) -> Model: return Model.from_dict(document) if use_typing else from_document(document) - def update(self, id: str, item: Model) -> bool: - raise NotImplementedError("Update not implemented for ModelStore") + def _validate(self, item: Model) -> Tuple[bool, str]: + if "model" not in item or not item["model"]: + return False, "Model is required" + + return True, "" + + def _complement(self, item: Model) -> Model: + if "key" not in item or item["key"] is None: + item["key"] = "models" + + def update(self, id: str, item: Model) -> Tuple[bool, Any]: + valid, message = self._validate(item) + if not valid: + return False, message + + self._complement(item) + + return super().update(id, item) def add(self, item: Model)-> Tuple[bool, Any]: raise NotImplementedError("Add not implemented for ModelStore") @@ -193,3 +209,14 @@ def count(self, **kwargs) -> int: """ kwargs["key"] = "models" return super().count(**kwargs) + + def get_active(self) -> str: + """Get the active model + return: The active model id (str) + """ + active_model = self.database[self.collection].find_one({"key": "current_model"}) + + if active_model is None: + raise EntityNotFound("Active model not found") + + return active_model["model"] diff --git a/fedn/network/storage/statestore/stores/session_store.py b/fedn/network/storage/statestore/stores/session_store.py index b25a34319..f2675c912 100644 --- a/fedn/network/storage/statestore/stores/session_store.py +++ b/fedn/network/storage/statestore/stores/session_store.py @@ -38,7 +38,7 @@ def _validate_session_config(self, session_config: dict) -> Tuple[bool, str]: if "round_timeout" not in session_config: return False, "session_config.round_timeout is required" - if not isinstance(session_config["round_timeout"], int): + if not isinstance(session_config["round_timeout"], (int, float)): return False, "session_config.round_timeout must be an integer" if "buffer_size" not in session_config: @@ -82,10 +82,15 @@ def _validate_session_config(self, session_config: dict) -> Tuple[bool, str]: def _validate(self, item: Session) -> Tuple[bool, str]: if "session_config" not in item or item["session_config"] is None: return False, "session_config is required" - elif not isinstance(item["session_config"], dict): - return False, "session_config must be a dict" - session_config = item["session_config"] + session_config = None + + if isinstance(item["session_config"], dict): + session_config = item["session_config"] + elif isinstance(item["session_config"], list): + session_config = item["session_config"][0] + else: + return False, "session_config must be a dict" return self._validate_session_config(session_config) @@ -117,10 +122,14 @@ def get(self, id: str, use_typing: bool = False) -> Session: return Session.from_dict(document) if use_typing else from_document(document) - def update(self, id: str, item: Session) -> bool: - raise NotImplementedError("Update not implemented for SessionStore") + def update(self, id: str, item: Session) -> Tuple[bool, Any]: + valid, message = self._validate(item) + if not valid: + return False, message + + return super().update(id, item) - def add(self, item: Session)-> Tuple[bool, Any]: + def add(self, item: Session) -> Tuple[bool, Any]: """Add an entity param item: The entity to add type: Session diff --git a/fedn/network/storage/statestore/stores/store.py b/fedn/network/storage/statestore/stores/store.py index f76ad3e12..f1175c9f7 100644 --- a/fedn/network/storage/statestore/stores/store.py +++ b/fedn/network/storage/statestore/stores/store.py @@ -30,8 +30,16 @@ def get(self, id: str, use_typing: bool = False) -> T: return from_document(document) if not use_typing else document - def update(self, id: str, item: T) -> bool: - pass + def update(self, id: str, item: T) -> Tuple[bool, Any]: + try: + result = self.database[self.collection].update_one({"_id": ObjectId(id)}, {"$set": item}) + if result.modified_count == 1: + document = self.database[self.collection].find_one({"_id": ObjectId(id)}) + return True, from_document(document) + else: + return False, "Entity not found" + except Exception as e: + return False, str(e) def add(self, item: T) -> Tuple[bool, Any]: try: diff --git a/fedn/utils/dist.py b/fedn/utils/dist.py new file mode 100644 index 000000000..e5fa7192b --- /dev/null +++ b/fedn/utils/dist.py @@ -0,0 +1,17 @@ +import importlib.metadata + +import fedn + + +def get_version(pacakge): + # Dynamically get the version of the package + try: + version = importlib.metadata.version("fedn") + except importlib.metadata.PackageNotFoundError: + version = "unknown" + return version + + +def get_package_path(): + # Get the path of the package + return fedn.__path__[0] diff --git a/fedn/utils/plots.py b/fedn/utils/plots.py deleted file mode 100644 index d04fffc4e..000000000 --- a/fedn/utils/plots.py +++ /dev/null @@ -1,411 +0,0 @@ -import json -from datetime import datetime - -import numpy -import plotly -import plotly.graph_objs as go -from plotly.subplots import make_subplots - -from fedn.common.log_config import logger -from fedn.network.storage.statestore.mongostatestore import MongoStateStore - - -class Plot: - """ """ - - def __init__(self, statestore): - try: - statestore_config = statestore.get_config() - statestore = MongoStateStore(statestore_config["network_id"], statestore_config["mongo_config"]) - self.mdb = statestore.connect() - self.status = self.mdb["control.status"] - self.round_time = self.mdb["control.round_time"] - self.combiner_round_time = self.mdb["control.combiner_round_time"] - self.psutil_usage = self.mdb["control.psutil_monitoring"] - self.network_clients = self.mdb["network.clients"] - - except Exception as e: - logger.error("FAILED TO CONNECT TO MONGO, {}".format(e)) - self.collection = None - raise - - # plot metrics from DB - def _scalar_metrics(self, metrics): - """Extract all scalar valued metrics from a MODEL_VALIDATON.""" - data = json.loads(metrics["data"]) - data = json.loads(data["data"]) - - valid_metrics = [] - for metric, val in data.items(): - # If it can be converted to a float it is a valid, scalar metric - try: - val = float(val) - valid_metrics.append(metric) - except Exception: - pass - - return valid_metrics - - def create_table_plot(self): - """:return: - """ - metrics = self.status.find_one({"type": "MODEL_VALIDATION"}) - if metrics is None: - fig = go.Figure(data=[]) - fig.update_layout(title_text="No data currently available for table mean metrics") - table = json.dumps(fig, cls=plotly.utils.PlotlyJSONEncoder) - return False - - valid_metrics = self._scalar_metrics(metrics) - if valid_metrics == []: - fig = go.Figure(data=[]) - fig.update_layout(title_text="No scalar metrics found") - table = json.dumps(fig, cls=plotly.utils.PlotlyJSONEncoder) - return False - - all_vals = [] - models = [] - for metric in valid_metrics: - validations = {} - for post in self.status.find({"type": "MODEL_VALIDATION"}): - e = json.loads(post["data"]) - try: - validations[e["modelId"]].append(float(json.loads(e["data"])[metric])) - except KeyError: - validations[e["modelId"]] = [float(json.loads(e["data"])[metric])] - - vals = [] - models = [] - for model, data in validations.items(): - vals.append(numpy.mean(data)) - models.append(model) - all_vals.append(vals) - - header_vals = valid_metrics - models.reverse() - values = [models] - - for vals in all_vals: - vals.reverse() - values.append(vals) - - fig = go.Figure( - data=[ - go.Table( - header=dict(values=["Model ID"] + header_vals, line_color="darkslategray", fill_color="lightskyblue", align="left"), - cells=dict( - values=values, # 2nd column - line_color="darkslategray", - fill_color="lightcyan", - align="left", - ), - ) - ] - ) - - fig.update_layout(title_text="Summary: mean metrics") - table = json.dumps(fig, cls=plotly.utils.PlotlyJSONEncoder) - return table - - def create_timeline_plot(self): - """:return: - """ - trace_data = [] - x = [] - y = [] - base = [] - for p in self.status.find({"type": "MODEL_UPDATE_REQUEST"}): - e = json.loads(p["data"]) - cid = e["correlationId"] - for cc in self.status.find({"sender": p["sender"], "type": "MODEL_UPDATE"}): - da = json.loads(cc["data"]) - if da["correlationId"] == cid: - cp = cc - - cd = json.loads(cp["data"]) - tr = datetime.strptime(e["timestamp"], "%Y-%m-%d %H:%M:%S.%f") - tu = datetime.strptime(cd["timestamp"], "%Y-%m-%d %H:%M:%S.%f") - ts = tu - tr - base.append(tr.timestamp()) - x.append(ts.total_seconds() / 60.0) - y.append(p["sender"]["name"]) - - trace_data.append( - go.Bar( - x=y, - y=x, - marker=dict(color="royalblue"), - name="Training", - ) - ) - - x = [] - y = [] - base = [] - for p in self.status.find({"type": "MODEL_VALIDATION_REQUEST"}): - e = json.loads(p["data"]) - cid = e["correlationId"] - for cc in self.status.find({"sender": p["sender"], "type": "MODEL_VALIDATION"}): - da = json.loads(cc["data"]) - if da["correlationId"] == cid: - cp = cc - cd = json.loads(cp["data"]) - tr = datetime.strptime(e["timestamp"], "%Y-%m-%d %H:%M:%S.%f") - tu = datetime.strptime(cd["timestamp"], "%Y-%m-%d %H:%M:%S.%f") - ts = tu - tr - base.append(tr.timestamp()) - x.append(ts.total_seconds() / 60.0) - y.append(p["sender"]["name"]) - - trace_data.append( - go.Bar( - x=y, - y=x, - marker=dict(color="lightskyblue"), - name="Validation", - ) - ) - - layout = go.Layout( - barmode="stack", - showlegend=True, - ) - - fig = go.Figure(data=trace_data, layout=layout) - fig.update_xaxes(title_text="Alliance/client") - fig.update_yaxes(title_text="Time (Min)") - fig.update_layout(title_text="Alliance timeline") - timeline = json.dumps(fig, cls=plotly.utils.PlotlyJSONEncoder) - return timeline - - def create_client_training_distribution(self): - """:return: - """ - training = [] - for p in self.status.find({"type": "MODEL_UPDATE"}): - e = json.loads(p["data"]) - meta = json.loads(e["meta"]) - training.append(meta["exec_training"]) - - if not training: - return False - fig = go.Figure(data=go.Histogram(x=training)) - fig.update_layout(title_text="Client model training time, mean: {}".format(numpy.mean(training))) - histogram = json.dumps(fig, cls=plotly.utils.PlotlyJSONEncoder) - return histogram - - def create_client_histogram_plot(self): - """:return: - """ - training = [] - for p in self.status.find({"type": "MODEL_UPDATE"}): - e = json.loads(p["data"]) - meta = json.loads(e["meta"]) - training.append(meta["exec_training"]) - - fig = go.Figure() - - fig.update_layout( - template="simple_white", - xaxis=dict(title_text="Time (s)"), - yaxis=dict(title_text="Number of updates"), - title="Mean client training time: {}".format(numpy.mean(training)), - # showlegend=True - ) - if not training: - return False - - fig.add_trace(go.Histogram(x=training)) - - histogram_plot = json.dumps(fig, cls=plotly.utils.PlotlyJSONEncoder) - return histogram_plot - - def create_client_plot(self): - """:return: - """ - processing = [] - upload = [] - download = [] - training = [] - for p in self.status.find({"type": "MODEL_UPDATE"}): - e = json.loads(p["data"]) - meta = json.loads(e["meta"]) - upload.append(meta["upload_model"]) - download.append(meta["fetch_model"]) - training.append(meta["exec_training"]) - processing.append(meta["processing_time"]) - - fig = go.Figure() - fig.update_layout(template="simple_white", title="Mean client processing time: {}".format(numpy.mean(processing)), showlegend=True) - if not processing: - return False - data = [numpy.mean(training), numpy.mean(upload), numpy.mean(download)] - labels = ["Training execution", "Model upload (to combiner)", "Model download (from combiner)"] - fig.add_trace(go.Pie(labels=labels, values=data)) - - client_plot = json.dumps(fig, cls=plotly.utils.PlotlyJSONEncoder) - return client_plot - - def create_combiner_plot(self): - """:return: - """ - waiting = [] - aggregation = [] - model_load = [] - combination = [] - for round in self.mdb["control.round"].find(): - try: - for combiner in round["combiners"]: - data = combiner - stats = data["local_round"]["1"] - ml = stats["aggregation_time"]["time_model_load"] - ag = stats["aggregation_time"]["time_model_aggregation"] - combination.append(stats["time_combination"]) - waiting.append(stats["time_combination"] - ml - ag) - model_load.append(ml) - aggregation.append(ag) - except Exception: - pass - - labels = ["Waiting for client updates", "Aggregation", "Loading model updates from disk"] - val = [numpy.mean(waiting), numpy.mean(aggregation), numpy.mean(model_load)] - fig = go.Figure() - - fig.update_layout(template="simple_white", title="Mean combiner round time: {}".format(numpy.mean(combination)), showlegend=True) - if not combination: - return False - fig.add_trace(go.Pie(labels=labels, values=val)) - combiner_plot = json.dumps(fig, cls=plotly.utils.PlotlyJSONEncoder) - return combiner_plot - - def fetch_valid_metrics(self): - """:return: - """ - metrics = self.status.find_one({"type": "MODEL_VALIDATION"}) - valid_metrics = self._scalar_metrics(metrics) - return valid_metrics - - def create_box_plot(self, metric): - """:param metric: - :return: - """ - metrics = self.status.find_one({"type": "MODEL_VALIDATION"}) - if metrics is None: - fig = go.Figure(data=[]) - fig.update_layout(title_text="No data currently available for metric distribution over " "participants") - box = json.dumps(fig, cls=plotly.utils.PlotlyJSONEncoder) - return box - - valid_metrics = self._scalar_metrics(metrics) - if valid_metrics == []: - fig = go.Figure(data=[]) - fig.update_layout(title_text="No scalar metrics found") - box = json.dumps(fig, cls=plotly.utils.PlotlyJSONEncoder) - return box - - validations = {} - for post in self.status.find({"type": "MODEL_VALIDATION"}): - e = json.loads(post["data"]) - try: - validations[e["modelId"]].append(float(json.loads(e["data"])[metric])) - except KeyError: - validations[e["modelId"]] = [float(json.loads(e["data"])[metric])] - - # Make sure validations are plotted in chronological order - model_trail = self.mdb.control.model.find_one({"key": "model_trail"}) - model_trail_ids = model_trail["model"] - validations_sorted = [] - for model_id in model_trail_ids: - try: - validations_sorted.append(validations[model_id]) - except Exception: - pass - - validations = validations_sorted - - box = go.Figure() - - y = [] - for j, acc in enumerate(validations): - # x.append(j) - y.append(numpy.mean([float(i) for i in acc])) - if len(acc) >= 2: - box.add_trace(go.Box(y=acc, name=str(j), marker_color="royalblue", showlegend=False, boxpoints=False)) - else: - box.add_trace(go.Scatter(x=[str(j)], y=[y[j]], showlegend=False)) - - rounds = list(range(len(y))) - box.add_trace(go.Scatter(x=rounds, y=y, name="Mean")) - - box.update_xaxes(title_text="Rounds") - box.update_yaxes(tickvals=[0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]) - box.update_layout(title_text="Metric distribution over clients: {}".format(metric), margin=dict(l=20, r=20, t=45, b=20)) - box = json.dumps(box, cls=plotly.utils.PlotlyJSONEncoder) - return box - - def create_round_plot(self): - """:return: - """ - trace_data = [] - metrics = self.round_time.find_one({"key": "round_time"}) - if metrics is None: - fig = go.Figure(data=[]) - fig.update_layout(title_text="No data currently available for round time") - return False - - for post in self.round_time.find({"key": "round_time"}): - rounds = post["round"] - traces_data = post["round_time"] - - trace_data.append(go.Scatter(x=rounds, y=traces_data, mode="lines+markers", name="Reducer")) - - for rec in self.combiner_round_time.find({"key": "combiner_round_time"}): - c_traces_data = rec["round_time"] - - trace_data.append(go.Scatter(x=rounds, y=c_traces_data, mode="lines+markers", name="Combiner")) - - fig = go.Figure(data=trace_data) - fig.update_xaxes(title_text="Round") - fig.update_yaxes(title_text="Time (s)") - fig.update_layout(title_text="Round time") - round_t = json.dumps(fig, cls=plotly.utils.PlotlyJSONEncoder) - return round_t - - def create_cpu_plot(self): - """:return: - """ - metrics = self.psutil_usage.find_one({"key": "cpu_mem_usage"}) - if metrics is None: - fig = go.Figure(data=[]) - fig.update_layout(title_text="No data currently available for MEM and CPU usage") - cpu = json.dumps(fig, cls=plotly.utils.PlotlyJSONEncoder) - return False - - for post in self.psutil_usage.find({"key": "cpu_mem_usage"}): - cpu = post["cpu"] - mem = post["mem"] - ps_time = post["time"] - round = post["round"] - - # Create figure with secondary y-axis - fig = make_subplots(specs=[[{"secondary_y": True}]]) - fig.add_trace(go.Scatter(x=ps_time, y=cpu, mode="lines+markers", name="CPU (%)")) - - fig.add_trace(go.Scatter(x=ps_time, y=mem, mode="lines+markers", name="MEM (%)")) - - fig.add_trace( - go.Scatter( - x=ps_time, - y=round, - mode="lines+markers", - name="Round", - ), - secondary_y=True, - ) - - fig.update_xaxes(title_text="Date Time") - fig.update_yaxes(title_text="Percentage (%)") - fig.update_yaxes(title_text="Round", secondary_y=True) - fig.update_layout(title_text="CPU loads and memory usage") - cpu = json.dumps(fig, cls=plotly.utils.PlotlyJSONEncoder) - return cpu diff --git a/pyproject.toml b/pyproject.toml index 59970c8ff..3806fee56 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -6,7 +6,7 @@ build-backend = "setuptools.build_meta" [project] name = "fedn" -version = "0.9.6" +version = "0.11.1" description = "Scaleout Federated Learning" authors = [{ name = "Scaleout Systems AB", email = "contact@scaleoutsystems.com" }] readme = "README.rst" @@ -46,7 +46,7 @@ dependencies = [ "pyyaml", "plotly", "virtualenv", - "restrictedpython==7.1" + "tenacity!=8.4.0", ] [project.urls]