From 916e42a34e9eead1bfa8269eb4a4d021c29cd449 Mon Sep 17 00:00:00 2001 From: Bart van Beusekom Date: Fri, 26 Jul 2024 10:41:18 +0200 Subject: [PATCH] Initial commit of personalized algorithm template --- .copier-answers.yml | 35 +++++++++ .github/workflows/release.yaml | 28 +++++++ .gitignore | 106 ++++++++++++++++++++++++++ Dockerfile | 19 +++++ LICENSE | 21 +++++ README.md | 85 +++++++++++++++++++++ algorithm_store.json | 43 +++++++++++ docs/Makefile | 23 ++++++ docs/README.md | 19 +++++ docs/conf.py | 56 ++++++++++++++ docs/index.rst | 36 +++++++++ docs/make.bat | 35 +++++++++ docs/requirements.txt | 6 ++ docs/v6-summary-py/implementation.rst | 23 ++++++ docs/v6-summary-py/privacy.rst | 46 +++++++++++ docs/v6-summary-py/references.rst | 5 ++ docs/v6-summary-py/usage.rst | 58 ++++++++++++++ docs/v6-summary-py/validation.rst | 5 ++ requirements.txt | 2 + setup.py | 28 +++++++ test/test.py | 76 ++++++++++++++++++ test/test_data.csv | 19 +++++ v6-summary-py/__init__.py | 2 + v6-summary-py/central.py | 62 +++++++++++++++ v6-summary-py/partial.py | 31 ++++++++ 25 files changed, 869 insertions(+) create mode 100644 .copier-answers.yml create mode 100644 .github/workflows/release.yaml create mode 100644 .gitignore create mode 100644 Dockerfile create mode 100644 LICENSE create mode 100644 README.md create mode 100644 algorithm_store.json create mode 100644 docs/Makefile create mode 100644 docs/README.md create mode 100644 docs/conf.py create mode 100644 docs/index.rst create mode 100644 docs/make.bat create mode 100644 docs/requirements.txt create mode 100644 docs/v6-summary-py/implementation.rst create mode 100644 docs/v6-summary-py/privacy.rst create mode 100644 docs/v6-summary-py/references.rst create mode 100644 docs/v6-summary-py/usage.rst create mode 100644 docs/v6-summary-py/validation.rst create mode 100644 requirements.txt create mode 100644 setup.py create mode 100644 test/test.py create mode 100644 test/test_data.csv create mode 100644 v6-summary-py/__init__.py create mode 100644 v6-summary-py/central.py create mode 100644 v6-summary-py/partial.py diff --git a/.copier-answers.yml b/.copier-answers.yml new file mode 100644 index 0000000..2309c0a --- /dev/null +++ b/.copier-answers.yml @@ -0,0 +1,35 @@ +# DO NOT MODIFY THIS FILE MANUALLY. +# This file contains the answers to the questions asked by the +# v6-algorithm-template-creator (using the Copier package). The answers will +# be updated if you re-run the template creator. Also, the answers in this file +# may be used to submit your algorithm to a vantage6 algorithm store. +_commit: 4.3.4 +_src_path: gh:vantage6/v6-algorithm-template.git +advanced: true +algorithm_description: Create a summary of the data (mean, range, variance, length, + ...) +algorithm_name: v6-summary-py +author: B. van Beusekom, H. Alradhi, F.C. Martin +central_args: +- columns +- organizations_to_include +central_function_client: true +central_function_data: false +central_function_name: summary +copyright: IKNL, 2024 +docker_image: harbor2.vantage6.ai/algorithms/v6-summary-py +docker_registry: harbor2.vantage6.ai +has_central_function: true +has_docs: true +has_gh_pipeline: true +has_partial_function: true +open_source_license: mit +partial_args: +- columns +partial_function_client: false +partial_function_data: true +partial_function_name: summary_per_data_station +partial_function_number_databases: 1 +private_registry: true +project_name: v6-summary-py +use_vpn: false diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml new file mode 100644 index 0000000..e52ac47 --- /dev/null +++ b/.github/workflows/release.yaml @@ -0,0 +1,28 @@ +name: Create Release + +on: + push: + branches: + - main + +jobs: + create-docker-image: + runs-on: ubuntu-latest + + steps: + - name: Checkout Repository + uses: actions/checkout@v4 + + - name: Login to Docker registry + uses: docker/login-action@v3 + with: + registry: harbor2.vantage6.ai + username: ${{ secrets.DOCKER_USER }} + password: ${{ secrets.DOCKER_PASSWORD }} + + - name: Run Makefile Rule + run: docker build -t harbor2.vantage6.ai/algorithms/v6-summary-py . + + - name: Push Docker image + run: | + docker push harbor2.vantage6.ai/algorithms/v6-summary-py \ No newline at end of file diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..346fbc2 --- /dev/null +++ b/.gitignore @@ -0,0 +1,106 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +.hypothesis/ +.pytest_cache/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# pyenv +.python-version + +# celery beat schedule file +celerybeat-schedule + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ + +.vscode/ diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..55bd5a3 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,19 @@ +# basic python3 image as base +FROM harbor2.vantage6.ai/infrastructure/algorithm-base + +# This is a placeholder that should be overloaded by invoking +# docker build with '--build-arg PKG_NAME=...' +ARG PKG_NAME="v6-summary-py" + +# install federated algorithm +COPY . /app +RUN pip install /app + + +# Set environment variable to make name of the package available within the +# docker image. +ENV PKG_NAME=${PKG_NAME} + +# Tell docker to execute `wrap_algorithm()` when the image is run. This function +# will ensure that the algorithm method is called properly. +CMD python -c "from vantage6.algorithm.tools.wrap import wrap_algorithm; wrap_algorithm()" diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..8d45e06 --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..0d01f33 --- /dev/null +++ b/README.md @@ -0,0 +1,85 @@ + +# v6-summary-py + +Create a summary of the data (mean, range, variance, length, ...) + +This algorithm is designed to be run with the [vantage6](https://vantage6.ai) +infrastructure for distributed analysis and learning. + +The base code for this algorithm has been created via the +[v6-algorithm-template](https://github.com/vantage6/v6-algorithm-template) +template generator. + +### Checklist + +Note that the template generator does not create a completely ready-to-use +algorithm yet. There are still a number of things you have to do yourself. +Please ensure to execute the following steps. The steps are also indicated with +TODO statements in the generated code - so you can also simply search the +code for TODO instead of following the checklist below. + +- [ ] Include a URL to your code repository in setup.py. +- [ ] Implement your algorithm functions. + - [ ] You are free to add more arguments to the functions. Be sure to add them + *after* the `client` and dataframe arguments. + - [ ] When adding new arguments, if you run the `test/test.py` script, be sure + to include values for these arguments in the `client.task.create()` calls + that are available there. +- [ ] If you are using Python packages that are not in the standard library, add + them to the `requirements.txt` and `setup.py` file. +- [ ] Fill in the documentation template. This will help others to understand your + algorithm, be able to use it safely, and to contribute to it. +- [ ] If you want to submit your algorithm to a vantage6 algorithm store, be sure + to fill in everything in ``algorithm_store.json`` (and be sure to update + it if you change function names, arguments, etc.). +- [ ] Create a ``DOCKER_USER`` and ``DOCKER_PASSWORD`` secret in the GitHub repository + settings. This will be used to push the Docker image to the registry in the github + pipeline. +- [ ] Finally, remove this checklist section to keep the README clean. + +### Dockerizing your algorithm + +To finally run your algorithm on the vantage6 infrastructure, you need to +create a Docker image of your algorithm. + +#### Automatically + +The easiest way to create a Docker image is to use the GitHub Actions pipeline to +automatically build and push the Docker image. All that you need to do is push a +commit to the ``main`` branch. + +#### Manually + +A Docker image can be created by executing the following command in the root of your +algorithm directory: + +```bash +docker build -t [my_docker_image_name] . +``` + +where you should provide a sensible value for the Docker image name. The +`docker build` command will create a Docker image that contains your algorithm. +You can create an additional tag for it by running + +```bash +docker tag [my_docker_image_name] [another_image_name] +``` + +This way, you can e.g. do +`docker tag local_average_algorithm harbor2.vantage6.ai/algorithms/average` to +make the algorithm available on a remote Docker registry (in this case +`harbor2.vantage6.ai`). + +Finally, you need to push the image to the Docker registry. This can be done +by running + +```bash +docker push [my_docker_image_name] +``` + +Note that you need to be logged in to the Docker registry before you can push +the image. You can do this by running `docker login` and providing your +credentials. Check [this page](https://docs.docker.com/get-started/04_sharing_app/) +For more details on sharing images on Docker Hub. If you are using a different +Docker registry, check the documentation of that registry and be sure that you +have sufficient permissions. \ No newline at end of file diff --git a/algorithm_store.json b/algorithm_store.json new file mode 100644 index 0000000..3ba4c52 --- /dev/null +++ b/algorithm_store.json @@ -0,0 +1,43 @@ +{ + "name": "v6-summary-py", + "image": "harbor2.vantage6.ai/algorithms/v6-summary-py", + "vantage6_version": "4.3", + "partitioning": "horizontal", + "functions": [ + { + "name": "summary", + "description": "Create a summary of the data (mean, range, variance, length, ...)", + "type": "central", + "databases": [], + "arguments": [ + { + "name": "columns", + "type": "", + "description": "" + }, + { + "name": "organizations_to_include", + "type": "", + "description": "" + } + ] + }, + { + "name": "summary_per_data_station", + "description": "", + "type": "federated", + "databases": [ + { + "name": "Database 1" + } + ], + "arguments": [ + { + "name": "columns", + "type": "", + "description": "" + } + ] + } + ] +} \ No newline at end of file diff --git a/docs/Makefile b/docs/Makefile new file mode 100644 index 0000000..3473b9c --- /dev/null +++ b/docs/Makefile @@ -0,0 +1,23 @@ +# Minimal makefile for Sphinx documentation +# + +# You can set these variables from the command line, and also +# from the environment for the first two. +SPHINXOPTS ?= +SPHINXBUILD ?= sphinx-build +SOURCEDIR = . +BUILDDIR = _build + +# Put it first so that "make" without argument is like "make help". +help: + @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + +livehtml: + sphinx-autobuild "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + +.PHONY: help Makefile + +# Catch-all target: route all unknown targets to Sphinx using the new +# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). +%: Makefile + @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) diff --git a/docs/README.md b/docs/README.md new file mode 100644 index 0000000..8dac731 --- /dev/null +++ b/docs/README.md @@ -0,0 +1,19 @@ +# Documentation of v6-summary-py + +### How to run this documentation locally + +```bash + +# Install the required packages +pip install -r requirements.txt + +# Build the documentation +make html + +# Run a local service to view the documentation that updates when you make changes +make livehtml +``` + +### How to publish the documentation + +View the instructions from [Sphinx](https://sphinx-rtd-tutorial.readthedocs.io/en/latest/read-the-docs.html) \ No newline at end of file diff --git a/docs/conf.py b/docs/conf.py new file mode 100644 index 0000000..f169ee3 --- /dev/null +++ b/docs/conf.py @@ -0,0 +1,56 @@ +# Configuration file for the Sphinx documentation builder. +# +# For the full list of built-in configuration values, see the documentation: +# https://www.sphinx-doc.org/en/master/usage/configuration.html + +# If extensions (or modules to document with autodoc) are in another directory, +# add these directories to sys.path here. If the directory is relative to the +# documentation root, use os.path.abspath to make it absolute, like shown here. +# + +# import os +# import sys +# sys.path.insert(0, os.path.abspath('../package')) + +# -- Project information ----------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information + +project = "v6-summary-py" +copyright = "" +author = "B. van Beusekom, H. Alradhi, F.C. Martin" + +# -- General configuration --------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration + +extensions = [ + "sphinx.ext.autodoc", + "sphinx.ext.napoleon", + "sphinx_autodoc_typehints", + "sphinx.ext.autosectionlabel", + "sphinx.ext.intersphinx", + "sphinx_click.ext", +] + + +napoleon_use_ivar = True + +templates_path = ["_templates"] + +# List of patterns, relative to source directory, that match files and +# directories to ignore when looking for source files. +# This pattern also affects html_static_path and html_extra_path. +exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"] + +# -- Options for HTML output ------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output + +html_theme = "furo" +html_static_path = ["_static"] + +master_doc = "index" + +add_module_names = False + +pygments_style = None + +numfig = False diff --git a/docs/index.rst b/docs/index.rst new file mode 100644 index 0000000..ecd1ed3 --- /dev/null +++ b/docs/index.rst @@ -0,0 +1,36 @@ +Overview +======== + +Description +----------- + +.. Give short description of the algorithm on this main page of the docs + +Authors +------- + +.. List authors. + +Source code +----------- + +.. Describe where to find source code and docker files + + +Contents +-------- + +.. toctree:: + :maxdepth: 2 + :hidden: + + self + +.. toctree:: + :maxdepth: 2 + + v6-summary-py/implementation + v6-summary-py/usage + v6-summary-py/privacy + v6-summary-py/validation + v6-summary-py/references diff --git a/docs/make.bat b/docs/make.bat new file mode 100644 index 0000000..dc1312a --- /dev/null +++ b/docs/make.bat @@ -0,0 +1,35 @@ +@ECHO OFF + +pushd %~dp0 + +REM Command file for Sphinx documentation + +if "%SPHINXBUILD%" == "" ( + set SPHINXBUILD=sphinx-build +) +set SOURCEDIR=source +set BUILDDIR=build + +%SPHINXBUILD% >NUL 2>NUL +if errorlevel 9009 ( + echo. + echo.The 'sphinx-build' command was not found. Make sure you have Sphinx + echo.installed, then set the SPHINXBUILD environment variable to point + echo.to the full path of the 'sphinx-build' executable. Alternatively you + echo.may add the Sphinx directory to PATH. + echo. + echo.If you don't have Sphinx installed, grab it from + echo.https://www.sphinx-doc.org/ + exit /b 1 +) + +if "%1" == "" goto help + +%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% +goto end + +:help +%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% + +:end +popd diff --git a/docs/requirements.txt b/docs/requirements.txt new file mode 100644 index 0000000..99d09ba --- /dev/null +++ b/docs/requirements.txt @@ -0,0 +1,6 @@ +# -r ../package/requirements.txt +sphinx==5.3.0 +sphinx-autobuild +sphinx-autodoc-typehints +sphinx-click==4.4.0 +furo==2022.12.7 \ No newline at end of file diff --git a/docs/v6-summary-py/implementation.rst b/docs/v6-summary-py/implementation.rst new file mode 100644 index 0000000..741aa25 --- /dev/null +++ b/docs/v6-summary-py/implementation.rst @@ -0,0 +1,23 @@ +Implementation +============== + +Overview +-------- + +Central (``summary``) +----------------- +The central part is responsible for the orchestration and aggregation of the algorithm. + +.. Describe the central function here. + +Partials +-------- +Partials are the computations that are executed on each node. The partials have access +to the data that is stored on the node. The partials are executed in parallel on each +node. + +``summary_per_data_station`` +~~~~~~~~~~~~~~~~ + +.. Describe the partial function. + diff --git a/docs/v6-summary-py/privacy.rst b/docs/v6-summary-py/privacy.rst new file mode 100644 index 0000000..cb3a2df --- /dev/null +++ b/docs/v6-summary-py/privacy.rst @@ -0,0 +1,46 @@ +Privacy +======= + +Guards +------ + +.. What have you done to protect your users' privacy? E.g. threshold on low counts, +.. noise addition, etc. + +Data sharing +------------ + +.. which data is shared between the parties? E.g. for an average, sum and total count +.. are shared. + +Vulnerabilities to known attacks +-------------------------------- + +.. Table below lists some well-known attacks. You could fill in this table to show +.. which attacks would be possible in your system. + +.. list-table:: + :widths: 25 10 65 + :header-rows: 1 + + * - Attack + - Risk eliminated? + - Risk analysis + * - Reconstruction + - ⚠ + - May happen if ... + * - Differencing + - ❌ + - Possible by doing A then B... + * - Deep Leakage from Gradients (DLG) + - ✔ + - + * - Generative Adversarial Networks (GAN) + - ✔ + - + * - Model Inversion + - ✔ + - + * - Watermark Attack + - ✔ + - \ No newline at end of file diff --git a/docs/v6-summary-py/references.rst b/docs/v6-summary-py/references.rst new file mode 100644 index 0000000..9c45aab --- /dev/null +++ b/docs/v6-summary-py/references.rst @@ -0,0 +1,5 @@ +References +========== + +.. If applicable, include references to papers, books, or other documents + for further reading. \ No newline at end of file diff --git a/docs/v6-summary-py/usage.rst b/docs/v6-summary-py/usage.rst new file mode 100644 index 0000000..b991d19 --- /dev/null +++ b/docs/v6-summary-py/usage.rst @@ -0,0 +1,58 @@ +How to use +========== + +Input arguments +--------------- + +.. describe the input arguments: +.. ['columns', 'organizations_to_include'] + +Python client example +--------------------- + +To understand the information below, you should be familiar with the vantage6 +framework. If you are not, please read the `documentation `_ +first, especially the part about the +`Python client `_. + +.. TODO Some explanation of the code below + +.. code-block:: python + + from vantage6.client import Client + + server = 'http://localhost' + port = 5000 + api_path = '/api' + private_key = None + username = 'root' + password = 'password' + + # Create connection with the vantage6 server + client = Client(server, port, api_path) + client.setup_encryption(private_key) + client.authenticate(username, password) + + input_ = { + 'master': True, + 'method': 'summary', + 'args': [], + 'kwargs': { + 'columns': 'my_value', + 'organizations_to_include': 'my_value', + }, + 'output_format': 'json' + } + + my_task = client.task.create( + collaboration=1, + organizations=[1], + name='v6-summary-py', + description='Create a summary of the data (mean, range, variance, length, ...)', + image='harbor2.vantage6.ai/algorithms/v6-summary-py', + input=input_, + data_format='json' + ) + + task_id = my_task.get('id') + results = client.wait_for_results(task_id) \ No newline at end of file diff --git a/docs/v6-summary-py/validation.rst b/docs/v6-summary-py/validation.rst new file mode 100644 index 0000000..40b8bb8 --- /dev/null +++ b/docs/v6-summary-py/validation.rst @@ -0,0 +1,5 @@ +Validation +========== + +.. Describe how the algorithm has been tested and how a user may test the algorithm +.. themselves (if applicable). \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..1f33787 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,2 @@ +vantage6-algorithm-tools +pandas diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..e2ba809 --- /dev/null +++ b/setup.py @@ -0,0 +1,28 @@ +from os import path +from codecs import open +from setuptools import setup, find_packages + +# we're using a README.md, if you do not have this in your folder, simply +# replace this with a string. +here = path.abspath(path.dirname(__file__)) +with open(path.join(here, 'README.md'), encoding='utf-8') as f: + long_description = f.read() + +# Here you specify the meta-data of your package. The `name` argument is +# needed in some other steps. +setup( + name='v6-summary-py', + version="1.0.0", + description='Create a summary of the data (mean, range, variance, length, ...)', + long_description=long_description, + long_description_content_type='text/markdown', + # TODO add a url to your github repository here (or remove this line if + # you do not want to make your source code public) + # url='https://github.com/....', + packages=find_packages(), + python_requires='>=3.10', + install_requires=[ + 'vantage6-algorithm-tools', + 'pandas' + ] +) diff --git a/test/test.py b/test/test.py new file mode 100644 index 0000000..04f5da2 --- /dev/null +++ b/test/test.py @@ -0,0 +1,76 @@ +""" +Run this script to test your algorithm locally (without building a Docker +image) using the mock client. + +Run as: + + python test.py + +Make sure to do so in an environment where `vantage6-algorithm-tools` is +installed. This can be done by running: + + pip install vantage6-algorithm-tools +""" +from vantage6.algorithm.tools.mock_client import MockAlgorithmClient +from pathlib import Path + +# get path of current directory +current_path = Path(__file__).parent + +## Mock client +client = MockAlgorithmClient( + datasets=[ + # Data for first organization + [{ + "database": current_path / "test_data.csv", + "db_type": "csv", + "input_data": {} + }], + # Data for second organization + [{ + "database": current_path / "test_data.csv", + "db_type": "csv", + "input_data": {} + }] + ], + module="v6-summary-py" +) + +# list mock organizations +organizations = client.organization.list() +print(organizations) +org_ids = [organization["id"] for organization in organizations] + +# Run the central method on 1 node and get the results +central_task = client.task.create( + input_={ + "method":"summary", + "kwargs": { + # TODO add sensible values + "columns": "some_value", + "organizations_to_include": "some_value", + + } + }, + organizations=[org_ids[0]], +) +results = client.wait_for_results(central_task.get("id")) +print(results) + +# Run the partial method for all organizations +task = client.task.create( + input_={ + "method":"summary_per_data_station", + "kwargs": { + # TODO add sensible values + "columns": "some_value", + + } + }, + organizations=org_ids +) +print(task) + +# Get the results from the task +results = client.wait_for_results(task.get("id")) +print(results) diff --git a/test/test_data.csv b/test/test_data.csv new file mode 100644 index 0000000..34cbe41 --- /dev/null +++ b/test/test_data.csv @@ -0,0 +1,19 @@ +"Name","Gender","Age","Height(in)","Weight(lbs)" +"Alex","M",41,74,170 +"Bert","M",42,68,166 +"Carl","M",32,70,155 +"Dave","M",39,72,167 +"Elly","F",30,66,124 +"Fran","F",33,66,115 +"Gwen","F",26,64,121 +"Hank","M",30,71,158 +"Ivan","M",53,72,175 +"Jake","M",32,69,143 +"Kate","F",47,69,139 +"Luke","M",34,72,163 +"Myra","F",23,62,98 +"Neil","M",36,75,160 +"Omar","M",38,70,145 +"Page","F",31,67,135 +"Quin","M",29,71,176 +"Ruth","F",28,65,131 \ No newline at end of file diff --git a/v6-summary-py/__init__.py b/v6-summary-py/__init__.py new file mode 100644 index 0000000..49648d3 --- /dev/null +++ b/v6-summary-py/__init__.py @@ -0,0 +1,2 @@ +from .central import * +from .partial import * diff --git a/v6-summary-py/central.py b/v6-summary-py/central.py new file mode 100644 index 0000000..2556adc --- /dev/null +++ b/v6-summary-py/central.py @@ -0,0 +1,62 @@ +""" +This file contains all central algorithm functions. It is important to note +that the central method is executed on a node, just like any other method. + +The results in a return statement are sent to the vantage6 server (after +encryption if that is enabled). +""" +from typing import Any + +from vantage6.algorithm.tools.util import info, warn, error +from vantage6.algorithm.tools.decorators import algorithm_client +from vantage6.algorithm.client import AlgorithmClient + + +@algorithm_client +def summary( + client: AlgorithmClient, columns, organizations_to_include +) -> Any: + + """ Central part of the algorithm """ + # TODO implement this function. Below is an example of a simple but typical + # central function. + + # get all organizations (ids) within the collaboration so you can send a + # task to them. + organizations = client.organization.list() + org_ids = [organization.get("id") for organization in organizations] + + # Define input parameters for a subtask + info("Defining input parameters") + input_ = { + "method": "summary_per_data_station", + "kwargs": { + # TODO add sensible values + "columns": "some_value", + + } + } + + # create a subtask for all organizations in the collaboration. + info("Creating subtask for all organizations in the collaboration") + task = client.task.create( + input_=input_, + organizations=org_ids, + name="My subtask", + description="This is a very important subtask" + ) + + + # wait for node to return results of the subtask. + info("Waiting for results") + results = client.wait_for_results(task_id=task.get("id")) + info("Results obtained!") + + # TODO probably you want to aggregate or combine these results here. + # For instance: + # results = [sum(result) for result in results] + + # return the final results of the algorithm + return results + +# TODO Feel free to add more central functions here. diff --git a/v6-summary-py/partial.py b/v6-summary-py/partial.py new file mode 100644 index 0000000..832b96b --- /dev/null +++ b/v6-summary-py/partial.py @@ -0,0 +1,31 @@ +""" +This file contains all partial algorithm functions, that are normally executed +on all nodes for which the algorithm is executed. + +The results in a return statement are sent to the vantage6 server (after +encryption if that is enabled). From there, they are sent to the partial task +or directly to the user (if they requested partial results). +""" +import pandas as pd +from typing import Any + +from vantage6.algorithm.tools.util import info, warn, error +from vantage6.algorithm.tools.decorators import data + + +@data(1) +def summary_per_data_station( + df1: pd.DataFrame, columns +) -> Any: + + """ Decentral part of the algorithm """ + # TODO this is a simple example to show you how to return something simple. + # Replace it by your own code + info("Computing mean age by gender") + result = df1[["Gender", "Age"]].groupby("Gender").mean() + + # Return results to the vantage6 server. + # TODO make sure no privacy sensitive data is shared + return result.to_dict() + +# TODO Feel free to add more partial functions here.