diff --git a/.github/workflows/python-linters.yml b/.github/workflows/python-linters.yml new file mode 100644 index 0000000..2ff3d9b --- /dev/null +++ b/.github/workflows/python-linters.yml @@ -0,0 +1,28 @@ +name: lip-dp linters + +on: + push: + branches: + - main + - release-no-advertising + pull_request: + branches: + - main + - release-no-advertising + +jobs: + checks: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v3 + - name: Set up Python 3.11 + uses: actions/setup-python@v4 + with: + python-version: 3.11 + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install tox + - name: Check lint + run: tox -e py311-lint \ No newline at end of file diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 6b5cb7e..53e32e4 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -2,9 +2,9 @@ name: tests on: push: - branches: ["release-no-advertising"] + branches: ["main", "release-no-advertising"] pull_request: - branches: ["release-no-advertising"] + branches: ["main", "release-no-advertising"] jobs: build-and-test: diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index b279b16..27251e3 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -45,7 +45,7 @@ repos: rev: v3.0.0a5 hooks: - id: pylint - args: [--enable=unused-import --max-line-length=100, --disable=all] + args: [--disable=all] # - repo: https://github.com/commitizen-tools/commitizen diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 825a653..ebca628 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -4,14 +4,14 @@ Thanks for taking the time to contribute! From opening a bug report to creating a pull request: every contribution is appreciated and welcome. If you're planning to implement a new feature or change -the api please create an [issue first](https://https://github.com/deel-ai/dp-lipschitz/issues/new). This way we can ensure that your precious +the api please create an [issue first](https://github.com/Algue-Rythme/lip-dp/issues). This way we can ensure that your precious work is not in vain. ## Setup with make -- Clone the repo `git clone https://github.com/deel-ai/lipdp.git`. -- Go to your freshly downloaded repo `cd lipdp` +- Clone the repo `git clone git@github.com:Algue-Rythme/lip-dp.git`. +- Go to your freshly downloaded repo `cd lip-dp` - Create a virtual environment and install the necessary dependencies for development: `make prepare-dev && source lipdp_dev_env/bin/activate`. @@ -26,9 +26,8 @@ This command activate your virtual environment and launch the `tox` command. `tox` on the otherhand will do the following: -- run pytest on the tests folder with python 3.6, python 3.7 and python 3.8 -> Note: If you do not have those 3 interpreters the tests would be only performs with your current interpreter -- run pylint on the deel-datasets main files, also with python 3.6, python 3.7 and python 3.8 +- run pytest on the tests folder +- run pylint on the deel-datasets main files > Note: It is possible that pylint throw false-positive errors. If the linting test failed please check first pylint output to point out the reasons. Please, make sure you run all the tests at least once before opening a pull request. @@ -42,7 +41,7 @@ Basically, it will check that your code follow a certain number of convention. A After getting some feedback, push to your fork and submit a pull request. We may suggest some changes or improvements or alternatives, but for small changes -your pull request should be accepted quickly (see [Governance policy](https://github.com/deel-ai/lipdp/blob/master/GOVERNANCE.md)). +your pull request should be accepted quickly (see [Governance policy](https://github.com/Algue-Rythme/lip-dp/blob/release-no-advertising/GOVERNANCE.md)). Something that will increase the chance that your pull request is accepted: @@ -51,4 +50,3 @@ Something that will increase the chance that your pull request is accepted: - Follow the existing coding style and run `make check_all` to check all files format. - Write a [good commit message](https://tbaggery.com/2008/04/19/a-note-about-git-commit-messages.html) (we follow a lowercase convention). - For a major fix/feature make sure your PR has an issue and if it doesn't, please create one. This would help discussion with the community, and polishing ideas in case of a new feature. - diff --git a/README.md b/README.md index 90ad831..ea0322c 100644 --- a/README.md +++ b/README.md @@ -1,22 +1,26 @@

-lipdp_logo

+lipdp_logo

- + Tests + + Linter +
-
+

LipDP is a Python toolkit dedicated to robust and certifiable learning under privacy guarantees. +

This package is the code for the paper "*DP-SGD Without Clipping: The Lipschitz Neural Network Way*" by Louis Béthune, Thomas Massena, Thibaut Boissin, Aurélien Bellet, Franck Mamalet, Yannick Prudent, Corentin Friedrich, Mathieu Serrurier, David Vigouroux, published at the **International Conference on Learning Representations (ICLR 2024)**. The paper is available on [arxiv](https://arxiv.org/abs/2305.16202). diff --git a/deel/lipdp/dynamic.py b/deel/lipdp/dynamic.py index cf786eb..8091b09 100644 --- a/deel/lipdp/dynamic.py +++ b/deel/lipdp/dynamic.py @@ -20,6 +20,7 @@ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. +"""Dynamic gradient clipping for differential privacy.""" import random from abc import abstractmethod @@ -66,9 +67,11 @@ def on_train_begin(self, logs=None): def get_gradloss(self): """Computes the norm of gradient of the loss with respect to the model's output. - - Warning: this method is unsafe from a privacy perspective, as the true gradient bound is computed. - It is meant to be used with privacy-preserving methods only, such as the ones implemented in this module. + + Warning: this method is unsafe from a privacy perspective, + as the true gradient bound is computed. + It is meant to be used with privacy-preserving methods only, + such as the ones implemented in this module. """ batch = next(iter(self.ds_train.take(1))) imgs, labels = batch diff --git a/deel/lipdp/model.py b/deel/lipdp/model.py index 550c63f..6e4745c 100644 --- a/deel/lipdp/model.py +++ b/deel/lipdp/model.py @@ -20,6 +20,7 @@ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. +"""Model class for differentially private training with Lipschitz constraints.""" from dataclasses import dataclass import numpy as np diff --git a/deel/lipdp/pipeline.py b/deel/lipdp/pipeline.py index b1a44bf..0cf60e4 100644 --- a/deel/lipdp/pipeline.py +++ b/deel/lipdp/pipeline.py @@ -354,9 +354,11 @@ def load_and_prepare_images_data( nb_samples_train=ds_info.splits["train"].num_examples, nb_samples_test=ds_info.splits["test"].num_examples, class_names=ds_info.features["label"].names, - nb_steps_per_epochs=ds_train.cardinality().numpy() - if ds_train.cardinality() > 0 # handle case cardinality return -1 (unknown) - else ds_info.splits["train"].num_examples / batch_size, + nb_steps_per_epochs=( + ds_train.cardinality().numpy() + if ds_train.cardinality() > 0 # handle case cardinality return -1 (unknown) + else ds_info.splits["train"].num_examples / batch_size + ), batch_size=batch_size, max_norm=bound_val, ) @@ -493,9 +495,11 @@ def prepare_tabular_data( nb_samples_train=x_train.shape[0], nb_samples_test=x_test.shape[0], class_names=[str(i) for i in range(nb_classes)], - nb_steps_per_epochs=ds_train.cardinality().numpy() - if ds_train.cardinality() > 0 # handle case cardinality return -1 (unknown) - else x_train.shape[0] / batch_size, + nb_steps_per_epochs=( + ds_train.cardinality().numpy() + if ds_train.cardinality() > 0 # handle case cardinality return -1 (unknown) + else x_train.shape[0] / batch_size + ), batch_size=batch_size, max_norm=bound_val, ) diff --git a/deel/lipdp/sensitivity.py b/deel/lipdp/sensitivity.py index fc79385..2132a9f 100644 --- a/deel/lipdp/sensitivity.py +++ b/deel/lipdp/sensitivity.py @@ -91,10 +91,14 @@ def fun(epoch): elif error < atol: # This branch should never be taken if fun is a non-decreasing function of the number of epochs. # fun is mathematcally non-decreasing, but numerical inaccuracy can lead to this case. - print(f"Numerical inaccuracy with error {error:.7f} in the dichotomy search: using a conservative value.") + print( + f"Numerical inaccuracy with error {error:.7f} in the dichotomy search: using a conservative value." + ) return epochs_min - 1 else: - assert False, f"Numerical inaccuracy with error {error:.7f}>{atol:.3f} in the dichotomy search." + assert ( + False, + ), f"Numerical inaccuracy with error {error:.7f}>{atol:.3f} in the dichotomy search." return epochs_max @@ -106,7 +110,7 @@ def gradient_norm_check(upper_bounds, model, examples): Args : upper_bounds: maximum gradient bounds for each layer (dictionnary of 'layers name ': 'bounds' pairs). model: The model containing the layers we are interested in. Layers must only have one trainable variable. - examples: a batch of examples to test on. + examples: a batch of examples to test on. Returns : Boolean value. True corresponds to upper bound has been validated. """ @@ -117,19 +121,30 @@ def gradient_norm_check(upper_bounds, model, examples): assert len(layer.trainable_variables) < 2 if len(layer.trainable_variables) == 1: assert len(layer.trainable_variables) == 1 - train_var = layer.trainable_variables[0] var_name = layer.trainable_variables[0].name var_seen.add(var_name) bound = upper_bounds[var_name] - check_layer_gradient_norm(bound, layer, activations) + bound_check = check_layer_gradient_norm(bound, layer, activations) + assert ( + bound_check + ), f"Gradient norm check failed for layer {layer.name} with bound {bound}." activations = post_activations for var_name in upper_bounds: assert var_name in var_seen def check_layer_gradient_norm(S, layer, activations): + """Check that the maximum gradient norm of a layer is less than S. + + Args: + S: The maximum gradient norm. + layer: The layer to check. + activations: The input to the layer. + Returns: + Boolean value. True corresponds to upper bound has been validated. + """ trainable_vars = layer.trainable_variables[0] - with tf.GradientTape() as tape: + with tf.GradientTape() as tape: y_pred = layer(activations, training=True) flat_pred = tf.reshape(y_pred, (y_pred.shape[0], -1)) jacobians = tape.jacobian(flat_pred, trainable_vars) @@ -141,8 +156,8 @@ def check_layer_gradient_norm(S, layer, activations): (y_pred.shape[0], -1, np.prod(trainable_vars.shape)), name="Reshaped_Gradient", ) - J_sigma = tf.linalg.svd(jacobians, full_matrices=False, compute_uv=False, name=None) - J_2norm = tf.reduce_max(J_sigma, axis=-1) - J_2norm = tf.reduce_max(J_2norm).numpy() + sigma = tf.linalg.svd(jacobians, full_matrices=False, compute_uv=False, name=None) + norm2 = tf.reduce_max(sigma, axis=-1) + norm2 = tf.reduce_max(norm2).numpy() atol = 1e-5 - return J_2norm < S+atol + return norm2 < (S + atol) diff --git a/docs/CONTRIBUTING.md b/docs/CONTRIBUTING.md index 456c9d3..ebca628 100644 --- a/docs/CONTRIBUTING.md +++ b/docs/CONTRIBUTING.md @@ -4,14 +4,14 @@ Thanks for taking the time to contribute! From opening a bug report to creating a pull request: every contribution is appreciated and welcome. If you're planning to implement a new feature or change -the api please create an [issue first](https://https://github.com/deel-ai/dp-lipschitz/issues/new). This way we can ensure that your precious +the api please create an [issue first](https://github.com/Algue-Rythme/lip-dp/issues). This way we can ensure that your precious work is not in vain. ## Setup with make -- Clone the repo `git clone https://github.com/deel-ai/dp-lipschitz.git`. -- Go to your freshly downloaded repo `cd lipdp` +- Clone the repo `git clone git@github.com:Algue-Rythme/lip-dp.git`. +- Go to your freshly downloaded repo `cd lip-dp` - Create a virtual environment and install the necessary dependencies for development: `make prepare-dev && source lipdp_dev_env/bin/activate`. @@ -26,9 +26,8 @@ This command activate your virtual environment and launch the `tox` command. `tox` on the otherhand will do the following: -- run pytest on the tests folder with python 3.6, python 3.7 and python 3.8 -> Note: If you do not have those 3 interpreters the tests would be only performs with your current interpreter -- run pylint on the deel-datasets main files, also with python 3.6, python 3.7 and python 3.8 +- run pytest on the tests folder +- run pylint on the deel-datasets main files > Note: It is possible that pylint throw false-positive errors. If the linting test failed please check first pylint output to point out the reasons. Please, make sure you run all the tests at least once before opening a pull request. @@ -42,7 +41,7 @@ Basically, it will check that your code follow a certain number of convention. A After getting some feedback, push to your fork and submit a pull request. We may suggest some changes or improvements or alternatives, but for small changes -your pull request should be accepted quickly (see [Governance policy](https://github.com/deel-ai/lipdp/blob/master/GOVERNANCE.md)). +your pull request should be accepted quickly (see [Governance policy](https://github.com/Algue-Rythme/lip-dp/blob/release-no-advertising/GOVERNANCE.md)). Something that will increase the chance that your pull request is accepted: diff --git a/docs/assets/residuals.png b/docs/assets/residuals.png new file mode 100644 index 0000000..4840e69 Binary files /dev/null and b/docs/assets/residuals.png differ diff --git a/docs/index.md b/docs/index.md index a34c4b0..0a51c1c 100644 --- a/docs/index.md +++ b/docs/index.md @@ -1,32 +1,40 @@ -# Index - -Mainly you could copy the README.md here. However, you should be careful with: - -- The banner section is different -- Link to assets (handling dark mode is different between GitHub and the documentation) -- Relative links - +

- + + + + Tests + + + Linter
-
+

-

- Libname is a Python toolkit dedicated to making people happy and fun. +LipDP is a Python toolkit dedicated to robust and certifiable learning under privacy guarantees. + + - -
- Explore Libname docs » -
+This package is the code for the paper "*DP-SGD Without Clipping: The Lipschitz Neural Network Way*" by Louis Béthune, Thomas Massena, Thibaut Boissin, Aurélien Bellet, Franck Mamalet, Yannick Prudent, Corentin Friedrich, Mathieu Serrurier, David Vigouroux, published at the **International Conference on Learning Representations (ICLR 2024)**. The paper is available on [arxiv](https://arxiv.org/abs/2305.16202). + + +State-of-the-art approaches for training Differentially Private (DP) Deep Neural Networks (DNN) face difficulties to estimate tight bounds on the sensitivity of the network's layers, and instead rely on a process of per-sample gradient clipping. This clipping process not only biases the direction of gradients but also proves costly both in memory consumption and in computation. To provide sensitivity bounds and bypass the drawbacks of the clipping process, we propose to rely on Lipschitz constrained networks. Our theoretical analysis reveals an unexplored link between the Lipschitz constant with respect to their input and the one with respect to their parameters. By bounding the Lipschitz constant of each layer with respect to its parameters, we prove that we can train these networks with privacy guarantees. Our analysis not only allows the computation of the aforementioned sensitivities at scale, but also provides guidance on how to maximize the gradient-to-noise ratio for fixed privacy guarantees. To facilitate the application of Lipschitz networks and foster robust and certifiable learning under privacy guarantees, we provide this Python package that implements building blocks allowing the construction and private training of such networks. -

+
+ backpropforbounds +
+ +The sensitivity is computed automatically by the package, and no element-wise clipping is required. This is translated into a new DP-SGD algorithm, called Clipless DP-SGD, that is faster and more memory efficient than DP-SGD with clipping. + +
+ speedcurves +
## 📚 Table of contents @@ -34,7 +42,6 @@ Mainly you could copy the README.md here. However, you should be careful with: - [🔥 Tutorials](#-tutorials) - [🚀 Quick Start](#-quick-start) - [📦 What's Included](#-whats-included) -- [👍 Contributing](#-contributing) - [👀 See Also](#-see-also) - [🙏 Acknowledgments](#-acknowledgments) - [👨‍🎓 Creator](#-creator) @@ -45,90 +52,135 @@ Mainly you could copy the README.md here. However, you should be careful with: We propose some tutorials to get familiar with the library and its API: -- [Getting started](https://colab.research.google.com/drive/1XproaVxXjO9nrBSyyy7BuKJ1vy21iHs2) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/deel-ai//blob/master/docs/notebooks/demo_fake.ipynb) - -You do not necessarily need to register the notebooks on GitHub. Notebooks can be hosted on a specific [drive](https://drive.google.com/drive/folders/1DOI1CsL-m9jGjkWM1hyDZ1vKmSU1t-be). +- **Demo on MNIST** [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1s3LBIxf0x1sOMQUw6BHpxbeUzmwtaP0d) +- **Demo on CIFAR10** [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1RbALHN-Eib6CCUznLrbiETX7JJrFaUB0) ## 🚀 Quick Start -Libname requires some stuff and several libraries including Numpy. Installation can be done using Pypi: - +lipDP requires some stuff and several libraries including Numpy. Installation can be done locally by cloning the repository and running: ```python -pip install libname +pip install -e .[dev] ``` -Now that Libname is installed, here are some basic examples of what you can do with the available modules. +### Setup privacy parameters -### Print Hello World - -Let's start with a simple example: +Parameters are stored in a dataclass: ```python -from libname.fake import hello_world - -hello_world() +from deel.lipdp.model import DPParameters +dp_parameters = DPParameters( + noisify_strategy="local", + noise_multiplier=4.0, + delta=1e-5, +) + +epsilon_max = 10.0 ``` -### Make addition - -In order to add `a` to `b` you can use: +### Setup DP model ```python -from libname.fake import addition - -a = 1 -b = 2 -c = addition(a, b) +# construct DP_Sequential +model = DP_Sequential( + # works like usual sequential but requires DP layers + layers=[ + # BoundedInput works like Input, but performs input clipping to guarantee input bound + layers.DP_BoundedInput( + input_shape=dataset_metadata.input_shape, upper_bound=input_upper_bound + ), + layers.DP_QuickSpectralConv2D( # Reshaped Kernel Orthogonalization (RKO) convolution. + filters=32, + kernel_size=3, + kernel_initializer="orthogonal", + strides=1, + use_bias=False, # No biases since the framework handles a single tf.Variable per layer. + ), + layers.DP_GroupSort(2), # GNP activation function. + layers.DP_ScaledL2NormPooling2D(pool_size=2, strides=2), # GNP pooling. + layers.DP_QuickSpectralConv2D( # Reshaped Kernel Orthogonalization (RKO) convolution. + filters=64, + kernel_size=3, + kernel_initializer="orthogonal", + strides=1, + use_bias=False, # No biases since the framework handles a single tf.Variable per layer. + ), + layers.DP_GroupSort(2), # GNP activation function. + layers.DP_ScaledL2NormPooling2D(pool_size=2, strides=2), # GNP pooling. + + layers.DP_Flatten(), # Convert features maps to flat vector. + + layers.DP_QuickSpectralDense(512), # GNP layer with orthogonal weight matrix. + layers.DP_GroupSort(2), + layers.DP_QuickSpectralDense(dataset_metadata.nb_classes), + ], + dp_parameters=dp_parameters, + dataset_metadata=dataset_metadata, +) ``` -## 📦 What's Included - -A list or table of methods available +### Setup accountant -## 👍 Contributing +The privacy accountant is composed of different mechanisms from `autodp` package that are combined to provide a privacy accountant for Clipless DP-SGD algorithm: -Feel free to propose your ideas or come and contribute with us on the Libname toolbox! We have a specific document where we describe in a simple way how to make your first pull request: [just here](CONTRIBUTING.md). +
+ rdpaccountant +
-## 👀 See Also +Adding a privacy accountant to your model is straighforward: -This library is one approach of many... +```python +from deel.lipdp.model import DP_Accountant + +callbacks = [ + DP_Accountant() +] + +model.fit( + ds_train, + epochs=num_epochs, + validation_data=ds_test, + callbacks=[ + # accounting is done thanks to a callback + DP_Accountant(log_fn="logging"), # wandb.log also available. + ], +) +``` -Other tools to explain your model include: +## 📦 What's Included -- [Random](https://www.youtube.com/watch?v=dQw4w9WgXcQ) +Code can be found in the `deel/lipdp` folder, the documentation ca be found by running + `mkdocs build` and `mkdocs serve` (or loading `site/index.html`). Experiments were + done using the code in the `experiments` folder. -More from the DEEL project: +Other tools to perform DP-training include: -- [Xplique](https://github.com/deel-ai/xplique) a Python library exclusively dedicated to explaining neural networks. -- [deel-lip](https://github.com/deel-ai/deel-lip) a Python library for training k-Lipschitz neural networks on TF. -- [Influenciae](https://github.com/deel-ai/influenciae) Python toolkit dedicated to computing influence values for the discovery of potentially problematic samples in a dataset. -- [deel-torchlip](https://github.com/deel-ai/deel-torchlip) a Python library for training k-Lipschitz neural networks on PyTorch. -- [DEEL White paper](https://arxiv.org/abs/2103.10529) a summary of the DEEL team on the challenges of certifiable AI and the role of data quality, representativity and explainability for this purpose. +- [tensorflow-privacy](https://github.com/tensorflow/privacy) in Tensorflow +- [Opacus](https://opacus.ai/) in Pytorch +- [jax-privacy](https://github.com/google-deepmind/jax_privacy) in Jax ## 🙏 Acknowledgments -DEEL Logo -DEEL Logo -This project received funding from the French ”Investing for the Future – PIA3” program within the Artificial and Natural Intelligence Toulouse Institute (ANITI). The authors gratefully acknowledge the support of the DEEL project. +The creators thank the whole [DEEL](https://deel-ai.com/) team for its support, and [Aurélien Bellet](http://researchers.lille.inria.fr/abellet/) for his guidance. ## 👨‍🎓 Creators -If you want to highlight the main contributors - +The library has been created by [Louis Béthune](https://github.com/Algue-Rythme), [Thomas Masséna](https://github.com/massena-t) during an internsip at [DEEL](https://deel-ai.com/), and [Thibaut Boissin](https://github.com/thib-s). ## 🗞️ Citation -If you use Libname as part of your workflow in a scientific publication, please consider citing 🗞️ [our paper](https://www.youtube.com/watch?v=dQw4w9WgXcQ): +If you find this work useful for your research, please consider citing it: ``` -@article{rickroll, - title={Rickrolling}, - author={Some Internet Trolls}, - journal={Best Memes}, - year={ND} +@inproceedings{ +bethune2024dpsgd, +title={{DP}-{SGD} Without Clipping: The Lipschitz Neural Network Way}, +author={Louis B{\'e}thune and Thomas Massena and Thibaut Boissin and Aur{\'e}lien Bellet and Franck Mamalet and Yannick Prudent and Corentin Friedrich and Mathieu Serrurier and David Vigouroux}, +booktitle={The Twelfth International Conference on Learning Representations}, +year={2024}, +url={https://openreview.net/forum?id=BEyEziZ4R6} } ``` ## 📝 License -The package is released under MIT license. \ No newline at end of file +The package is released under [MIT license](../LICENSE). diff --git a/docs/notebooks/advanced_cifar10.ipynb b/docs/notebooks/advanced_cifar10.ipynb index f37b750..dca2a33 100644 --- a/docs/notebooks/advanced_cifar10.ipynb +++ b/docs/notebooks/advanced_cifar10.ipynb @@ -132,7 +132,7 @@ "source": [ "With many parameters, it can be interesting to use `local` strategy over `global`, since the effective noise growths as $\\mathcal{O}(\\sqrt{(D)})$ in `global` strategy. Since the privacy leakge is more important is `local` strategy, we compensate with high `noise_multiplier`.\n", "\n", - "![DP-SGD accountant](fig_accountant.png \"DP-SGD accountant\")" + "![DP-SGD accountant](../assets/fig_accountant.png \"DP-SGD accountant\")" ] }, { @@ -220,7 +220,7 @@ "source": [ "The MLP Mixer uses residual connections. Residuals connections are handled with the utility function `make_residuals` that wraps the layers inside a block that handles bounds propagation.\n", "\n", - "![Residuals Connections](residuals.png \"Residual Connections\")" + "![Residuals Connections](../assets/residuals.png \"Residual Connections\")" ] }, { diff --git a/mkdocs.yml b/mkdocs.yml index f4d8766..bca7e67 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -1,4 +1,4 @@ -site_name: lipdp +site_name: lip-dp # Set navigation here nav: @@ -10,14 +10,14 @@ nav: - deel.lipdp.pipeline module: api/pipeline.md - deel.lipdp.sensitivity module: api/sensitivity.md - Tutorials: - - "Demo 1: basic use on MNIST": notebooks/basic_mnist.ipynb - - "Demo 2: advanced use on CIFAR10": notebooks/advanced_cifar10.ipynb + - "Basic use on MNIST": notebooks/basic_mnist.ipynb + - "Residuals and dynamic clipping on CIFAR10": notebooks/advanced_cifar10.ipynb - Contributing: CONTRIBUTING.md theme: name: "material" - logo: assets/logo.png - favicon: assets/logo.png + logo: assets/lipdp_logo.png + favicon: assets/lipdp_logo.png palette: - scheme: default primary: dark @@ -66,5 +66,5 @@ extra_javascript: - https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js - js/custom.js -repo_name: "deel-ai/" -repo_url: "https://github.com/deel-ai/" +repo_name: "Algue-Rythme/lip-dp" +repo_url: "https://github.com/Algue-Rythme/lip-dp" diff --git a/setup.cfg b/setup.cfg index 84a9214..74a40e0 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,5 +1,6 @@ [flake8] # Use black line length: +exclude = tests/* max-line-length = 88 extend-ignore = # See https://github.com/PyCQA/pycodestyle/issues/373 @@ -27,9 +28,9 @@ ignore_missing_imports = True ignore_missing_imports = True [tox:tox] -envlist = py36,py37,py38,py36-lint +envlist = py310-lint -[testenv:py36-lint] +[testenv:py310-lint] deps = black flake8 diff --git a/setup.py b/setup.py index 05f1a58..8c7a8b7 100644 --- a/setup.py +++ b/setup.py @@ -27,27 +27,38 @@ from setuptools import find_packages from setuptools import setup -this_directory = os.path.dirname(__file__) -req_path = os.path.join(this_directory, "requirements.txt") -req_dev_path = os.path.join(this_directory, "requirements_dev.txt") +REQ_PATH = "requirements.txt" +REQ_DEV_PATH = "requirements_dev.txt" install_requires = [] -if os.path.exists(req_path): - with open(req_path) as fp: +if os.path.exists(REQ_PATH): + print("Loading requirements") + with open(REQ_PATH, encoding="utf-8") as fp: install_requires = [line.strip() for line in fp] -if os.path.exists(req_dev_path): - with open(req_dev_path) as fp: - install_dev_requires = [line.strip() for line in fp] +dev_requires = [ + "setuptools", + "pre-commit", + "pytest", + "tox", + "black", + "pytest", + "pylint", + "mkdocs", + "mkdocs-material", + "mkdocstrings[python]", + "mknotebooks", + "bump2version", +] -readme_path = os.path.join(this_directory, "README.md") +README_PATH = "README.md" readme_contents = "" -if os.path.exists(readme_path): - with open(readme_path, encoding="utf8") as fp: +if os.path.exists(README_PATH): + with open(README_PATH, encoding="utf8") as fp: readme_contents = fp.read().strip() -with open(os.path.join(this_directory, "deel/lipdp/VERSION"), encoding="utf8") as f: +with open("deel/lipdp/VERSION", encoding="utf8") as f: version = f.read().strip() setup( @@ -57,7 +68,7 @@ version=version, # Find the package automatically (include everything): packages=find_namespace_packages(include=["deel.*"]), - package_data={'': ['VERSION']}, + package_data={"": ["VERSION"]}, include_package_data=True, # Author information: # Author information: @@ -71,12 +82,13 @@ classifiers=[ "Programming Language :: Python", "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.6", - "Programming Language :: Python :: 3.7", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", ], licence="MIT", install_requires=install_requires, extras_require={ - "dev": install_dev_requires, + "dev": dev_requires, }, ) diff --git a/tests/model_test.py b/tests/model_test.py index 7ba87a4..c9260a1 100644 --- a/tests/model_test.py +++ b/tests/model_test.py @@ -26,13 +26,14 @@ from deel.lipdp.dynamic import AdaptiveQuantileClipping from deel.lipdp.layers import * -from deel.lipdp.model import DP_Sequential, DPParameters -from deel.lipdp.pipeline import bound_normalize, load_and_prepare_images_data from deel.lipdp.losses import DP_TauCategoricalCrossentropy +from deel.lipdp.model import DP_Sequential +from deel.lipdp.model import DPParameters +from deel.lipdp.pipeline import bound_normalize +from deel.lipdp.pipeline import load_and_prepare_images_data class ModelTest(parameterized.TestCase): - def _get_mnist_cnn(self): ds_train, _, dataset_metadata = load_and_prepare_images_data( "mnist", @@ -62,13 +63,13 @@ def _get_mnist_cnn(self): DP_SpectralDense(10, use_bias=False, kernel_initializer="orthogonal"), DP_AddBias(norm_max=norm_max), DP_ClipGradient( - clip_value=2. ** 0.5, + clip_value=2.0**0.5, mode="dynamic", ), ] dp_parameters = DPParameters( - noisify_strategy='per-layer', + noisify_strategy="per-layer", noise_multiplier=2.2, delta=1e-5, ) @@ -81,7 +82,7 @@ def _get_mnist_cnn(self): optimizer = tf.keras.optimizers.Adam(learning_rate=1e-3) loss = DP_TauCategoricalCrossentropy( - tau=1., reduction=tf.keras.losses.Reduction.SUM_OVER_BATCH_SIZE + tau=1.0, reduction=tf.keras.losses.Reduction.SUM_OVER_BATCH_SIZE ) model.compile(optimizer=optimizer, loss=loss, metrics=["accuracy"]) @@ -97,9 +98,7 @@ def test_create_residuals(self): input_shape = (32, 32, 3) patch_size = 4 - seq_len = (input_shape[0] // patch_size) * ( - input_shape[1] // patch_size - ) + seq_len = (input_shape[0] // patch_size) * (input_shape[1] // patch_size) multiplier = 1 mlp_seq_dim = multiplier * seq_len @@ -139,7 +138,9 @@ def test_adaptive_clipping(self): ) adaptive.set_model(model) callbacks = [adaptive] - model.fit(ds_train, epochs=2, callbacks=callbacks, steps_per_epoch=num_steps_test_case) + model.fit( + ds_train, epochs=2, callbacks=callbacks, steps_per_epoch=num_steps_test_case + ) if __name__ == "__main__": diff --git a/tests/sensitivity_test.py b/tests/sensitivity_test.py index 5441596..e573fce 100644 --- a/tests/sensitivity_test.py +++ b/tests/sensitivity_test.py @@ -25,7 +25,12 @@ from deel.lipdp.dynamic import AdaptiveQuantileClipping from deel.lipdp.layers import * -from deel.lipdp.model import DP_Sequential, DPParameters, get_eps_delta, compute_gradient_bounds +from deel.lipdp.model import ( + DP_Sequential, + DPParameters, + get_eps_delta, + compute_gradient_bounds +) from deel.lipdp.pipeline import bound_normalize, load_and_prepare_images_data from deel.lipdp.losses import DP_TauCategoricalCrossentropy from deel.lipdp.sensitivity import get_max_epochs, gradient_norm_check @@ -82,8 +87,8 @@ def _get_small_mnist_cnn(self, dp_parameters, batch_size): return model, ds_train @parameterized.parameters( - ('per-layer', 0.8, 1e-5, 22.0, True), - ('global', 1.2, 1e-6, 30.0, False), + ("per-layer", 0.8, 1e-5, 22.0, True), + ("global", 1.2, 1e-6, 30.0, False), ) def test_get_max_epochs(self, noisify_strategy, noise_multiplier, delta, epsilon_max, safe): dp_parameters = DPParameters( @@ -109,7 +114,7 @@ def test_get_max_epochs(self, noisify_strategy, noise_multiplier, delta, epsilon def test_gradient_bounds(self): dp_parameters = DPParameters( - noisify_strategy='per-layer', + noisify_strategy="per-layer", noise_multiplier=2.2, delta=1e-5, )