Merge pull request #92 from eth-cscs/release-0.6.2

Release 0.6.2
eth-cscs · Apr 9, 2021 · d0e6181 · d0e6181
2 parents e8dd2be + 09caaa5
commit d0e6181
Show file tree

Hide file tree

Showing 30 changed files with 11,377 additions and 500 deletions.
diff --git a/.travis.yml b/.travis.yml
@@ -11,6 +11,7 @@ addons:
     - gfortran
     - libboost-random-dev
     - python3.7-dev
+    - python3.9-dev
     - python3-numpy
     - swig
     - libmpich-dev
@@ -31,6 +32,10 @@ jobs:
           python: "3.8"
           env:
             - UNIT_TEST=true
+        - name: "Python 3.9"
+          python: "3.9"
+          env:
+            - UNIT_TEST=true
       # Test coverage and without Pytorch for a single version
         - name: "Coverage"
           python: "3.8"
@@ -61,6 +66,7 @@ jobs:
 
 install:
 - sudo apt-get install -y r-base  # install R for testing example
+- pip install cython
 - pip install rpy2  # install the rpy2 library for testing example with R
 - pip install -r requirements.txt
 - pip install -r requirements/backend-spark.txt

diff --git a/Makefile b/Makefile
@@ -4,10 +4,10 @@ MAKEDIRS=$(shell find examples -name Makefile -exec dirname {} \;)
 whl_file = abcpy-${VERSION}-py3-none-any.whl
 
 .DEFAULT: help
-.PHONY: help clean doc doctest exampletest package test uninstall unittest unittest_mpi install reinstall $(MAKEDIRS)
+.PHONY: help clean doc doctest exampletest exampletest_mpi package test uninstall unittest unittest_mpi install reinstall $(MAKEDIRS)
 
 help:
-	@echo Targets are: clean, doc, doctest, exampletest, package, uninstall, unittest, unittest_mpi	, test
+	@echo Targets are: clean, doc, doctest, exampletest, exampletest_mpi, package, uninstall, unittest, unittest_mpi, test
 
 clean:
 	find . -name "*.pyc" -type f -delete
@@ -26,6 +26,11 @@ test: unittest unittest_mpi exampletest exampletest_mpi doctest
 unittest:
 	@echo "Running standard unit tests.."
 	python3 -m unittest discover -s tests -v -p "*_tests.py" || (echo "Error in standard unit tests."; exit 1)
+	@# remove temporary files created during testing
+	@if test -f net.pth; then rm net.pth; fi
+	@if test -f scaler.pkl; then rm scaler.pkl; fi
+	@if test -f tmp.jnl; then rm tmp.jnl; fi
+	@if test -f journal_tests_testfile.pkl; then rm journal_tests_testfile.pkl; fi
 
 unittest_mpi:
 	@echo "Running MPI backend unit tests.."

diff --git a/README.md b/README.md
@@ -2,19 +2,50 @@
 
 ABCpy is a scientific library written in Python for Bayesian uncertainty quantification in
 absence of likelihood function, which parallelizes existing approximate Bayesian computation (ABC) 
-algorithms and other likelihood-free inference schemes. It presently includes:
-
-* RejectionABC
-* PMCABC (Population Monte Carlo ABC)
-* SMCABC (Sequential Monte Carlo ABC) 
-* RSMCABC (Replenishment SMC-ABC)
-* APMCABC (Adaptive Population Monte Carlo ABC)
-* SABC (Simulated Annealing ABC)
-* ABCsubsim (ABC using subset simulation)
-* PMC (Population Monte Carlo) using approximations of likelihood functions
-* Random Forest Model Selection Scheme
-* Semi-automatic summary selection (with Neural networks)
-* summary selection using distance learning (with Neural networks)
+algorithms and other likelihood-free inference schemes. 
+
+# Content
+
+ABCpy presently includes the following **ABC algorithms**:
+
+* [RejectionABC](https://www.genetics.org/content/145/2/505)
+* [PMCABC (Population Monte Carlo ABC)](https://www.annualreviews.org/doi/abs/10.1146/annurev-ecolsys-102209-144621)
+* [SMCABC (Sequential Monte Carlo ABC)](https://link.springer.com/article/10.1007/s11222-011-9271-y)
+* [RSMCABC (Replenishment SMC-ABC)](https://onlinelibrary.wiley.com/doi/abs/10.1111/j.1541-0420.2010.01410.x)
+* [APMCABC (Adaptive Population Monte Carlo ABC)](https://link.springer.com/article/10.1007/s00180-013-0428-3)
+* [SABC (Simulated Annealing ABC)](https://link.springer.com/article/10.1007/s11222-014-9507-8)
+* [ABCsubsim (ABC using subset simulation)](https://epubs.siam.org/doi/10.1137/130932831)
+
+The above can be used with the following **distances**: 
+
+* Euclidean Distance
+* [Logistic Regression and Penalised Logistic Regression (classification accuracy)](https://link.springer.com/article/10.1007/s11222-017-9738-6)
+* Divergences between datasets: 
+  * [Wasserstein Distance](https://rss.onlinelibrary.wiley.com/doi/abs/10.1111/rssb.12312)
+  * [Sliced Wasserstein Distance](https://ieeexplore.ieee.org/abstract/document/9054735)
+  * [Gamma Divergence](http://proceedings.mlr.press/v130/fujisawa21a/fujisawa21a.pdf)
+  * [Kullback Liebler Divergence](http://proceedings.mlr.press/v84/jiang18a/jiang18a.pdf)
+  * [Maximum Mean Discrepancy](http://proceedings.mlr.press/v51/park16.pdf)
+  * [Energy Distance](https://arxiv.org/abs/1905.05884)
+  * [Squared Hellinger Distance](https://arxiv.org/pdf/2006.14126.pdf)
+
+Moreover, we provide the following methods for directly **approximating the likelihood functions**:
+* [Bayesian Synthetic Likelihood](https://www.tandfonline.com/doi/abs/10.1080/10618600.2017.1302882?journalCode=ucgs20)
+* [Semiparametric Bayesian Synthetic Likelihood](https://link.springer.com/article/10.1007/s11222-019-09904-x)
+* [Penalised Logistic Regression for Ratio Estimation](https://projecteuclid.org/journals/bayesian-analysis/advance-publication/Likelihood-Free-Inference-by-Ratio-Estimation/10.1214/20-BA1238.full)
+
+The above likelihood approximation methods can be used with the following samplers: 
+
+* [PMC (Population Monte Carlo)](https://www.tandfonline.com/doi/abs/10.1198/106186004X12803)
+* Metropolis-Hastings MCMC (Markov Chain Monte Carlo)
+
+Additional **features** are:
+* plotting utilities for the obtained posterior
+* several methods for summary selection:
+  * [Semi-automatic summary selection (with Neural networks)](http://proceedings.mlr.press/v97/wiqvist19a/wiqvist19a.pdf)
+  * [summary selection using distance learning (with Neural networks)](https://link.springer.com/article/10.1007/s13571-019-00208-8)
+* [Random Forest Model Selection Scheme](https://academic.oup.com/bioinformatics/article/32/6/859/1744513)
+
 
 ABCpy addresses the needs of domain scientists and data
 scientists by providing
@@ -27,9 +58,9 @@ scientists by providing
 # Documentation
 For more information, check out the
 
-* [Documentation](http://abcpy.readthedocs.io/en/v0.6.1) 
-* [Examples](https://github.com/eth-cscs/abcpy/tree/v0.6.1/examples) directory and
-* [Reference](http://abcpy.readthedocs.io/en/v0.6.1/abcpy.html)
+* [Documentation](http://abcpy.readthedocs.io/en/v0.6.2) 
+* [Examples](https://github.com/eth-cscs/abcpy/tree/v0.6.2/examples) directory and
+* [Reference](http://abcpy.readthedocs.io/en/v0.6.2/abcpy.html)
 
 
 Further, we provide a
@@ -93,33 +124,25 @@ ABCpy for your publication, we would appreciate a citation. You can use
 
 Publications in which ABCpy was applied:
 
+* L. Pacchiardi, R. Dutta. "Generalized Bayesian Likelihood-Free Inference Using Scoring Rules Estimators", 2021, arXiv:2104.03889.
+
 * L. Pacchiardi, R. Dutta. "Score Matched Conditional Exponential Families for Likelihood-Free Inference", 2020, arXiv:2012.10903.
 
-* R. Dutta, K. Zouaoui-Boudjeltia, C. Kotsalos, A. Rousseau, D. Ribeiro de Sousa, J. M. Desmet, 
-A. Van Meerhaeghe, A. Mira, and B. Chopard. "Interpretable pathological test for Cardio-vascular 
-disease: Approximate Bayesian computation with distance learning.", 2020, arXiv:2010.06465.
+* R. Dutta, K. Zouaoui-Boudjeltia, C. Kotsalos, A. Rousseau, D. Ribeiro de Sousa, J. M. Desmet, A. Van Meerhaeghe, A. Mira, and B. Chopard. "Interpretable pathological test for Cardio-vascular disease: Approximate Bayesian computation with distance learning.", 2020, arXiv:2010.06465.
 
-* R. Dutta, S. Gomes, D. Kalise, L. Pacchiardi. "Using mobility data in the design of optimal 
-lockdown strategies for the COVID-19 pandemic in England.", 2020, arXiv:2006.16059.
+* R. Dutta, S. Gomes, D. Kalise, L. Pacchiardi. "Using mobility data in the design of optimal lockdown strategies for the COVID-19 pandemic in England.", 2020, arXiv:2006.16059.
 
-* L. Pacchiardi, P. K&#252;nzli, M. Sch&#246;ngens, B. Chopard, R. Dutta, "Distance-Learning for 
-Approximate Bayesian Computation to Model a Volcanic Eruption", 2020, Sankhya B, ISSN 0976-8394, 
-  [DOI: 10.1007/s13571-019-00208-8](https://doi.org/10.1007/s13571-019-00208-8).
+* L. Pacchiardi, P. K&#252;nzli, M. Sch&#246;ngens, B. Chopard, R. Dutta, "Distance-Learning for Approximate Bayesian Computation to Model a Volcanic Eruption", 2020, Sankhya B, 1-30.
 
-* R. Dutta, J. P.  Onnela, A. Mira, "Bayesian Inference of Spreading Processes
-  on Networks", 2018, Proc. R. Soc. A, 474(2215), 20180129.
+* R. Dutta, J. P.  Onnela, A. Mira, "Bayesian Inference of Spreading Processes on Networks", 2018, Proceedings of Royal Society A, 474(2215), 20180129.
 
-* R. Dutta, Z. Faidon Brotzakis and A. Mira, "Bayesian Calibration of
-  Force-fields from Experimental Data: TIP4P Water", 2018, Journal of Chemical Physics 149, 154110.
+* R. Dutta, Z. Faidon Brotzakis and A. Mira, "Bayesian Calibration of   Force-fields from Experimental Data: TIP4P Water", 2018, Journal of Chemical Physics 149, 154110.
 
-* R. Dutta, B. Chopard, J. Lätt, F. Dubois, K. Zouaoui Boudjeltia and A. Mira,
-  "Parameter Estimation of Platelets Deposition: Approximate Bayesian
-  Computation with High Performance Computing", 2018, Frontiers in physiology, 9.
+* R. Dutta, B. Chopard, J. Lätt, F. Dubois, K. Zouaoui Boudjeltia and A. Mira, "Parameter Estimation of Platelets Deposition: Approximate Bayesian Computation with High Performance Computing", 2018, Frontiers in physiology, 9.
 
-* A. Ebert, R. Dutta, P. Wu, K. Mengersen and A. Mira, "Likelihood-Free
-  Parameter Estimation for Dynamic Queueing Networks", 2018, arXiv:1804.02526.
+* A. Ebert, R. Dutta, P. Wu, K. Mengersen and A. Mira, "Likelihood-Free Parameter Estimation for Dynamic Queueing Networks", 2018, arXiv:1804.02526, To apear in Jouranl of Royal Statistical Scoiety: Series C.
 
-* R. Dutta, M. Schoengens, L. Pacchiardi, A. Ummadisingu, N. Widerman, J. P.  Onnela, A. Mira, "ABCpy:       A High-Performance Computing Perspective to Approximate Bayesian Computation", 2020, arXiv:1711.04694.
+* R. Dutta, M. Schoengens, L. Pacchiardi, A. Ummadisingu, N. Widerman, P. K&#252;nzli, J. P.  Onnela, A. Mira, "ABCpy: A High-Performance Computing Perspective to Approximate Bayesian Computation", 2018, arXiv:1711.04694, To appear in Journal of Statistical Software.
 
 ## License
 ABCpy is published under the BSD 3-clause license, see [here](LICENSE).

diff --git a/VERSION b/VERSION
@@ -1 +1 @@
-0.6.1
+0.6.2
diff --git a/abcpy/NN_utilities/algorithms.py b/abcpy/NN_utilities/algorithms.py
@@ -19,7 +19,7 @@ def contrastive_training(samples, similarity_set, embedding_net, cuda, batch_siz
                          samples_val=None, similarity_set_val=None, early_stopping=False,
                          epochs_early_stopping_interval=1,
                          start_epoch_early_stopping=10, positive_weight=None, load_all_data_GPU=False, margin=1.,
-                         lr=None, optimizer=None, scheduler=None, start_epoch_training=0,
+                         lr=None, optimizer=None, scheduler=None, start_epoch_training=0, use_tqdm=True,
                          optimizer_kwargs={}, scheduler_kwargs={}, loader_kwargs={}):
     """ Implements the algorithm for the contrastive distance learning training of a neural network; need to be
      provided with a set of samples and the corresponding similarity matrix"""
@@ -84,7 +84,8 @@ def contrastive_training(samples, similarity_set, embedding_net, cuda, batch_siz
     fit(pairs_train_loader, model_contrastive, loss_fn, optimizer, scheduler, n_epochs, cuda,
         val_loader=pairs_train_loader_val,
         early_stopping=early_stopping, start_epoch_early_stopping=start_epoch_early_stopping,
-        epochs_early_stopping_interval=epochs_early_stopping_interval, start_epoch_training=start_epoch_training)
+        epochs_early_stopping_interval=epochs_early_stopping_interval, start_epoch_training=start_epoch_training,
+        use_tqdm=use_tqdm)
 
     return embedding_net
 
@@ -93,7 +94,7 @@ def triplet_training(samples, similarity_set, embedding_net, cuda, batch_size=16
                      samples_val=None, similarity_set_val=None, early_stopping=False, epochs_early_stopping_interval=1,
                      start_epoch_early_stopping=10,
                      load_all_data_GPU=False, margin=1., lr=None, optimizer=None, scheduler=None,
-                     start_epoch_training=0,
+                     start_epoch_training=0, use_tqdm=True,
                      optimizer_kwargs={}, scheduler_kwargs={}, loader_kwargs={}):
     """ Implements the algorithm for the triplet distance learning training of a neural network; need to be
      provided with a set of samples and the corresponding similarity matrix"""
@@ -157,15 +158,15 @@ def triplet_training(samples, similarity_set, embedding_net, cuda, batch_size=16
     fit(triplets_train_loader, model_triplet, loss_fn, optimizer, scheduler, n_epochs, cuda,
         val_loader=triplets_train_loader_val,
         early_stopping=early_stopping, start_epoch_early_stopping=start_epoch_early_stopping,
-        epochs_early_stopping_interval=epochs_early_stopping_interval, start_epoch_training=start_epoch_training)
+        epochs_early_stopping_interval=epochs_early_stopping_interval, start_epoch_training=start_epoch_training, use_tqdm=use_tqdm)
 
     return embedding_net
 
 
 def FP_nn_training(samples, target, embedding_net, cuda, batch_size=1, n_epochs=50, samples_val=None, target_val=None,
                    early_stopping=False, epochs_early_stopping_interval=1, start_epoch_early_stopping=10,
                    load_all_data_GPU=False,
-                   lr=1e-3, optimizer=None, scheduler=None, start_epoch_training=0, optimizer_kwargs={},
+                   lr=1e-3, optimizer=None, scheduler=None, start_epoch_training=0, use_tqdm=True, optimizer_kwargs={},
                    scheduler_kwargs={}, loader_kwargs={}):
     """ Implements the algorithm for the training of a neural network based on regressing the values of the parameters
     on the corresponding simulation outcomes; it is effectively a training with a mean squared error loss. Needs to be
@@ -224,6 +225,6 @@ def FP_nn_training(samples, target, embedding_net, cuda, batch_size=1, n_epochs=
     fit(data_loader_FP_nn, embedding_net, loss_fn, optimizer, scheduler, n_epochs, cuda,
         val_loader=data_loader_FP_nn_val,
         early_stopping=early_stopping, start_epoch_early_stopping=start_epoch_early_stopping,
-        epochs_early_stopping_interval=epochs_early_stopping_interval, start_epoch_training=start_epoch_training)
+        epochs_early_stopping_interval=epochs_early_stopping_interval, start_epoch_training=start_epoch_training, use_tqdm=use_tqdm)
 
     return embedding_net
diff --git a/abcpy/NN_utilities/trainer.py b/abcpy/NN_utilities/trainer.py
@@ -5,7 +5,7 @@
 
 
 def fit(train_loader, model, loss_fn, optimizer, scheduler, n_epochs, cuda, val_loader=None, early_stopping=False,
-        epochs_early_stopping_interval=1, start_epoch_early_stopping=10, start_epoch_training=0):
+        epochs_early_stopping_interval=1, start_epoch_early_stopping=10, start_epoch_training=0, use_tqdm=True):
     """
     Basic function to train a neural network given a train_loader, a loss function and an optimizer.
 
@@ -26,7 +26,7 @@ def fit(train_loader, model, loss_fn, optimizer, scheduler, n_epochs, cuda, val_
     for epoch in range(0, start_epoch_training):
         scheduler.step()
 
-    for epoch in tqdm(range(start_epoch_training, n_epochs)):
+    for epoch in tqdm(range(start_epoch_training, n_epochs), disable=not use_tqdm):
         # Train stage
         train_loss = train_epoch(train_loader, model, loss_fn, optimizer, cuda)