From 5535fe53272bf4ea2ee2eb77d8ad46ff5afa1718 Mon Sep 17 00:00:00 2001
From: qubixes <44498096+qubixes@users.noreply.github.com>
Date: Thu, 26 Mar 2020 23:09:57 +0100
Subject: [PATCH] Add testing (#14)

---
 .github/workflows/ci-workflow.yml         | 42 ++++++++++++
 README.md                                 | 64 +++++++++---------
 asreviewcontrib/hyperopt/__init__.py      |  2 +-
 asreviewcontrib/hyperopt/active.py        | 52 ++++-----------
 asreviewcontrib/hyperopt/active_job.py    |  7 +-
 asreviewcontrib/hyperopt/cluster.py       | 53 ++++-----------
 asreviewcontrib/hyperopt/cluster_job.py   | 13 ++--
 asreviewcontrib/hyperopt/cluster_utils.py | 80 +++++++++++++++++++++++
 asreviewcontrib/hyperopt/job_utils.py     | 56 +++++++++++++++-
 asreviewcontrib/hyperopt/passive.py       | 54 ++++-----------
 asreviewcontrib/hyperopt/passive_job.py   | 13 ++--
 setup.py                                  | 12 +++-
 tests/data/embase_labelled.csv            |  7 ++
 tests/test_active.py                      | 69 +++++++++++++++++++
 tests/test_cluster.py                     | 65 ++++++++++++++++++
 tests/test_passive.py                     | 69 +++++++++++++++++++
 16 files changed, 486 insertions(+), 172 deletions(-)
 create mode 100644 .github/workflows/ci-workflow.yml
 create mode 100644 asreviewcontrib/hyperopt/cluster_utils.py
 create mode 100644 tests/data/embase_labelled.csv
 create mode 100644 tests/test_active.py
 create mode 100644 tests/test_cluster.py
 create mode 100644 tests/test_passive.py

diff --git a/.github/workflows/ci-workflow.yml b/.github/workflows/ci-workflow.yml
new file mode 100644
index 0000000..5aa4e94
--- /dev/null
+++ b/.github/workflows/ci-workflow.yml
@@ -0,0 +1,42 @@
+name: test-suite
+on: [push, pull_request]
+jobs:
+  test-master:
+    name: pytest
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v2
+      with:
+        path: asr-hyper
+    - uses: actions/setup-python@v1
+      with:
+        python-version: '3.6' # Version range or exact version of a Python version to use, using semvers version range syntax.
+        architecture: 'x64' # (x64 or x86)
+    - name: Install packages and run tests  
+      run: |
+        pip install pytest
+        pip install --upgrade setuptools>=41.0.0
+        git clone https://github.com/asreview/asreview.git
+        pip install ./asreview[all]
+        pip install ./asr-hyper
+        pytest asr-hyper/tests
+     
+  #test-older:
+    #name: pytest
+    #runs-on: ubuntu-latest
+    #strategy:
+      #matrix:
+        #asr_versions: ['0.7.2']
+    #steps:
+    #- uses: actions/checkout@v2
+    #- uses: actions/setup-python@v1
+      #with:
+        #python-version: '3.6' # Version range or exact version of a Python version to use, using semvers version range syntax.
+        #architecture: 'x64' # (x64 or x86)
+    #- name: Install packages and run tests
+      #run: |
+        #pip install pytest
+        #pip install --upgrade setuptools>=41.0.0
+        #pip install asreview[all]==${{ matrix.asr_versions }}
+        #pip install .
+        #pytest tests
diff --git a/README.md b/README.md
index 317b9f3..edea615 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,6 @@
 ## ASReview-hyperopt
 
-![Deploy and release](https://github.com/msdslab/asreview-hyperopt/workflows/Deploy%20and%20release/badge.svg)
+![Deploy and release](https://github.com/asreview/asreview-hyperopt/workflows/Deploy%20and%20release/badge.svg)![Build status](https://github.com/asreview/asreview-hyperopt/workflows/test-suite/badge.svg)
 
 Hyper parameter optimization extension for 
 [ASReview](https://github.com/asreview/asreview). It uses the 
@@ -11,7 +11,7 @@ automatically used for hyper parameter optimization.
 
 ### Installation
 
-The easiest way to install the visualization package is to use the command line:
+The easiest way to install the hyper parameter optimization package is to use the command line:
 
 ``` bash
 pip install asreview-hyperopt
@@ -45,15 +45,29 @@ asreview hyper-active --help
 Which results in the following options:
 
 ```bash
-usage: /Users/qubix/Library/Python/3.6/bin/asreview [-h] [-m MODEL]
-                                                    [-q QUERY_STRATEGY]
-                                                    [-b BALANCE_STRATEGY]
-                                                    [-e FEATURE_EXTRACTION]
-                                                    [-n N_ITER] [-d DATASETS]
-                                                    [--mpi]
+usage: hyper-active [-h] [-n N_ITER] [-r N_RUN] [-d DATASETS] [--mpi]
+                    [--data_dir DATA_DIR] [--output_dir OUTPUT_DIR]
+                    [--server_job] [-m MODEL] [-q QUERY_STRATEGY]
+                    [-b BALANCE_STRATEGY] [-e FEATURE_EXTRACTION]
 
 optional arguments:
   -h, --help            show this help message and exit
+  -n N_ITER, --n_iter N_ITER
+                        Number of iterations of Bayesian Optimization.
+  -r N_RUN, --n_run N_RUN
+                        Number of runs per dataset.
+  -d DATASETS, --datasets DATASETS
+                        Datasets to use in the hyper parameter optimization
+                        Separate by commas to use multiple at the same time
+                        [default: all].
+  --mpi                 Use the mpi implementation.
+  --data_dir DATA_DIR   Base directory with data files.
+  --output_dir OUTPUT_DIR
+                        Output directory for trials.
+  --server_job          Run job on the server. It will incur less overhead of
+                        used CPUs, but more latency of workers waiting for the
+                        server to finish its own job. Only makes sense in
+                        combination with the flag --mpi.
   -m MODEL, --model MODEL
                         Prediction model for active learning.
   -q QUERY_STRATEGY, --query_strategy QUERY_STRATEGY
@@ -62,22 +76,16 @@ optional arguments:
                         Balance strategy for active learning.
   -e FEATURE_EXTRACTION, --feature_extraction FEATURE_EXTRACTION
                         Feature extraction method.
-  -n N_ITER, --n_iter N_ITER
-                        Number of iterations of Bayesian Optimization.
-  -d DATASETS, --datasets DATASETS
-                        Datasets to use in the hyper parameter optimization
-                        Separate by commas to use multiple at the same time
-                        [default: all].
-  --mpi                 Use the mpi implementation.
 
 ```
 
 ### Data structure
 
-The extension will search for datasets in the `data` directory, relative to the current
-working directory, so put your datasets there.
+The extension will by default search for datasets in the `data` directory, relative to the current
+working directory. Either put your datasets there, or specify and data directory.
 
-The output of the runs will be stored in the `output` directory, again relative to the current path.
+The output of the runs will by default be stored in the `output` directory, relative to
+the current path.
 
 An example of a structure that has been created:
 
@@ -161,20 +169,14 @@ The hyperopt extension has built-in support for MPI. MPI is used for paralleliza
 a local PC with an MPI-implementation (like OpenMPI) installed, one could run with 4 cores:
 
 ```bash
-mpirun -n 4 asreview hyper-active
+mpirun -n 4 asreview hyper-active --mpi
 ```
 
-On super computers one should sometimes replace `mpirun` with `srun`.
-
-
-### Time measurements:
+If you want to be slightly more efficient on a machine with a low number of cores, you can run
+jobs on the MPI server as well:
 
-#### inactive
+```bash
+mpirun -n 4 asreview hyper-active --mpi --server_job
+```
 
-nb, tfidf, double, max -> 53 seconds
-svm, tfidf, double, max -> 1940 seconds
-rf, tfidf, double, max -> 80 seconds
-logistic, tfidf, double, max -> 250 seconds /4
-dense_nn, tfidf, double, max -> ?
-dense_nn, doc2vec, double, max ->  2750 seconds /1, /2
-svm, doc2vec, ...
+On super computers one should sometimes replace `mpirun` with `srun`.
diff --git a/asreviewcontrib/hyperopt/__init__.py b/asreviewcontrib/hyperopt/__init__.py
index 6ca8b73..2404ed8 100644
--- a/asreviewcontrib/hyperopt/__init__.py
+++ b/asreviewcontrib/hyperopt/__init__.py
@@ -18,5 +18,5 @@
 from asreviewcontrib.hyperopt.show_trials import ShowTrialsEntryPoint
 from asreviewcontrib.hyperopt.create_config import CreateConfigEntryPoint
 
-__version__ = "0.1.4"
+__version__ = "0.2.0"
 __extension_name__ = "asreview-hyperopt"
diff --git a/asreviewcontrib/hyperopt/active.py b/asreviewcontrib/hyperopt/active.py
index 2f73691..f4d07b0 100644
--- a/asreviewcontrib/hyperopt/active.py
+++ b/asreviewcontrib/hyperopt/active.py
@@ -16,13 +16,13 @@
 import argparse
 import logging
 
-from asreviewcontrib.hyperopt.mpi_executor import mpi_executor
-from asreviewcontrib.hyperopt.mpi_executor import mpi_hyper_optimize
+from asreview.entry_points import BaseEntryPoint
+
 from asreviewcontrib.hyperopt.serial_executor import serial_executor
 from asreviewcontrib.hyperopt.serial_executor import serial_hyper_optimize
-from asreviewcontrib.hyperopt.job_utils import get_data_names
+from asreviewcontrib.hyperopt.job_utils import get_data_names,\
+    _base_parse_arguments
 from asreviewcontrib.hyperopt.active_job import ActiveJobRunner
-from asreview.entry_points import BaseEntryPoint
 
 
 class HyperActiveEntryPoint(BaseEntryPoint):
@@ -43,7 +43,7 @@ def execute(self, argv):
 
 
 def _parse_arguments():
-    parser = argparse.ArgumentParser(prog=sys.argv[0])
+    parser = _base_parse_arguments(prog="hyper-active")
     parser.add_argument(
         "-m", "--model",
         type=str,
@@ -67,39 +67,6 @@ def _parse_arguments():
         type=str,
         default="tfidf",
         help="Feature extraction method.")
-    parser.add_argument(
-        "-n", "--n_iter",
-        type=int,
-        default=1,
-        help="Number of iterations of Bayesian Optimization."
-    )
-    parser.add_argument(
-        "-r", "--n_run",
-        type=int,
-        default=8,
-        help="Number of runs per dataset."
-    )
-    parser.add_argument(
-        "-d", "--datasets",
-        type=str,
-        default="all",
-        help="Datasets to use in the hyper parameter optimization "
-        "Separate by commas to use multiple at the same time [default: all].",
-    )
-    parser.add_argument(
-        "--mpi",
-        dest='use_mpi',
-        action='store_true',
-        help="Use the mpi implementation.",
-    )
-    parser.add_argument(
-        "--server_job",
-        dest='server_job',
-        action='store_true',
-        help='Run job on the server. It will incur less overhead of used CPUs,'
-        ' but more latency of workers waiting for the server to finish its own'
-        ' job. Only makes sense in combination with the flag --mpi.'
-    )
     return parser
 
 
@@ -115,9 +82,12 @@ def main(argv=sys.argv[1:]):
     use_mpi = args["use_mpi"]
     n_run = args["n_run"]
     server_job = args["server_job"]
+    data_dir = args["data_dir"]
+    output_dir = args["output_dir"]
 
-    data_names = get_data_names(datasets)
+    data_names = get_data_names(datasets, data_dir=data_dir)
     if use_mpi:
+        from asreviewcontrib.hyperopt.mpi_executor import mpi_executor
         executor = mpi_executor
     else:
         executor = serial_executor
@@ -125,9 +95,11 @@ def main(argv=sys.argv[1:]):
     job_runner = ActiveJobRunner(
         data_names, model_name=model_name, query_name=query_name,
         balance_name=balance_name, feature_name=feature_name,
-        executor=executor, n_run=n_run, server_job=server_job)
+        executor=executor, n_run=n_run, server_job=server_job,
+        data_dir=data_dir, output_dir=output_dir)
 
     if use_mpi:
+        from asreviewcontrib.hyperopt.mpi_executor import mpi_hyper_optimize
         mpi_hyper_optimize(job_runner, n_iter)
     else:
         serial_hyper_optimize(job_runner, n_iter)
diff --git a/asreviewcontrib/hyperopt/active_job.py b/asreviewcontrib/hyperopt/active_job.py
index 2e73e94..6598815 100644
--- a/asreviewcontrib/hyperopt/active_job.py
+++ b/asreviewcontrib/hyperopt/active_job.py
@@ -40,12 +40,13 @@ class ActiveJobRunner():
     def __init__(self, data_names, model_name, query_name, balance_name,
                  feature_name, executor=serial_executor,
                  n_run=8, n_papers=1502, n_instances=50, n_included=1,
-                 n_excluded=1, server_job=False):
+                 n_excluded=1, server_job=False, data_dir="data",
+                 output_dir=None):
 
         self.trials_dir, self.trials_fp = get_trial_fp(
             data_names, model_name=model_name, balance_name=balance_name,
             query_name=query_name, feature_name=feature_name,
-            hyper_type="active")
+            hyper_type="active", output_dir=output_dir)
 
         self.feature_name = feature_name
         self.balance_name = balance_name
@@ -61,7 +62,7 @@ def __init__(self, data_names, model_name, query_name, balance_name,
         self.n_excluded = n_excluded
 
         self.server_job = server_job
-        self.data_dir = "data"
+        self.data_dir = data_dir
         self._cache = {data_name: {"priors": {}}
                        for data_name in data_names}
 
diff --git a/asreviewcontrib/hyperopt/cluster.py b/asreviewcontrib/hyperopt/cluster.py
index ed4b34e..ac48ca8 100644
--- a/asreviewcontrib/hyperopt/cluster.py
+++ b/asreviewcontrib/hyperopt/cluster.py
@@ -18,11 +18,10 @@
 
 from asreview.entry_points import BaseEntryPoint
 
-from asreviewcontrib.hyperopt.mpi_executor import mpi_executor
-from asreviewcontrib.hyperopt.mpi_executor import mpi_hyper_optimize
 from asreviewcontrib.hyperopt.serial_executor import serial_executor
 from asreviewcontrib.hyperopt.serial_executor import serial_hyper_optimize
-from asreviewcontrib.hyperopt.job_utils import get_data_names
+from asreviewcontrib.hyperopt.job_utils import get_data_names,\
+    _base_parse_arguments
 from asreviewcontrib.hyperopt.cluster_job import ClusterJobRunner
 
 
@@ -43,46 +42,12 @@ def execute(self, argv):
 
 
 def _parse_arguments():
-    parser = argparse.ArgumentParser(prog=sys.argv[0])
+    parser = _base_parse_arguments("hyper-cluster")
     parser.add_argument(
         "-e", "--feature_extraction",
         type=str,
         default="doc2vec",
         help="Feature extraction method.")
-    parser.add_argument(
-        "-n", "--n_iter",
-        type=int,
-        default=1,
-        help="Number of iterations of Bayesian Optimization."
-    )
-    parser.add_argument(
-        "-d", "--datasets",
-        type=str,
-        default="all",
-        help="Datasets to use in the hyper parameter optimization "
-        "Separate by commas to use multiple at the same time [default: all].",
-    )
-    parser.add_argument(
-        "--mpi",
-        dest='use_mpi',
-        action='store_true',
-        help="Use the mpi implementation.",
-    )
-    parser.add_argument(
-        "-r", "--n_run",
-        type=int,
-        default=8,
-        help="Number of runs per dataset."
-    )
-    parser.add_argument(
-        "--server_job",
-        dest='server_job',
-        action='store_true',
-        help='Run job on the server. It will incur less overhead of used CPUs,'
-        ' but more latency of workers waiting for the server to finish its own'
-        ' job. Only makes sense in combination with the flag --mpi.'
-    )
-
     return parser
 
 
@@ -95,17 +60,23 @@ def main(argv=sys.argv[1:]):
     use_mpi = args["use_mpi"]
     n_run = args["n_run"]
     server_job = args["server_job"]
+    data_dir = args["data_dir"]
+    output_dir = args["output_dir"]
 
-    data_names = get_data_names(datasets)
+    data_names = get_data_names(datasets, data_dir=data_dir)
     if use_mpi:
+        from asreviewcontrib.hyperopt.mpi_executor import mpi_executor
         executor = mpi_executor
     else:
         executor = serial_executor
 
-    job_runner = ClusterJobRunner(data_names, feature_name, executor=executor,
-                                  n_cluster_run=n_run, server_job=server_job)
+    job_runner = ClusterJobRunner(
+        data_names, feature_name, executor=executor,
+        n_cluster_run=n_run, server_job=server_job,
+        data_dir=data_dir, output_dir=output_dir)
 
     if use_mpi:
+        from asreviewcontrib.hyperopt.mpi_executor import mpi_hyper_optimize
         mpi_hyper_optimize(job_runner, n_iter)
     else:
         serial_hyper_optimize(job_runner, n_iter)
diff --git a/asreviewcontrib/hyperopt/cluster_job.py b/asreviewcontrib/hyperopt/cluster_job.py
index b05b1f4..2def93f 100644
--- a/asreviewcontrib/hyperopt/cluster_job.py
+++ b/asreviewcontrib/hyperopt/cluster_job.py
@@ -25,8 +25,8 @@
 
 from asreview import ASReviewData
 from asreview.feature_extraction.utils import get_feature_class
-from asreview.cluster import normalized_cluster_score
 
+from asreviewcontrib.hyperopt.cluster_utils import normalized_cluster_score
 from asreviewcontrib.hyperopt.job_utils import get_trial_fp
 from asreviewcontrib.hyperopt.job_utils import get_split_param
 from asreviewcontrib.hyperopt.job_utils import data_fp_from_name
@@ -37,10 +37,12 @@
 
 class ClusterJobRunner():
     def __init__(self, data_names, feature_name, executor=serial_executor,
-                 n_cluster_run=30, n_feature_run=1, server_job=False):
+                 n_cluster_run=30, n_feature_run=1, server_job=False,
+                 data_dir="data", output_dir=None):
 
         self.trials_dir, self.trials_fp = get_trial_fp(
-            data_names, feature_name=feature_name, hyper_type="cluster")
+            data_names, feature_name=feature_name, hyper_type="cluster",
+            output_dir=output_dir)
 
         self.feature_name = feature_name
         self.feature_class = get_feature_class(feature_name)
@@ -49,7 +51,7 @@ def __init__(self, data_names, feature_name, executor=serial_executor,
         self.executor = executor
         self.n_cluster_run = n_cluster_run
         self.n_feature_run = n_feature_run
-        self.data_dir = "data"
+        self.data_dir = data_dir
         self.server_job = server_job
         self._cache = {data_name: {}
                        for data_name in data_names}
@@ -162,7 +164,8 @@ def loss_from_files(data_fps, labels_fp):
             cur_scores.append(score)
         all_scores.append(cur_scores)
 
-    return -np.exp(np.average(np.log(all_scores)))
+    print(all_scores)
+    return -np.average(all_scores)
 
 
 def create_jobs(param, data_names, n_run):
diff --git a/asreviewcontrib/hyperopt/cluster_utils.py b/asreviewcontrib/hyperopt/cluster_utils.py
new file mode 100644
index 0000000..3e6ed7b
--- /dev/null
+++ b/asreviewcontrib/hyperopt/cluster_utils.py
@@ -0,0 +1,80 @@
+#!/usr/bin/env python
+
+# Copyright 2019 The ASReview Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from math import sqrt
+
+import numpy as np
+
+
+def simulate_score(one_dict, all_dict, n_run=10000):
+    total_one = np.sum([x for x in one_dict.values()])
+    total = np.sum([x for x in all_dict.values()])
+    sim_scores = []
+    for _ in range(n_run):
+        one_idx = np.random.choice(range(total), total_one, replace=False)
+        one_idx = np.sort(one_idx)
+        new_one_dict = {}
+        cur_all_idx = 0
+        cur_one_idx = 0
+        for key in all_dict:
+            cur_all_idx += all_dict[key]
+            while cur_one_idx < len(one_idx) and one_idx[cur_one_idx] < cur_all_idx:
+                if key in new_one_dict:
+                    new_one_dict[key] += 1
+                else:
+                    new_one_dict[key] = 1
+                cur_one_idx += 1
+        try:
+            sim_scores.append(cluster_score(new_one_dict, all_dict))
+        except ZeroDivisionError:
+            print(new_one_dict, all_dict)
+            raise
+
+    return np.average(sim_scores), np.std(sim_scores)
+
+
+def cluster_score(one_dict, all_dict):
+    tp = 0
+    fn = 0
+    fp = 0
+    total = np.sum(list(one_dict.values()))
+    for key, n_total in all_dict.items():
+        n_one = one_dict.get(key, 0)
+        n_zero = n_total-n_one
+        tp += n_one*(n_one - 1)/2
+        fn += n_zero*n_one
+        fp += n_one*(total-n_one)
+    return tp/sqrt(1+(tp+fn)*(tp+fp))
+
+
+def normalized_cluster_score(prediction, labels):
+    one_dict, all_dict = get_one_all_dict(prediction, labels)
+    score = cluster_score(one_dict, all_dict)
+    avg, sigma = simulate_score(one_dict, all_dict)
+    return (score-avg)/sigma
+
+
+def get_one_all_dict(prediction, labels):
+    one_idx = np.where(labels == 1)[0]
+    unique, counts = np.unique(prediction, return_counts=True)
+    all_dict = {unique[i]: counts[i] for i in range(len(unique))}
+    all_counts = [all_dict.get(i, 0) for i in range(len(unique))]
+
+    one_prediction = prediction[one_idx, ]
+    unique, counts = np.unique(one_prediction, return_counts=True)
+    one_dict = {unique[i]: counts[i] for i in range(len(unique))}
+    one_counts = [one_dict.get(i, 0) for i in range(len(all_counts))]
+    return one_dict, all_dict
diff --git a/asreviewcontrib/hyperopt/job_utils.py b/asreviewcontrib/hyperopt/job_utils.py
index 648fe11..83bce35 100644
--- a/asreviewcontrib/hyperopt/job_utils.py
+++ b/asreviewcontrib/hyperopt/job_utils.py
@@ -12,6 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import argparse
 import logging
 import os
 from os.path import join, splitext
@@ -24,6 +25,55 @@ def empty_shared():
     }
 
 
+def _base_parse_arguments(prog="hyper-?"):
+    parser = argparse.ArgumentParser(prog=prog)
+    parser.add_argument(
+        "-n", "--n_iter",
+        type=int,
+        default=1,
+        help="Number of iterations of Bayesian Optimization."
+    )
+    parser.add_argument(
+        "-r", "--n_run",
+        type=int,
+        default=8,
+        help="Number of runs per dataset."
+    )
+    parser.add_argument(
+        "-d", "--datasets",
+        type=str,
+        default="all",
+        help="Datasets to use in the hyper parameter optimization "
+        "Separate by commas to use multiple at the same time [default: all].",
+    )
+    parser.add_argument(
+        "--mpi",
+        dest='use_mpi',
+        action='store_true',
+        help="Use the mpi implementation.",
+    )
+    parser.add_argument(
+        "--data_dir",
+        type=str,
+        default="data",
+        help="Base directory with data files.",
+    )
+    parser.add_argument(
+        "--output_dir",
+        default=None,
+        help="Output directory for trials."
+    )
+    parser.add_argument(
+        "--server_job",
+        dest='server_job',
+        action='store_true',
+        help='Run job on the server. It will incur less overhead of used CPUs,'
+        ' but more latency of workers waiting for the server to finish its own'
+        ' job. Only makes sense in combination with the flag --mpi.'
+    )
+    return parser
+
+
 def quality(result_list, alpha=1):
     q = 0
     for _, rank in result_list:
@@ -33,7 +83,10 @@ def quality(result_list, alpha=1):
 
 
 def get_trial_fp(datasets, model_name=None, query_name=None, balance_name=None,
-                 feature_name=None, hyper_type="passive"):
+                 feature_name=None, hyper_type="passive", output_dir=None):
+
+    if output_dir is not None:
+        return output_dir, os.path.join(str(output_dir), "trials.pkl")
 
     name_list = [
         name for name in [model_name, query_name, balance_name, feature_name]
@@ -49,7 +102,6 @@ def get_trial_fp(datasets, model_name=None, query_name=None, balance_name=None,
 
 
 def get_data_names(datasets, data_dir="data"):
-    data_dir = "data"
     file_list = os.listdir(data_dir)
     file_list = [file_name for file_name in file_list
                  if file_name.endswith((".csv", ".xlsx", ".ris"))]
diff --git a/asreviewcontrib/hyperopt/passive.py b/asreviewcontrib/hyperopt/passive.py
index caded52..0759949 100644
--- a/asreviewcontrib/hyperopt/passive.py
+++ b/asreviewcontrib/hyperopt/passive.py
@@ -18,12 +18,11 @@
 
 from asreview.entry_points.base import BaseEntryPoint
 
-from asreviewcontrib.hyperopt.mpi_executor import mpi_executor
-from asreviewcontrib.hyperopt.mpi_executor import mpi_hyper_optimize
 from asreviewcontrib.hyperopt.serial_executor import serial_executor
 from asreviewcontrib.hyperopt.serial_executor import serial_hyper_optimize
 from asreviewcontrib.hyperopt.passive_job import PassiveJobRunner
-from asreviewcontrib.hyperopt.job_utils import get_data_names
+from asreviewcontrib.hyperopt.job_utils import get_data_names,\
+    _base_parse_arguments
 
 
 class HyperPassiveEntryPoint(BaseEntryPoint):
@@ -43,7 +42,7 @@ def execute(self, argv):
 
 
 def _parse_arguments():
-    parser = argparse.ArgumentParser(prog=sys.argv[0])
+    parser = _base_parse_arguments("hyper-passive")
     parser.add_argument(
         "-m", "--model",
         type=str,
@@ -61,39 +60,6 @@ def _parse_arguments():
         type=str,
         default="doc2vec",
         help="Feature extraction method.")
-    parser.add_argument(
-        "-n", "--n_iter",
-        type=int,
-        default=1,
-        help="Number of iterations of Bayesian Optimization."
-    )
-    parser.add_argument(
-        "-d", "--datasets",
-        type=str,
-        default="all",
-        help="Datasets to use in the hyper parameter optimization "
-        "Separate by commas to use multiple at the same time [default: all].",
-    )
-    parser.add_argument(
-        "-r", "--n_run",
-        type=int,
-        default=8,
-        help="Number of runs per dataset."
-    )
-    parser.add_argument(
-        "--mpi",
-        dest='use_mpi',
-        action='store_true',
-        help="Use the mpi implementation.",
-    )
-    parser.add_argument(
-        "--server_job",
-        dest='server_job',
-        action='store_true',
-        help='Run job on the server. It will incur less overhead of used CPUs,'
-        ' but more latency of workers waiting for the server to finish its own'
-        ' job. Only makes sense in combination with the flag --mpi.'
-    )
     return parser
 
 
@@ -108,18 +74,24 @@ def main(argv=sys.argv[1:]):
     use_mpi = args["use_mpi"]
     n_run = args["n_run"]
     server_job = args["server_job"]
+    data_dir = args["data_dir"]
+    output_dir = args["output_dir"]
 
-    data_names = get_data_names(datasets)
+    data_names = get_data_names(datasets, data_dir=data_dir)
     if use_mpi:
+        from asreviewcontrib.hyperopt.mpi_executor import mpi_executor
         executor = mpi_executor
     else:
         executor = serial_executor
 
-    job_runner = PassiveJobRunner(data_names, model_name, balance_name,
-                                  feature_name, executor=executor, n_run=n_run,
-                                  server_job=server_job)
+    job_runner = PassiveJobRunner(
+        data_names, model_name, balance_name,
+        feature_name, executor=executor, n_run=n_run,
+        server_job=server_job, data_dir=data_dir,
+        output_dir=output_dir)
 
     if use_mpi:
+        from asreviewcontrib.hyperopt.mpi_executor import mpi_hyper_optimize
         mpi_hyper_optimize(job_runner, n_iter)
     else:
         serial_hyper_optimize(job_runner, n_iter)
diff --git a/asreviewcontrib/hyperopt/passive_job.py b/asreviewcontrib/hyperopt/passive_job.py
index 22a3a70..2b4cd24 100644
--- a/asreviewcontrib/hyperopt/passive_job.py
+++ b/asreviewcontrib/hyperopt/passive_job.py
@@ -39,11 +39,13 @@
 
 class PassiveJobRunner():
     def __init__(self, data_names, model_name, balance_name, feature_name,
-                 executor=serial_executor, n_run=10, server_job=False):
+                 executor=serial_executor, n_run=10, server_job=False,
+                 data_dir="data", output_dir=None):
 
         self.trials_dir, self.trials_fp = get_trial_fp(
             data_names, model_name=model_name, balance_name=balance_name,
-            feature_name=feature_name, hyper_type="passive")
+            feature_name=feature_name, hyper_type="passive",
+            output_dir=output_dir)
 
         self.model_name = model_name
         self.balance_name = balance_name
@@ -57,7 +59,7 @@ def __init__(self, data_names, model_name, balance_name, feature_name,
         self.data_names = data_names
         self.executor = executor
         self.n_run = n_run
-        self.data_dir = "data"
+        self.data_dir = data_dir
         self._cache = {data_name: {"train_idx": {}}
                        for data_name in data_names}
 
@@ -222,8 +224,9 @@ def compute_train_idx(y, seed):
     one_idx = np.where(y == 1)[0]
     zero_idx = np.where(y == 0)[0]
 
-    n_zero_train = round(0.75*len(zero_idx))
-    n_one_train = round(0.75*len(one_idx))
+    n_zero_train = min(len(zero_idx)-1, max(1, round(0.75*len(zero_idx))))
+    n_one_train = min(len(one_idx)-1, max(1, round(0.75*len(one_idx))))
+
     train_one_idx = np.random.choice(one_idx, n_one_train, replace=False)
     train_zero_idx = np.random.choice(zero_idx, n_zero_train, replace=False)
     train_idx = np.append(train_one_idx, train_zero_idx)
diff --git a/setup.py b/setup.py
index 61ec1fd..69a9a3c 100644
--- a/setup.py
+++ b/setup.py
@@ -15,6 +15,13 @@
         exec(line)
         break
 
+DEPS = {
+    'mpi': ['mpi4py'],
+}
+
+DEPS['all'] = DEPS['mpi']
+
+
 setup(
     name='asreview-hyperopt',
     version=__version__,  # noqa
@@ -42,11 +49,10 @@
     packages=find_namespace_packages(include=['asreviewcontrib.*']),
     namespace_package=["asreview"],
     install_requires=[
-        "asreview>=0.7.0", "numpy", "tqdm", "hyperopt", "sklearn"
+        "asreview>=0.7.0", "numpy", "tqdm", "hyperopt", "sklearn",
     ],
 
-    extras_require={
-    },
+    extras_require=DEPS,
 
     entry_points={
         "asreview.entry_points": [
diff --git a/tests/data/embase_labelled.csv b/tests/data/embase_labelled.csv
new file mode 100644
index 0000000..797a1ef
--- /dev/null
+++ b/tests/data/embase_labelled.csv
@@ -0,0 +1,7 @@
+Title,Original Title,Author Names,Correspondence Address,Editors,Source,Abstract,Original Abstract,Emtree Drug Index Terms (Major Focus),Emtree Drug Index Terms,Emtree Medical Index Terms (Major Focus),Emtree Medical Index Terms,keywords,Drug Tradenames,Drug Manufacturer,Device Tradenames,Device Manufacturer,Clinical Trial Numbers,Open URL Link,Copyright,included
+"Duplication cysts: Diagnosis, management, and the role of endoscopic ultrasound",,"Liu R., Adler D.G.","D.G. Adler, Department of Gastroenterology and Hepatology, University of Utah School of Medicine, Huntsman Cancer Center, Salt Lake City, UT, United States. Email: douglas.adler@hsc.utah.edu",,Endoscopic Ultrasound (2014) 3:3 (152-160). Date of Publication: 2014,"Gastrointestinal tract duplication cysts are rare congenital gastrointestinal malformation in young patients and adults. They consist of foregut duplication cysts, small bowel duplication cysts, and large bowel duplication cysts. Endoscopic ultrasound (EUS) has been widely used as a modality for the evaluation and diagnosis of duplication cysts. EUS is the diagnostic tool of choice to investigate duplication cysts since it can distinguish between solid and cystic lesions. The question of whether or not to perform EUS-fine needle aspiration (EUS-FNA) on a lesion suspected of being a duplication cyst is controversial as these lesions can become infected with significant consequences, although EUS-FNA is often required to obtain a definitive diagnosis and to rule out more ominous lesions. This manuscript will review the literature on duplication cysts throughout the body and will also focus on the role of EUS and FNA with regards to these lesions.",,,,"endoscopic ultrasonography, gastrointestinal malformation (diagnosis), gastrointestinal tract duplication cyst (diagnosis)","article, bronchogenic duplication cyst (diagnosis, surgery), colonic duplication cyst (diagnosis, surgery), colonoscopy, computer assisted tomography, cytology, duodenal duplication cyst (diagnosis, surgery), endoscopic ultrasound guided fine needle biopsy, enucleation, esophageal duplication cyst (diagnosis, surgery), gastric duplication cyst (diagnosis), gastrointestinal mucosa, hemicolectomy, histology, human, ilial duplication cyst (diagnosis), intestine surgery, jejunal duplication cyst (diagnosis, surgery), symptomatology","article, bronchogenic duplication cyst (diagnosis, surgery), colonic duplication cyst (diagnosis, surgery), colonoscopy, computer assisted tomography, cytology, duodenal duplication cyst (diagnosis, surgery), endoscopic ultrasound guided fine needle biopsy, enucleation, esophageal duplication cyst (diagnosis, surgery), gastric duplication cyst (diagnosis), gastrointestinal mucosa, hemicolectomy, histology, human, ilial duplication cyst (diagnosis), intestine surgery, jejunal duplication cyst (diagnosis, surgery), symptomatology",,,,,,http://sfx.library.uu.nl/utrecht?sid=EMBASE&issn=22267190&id=doi:10.4103%2F2303-9027.138783&atitle=Duplication+cysts%3A+Diagnosis%2C+management%2C+and+the+role+of+endoscopic+ultrasound&stitle=Endoscopic+Ultrasound&title=Endoscopic+Ultrasound&volume=3&issue=3&spage=152&epage=160&aulast=Liu&aufirst=Roy&auinit=R.&aufull=Liu+R.&coden=&isbn=&pages=152-160&date=2014&auinit1=R&auinitm=,"Copyright 2014 Elsevier B.V., All rights reserved.",0
+Endoscopic ultrasound-guided fine-needle aspiration in the diagnosis of foregut duplication cysts: The value of demonstrating detached ciliary tufts in cyst fluid,,"Eloubeidi M.A., Cohn M., Cerfolio R.J., Chhieng D.C., Jhala N., Jhala D., Eltoum I.A.","M.A. Eloubeidi, Endoscopic Ultrasound Program, Div. of Gastroenterol. and Hepatol., University of Alabama-Birmingham, 1530 3rd Avenue South-ZRB 633, Birmingham, AL 35294-0007, United States. Email: meloubeidi@uabmc.edu",,Cancer (2004) 102:4 (253-258). Date of Publication: 25 Aug 2004,,,,,"cyst fluid, endoscopic ultrasonography, fine needle aspiration biopsy, foregut duplication cyst (diagnosis), mediastinum cyst (diagnosis)","adult, aged, article, cancer cell, cancer cytodiagnosis, ciliary tuft, computer assisted tomography, conservative treatment, diagnostic accuracy, diagnostic value, electron microscopy, evaluation study, female, histopathology, human, major clinical study, male, mediastinum mass, priority journal, risk assessment, solid malignant neoplasm, symptomatology","adult, aged, article, cancer cell, cancer cytodiagnosis, ciliary tuft, computer assisted tomography, conservative treatment, diagnostic accuracy, diagnostic value, electron microscopy, evaluation study, female, histopathology, human, major clinical study, male, mediastinum mass, priority journal, risk assessment, solid malignant neoplasm, symptomatology",,,,,,http://sfx.library.uu.nl/utrecht?sid=EMBASE&issn=0008543X&id=doi:10.1002%2Fcncr.20369&atitle=Endoscopic+ultrasound-guided+fine-needle+aspiration+in+the+diagnosis+of+foregut+duplication+cysts%3A+The+value+of+demonstrating+detached+ciliary+tufts+in+cyst+fluid&stitle=Cancer&title=Cancer&volume=102&issue=4&spage=253&epage=258&aulast=Eloubeidi&aufirst=Mohamad+A.&auinit=M.A.&aufull=Eloubeidi+M.A.&coden=CANCA&isbn=&pages=253-258&date=2004&auinit1=M&auinitm=A,"Copyright 2009 Elsevier B.V., All rights reserved.",1
+A case of completely isolated advanced enteric duplication cyst cancer performed partial pancreatectomy,,"Nakashima S., Yamada T., Sato G., Sakai T., Chinen Y., Itakura H., Kato R., Ueda M., Tsuda Y., Ohta K., Matsuyama J., Ikenaga M.","T. Yamada, Department of Gastroenterological Surgery, Higashiosaka City Medical Center, Nishiiwata 3-4-5, Higashiosaka, Japan. Email: yamada-t@higashiosaka-hosp.jp",,International Journal of Surgery Case Reports (2019) 54 (83-86). Date of Publication: 1 Jan 2019,"Introduction: Enteric duplication cysts are rare and, in addition, isolated enteric duplication cysts are lower morbidity prevalence rate. These cysts lack a connection to the gastrointestinal tract or the adjacent mesenteric vasculature and have only been reported in 10 case reports. In these reports, only two reports were cases with malignant transformation. Our case was a report for the advanced cancer of the isolated enteric duplication cyst. Case presentation: The patient was a 43 year-old woman with slightly abdominal pain and mass formation. The abdominal contrast-enhanced computed tomography showed 130 × 100 × 90 mm huge cystic mass existed in right upper peritoneal cavity. The cystic mass had thickened wall and many enhanced nodules. As these imaging findings suggested a tumor originated from pancreas and the preoperative diagnose was suspect of mucinous cystic neoplasm. In operative findings, the tumor originated from pancreatic head and did not attach to gastrointestinal tract. Final pathology indicated the cyst was an isolated advanced enteric duplication cyst cancer and not originated from pancreas. Conclusion: We experienced an extremely rare case of completely isolated advanced enteric duplication cyst cancer. Unique to this case, the preoperative diagnosis was suspect of mucinous cystic neoplasm arising from pancreas head and partial pancreatectomy was performed. However, in the pathological findings, this cyst diagnosed advanced enteric duplication cyst cancer.",,,"cytokeratin 20 (endogenous compound), cytokeratin 7 (endogenous compound), gadolinium, gimeracil plus oteracil potassium plus tegafur (drug therapy), transcription factor Cdx2 (endogenous compound)","advanced cancer (diagnosis, drug therapy, surgery), cyst (diagnosis, drug therapy, surgery), intestine duplication (diagnosis, surgery)","abdominal pain, abdominal tenderness, abdominal tumor, adjuvant chemotherapy, adult, article, cancer diagnosis, cancer surgery, case report, clinical article, clinical examination, computer assisted tomography, consensus, contrast enhancement, differential diagnosis, diffusion weighted imaging, enteric duplication cyst (drug therapy), female, human, human tissue, immunohistochemistry, lymph node dissection, lymphatic system, malignant transformation, mucinous cystic neoplasm (diagnosis), nervous system, pancreatectomy, pancreaticoduodenectomy, pathology, peritoneal cavity, physical examination, practice guideline, priority journal, tumor invasion, upper abdominal pain, venous circulation","abdominal pain, abdominal tenderness, abdominal tumor, adjuvant chemotherapy, adult, article, cancer diagnosis, cancer surgery, case report, clinical article, clinical examination, computer assisted tomography, consensus, contrast enhancement, differential diagnosis, diffusion weighted imaging, enteric duplication cyst (drug therapy), female, human, human tissue, immunohistochemistry, lymph node dissection, lymphatic system, malignant transformation, mucinous cystic neoplasm (diagnosis), nervous system, pancreatectomy, pancreaticoduodenectomy, pathology, peritoneal cavity, physical examination, practice guideline, priority journal, tumor invasion, upper abdominal pain, venous circulation",s 1,,,,,http://sfx.library.uu.nl/utrecht?sid=EMBASE&issn=22102612&id=doi:10.1016%2Fj.ijscr.2018.11.060&atitle=A+case+of+completely+isolated+advanced+enteric+duplication+cyst+cancer+performed+partial+pancreatectomy&stitle=Int.+J.+Surg.+Case+Rep.&title=International+Journal+of+Surgery+Case+Reports&volume=54&issue=&spage=83&epage=86&aulast=Nakashima&aufirst=Shinsuke&auinit=S.&aufull=Nakashima+S.&coden=&isbn=&pages=83-86&date=2019&auinit1=S&auinitm=,"Copyright 2018 Elsevier B.V., All rights reserved.",0
+,,"Quintanilla-Dieck L., Penn E.B.","L. Quintanilla-Dieck, Department of Otolaryngology Head and Neck Surgery, Oregon Health & Science University, 3181 Southwest Sam Jackson Park Road, PV-01, Portland, United States. Email: quintani@ohsu.edu",,Clinics in Perinatology (2018) 45:4 (769-785). Date of Publication: 1 Dec 2018,"Congenital neck masses can be a developmental anomaly of cystic, solid, or vascular origin. They can also constitute neoplasms, including malignancies, although this is rare in the pediatric population. The history and examination can help quickly narrow the differential diagnosis. Imaging also plays an essential role in defining the characteristics and likely cause of neck masses. The most common neck masses in young children are thyroglossal duct cysts, branchial cleft anomalies, and dermoid cysts. Also important to consider in the differential diagnosis are solid tumors, such as teratomas, or vascular lesions, such as hemangiomas.",,,"antineoplastic agent (drug therapy), bleomycin (drug therapy), doxycycline (drug therapy), propranolol (drug therapy), tetradecyl sulfate sodium (drug therapy), thyroid hormone (drug therapy)","congenital tumor (congenital disorder), neck tumor (congenital disorder)","ablation therapy, branchial defect (congenital disorder, surgery), cancer chemotherapy, cancer radiotherapy, cancer surgery, capillary hemangioma (congenital disorder, diagnosis, drug therapy, surgery), carbon dioxide laser, congenital blood vessel malformation (congenital disorder, surgery), cyst (congenital disorder, surgery), differential diagnosis, ectopic thyroid gland (congenital disorder, drug therapy, surgery, therapy), epidermoid cyst (congenital disorder, surgery), foregut duplication cyst (congenital disorder, surgery), head and neck tumor (congenital disorder, diagnosis), hormone substitution, human, lung cyst (congenital disorder), lymphatic malformation (congenital disorder, diagnosis, drug therapy, surgery, therapy), prenatal diagnosis, priority journal, respiration control, review, rhabdomyosarcoma (congenital disorder, drug therapy, radiotherapy, surgery), sclerotherapy, Sistrunk procedure, surgical approach, surgical technique, teratoma (congenital disorder, surgery), thymus cyst (congenital disorder, surgery), thyroglossal duct cyst (congenital disorder, surgery), thyroidectomy","ablation therapy, branchial defect (congenital disorder, surgery), cancer chemotherapy, cancer radiotherapy, cancer surgery, capillary hemangioma (congenital disorder, diagnosis, drug therapy, surgery), carbon dioxide laser, congenital blood vessel malformation (congenital disorder, surgery), cyst (congenital disorder, surgery), differential diagnosis, ectopic thyroid gland (congenital disorder, drug therapy, surgery, therapy), epidermoid cyst (congenital disorder, surgery), foregut duplication cyst (congenital disorder, surgery), head and neck tumor (congenital disorder, diagnosis), hormone substitution, human, lung cyst (congenital disorder), lymphatic malformation (congenital disorder, diagnosis, drug therapy, surgery, therapy), prenatal diagnosis, priority journal, respiration control, review, rhabdomyosarcoma (congenital disorder, drug therapy, radiotherapy, surgery), sclerotherapy, Sistrunk procedure, surgical approach, surgical technique, teratoma (congenital disorder, surgery), thymus cyst (congenital disorder, surgery), thyroglossal duct cyst (congenital disorder, surgery), thyroidectomy",,,,,,http://sfx.library.uu.nl/utrecht?sid=EMBASE&issn=15579840&id=doi:10.1016%2Fj.clp.2018.07.012&atitle=Congenital+Neck+Masses&stitle=Clin.+Perinatol.&title=Clinics+in+Perinatology&volume=45&issue=4&spage=769&epage=785&aulast=Quintanilla-Dieck&aufirst=Lourdes&auinit=L.&aufull=Quintanilla-Dieck+L.&coden=CLPED&isbn=&pages=769-785&date=2018&auinit1=L&auinitm=,"Copyright 2018 Elsevier B.V., All rights reserved.",1
+Foregut duplication cysts: A report of two cases with emphasis on embryogenesis,,"Khoury T., Rivera L.","T. Khoury, Department of Pathology, Roswell Park Cancer Institute, Elm and Carlton Streets, Buffalo, NY 14263, United States. Email: thaer.khoury@roswellpark.org",,"World Journal of Gastroenterology (2011) 17:1 (130-134). Date of Publication: January 7, 2011","Duplication cyst of the stomach with a pseudostratified columnar ciliated epithelium is extremely rare. We describe two cases of these cysts, with emphasis on their immunophenotype and embryogenesis. The first patient was a 29-year-old man who presented with cramping abdominal pain in his left lower quadrant. The second patient was a 26-year-old woman who had a history, over several years, of chronic epigastric abdominal pain radiating to her back. Both lesions were surgically removed. They showed the same histomorphology. The cysts were lined by a pseudostratified respiratory epithelium with ciliated cells. The first cyst was connected to the stomach, while the second cyst was not connected. Both cysts expressed thyroid transcription factor-1 (TTF-1) and surfactant. In this report, we explore the possible embryogenesis of these lesions in the light of TTF-1 and surfactant expression. © 2011 Baishideng. All rights reserved.",,,"homeobox protein Nkx 2.1 (endogenous compound), surfactant (endogenous compound)","cyst (diagnosis, surgery), gastric duplication cyst (diagnosis, surgery), gastrointestinal duplication cyst (diagnosis, surgery)","abdominal pain, adult, article, case report, computer assisted tomography, embryo development, endoscopic ultrasonography, epigastric pain, female, human, immunophenotyping, male, partial gastrectomy, protein expression","abdominal pain, adult, article, case report, computer assisted tomography, embryo development, endoscopic ultrasonography, epigastric pain, female, human, immunophenotyping, male, partial gastrectomy, protein expression",,,,,,http://sfx.library.uu.nl/utrecht?sid=EMBASE&issn=10079327&id=doi:10.3748%2Fwjg.v17.i1.130&atitle=Foregut+duplication+cysts%3A+A+report+of+two+cases+with+emphasis+on+embryogenesis&stitle=World+J.+Gastroenterol.&title=World+Journal+of+Gastroenterology&volume=17&issue=1&spage=130&epage=134&aulast=Khoury&aufirst=Thaer&auinit=T.&aufull=Khoury+T.&coden=WJGAF&isbn=&pages=130-134&date=2011&auinit1=T&auinitm=,"Copyright 2011 Elsevier B.V., All rights reserved.",0
+Completely Isolated Retroperitoneal Enteric Duplication Cyst with Adenocarcinoma Transformation Managed with Robotic Radical Nephrectomy,,"Faraj K., Edwards L., Gupta A., Seifman B.","B. Seifman, Michigan Institute of Urology, 1701 E South Boulevard, Rochester Hills, United States. Email: seifmanb@michiganurology.com",,Journal of Endourology Case Reports (2017) 3:1 (31-33). Date of Publication: 1 Mar 2017,"Background: Enteric duplication cysts are congenital malformations that typically affect children in infancy, but can also affect adults. Rarely, these cysts can be complicated by malignancy. We present the first case of retroperitoneal duplication cyst that was complicated by malignancy transformation and managed by robot-assisted excision. Case presentation: A 64-year-old female with a history of a left-sided renal cyst presented with a 4-month history of abdominal pain and fatigue. MRI revealed a bilobed cyst, with components measuring 6.9 × 6.6 and 6.1 × 6.9 cm, which had grown since previous imaging, and hemorrhage in some portions of the cysts, as well as cystic wall enhancement, suggesting a possible malignancy. The patient consented to a robot-assisted partial (possible radical) nephrectomy. During the procedure, the cystic structure appeared to have grown since imaging, was intimately associated with the hilum, and had a complex vasculature, which prompted us to perform a radical nephrectomy. Grossly, the specimen consisted of a 14.8 cm cystic structure at the superior portion of the kidney, but was not contained within the renal parenchyma. Histologically, the internal mucosa of the cyst showed columnar epithelium with high-grade dysplasia and carcinoma in situ with focal individual cell infiltration into the superficial portion of the inferior part of the cyst. The patient saw a medical oncologist and was instructed to follow up with quarterly imaging to assess for disease progression. Conclusion: Enteric duplication cysts are uncommon entities that can occur in various locations in the body, causing a wide spectrum of symptoms, and are rarely complicated by malignancy transformation. Robot-assisted surgical resection is an option that we have shown to be effective in managing these patients.",,,"2 methylacyl coenzyme A racemase (endogenous compound), cytokeratin 20 (endogenous compound), cytokeratin 7 (endogenous compound), kidney injury molecule 1 (endogenous compound), protein p53 (endogenous compound), transcription factor Cdx2 (endogenous compound), transcription factor PAX8 (endogenous compound)","colloid carcinoma (diagnosis), cyst (congenital disorder, diagnosis, surgery), enteric duplication cyst (congenital disorder, diagnosis, surgery), malignant transformation, radical nephrectomy, robotic surgical procedure","abdominal pain, adult, artery ligation, article, bleeding (diagnosis), cancer risk, case report, cell infiltration, clinical article, columnar epithelium, computer assisted tomography, cyst hemorrhage (diagnosis), disease exacerbation, fatigue, female, follow up, general anesthesia, histopathology, human, human tissue, middle aged, nuclear magnetic resonance imaging, priority journal, renal artery ligation, vascularization","abdominal pain, adult, artery ligation, article, bleeding (diagnosis), cancer risk, case report, cell infiltration, clinical article, columnar epithelium, computer assisted tomography, cyst hemorrhage (diagnosis), disease exacerbation, fatigue, female, follow up, general anesthesia, histopathology, human, human tissue, middle aged, nuclear magnetic resonance imaging, priority journal, renal artery ligation, vascularization",,,,,,http://sfx.library.uu.nl/utrecht?sid=EMBASE&issn=23799889&id=doi:10.1089%2Fcren.2017.0016&atitle=Completely+Isolated+Retroperitoneal+Enteric+Duplication+Cyst+with+Adenocarcinoma+Transformation+Managed+with+Robotic+Radical+Nephrectomy&stitle=J.+Endourol.+Case+Rep.&title=Journal+of+Endourology+Case+Reports&volume=3&issue=1&spage=31&epage=33&aulast=Faraj&aufirst=Kassem&auinit=K.&aufull=Faraj+K.&coden=&isbn=&pages=31-33&date=2017&auinit1=K&auinitm=,"Copyright 2018 Elsevier B.V., All rights reserved.",0
diff --git a/tests/test_active.py b/tests/test_active.py
new file mode 100644
index 0000000..229c469
--- /dev/null
+++ b/tests/test_active.py
@@ -0,0 +1,69 @@
+import os
+from os.path import join
+from pytest import mark
+from pathlib import Path
+
+import numpy as np
+
+from asreviewcontrib.hyperopt.active import main
+from asreviewcontrib.hyperopt.show_trials import load_trials
+
+
+def remove_dir(output_dir):
+    files = [
+        join(output_dir, "best", "embase_labelled", "results_0.h5"),
+        join(output_dir, "best", "embase_labelled", "results_1.h5"),
+        join(output_dir, "current", "embase_labelled", "results_0.h5"),
+        join(output_dir, "current", "embase_labelled", "results_1.h5"),
+        join(output_dir, "trials.pkl")
+    ]
+    dirs = [
+        join(output_dir, "best", "embase_labelled"),
+        join(output_dir, "current", "embase_labelled"),
+        join(output_dir, "best"),
+        join(output_dir, "current"),
+        output_dir,
+    ]
+
+    for file_ in files:
+        try:
+            os.remove(file_)
+        except FileNotFoundError:
+            pass
+    for dir_ in dirs:
+        try:
+            os.rmdir(dir_)
+        except (FileNotFoundError, OSError):
+            pass
+
+
+@mark.parametrize(
+    "model,feature_extraction,query_strategy,balance_strategy",
+    [
+        ("nb", "tfidf", "max", "simple"),
+        ("rf", "doc2vec", "max_random", "double"),
+        ("logistic", "doc2vec", "cluster", "triple"),
+        ("nn-2-layer", "doc2vec", "uncertainty", "undersample"),
+        ("svm", "tfidf", "uncertainty_max", "simple"),
+    ]
+)
+def test_active(request, model, feature_extraction, query_strategy,
+                balance_strategy):
+    test_dir = request.fspath.dirname
+    data_dir = Path(test_dir, "data")
+    base_output_dir = Path(test_dir, "temp")
+    output_dir = os.path.join(str(base_output_dir), f"active_{model}")
+    args = ["--model", model,
+            "--feature_extraction", feature_extraction,
+            "--query_strategy", query_strategy,
+            "--balance_strategy", balance_strategy,
+            "--data_dir", str(data_dir),
+            "--n_run", "2",
+            "--output_dir", output_dir,
+            "--n_iter", "2"
+            ]
+    remove_dir(output_dir)
+    main(args)
+    trial_vals = load_trials(join(output_dir, "trials.pkl"))["values"]
+    assert np.all(np.array([len(x) for x in trial_vals.values()]) == 2)
+    remove_dir(output_dir)
diff --git a/tests/test_cluster.py b/tests/test_cluster.py
new file mode 100644
index 0000000..aa80bd5
--- /dev/null
+++ b/tests/test_cluster.py
@@ -0,0 +1,65 @@
+import os
+from os.path import join
+from pytest import mark
+from pathlib import Path
+
+import numpy as np
+
+from asreviewcontrib.hyperopt.cluster import main
+from asreviewcontrib.hyperopt.show_trials import load_trials
+
+
+def remove_dir(output_dir):
+    files = [
+        join(output_dir, "best", "embase_labelled", "labels.json"),
+        join(output_dir, "best", "embase_labelled", "results_0.json"),
+        join(output_dir, "best", "embase_labelled", "results_1.json"),
+        join(output_dir, "current", "embase_labelled", "labels.json"),
+        join(output_dir, "current", "embase_labelled", "results_0.json"),
+        join(output_dir, "current", "embase_labelled", "results_1.json"),
+        join(output_dir, "trials.pkl")
+    ]
+    dirs = [
+        join(output_dir, "best", "embase_labelled"),
+        join(output_dir, "current", "embase_labelled"),
+        join(output_dir, "best"),
+        join(output_dir, "current"),
+        output_dir,
+    ]
+
+    for file_ in files:
+        try:
+            os.remove(file_)
+        except FileNotFoundError:
+            pass
+    for dir_ in dirs:
+        try:
+            os.rmdir(dir_)
+        except (FileNotFoundError, OSError):
+            pass
+
+
+@mark.parametrize(
+    "feature_extraction",
+    [
+        "tfidf",
+        "doc2vec",
+    ]
+)
+def test_passive(request, feature_extraction):
+    test_dir = request.fspath.dirname
+    data_dir = Path(test_dir, "data")
+    base_output_dir = Path(test_dir, "temp")
+    output_dir = os.path.join(str(base_output_dir),
+                              f"fex_{feature_extraction}")
+    args = ["--feature_extraction", feature_extraction,
+            "--data_dir", str(data_dir),
+            "--n_run", "2",
+            "--output_dir", output_dir,
+            "--n_iter", "2"
+            ]
+    remove_dir(output_dir)
+    main(args)
+    trial_vals = load_trials(join(output_dir, "trials.pkl"))["values"]
+    assert np.all(np.array([len(x) for x in trial_vals.values()]) == 2)
+    remove_dir(output_dir)
diff --git a/tests/test_passive.py b/tests/test_passive.py
new file mode 100644
index 0000000..ac8d4b9
--- /dev/null
+++ b/tests/test_passive.py
@@ -0,0 +1,69 @@
+import os
+from os.path import join
+from pytest import mark
+from pathlib import Path
+
+import numpy as np
+
+from asreviewcontrib.hyperopt.passive import main
+from asreviewcontrib.hyperopt.show_trials import load_trials
+
+
+def remove_dir(output_dir):
+    files = [
+        join(output_dir, "best", "embase_labelled", "labels.json"),
+        join(output_dir, "best", "embase_labelled", "results_0.json"),
+        join(output_dir, "best", "embase_labelled", "results_1.json"),
+        join(output_dir, "current", "embase_labelled", "labels.json"),
+        join(output_dir, "current", "embase_labelled", "results_0.json"),
+        join(output_dir, "current", "embase_labelled", "results_1.json"),
+        join(output_dir, "trials.pkl")
+    ]
+    dirs = [
+        join(output_dir, "best", "embase_labelled"),
+        join(output_dir, "current", "embase_labelled"),
+        join(output_dir, "best"),
+        join(output_dir, "current"),
+        output_dir,
+    ]
+
+    for file_ in files:
+        try:
+            os.remove(file_)
+        except FileNotFoundError:
+            pass
+    for dir_ in dirs:
+        try:
+            os.rmdir(dir_)
+        except (FileNotFoundError, OSError):
+            pass
+
+
+@mark.parametrize(
+    "model,feature_extraction,balance_strategy",
+    [
+        ("nb", "tfidf", "simple"),
+        ("rf", "doc2vec", "double"),
+        ("logistic", "doc2vec", "triple"),
+        ("nn-2-layer", "doc2vec", "undersample"),
+        ("svm", "tfidf", "simple"),
+    ]
+)
+def test_passive(request, model, feature_extraction, balance_strategy):
+    test_dir = request.fspath.dirname
+    data_dir = Path(test_dir, "data")
+    base_output_dir = Path(test_dir, "temp")
+    output_dir = os.path.join(str(base_output_dir), f"passive_{model}")
+    args = ["--model", model,
+            "--feature_extraction", feature_extraction,
+            "--balance_strategy", balance_strategy,
+            "--data_dir", str(data_dir),
+            "--n_run", "2",
+            "--output_dir", output_dir,
+            "--n_iter", "2"
+            ]
+    remove_dir(output_dir)
+    main(args)
+    trial_vals = load_trials(join(output_dir, "trials.pkl"))["values"]
+    assert np.all(np.array([len(x) for x in trial_vals.values()]) == 2)
+    remove_dir(output_dir)