diff --git a/.travis.yml b/.travis.yml index bdd5a23..0674d0c 100644 --- a/.travis.yml +++ b/.travis.yml @@ -25,6 +25,8 @@ script: # Build documentation - sphinx-build docs/source docs/build - touch docs/build/.nojekyll + # Black lint checking + - black --check scripts/ tests/ after_success: # update coveralls report - coveralls diff --git a/README.rst b/README.rst index 0cb34c2..46b4056 100644 --- a/README.rst +++ b/README.rst @@ -6,6 +6,8 @@ :target: https://sdevenes.github.io/M05_MiniProject/index.html .. image:: https://img.shields.io/badge/github-project-0000c0.svg :target: https://github.com/sdevenes/M05_MiniProject +.. image:: https://img.shields.io/badge/code%20style-black-000000.svg + :target: https://github.com/psf/black =============================================================== Human Activity Recognition from Continuous Ambient Sensor Data diff --git a/build-requirements.txt b/build-requirements.txt index 31e60be..c362dd0 100644 --- a/build-requirements.txt +++ b/build-requirements.txt @@ -3,3 +3,4 @@ sphinx_rtd_theme nose coverage coveralls +black diff --git a/scripts/algorithm.py b/scripts/algorithm.py index 80413a6..3df618b 100644 --- a/scripts/algorithm.py +++ b/scripts/algorithm.py @@ -3,12 +3,13 @@ import numpy as np import logging + logger = logging.getLogger() class Model: - def __init__(self, nb_tree_per_forest=50, max_depth=10): - """Create a new ML model (Random forest classifier from scikitlearn) + def __init__(self, nb_tree_per_forest=50, max_depth=10): + """Create a new ML model (Random forest classifier from scikitlearn) Args: nb_tree_per_forest: number of decision trees in the forest @@ -18,13 +19,13 @@ def __init__(self, nb_tree_per_forest=50, max_depth=10): Raises: None """ - # Create a random forest model - self.model = RandomForestClassifier(n_estimators=nb_tree_per_forest, max_depth=max_depth, - random_state=0) - + # Create a random forest model + self.model = RandomForestClassifier( + n_estimators=nb_tree_per_forest, max_depth=max_depth, random_state=0 + ) - def train(self, X, y): - """Train the model using the given data + def train(self, X, y): + """Train the model using the given data Args: X (numpy.ndarray):A NxM 2D-array where each row corresponds to a sample and each column to a feature @@ -34,11 +35,10 @@ def train(self, X, y): Raises: None """ - self.model.fit(X, y) - + self.model.fit(X, y) - def predict(self, X): - """Make a prediction on the data using the trained model + def predict(self, X): + """Make a prediction on the data using the trained model Args: X (numpy.ndarray):A NxM 2D-array where each row corresponds to a sample and each column to a feature @@ -49,6 +49,6 @@ def predict(self, X): Raises: None """ - prediction = self.model.predict(X) + prediction = self.model.predict(X) - return prediction + return prediction diff --git a/scripts/analysis.py b/scripts/analysis.py index c0e9a64..a5509b4 100644 --- a/scripts/analysis.py +++ b/scripts/analysis.py @@ -3,6 +3,7 @@ import numpy as np import itertools + def get_confusion_matrix(prediction_label, true_label): """Get the confusion matrix given the predicted and true labels @@ -16,7 +17,10 @@ def get_confusion_matrix(prediction_label, true_label): """ return confusion_matrix(true_label, prediction_label) -def plot_confusion_matrix(cm, classes, normalize=False, title='Confusion matrix', file_name="graph"): + +def plot_confusion_matrix( + cm, classes, normalize=False, title="Confusion matrix", file_name="graph" +): """Generate a plot of the given confusion matrix using plotly express Args: @@ -35,13 +39,14 @@ def plot_confusion_matrix(cm, classes, normalize=False, title='Confusion matrix' cm = cm / np.sum(cm) # Create confusion matrix graph - fig = px.imshow(cm, - labels=dict(x="Predicted label", y="True label", color="value"), - x=classes, - y=classes, - color_continuous_scale='Blues', - title=title - ) - #fig.show() + fig = px.imshow( + cm, + labels=dict(x="Predicted label", y="True label", color="value"), + x=classes, + y=classes, + color_continuous_scale="Blues", + title=title, + ) + # fig.show() # Export graph - fig.write_html("{}.html".format(file_name)) \ No newline at end of file + fig.write_html("{}.html".format(file_name)) diff --git a/scripts/config.py b/scripts/config.py index ccb7b25..0ffb049 100644 --- a/scripts/config.py +++ b/scripts/config.py @@ -1,3 +1,3 @@ -data_path = '../data/csh101/csh101.ann.features.csv' +data_path = "../data/csh101/csh101.ann.features.csv" nb_trees_experiment = {"nb_trees": (1, 2), "tree_depth": 10} tree_depth_experiment = {"nb_trees": 10, "tree_depth": (1, 2)} diff --git a/scripts/database.py b/scripts/database.py index e116983..854ac71 100644 --- a/scripts/database.py +++ b/scripts/database.py @@ -4,95 +4,91 @@ from sklearn.model_selection import train_test_split PROTOCOLS = { - 'proto1': {'train': 0.8, 'test': 0.2, 'random': 1}, - 'proto2': {'train': 0.8, 'test': 0.2, 'random': 2}, + "proto1": {"train": 0.8, "test": 0.2, "random": 1}, + "proto2": {"train": 0.8, "test": 0.2, "random": 2}, } -SUBSETS = [ - 'train', - 'validation', - 'test' -] +SUBSETS = ["train", "validation", "test"] CLASSES = [ - 'Other_Activity', - 'Watch_TV', - 'Sleep_Out_Of_Bed', - 'Bathe', - 'Cook_Breakfast', - 'Dress', - 'Toilet', - 'Personal_Hygiene', - 'Sleep', - 'Read', - 'Relax', - 'Cook_Dinner', - 'Drink', - 'Eat_Breakfast', - 'Morning_Meds', - 'Evening_Meds', - 'Wash_Breakfast_Dishes', - 'Cook_Lunch', - 'Wash_Dishes', - 'Leave_Home', - 'Cook', - 'Enter_Home', - 'Entertain_Guests', - 'Wash_Dinner_Dishes', - 'Phone', - 'Groom', - 'Step_Out', - 'Eat_Dinner', - 'Eat_Lunch', - 'Wash_Lunch_Dishes', - 'Bed_Toilet_Transition', - 'Eat', - 'Go_To_Sleep', - 'Wake_Up', - 'Work_At_Table' + "Other_Activity", + "Watch_TV", + "Sleep_Out_Of_Bed", + "Bathe", + "Cook_Breakfast", + "Dress", + "Toilet", + "Personal_Hygiene", + "Sleep", + "Read", + "Relax", + "Cook_Dinner", + "Drink", + "Eat_Breakfast", + "Morning_Meds", + "Evening_Meds", + "Wash_Breakfast_Dishes", + "Cook_Lunch", + "Wash_Dishes", + "Leave_Home", + "Cook", + "Enter_Home", + "Entertain_Guests", + "Wash_Dinner_Dishes", + "Phone", + "Groom", + "Step_Out", + "Eat_Dinner", + "Eat_Lunch", + "Wash_Lunch_Dishes", + "Bed_Toilet_Transition", + "Eat", + "Go_To_Sleep", + "Wake_Up", + "Work_At_Table", ] VARIABLES = [ - 'lastSensorEventHours', - 'lastSensorEventSeconds', - 'lastSensorDayOfWeek', - 'windowDuration', - 'timeSinceLastSensorEvent', - 'prevDominantSensor1', - 'prevDominantSensor2', - 'lastSensorID', - 'lastSensorLocation', - 'lastMotionLocation', - 'complexity', - 'activityChange', - 'areaTransitions', - 'numDistinctSensors', - 'sensorCount-Bathroom', - 'sensorCount-Bedroom', - 'sensorCount-Chair', - 'sensorCount-DiningRoom', - 'sensorCount-Hall', - 'sensorCount-Ignore', - 'sensorCount-Kitchen', - 'sensorCount-LivingRoom', - 'sensorCount-Office', - 'sensorCount-OutsideDoor', - 'sensorCount-WorkArea', - 'sensorElTime-Bathroom', - 'sensorElTime-Bedroom', - 'sensorElTime-Chair', - 'sensorElTime-DiningRoom', - 'sensorElTime-Hall', - 'sensorElTime-Ignore', - 'sensorElTime-Kitchen', - 'sensorElTime-LivingRoom', - 'sensorElTime-Office', - 'sensorElTime-OutsideDoor', - 'sensorElTime-WorkArea' + "lastSensorEventHours", + "lastSensorEventSeconds", + "lastSensorDayOfWeek", + "windowDuration", + "timeSinceLastSensorEvent", + "prevDominantSensor1", + "prevDominantSensor2", + "lastSensorID", + "lastSensorLocation", + "lastMotionLocation", + "complexity", + "activityChange", + "areaTransitions", + "numDistinctSensors", + "sensorCount-Bathroom", + "sensorCount-Bedroom", + "sensorCount-Chair", + "sensorCount-DiningRoom", + "sensorCount-Hall", + "sensorCount-Ignore", + "sensorCount-Kitchen", + "sensorCount-LivingRoom", + "sensorCount-Office", + "sensorCount-OutsideDoor", + "sensorCount-WorkArea", + "sensorElTime-Bathroom", + "sensorElTime-Bedroom", + "sensorElTime-Chair", + "sensorElTime-DiningRoom", + "sensorElTime-Hall", + "sensorElTime-Ignore", + "sensorElTime-Kitchen", + "sensorElTime-LivingRoom", + "sensorElTime-Office", + "sensorElTime-OutsideDoor", + "sensorElTime-WorkArea", ] -def load(filepath='./data/csh101/csh101.ann.features.csv'): +def load(filepath="./data/csh101/csh101.ann.features.csv"): """Loads the dataset Args: @@ -105,8 +101,8 @@ def load(filepath='./data/csh101/csh101.ann.features.csv'): """ x = [] y = [] - with open(filepath, 'rt') as f: - reader = csv.reader(f, delimiter=',') + with open(filepath, "rt") as f: + reader = csv.reader(f, delimiter=",") for k, row in enumerate(reader): if not k: continue @@ -129,16 +125,25 @@ def split_data(x, y, subset, splits): Raises: None """ - x_train, x_test, y_train, y_test = train_test_split(x, y, - test_size=splits['test'], - train_size=splits['train'], - random_state=splits['random'], - stratify=y) - (x_split, y_split) = (x_train, y_train) if subset == 'train' else (x_test, y_test) + x_train, x_test, y_train, y_test = train_test_split( + x, + y, + test_size=splits["test"], + train_size=splits["train"], + random_state=splits["random"], + stratify=y, + ) + (x_split, y_split) = (x_train, y_train) if subset == "train" else (x_test, y_test) return x_split, y_split -def get(protocol, subset, classes=CLASSES, variables=VARIABLES, filepath='./data/csh101/csh101.ann.features.csv'): +def get( + protocol, + subset, + classes=CLASSES, + variables=VARIABLES, + filepath="./data/csh101/csh101.ann.features.csv", +): """Get the desired subset Args: diff --git a/scripts/download_data.py b/scripts/download_data.py index 62d5e90..7a5e977 100644 --- a/scripts/download_data.py +++ b/scripts/download_data.py @@ -1,5 +1,5 @@ -import requests -import sys +import requests +import sys import zipfile @@ -16,21 +16,21 @@ def download_url(url, save_path): None """ with open(save_path, "wb") as f: - print("Downloading {} from {}".format(save_path, url)) - response = requests.get(url, stream=True) - total_length = response.headers.get('content-length') - - if total_length is None: # no content length header - f.write(response.content) - else: - dl = 0 - total_length = int(total_length) - for data in response.iter_content(chunk_size=4096): - dl += len(data) - f.write(data) - done = int(50 * dl / total_length) - sys.stdout.write("\r[%s%s]" % ('=' * done, ' ' * (50-done)) ) - sys.stdout.flush() + print("Downloading {} from {}".format(save_path, url)) + response = requests.get(url, stream=True) + total_length = response.headers.get("content-length") + + if total_length is None: # no content length header + f.write(response.content) + else: + dl = 0 + total_length = int(total_length) + for data in response.iter_content(chunk_size=4096): + dl += len(data) + f.write(data) + done = int(50 * dl / total_length) + sys.stdout.write("\r[%s%s]" % ("=" * done, " " * (50 - done))) + sys.stdout.flush() print() @@ -47,16 +47,16 @@ def unzip_file(path_to_zip_file, directory_to_extract_to): None """ print("Unzip files..") - with zipfile.ZipFile(path_to_zip_file, 'r') as zip_ref: + with zipfile.ZipFile(path_to_zip_file, "r") as zip_ref: zip_ref.extractall(directory_to_extract_to) -if __name__ == '__main__': +if __name__ == "__main__": url = "https://archive.ics.uci.edu/ml/machine-learning-databases/00506/casas-dataset.zip" - url_test = "https://archive.ics.uci.edu/ml/machine-learning-databases/00405/Postures.zip" # Smaller zip to test + url_test = "https://archive.ics.uci.edu/ml/machine-learning-databases/00405/Postures.zip" # Smaller zip to test save_path = "../data/casas-dataset.zip" # Download zip file download_url(url, save_path) # Unzip it unzip_file(save_path, "../data_test/") - print("Done") \ No newline at end of file + print("Done") diff --git a/scripts/main.py b/scripts/main.py index 3d2c5e3..1889345 100644 --- a/scripts/main.py +++ b/scripts/main.py @@ -3,6 +3,7 @@ from scripts import algorithm, database, analysis, config import numpy as np + def base_experiment(protocol, variables, filepath, nb_tree_per_forest=50, max_depth=10): """Basic test for the random forest classifier @@ -17,10 +18,14 @@ def base_experiment(protocol, variables, filepath, nb_tree_per_forest=50, max_de Raises: None """ - x_train, y_train = database.get(protocol, 'train', database.CLASSES, variables, filepath) + x_train, y_train = database.get( + protocol, "train", database.CLASSES, variables, filepath + ) model = algorithm.Model(nb_tree_per_forest, max_depth) model.train(x_train, y_train) - x_test, y_test = database.get(protocol, 'test', database.CLASSES, variables, filepath) + x_test, y_test = database.get( + protocol, "test", database.CLASSES, variables, filepath + ) test_predictions = model.predict(x_test) cm = analysis.get_confusion_matrix(test_predictions, y_test) return cm @@ -37,8 +42,9 @@ def pretty_confusion_matrix(cm): None """ classes = np.array([database.CLASSES]) - table = tabulate(np.vstack((np.hstack(([[""]], classes)), - np.hstack((classes.T, cm))))) + table = tabulate( + np.vstack((np.hstack(([[""]], classes)), np.hstack((classes.T, cm)))) + ) return table @@ -57,16 +63,20 @@ def experiment_impact_nb_trees(tabnum, filepath, nb_trees, max_depth): print("\nImpact of number of trees per forest") for n, p in enumerate(database.PROTOCOLS): for m, nb_tree_per_forest in enumerate(nb_trees): - print("\nTable {table_number}: Confusion matrix with {nb_trees} tree(s) for Protocol `{protocol}`".format( - table_number=(n * len(nb_trees)) + m + tabnum, - protocol=p, - nb_trees=nb_tree_per_forest) + print( + "\nTable {table_number}: Confusion matrix with {nb_trees} tree(s) for Protocol `{protocol}`".format( + table_number=(n * len(nb_trees)) + m + tabnum, + protocol=p, + nb_trees=nb_tree_per_forest, + ) + ) + cm = base_experiment( + p, + database.VARIABLES, + nb_tree_per_forest=nb_tree_per_forest, + max_depth=max_depth, + filepath=filepath, ) - cm = base_experiment(p, - database.VARIABLES, - nb_tree_per_forest=nb_tree_per_forest, - max_depth=max_depth, - filepath=filepath) print(pretty_confusion_matrix(cm)) @@ -88,25 +98,32 @@ def experiment_impact_tree_depth(tabnum, filepath, nb_trees, max_depths): "\nTable {table_number}: Confusion matrix with trees maximum depth of {max_depth} for Protocol `{protocol}`".format( table_number=(n * len(max_depths)) + m + tabnum, protocol=p, - max_depth=max_depth) + max_depth=max_depth, + ) + ) + cm = base_experiment( + p, + database.VARIABLES, + nb_tree_per_forest=nb_trees, + max_depth=max_depth, + filepath=filepath, ) - cm = base_experiment(p, - database.VARIABLES, - nb_tree_per_forest=nb_trees, - max_depth=max_depth, - filepath=filepath) print(pretty_confusion_matrix(cm)) -if __name__ == '__main__': +if __name__ == "__main__": print("Main script for Human Activity Recognition with Random Forest classifier") tabnum = 1 - experiment_impact_nb_trees(tabnum, - filepath=config.data_path, - nb_trees=config.nb_trees_experiment['nb_trees'], - max_depth=config.nb_trees_experiment['tree_depth']) - tabnum += len(config.nb_trees_experiment['nb_trees'])*len(database.PROTOCOLS) - experiment_impact_tree_depth(tabnum, - filepath=config.data_path, - nb_trees=config.tree_depth_experiment['nb_trees'], - max_depths=config.tree_depth_experiment['tree_depth']) + experiment_impact_nb_trees( + tabnum, + filepath=config.data_path, + nb_trees=config.nb_trees_experiment["nb_trees"], + max_depth=config.nb_trees_experiment["tree_depth"], + ) + tabnum += len(config.nb_trees_experiment["nb_trees"]) * len(database.PROTOCOLS) + experiment_impact_tree_depth( + tabnum, + filepath=config.data_path, + nb_trees=config.tree_depth_experiment["nb_trees"], + max_depths=config.tree_depth_experiment["tree_depth"], + ) diff --git a/tests/test.py b/tests/test.py index 7ee65bc..586c43e 100644 --- a/tests/test.py +++ b/tests/test.py @@ -15,26 +15,41 @@ def test_unzip_file_not_a_zip(): path_to_zip_file = base_path + "/inputs/not_a_zip.txt" # not a zip directory_to_extract_to = base_path # extract here - nose.tools.assert_raises(zipfile.BadZipFile, download_data.unzip_file, path_to_zip_file, directory_to_extract_to) + nose.tools.assert_raises( + zipfile.BadZipFile, + download_data.unzip_file, + path_to_zip_file, + directory_to_extract_to, + ) def test_unzip_file_invalid_zip_path(): path_to_zip_file = base_path + "/simpleZip.zip" # Invalid path directory_to_extract_to = base_path # extract here - nose.tools.assert_raises(FileNotFoundError, download_data.unzip_file, path_to_zip_file, directory_to_extract_to) + nose.tools.assert_raises( + FileNotFoundError, + download_data.unzip_file, + path_to_zip_file, + directory_to_extract_to, + ) def test_unzip_file(): path_to_zip_file = base_path + "/inputs/simpleZip.zip" directory_to_extract_to = base_path + "/output/" # extract here download_data.unzip_file(path_to_zip_file, directory_to_extract_to) - nose.tools.ok_(os.path.isfile(base_path + "/output/f1.txt"), msg="SimpleZip not correctly unzipped") + nose.tools.ok_( + os.path.isfile(base_path + "/output/f1.txt"), + msg="SimpleZip not correctly unzipped", + ) def test_download_url_invalid_url(): url = "https://invalid_url.zip" save_path = base_path + "/output/invalid_dl.zip" - nose.tools.assert_raises(requests.exceptions.ConnectionError, download_data.download_url, url, save_path) + nose.tools.assert_raises( + requests.exceptions.ConnectionError, download_data.download_url, url, save_path + ) def test_download_url(): @@ -53,83 +68,98 @@ def test_get_confusion_matrix(): def test_load(): - datapath = base_path + '/inputs/test_set.csv' + datapath = base_path + "/inputs/test_set.csv" data_len = 1225 num_columns = len(database.VARIABLES) expected_x_shape = (data_len, num_columns) expected_y_shape = (data_len,) x, y = database.load(datapath) - nose.tools.ok_(x.shape == expected_x_shape, - msg='x.shape != {}'.format(expected_x_shape)) - nose.tools.ok_(y.shape == expected_y_shape, - msg='y.shape != {}'.format(expected_y_shape)) + nose.tools.ok_( + x.shape == expected_x_shape, msg="x.shape != {}".format(expected_x_shape) + ) + nose.tools.ok_( + y.shape == expected_y_shape, msg="y.shape != {}".format(expected_y_shape) + ) def test_split_data_test(): - datapath = base_path + '/inputs/test_set.csv' + datapath = base_path + "/inputs/test_set.csv" data_len = 1225 num_columns = len(database.VARIABLES) - protocol = 'proto1' - subset = 'test' + protocol = "proto1" + subset = "test" expected_test_len = np.ceil(database.PROTOCOLS[protocol][subset] * data_len) x, y = database.load(datapath) x_test, y_test = database.split_data(x, y, subset, database.PROTOCOLS[protocol]) - nose.tools.ok_(x_test.shape[0] == expected_test_len, - msg='x_test.shape[0] != {}'.format(expected_test_len)) - nose.tools.ok_(y_test.shape[0] == expected_test_len, - msg='y_test.shape[0] != {}'.format(expected_test_len)) + nose.tools.ok_( + x_test.shape[0] == expected_test_len, + msg="x_test.shape[0] != {}".format(expected_test_len), + ) + nose.tools.ok_( + y_test.shape[0] == expected_test_len, + msg="y_test.shape[0] != {}".format(expected_test_len), + ) def test_split_data_train(): - datapath = base_path + '/inputs/test_set.csv' + datapath = base_path + "/inputs/test_set.csv" data_len = 1225 - protocol = 'proto1' - subset = 'train' + protocol = "proto1" + subset = "train" expected_train_len = np.floor(database.PROTOCOLS[protocol][subset] * data_len) x, y = database.load(datapath) x_train, y_train = database.split_data(x, y, subset, database.PROTOCOLS[protocol]) - nose.tools.ok_(x_train.shape[0] == expected_train_len, - msg='x_test.shape[0] != {}'.format(expected_train_len)) - nose.tools.ok_(y_train.shape[0] == expected_train_len, - msg='y_test.shape[0] != {}'.format(expected_train_len)) + nose.tools.ok_( + x_train.shape[0] == expected_train_len, + msg="x_test.shape[0] != {}".format(expected_train_len), + ) + nose.tools.ok_( + y_train.shape[0] == expected_train_len, + msg="y_test.shape[0] != {}".format(expected_train_len), + ) def test_get_default(): - datapath = base_path + '/inputs/test_set.csv' - protocol = 'proto1' - subset = 'test' + datapath = base_path + "/inputs/test_set.csv" + protocol = "proto1" + subset = "test" data_len = 1225 num_columns = len(database.VARIABLES) expected_len = np.ceil(database.PROTOCOLS[protocol][subset] * data_len) expected_x_shape = (expected_len, num_columns) expected_y_shape = (expected_len,) x, y = database.get(protocol, subset, filepath=datapath) - nose.tools.ok_(x.shape == expected_x_shape, - msg='x.shape != {}'.format(expected_x_shape)) - nose.tools.ok_(y.shape == expected_y_shape, - msg='y.shape != {}'.format(expected_y_shape)) + nose.tools.ok_( + x.shape == expected_x_shape, msg="x.shape != {}".format(expected_x_shape) + ) + nose.tools.ok_( + y.shape == expected_y_shape, msg="y.shape != {}".format(expected_y_shape) + ) def test_get_limited_variables(): - datapath = base_path + '/inputs/test_set.csv' - protocol = 'proto1' - subset = 'test' - variables = ['lastSensorEventHours', 'complexity'] + datapath = base_path + "/inputs/test_set.csv" + protocol = "proto1" + subset = "test" + variables = ["lastSensorEventHours", "complexity"] variables_indices = [0, 10] x_full, y_full = database.get(protocol, subset, filepath=datapath) x, y = database.get(protocol, subset, variables=variables, filepath=datapath) - nose.tools.ok_(np.array_equal(x, x_full[:, variables_indices]), - msg="x != x_full[:, ('lastSensorEventHours', 'complexity')]") + nose.tools.ok_( + np.array_equal(x, x_full[:, variables_indices]), + msg="x != x_full[:, ('lastSensorEventHours', 'complexity')]", + ) def test_get_limited_classes(): - datapath = base_path + '/inputs/test_set.csv' - protocol = 'proto1' - subset = 'test' - classes = ['Other_Activity', 'Work_At_Table'] + datapath = base_path + "/inputs/test_set.csv" + protocol = "proto1" + subset = "test" + classes = ["Other_Activity", "Work_At_Table"] other_classes = database.CLASSES[1:-1] x, y = database.get(protocol, subset, classes=classes, filepath=datapath) - nose.tools.ok_(not np.isin(y, other_classes).any(), - msg='y contains other classes than: {}'.format(classes)) - nose.tools.ok_(x.shape[0] == y.shape[0], - msg='x.shape[0] != y.shape[0]') + nose.tools.ok_( + not np.isin(y, other_classes).any(), + msg="y contains other classes than: {}".format(classes), + ) + nose.tools.ok_(x.shape[0] == y.shape[0], msg="x.shape[0] != y.shape[0]")