Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Simpler Pipeline evaluation function #1093

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion fedot/core/optimisers/objective/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from .data_objective_eval import PipelineObjectiveEvaluate, DataSource
from .data_objective_eval import PipelineObjectiveEvaluate, get_pipeline_evaluator, get_pipeline_fitness
from .data_source_splitter import DataSource
from .metrics_objective import MetricsObjective
from .objective_serialization import init_backward_serialize_compat

Expand Down
51 changes: 48 additions & 3 deletions fedot/core/optimisers/objective/data_objective_eval.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import traceback
from datetime import timedelta
from typing import Callable, Iterable, Optional, Tuple
from typing import Optional, Union, Iterable

import numpy as np
from golem.core.log import default_log
Expand All @@ -12,11 +12,12 @@
from fedot.core.caching.preprocessing_cache import PreprocessingCache
from fedot.core.data.data import InputData
from fedot.core.operations.model import Model
from fedot.core.optimisers.objective.metrics_objective import MetricsObjective
from fedot.core.optimisers.objective.data_source_splitter import DataSource, DataSourceSplitter
from fedot.core.pipelines.pipeline import Pipeline
from fedot.core.repository.quality_metrics_repository import MetricType
from fedot.utilities.debug import is_recording_mode, is_test_session, save_debug_info_for_pipeline

DataSource = Callable[[], Iterable[Tuple[InputData, InputData]]]


class PipelineObjectiveEvaluate(ObjectiveEvaluate[Pipeline]):
"""
Expand Down Expand Up @@ -150,3 +151,47 @@ def evaluate_intermediate_metrics(self, graph: Pipeline):
@property
def input_data(self):
return self._data_producer.args[0]


def get_pipeline_evaluator(metrics: Union[MetricType, Iterable[MetricType]],
data: InputData,
cv_folds: Optional[int] = None,
validation_blocks: Optional[int] = None) -> PipelineObjectiveEvaluate:
"""Helper function for simplifying Pipeline evaluation.

Args:
metrics: one or many metrics to be evaluated and included in Fitness object.
data: data for evaluation.
cv_folds: number of folds for cross validation of Pipeline, optional.
validation_blocks: Number of validation blocks, optional, used only for time series validation.

Returns:
Callable object that can evaluate Pipelines
"""
objective = MetricsObjective(metrics)
data_producer = DataSourceSplitter(cv_folds, validation_blocks).build(data)
objective_evaluate = PipelineObjectiveEvaluate(objective,
data_producer, validation_blocks=validation_blocks,
do_unfit=False)
return objective_evaluate


def get_pipeline_fitness(pipeline: Pipeline,
metrics: Union[MetricType, Iterable[MetricType]],
data: InputData,
cv_folds: Optional[int] = None,
validation_blocks: Optional[int] = None) -> Fitness:
"""Helper function for simplifying Pipeline evaluation.

Args:
pipeline: Pipeline for evaluation
metrics: one or many metrics to be evaluated and included in Fitness object.
data: data for evaluation.
cv_folds: number of folds for cross validation of Pipeline, optional.
validation_blocks: Number of validation blocks, optional, used only for time series validation.

Returns:
Fitness object
"""
fitness = get_pipeline_evaluator(metrics, data, cv_folds, validation_blocks).evaluate(pipeline)
return fitness
5 changes: 3 additions & 2 deletions fedot/core/optimisers/objective/data_source_splitter.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from functools import partial
from typing import Optional
from typing import Optional, Callable, Iterable, Tuple

from golem.core.log import default_log

Expand All @@ -8,11 +8,12 @@
from fedot.core.data.data_split import train_test_data_setup
from fedot.core.data.multi_modal import MultiModalData
from fedot.core.optimisers.objective.data_objective_advisor import DataObjectiveAdvisor
from fedot.core.optimisers.objective.data_objective_eval import DataSource
from fedot.core.repository.tasks import TaskTypesEnum
from fedot.core.validation.split import tabular_cv_generator, ts_cv_generator
from fedot.remote.remote_evaluator import RemoteEvaluator, init_data_for_remote_execution

DataSource = Callable[[], Iterable[Tuple[InputData, InputData]]]


class DataSourceSplitter:
"""
Expand Down
3 changes: 1 addition & 2 deletions test/unit/optimizer/test_pipeline_objective_eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@

import numpy as np
import pytest

from golem.core.optimisers.fitness import SingleObjFitness

from fedot.core.data.data import InputData
Expand Down Expand Up @@ -157,9 +156,9 @@ def test_pipeline_objective_evaluate_with_invalid_metrics(classification_dataset
@pytest.mark.parametrize('folds, actual_value', [(2, 9.8965), (3, 38.624)])
def test_pipeline_objective_evaluate_for_timeseries_cv(folds, actual_value):
forecast_len, validation_blocks, time_series = configure_experiment()
simple_pipeline = get_simple_ts_pipeline()
objective = MetricsObjective(RegressionMetricsEnum.MSE)
data_producer = DataSourceSplitter(folds, validation_blocks).build(time_series)
simple_pipeline = get_simple_ts_pipeline()
objective_evaluate = PipelineObjectiveEvaluate(objective, data_producer, validation_blocks=validation_blocks)
metric_value = objective_evaluate.evaluate(simple_pipeline).value
assert np.isclose(metric_value, actual_value)
21 changes: 4 additions & 17 deletions test/unit/pipelines/tuning/test_tuner_builder.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
from datetime import timedelta
from typing import Optional

import numpy as np
import pytest
Expand All @@ -9,32 +8,20 @@
from hyperopt import tpe, rand

from fedot.core.constants import DEFAULT_TUNING_ITERATIONS_NUMBER
from fedot.core.data.data import InputData
from fedot.core.optimisers.objective import PipelineObjectiveEvaluate
from fedot.core.optimisers.objective.data_source_splitter import DataSourceSplitter
from fedot.core.optimisers.objective.metrics_objective import MetricsObjective
from fedot.core.optimisers.objective import get_pipeline_evaluator
from fedot.core.pipelines.tuning.search_space import PipelineSearchSpace
from fedot.core.pipelines.tuning.tuner_builder import TunerBuilder
from fedot.core.repository.quality_metrics_repository import ClassificationMetricsEnum, MetricType
from fedot.core.repository.quality_metrics_repository import ClassificationMetricsEnum
from test.unit.optimizer.test_pipeline_objective_eval import pipeline_first_test
from test.unit.pipelines.tuning.test_pipeline_tuning import get_not_default_search_space
from test.unit.validation.test_table_cv import get_classification_data


def get_objective_evaluate(metric: MetricType, data: InputData,
cv_folds: Optional[int] = None, validation_blocks: Optional[int] = None) \
-> PipelineObjectiveEvaluate:
objective = MetricsObjective(metric)
data_producer = DataSourceSplitter(cv_folds, validation_blocks).build(data)
objective_evaluate = PipelineObjectiveEvaluate(objective, data_producer, validation_blocks=validation_blocks)
return objective_evaluate


def test_tuner_builder_with_default_params():
data = get_classification_data()
pipeline = pipeline_first_test()
tuner = TunerBuilder(data.task).build(data)
objective_evaluate = get_objective_evaluate(ClassificationMetricsEnum.ROCAUC_penalty, data)
objective_evaluate = get_pipeline_evaluator(ClassificationMetricsEnum.ROCAUC_penalty, data)
assert isinstance(tuner, HyperoptTuner)
assert np.isclose(tuner.objective_evaluate(pipeline).value, objective_evaluate.evaluate(pipeline).value)
assert isinstance(tuner.search_space, PipelineSearchSpace)
Expand All @@ -51,7 +38,7 @@ def test_tuner_builder_with_custom_params(tuner_class):
cv_folds = 3
validation_blocks = 2

objective_evaluate = get_objective_evaluate(metric, data, cv_folds, validation_blocks)
objective_evaluate = get_pipeline_evaluator(metric, data, cv_folds, validation_blocks)
timeout = timedelta(minutes=2)
early_stopping = 100
iterations = 10
Expand Down
18 changes: 6 additions & 12 deletions test/unit/validation/test_table_cv.py
Original file line number Diff line number Diff line change
@@ -1,29 +1,25 @@
import logging
import os
from datetime import timedelta
from functools import partial

import pytest
from golem.core.tuning.simultaneous import SimultaneousTuner

from fedot.core.pipelines.pipeline_composer_requirements import PipelineComposerRequirements
from sklearn.metrics import roc_auc_score as roc_auc
from sklearn.model_selection import KFold, StratifiedKFold

from fedot.api.main import Fedot
from fedot.core.composer.composer_builder import ComposerBuilder
from fedot.core.data.data import InputData
from fedot.core.data.data_split import train_test_data_setup
from fedot.core.optimisers.objective import PipelineObjectiveEvaluate
from fedot.core.optimisers.objective import get_pipeline_fitness
from fedot.core.optimisers.objective.data_objective_advisor import DataObjectiveAdvisor
from fedot.core.optimisers.objective.metrics_objective import MetricsObjective
from fedot.core.pipelines.node import PipelineNode
from fedot.core.pipelines.pipeline import Pipeline
from fedot.core.pipelines.pipeline_composer_requirements import PipelineComposerRequirements
from fedot.core.pipelines.tuning.tuner_builder import TunerBuilder
from fedot.core.repository.operation_types_repository import OperationTypesRepository
from fedot.core.repository.quality_metrics_repository import ClassificationMetricsEnum
from fedot.core.repository.tasks import Task, TaskTypesEnum
from fedot.core.validation.split import tabular_cv_generator
from test.unit.api.test_api_cli_params import project_root_path
from test.unit.models.test_model import classification_dataset
from test.unit.tasks.test_classification import get_iris_data, pipeline_simple
Expand All @@ -33,8 +29,8 @@

def sample_pipeline():
return Pipeline(PipelineNode(operation_type='logit',
nodes_from=[PipelineNode(operation_type='rf'),
PipelineNode(operation_type='scaling')]))
nodes_from=[PipelineNode(operation_type='rf'),
PipelineNode(operation_type='scaling')]))


def get_classification_data():
Expand All @@ -46,13 +42,11 @@ def get_classification_data():
def test_cv_multiple_metrics_evaluated_correct(classification_dataset):
pipeline = sample_pipeline()

cv_folds = partial(tabular_cv_generator, classification_dataset, folds=5)
metrics = [ClassificationMetricsEnum.ROCAUC_penalty,
ClassificationMetricsEnum.accuracy,
ClassificationMetricsEnum.logloss]
objective_eval = PipelineObjectiveEvaluate(MetricsObjective(metrics), cv_folds)
actual_values = objective_eval(pipeline).values
all_metrics_correct = all(0 < abs(x) <= 1 for x in actual_values)
fitness = get_pipeline_fitness(pipeline, metrics, classification_dataset, cv_folds=5)
all_metrics_correct = all(0 < abs(x) <= 1 for x in fitness.values)

assert all_metrics_correct

Expand Down