From 4a2950d0089d2532b7ae15a0926ad6207c5f573a Mon Sep 17 00:00:00 2001 From: Grigorii Kirgizov Date: Wed, 3 May 2023 10:14:04 +0300 Subject: [PATCH 1/4] Move useful function from tests --- fedot/core/optimisers/objective/__init__.py | 3 +- .../objective/data_objective_eval.py | 29 +++++++++++++++++-- .../objective/data_source_splitter.py | 5 ++-- .../pipelines/tuning/test_tuner_builder.py | 20 ++++--------- 4 files changed, 36 insertions(+), 21 deletions(-) diff --git a/fedot/core/optimisers/objective/__init__.py b/fedot/core/optimisers/objective/__init__.py index 11a17fc5e5..0185526390 100644 --- a/fedot/core/optimisers/objective/__init__.py +++ b/fedot/core/optimisers/objective/__init__.py @@ -1,4 +1,5 @@ -from .data_objective_eval import PipelineObjectiveEvaluate, DataSource +from .data_objective_eval import PipelineObjectiveEvaluate +from .data_source_splitter import DataSource from .metrics_objective import MetricsObjective from .objective_serialization import init_backward_serialize_compat diff --git a/fedot/core/optimisers/objective/data_objective_eval.py b/fedot/core/optimisers/objective/data_objective_eval.py index 93b066f128..dac31baf4b 100644 --- a/fedot/core/optimisers/objective/data_objective_eval.py +++ b/fedot/core/optimisers/objective/data_objective_eval.py @@ -1,6 +1,6 @@ import traceback from datetime import timedelta -from typing import Callable, Iterable, Optional, Tuple +from typing import Optional, Union, Iterable import numpy as np from golem.core.log import default_log @@ -12,11 +12,13 @@ from fedot.core.caching.preprocessing_cache import PreprocessingCache from fedot.core.data.data import InputData from fedot.core.operations.model import Model +from fedot.core.optimisers.objective import MetricsObjective +from fedot.core.optimisers.objective.metrics_objective import MetricsObjective +from fedot.core.optimisers.objective.data_source_splitter import DataSource, DataSourceSplitter from fedot.core.pipelines.pipeline import Pipeline +from fedot.core.repository.quality_metrics_repository import MetricType from fedot.utilities.debug import is_recording_mode, is_test_session, save_debug_info_for_pipeline -DataSource = Callable[[], Iterable[Tuple[InputData, InputData]]] - class PipelineObjectiveEvaluate(ObjectiveEvaluate[Pipeline]): """ @@ -150,3 +152,24 @@ def evaluate_intermediate_metrics(self, graph: Pipeline): @property def input_data(self): return self._data_producer.args[0] + + +def get_pipeline_evaluator(metrics: Union[MetricType, Iterable[MetricType]], + data: InputData, + cv_folds: Optional[int] = None, + validation_blocks: Optional[int] = None) -> PipelineObjectiveEvaluate: + """Helper function for simplifying Pipeline evaluation. + + Args: + metrics: one or many metrics to be evaluated and included in Fitness object. + data: data for evaluation. + cv_folds: number of folds for cross validation of Pipeline, optional. + validation_blocks: Number of validation blocks, optional, used only for time series validation. + + Returns: + Callable object that can evaluate Pipelines + """ + objective = MetricsObjective(metrics) + data_producer = DataSourceSplitter(cv_folds, validation_blocks).build(data) + objective_evaluate = PipelineObjectiveEvaluate(objective, data_producer, validation_blocks=validation_blocks) + return objective_evaluate diff --git a/fedot/core/optimisers/objective/data_source_splitter.py b/fedot/core/optimisers/objective/data_source_splitter.py index ef8e590388..f16ba7db8a 100644 --- a/fedot/core/optimisers/objective/data_source_splitter.py +++ b/fedot/core/optimisers/objective/data_source_splitter.py @@ -1,5 +1,5 @@ from functools import partial -from typing import Optional +from typing import Optional, Callable, Iterable, Tuple from golem.core.log import default_log @@ -8,11 +8,12 @@ from fedot.core.data.data_split import train_test_data_setup from fedot.core.data.multi_modal import MultiModalData from fedot.core.optimisers.objective.data_objective_advisor import DataObjectiveAdvisor -from fedot.core.optimisers.objective.data_objective_eval import DataSource from fedot.core.repository.tasks import TaskTypesEnum from fedot.core.validation.split import tabular_cv_generator, ts_cv_generator from fedot.remote.remote_evaluator import RemoteEvaluator, init_data_for_remote_execution +DataSource = Callable[[], Iterable[Tuple[InputData, InputData]]] + class DataSourceSplitter: """ diff --git a/test/unit/pipelines/tuning/test_tuner_builder.py b/test/unit/pipelines/tuning/test_tuner_builder.py index db1e1a34b1..31ff44c5f8 100644 --- a/test/unit/pipelines/tuning/test_tuner_builder.py +++ b/test/unit/pipelines/tuning/test_tuner_builder.py @@ -1,8 +1,9 @@ from datetime import timedelta -from typing import Optional +from typing import Optional, Union, Iterable import numpy as np import pytest +from golem.core.optimisers.fitness import Fitness from golem.core.tuning.sequential import SequentialTuner from golem.core.tuning.simultaneous import SimultaneousTuner from golem.core.tuning.tuner_interface import HyperoptTuner @@ -10,9 +11,7 @@ from fedot.core.constants import DEFAULT_TUNING_ITERATIONS_NUMBER from fedot.core.data.data import InputData -from fedot.core.optimisers.objective import PipelineObjectiveEvaluate -from fedot.core.optimisers.objective.data_source_splitter import DataSourceSplitter -from fedot.core.optimisers.objective.metrics_objective import MetricsObjective +from fedot.core.optimisers.objective.data_objective_eval import get_pipeline_evaluator from fedot.core.pipelines.tuning.search_space import PipelineSearchSpace from fedot.core.pipelines.tuning.tuner_builder import TunerBuilder from fedot.core.repository.quality_metrics_repository import ClassificationMetricsEnum, MetricType @@ -21,20 +20,11 @@ from test.unit.validation.test_table_cv import get_classification_data -def get_objective_evaluate(metric: MetricType, data: InputData, - cv_folds: Optional[int] = None, validation_blocks: Optional[int] = None) \ - -> PipelineObjectiveEvaluate: - objective = MetricsObjective(metric) - data_producer = DataSourceSplitter(cv_folds, validation_blocks).build(data) - objective_evaluate = PipelineObjectiveEvaluate(objective, data_producer, validation_blocks=validation_blocks) - return objective_evaluate - - def test_tuner_builder_with_default_params(): data = get_classification_data() pipeline = pipeline_first_test() tuner = TunerBuilder(data.task).build(data) - objective_evaluate = get_objective_evaluate(ClassificationMetricsEnum.ROCAUC_penalty, data) + objective_evaluate = get_pipeline_evaluator(ClassificationMetricsEnum.ROCAUC_penalty, data) assert isinstance(tuner, HyperoptTuner) assert np.isclose(tuner.objective_evaluate(pipeline).value, objective_evaluate.evaluate(pipeline).value) assert isinstance(tuner.search_space, PipelineSearchSpace) @@ -51,7 +41,7 @@ def test_tuner_builder_with_custom_params(tuner_class): cv_folds = 3 validation_blocks = 2 - objective_evaluate = get_objective_evaluate(metric, data, cv_folds, validation_blocks) + objective_evaluate = get_pipeline_evaluator(metric, data, cv_folds, validation_blocks) timeout = timedelta(minutes=2) early_stopping = 100 iterations = 10 From 068095adf823aa88633e7bc87522e838ac024421 Mon Sep 17 00:00:00 2001 From: Grigorii Kirgizov Date: Wed, 3 May 2023 10:31:48 +0300 Subject: [PATCH 2/4] Fix imports and tests --- fedot/core/optimisers/objective/__init__.py | 2 +- .../objective/data_objective_eval.py | 22 ++++++++++++++++++- .../optimizer/test_pipeline_objective_eval.py | 4 ++-- .../pipelines/tuning/test_tuner_builder.py | 5 +---- test/unit/validation/test_table_cv.py | 14 ++++-------- 5 files changed, 29 insertions(+), 18 deletions(-) diff --git a/fedot/core/optimisers/objective/__init__.py b/fedot/core/optimisers/objective/__init__.py index 0185526390..0f9e570562 100644 --- a/fedot/core/optimisers/objective/__init__.py +++ b/fedot/core/optimisers/objective/__init__.py @@ -1,4 +1,4 @@ -from .data_objective_eval import PipelineObjectiveEvaluate +from .data_objective_eval import PipelineObjectiveEvaluate, get_pipeline_evaluator, get_pipeline_fitness from .data_source_splitter import DataSource from .metrics_objective import MetricsObjective from .objective_serialization import init_backward_serialize_compat diff --git a/fedot/core/optimisers/objective/data_objective_eval.py b/fedot/core/optimisers/objective/data_objective_eval.py index dac31baf4b..570118e066 100644 --- a/fedot/core/optimisers/objective/data_objective_eval.py +++ b/fedot/core/optimisers/objective/data_objective_eval.py @@ -12,7 +12,6 @@ from fedot.core.caching.preprocessing_cache import PreprocessingCache from fedot.core.data.data import InputData from fedot.core.operations.model import Model -from fedot.core.optimisers.objective import MetricsObjective from fedot.core.optimisers.objective.metrics_objective import MetricsObjective from fedot.core.optimisers.objective.data_source_splitter import DataSource, DataSourceSplitter from fedot.core.pipelines.pipeline import Pipeline @@ -173,3 +172,24 @@ def get_pipeline_evaluator(metrics: Union[MetricType, Iterable[MetricType]], data_producer = DataSourceSplitter(cv_folds, validation_blocks).build(data) objective_evaluate = PipelineObjectiveEvaluate(objective, data_producer, validation_blocks=validation_blocks) return objective_evaluate + + +def get_pipeline_fitness(pipeline: Pipeline, + metrics: Union[MetricType, Iterable[MetricType]], + data: InputData, + cv_folds: Optional[int] = None, + validation_blocks: Optional[int] = None) -> Fitness: + """Helper function for simplifying Pipeline evaluation. + + Args: + pipeline: Pipeline for evaluation + metrics: one or many metrics to be evaluated and included in Fitness object. + data: data for evaluation. + cv_folds: number of folds for cross validation of Pipeline, optional. + validation_blocks: Number of validation blocks, optional, used only for time series validation. + + Returns: + Fitness object + """ + fitness = get_pipeline_evaluator(metrics, data, cv_folds, validation_blocks).evaluate(pipeline) + return fitness diff --git a/test/unit/optimizer/test_pipeline_objective_eval.py b/test/unit/optimizer/test_pipeline_objective_eval.py index 97a92fd743..2611495f42 100644 --- a/test/unit/optimizer/test_pipeline_objective_eval.py +++ b/test/unit/optimizer/test_pipeline_objective_eval.py @@ -9,7 +9,7 @@ from fedot.core.data.data import InputData from fedot.core.data.supplementary_data import SupplementaryData -from fedot.core.optimisers.objective import PipelineObjectiveEvaluate +from fedot.core.optimisers.objective import PipelineObjectiveEvaluate, get_pipeline_fitness from fedot.core.optimisers.objective.data_source_splitter import DataSourceSplitter from fedot.core.optimisers.objective.metrics_objective import MetricsObjective from fedot.core.pipelines.pipeline import Pipeline @@ -157,9 +157,9 @@ def test_pipeline_objective_evaluate_with_invalid_metrics(classification_dataset @pytest.mark.parametrize('folds, actual_value', [(2, 9.8965), (3, 38.624)]) def test_pipeline_objective_evaluate_for_timeseries_cv(folds, actual_value): forecast_len, validation_blocks, time_series = configure_experiment() + simple_pipeline = get_simple_ts_pipeline() objective = MetricsObjective(RegressionMetricsEnum.MSE) data_producer = DataSourceSplitter(folds, validation_blocks).build(time_series) - simple_pipeline = get_simple_ts_pipeline() objective_evaluate = PipelineObjectiveEvaluate(objective, data_producer, validation_blocks=validation_blocks) metric_value = objective_evaluate.evaluate(simple_pipeline).value assert np.isclose(metric_value, actual_value) diff --git a/test/unit/pipelines/tuning/test_tuner_builder.py b/test/unit/pipelines/tuning/test_tuner_builder.py index 31ff44c5f8..34c9cf5ba5 100644 --- a/test/unit/pipelines/tuning/test_tuner_builder.py +++ b/test/unit/pipelines/tuning/test_tuner_builder.py @@ -1,17 +1,14 @@ from datetime import timedelta -from typing import Optional, Union, Iterable import numpy as np import pytest -from golem.core.optimisers.fitness import Fitness from golem.core.tuning.sequential import SequentialTuner from golem.core.tuning.simultaneous import SimultaneousTuner from golem.core.tuning.tuner_interface import HyperoptTuner from hyperopt import tpe, rand from fedot.core.constants import DEFAULT_TUNING_ITERATIONS_NUMBER -from fedot.core.data.data import InputData -from fedot.core.optimisers.objective.data_objective_eval import get_pipeline_evaluator +from fedot.core.optimisers.objective import get_pipeline_evaluator from fedot.core.pipelines.tuning.search_space import PipelineSearchSpace from fedot.core.pipelines.tuning.tuner_builder import TunerBuilder from fedot.core.repository.quality_metrics_repository import ClassificationMetricsEnum, MetricType diff --git a/test/unit/validation/test_table_cv.py b/test/unit/validation/test_table_cv.py index 3fdde5a895..95428fce3f 100644 --- a/test/unit/validation/test_table_cv.py +++ b/test/unit/validation/test_table_cv.py @@ -1,12 +1,9 @@ import logging import os from datetime import timedelta -from functools import partial import pytest from golem.core.tuning.simultaneous import SimultaneousTuner - -from fedot.core.pipelines.pipeline_composer_requirements import PipelineComposerRequirements from sklearn.metrics import roc_auc_score as roc_auc from sklearn.model_selection import KFold, StratifiedKFold @@ -14,16 +11,15 @@ from fedot.core.composer.composer_builder import ComposerBuilder from fedot.core.data.data import InputData from fedot.core.data.data_split import train_test_data_setup -from fedot.core.optimisers.objective import PipelineObjectiveEvaluate +from fedot.core.optimisers.objective import get_pipeline_fitness from fedot.core.optimisers.objective.data_objective_advisor import DataObjectiveAdvisor -from fedot.core.optimisers.objective.metrics_objective import MetricsObjective from fedot.core.pipelines.node import PipelineNode from fedot.core.pipelines.pipeline import Pipeline +from fedot.core.pipelines.pipeline_composer_requirements import PipelineComposerRequirements from fedot.core.pipelines.tuning.tuner_builder import TunerBuilder from fedot.core.repository.operation_types_repository import OperationTypesRepository from fedot.core.repository.quality_metrics_repository import ClassificationMetricsEnum from fedot.core.repository.tasks import Task, TaskTypesEnum -from fedot.core.validation.split import tabular_cv_generator from test.unit.api.test_api_cli_params import project_root_path from test.unit.models.test_model import classification_dataset from test.unit.tasks.test_classification import get_iris_data, pipeline_simple @@ -46,13 +42,11 @@ def get_classification_data(): def test_cv_multiple_metrics_evaluated_correct(classification_dataset): pipeline = sample_pipeline() - cv_folds = partial(tabular_cv_generator, classification_dataset, folds=5) metrics = [ClassificationMetricsEnum.ROCAUC_penalty, ClassificationMetricsEnum.accuracy, ClassificationMetricsEnum.logloss] - objective_eval = PipelineObjectiveEvaluate(MetricsObjective(metrics), cv_folds) - actual_values = objective_eval(pipeline).values - all_metrics_correct = all(0 < abs(x) <= 1 for x in actual_values) + fitness = get_pipeline_fitness(pipeline, metrics, classification_dataset, cv_folds=5) + all_metrics_correct = all(0 < abs(x) <= 1 for x in fitness.values) assert all_metrics_correct From d2ba85af4a8b620b7f7daeefb0c3c8f3e7976623 Mon Sep 17 00:00:00 2001 From: Grigorii Kirgizov Date: Wed, 3 May 2023 10:38:21 +0300 Subject: [PATCH 3/4] Pep8 fixes --- test/unit/optimizer/test_pipeline_objective_eval.py | 3 +-- test/unit/pipelines/tuning/test_tuner_builder.py | 2 +- test/unit/validation/test_table_cv.py | 4 ++-- 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/test/unit/optimizer/test_pipeline_objective_eval.py b/test/unit/optimizer/test_pipeline_objective_eval.py index 2611495f42..943faa84cb 100644 --- a/test/unit/optimizer/test_pipeline_objective_eval.py +++ b/test/unit/optimizer/test_pipeline_objective_eval.py @@ -4,12 +4,11 @@ import numpy as np import pytest - from golem.core.optimisers.fitness import SingleObjFitness from fedot.core.data.data import InputData from fedot.core.data.supplementary_data import SupplementaryData -from fedot.core.optimisers.objective import PipelineObjectiveEvaluate, get_pipeline_fitness +from fedot.core.optimisers.objective import PipelineObjectiveEvaluate from fedot.core.optimisers.objective.data_source_splitter import DataSourceSplitter from fedot.core.optimisers.objective.metrics_objective import MetricsObjective from fedot.core.pipelines.pipeline import Pipeline diff --git a/test/unit/pipelines/tuning/test_tuner_builder.py b/test/unit/pipelines/tuning/test_tuner_builder.py index 34c9cf5ba5..b737fea6c4 100644 --- a/test/unit/pipelines/tuning/test_tuner_builder.py +++ b/test/unit/pipelines/tuning/test_tuner_builder.py @@ -11,7 +11,7 @@ from fedot.core.optimisers.objective import get_pipeline_evaluator from fedot.core.pipelines.tuning.search_space import PipelineSearchSpace from fedot.core.pipelines.tuning.tuner_builder import TunerBuilder -from fedot.core.repository.quality_metrics_repository import ClassificationMetricsEnum, MetricType +from fedot.core.repository.quality_metrics_repository import ClassificationMetricsEnum from test.unit.optimizer.test_pipeline_objective_eval import pipeline_first_test from test.unit.pipelines.tuning.test_pipeline_tuning import get_not_default_search_space from test.unit.validation.test_table_cv import get_classification_data diff --git a/test/unit/validation/test_table_cv.py b/test/unit/validation/test_table_cv.py index 95428fce3f..85d28bfdb8 100644 --- a/test/unit/validation/test_table_cv.py +++ b/test/unit/validation/test_table_cv.py @@ -29,8 +29,8 @@ def sample_pipeline(): return Pipeline(PipelineNode(operation_type='logit', - nodes_from=[PipelineNode(operation_type='rf'), - PipelineNode(operation_type='scaling')])) + nodes_from=[PipelineNode(operation_type='rf'), + PipelineNode(operation_type='scaling')])) def get_classification_data(): From 952e5a3dafc4c6c70f4656c2250ebe887272a311 Mon Sep 17 00:00:00 2001 From: Grigorii Kirgizov Date: Thu, 4 May 2023 14:12:31 +0300 Subject: [PATCH 4/4] Don't unfit Pipeline in get_pipeline_fitness --- fedot/core/optimisers/objective/data_objective_eval.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fedot/core/optimisers/objective/data_objective_eval.py b/fedot/core/optimisers/objective/data_objective_eval.py index 570118e066..4221738a9b 100644 --- a/fedot/core/optimisers/objective/data_objective_eval.py +++ b/fedot/core/optimisers/objective/data_objective_eval.py @@ -170,7 +170,9 @@ def get_pipeline_evaluator(metrics: Union[MetricType, Iterable[MetricType]], """ objective = MetricsObjective(metrics) data_producer = DataSourceSplitter(cv_folds, validation_blocks).build(data) - objective_evaluate = PipelineObjectiveEvaluate(objective, data_producer, validation_blocks=validation_blocks) + objective_evaluate = PipelineObjectiveEvaluate(objective, + data_producer, validation_blocks=validation_blocks, + do_unfit=False) return objective_evaluate