From b56b841391cc0a5fe3d12bf3ae3953ac41bb6133 Mon Sep 17 00:00:00 2001 From: "Timur M. Carstensen" Date: Mon, 26 Aug 2024 13:02:55 +0200 Subject: [PATCH 01/46] removing python 3.8 and 3.9 support --- .github/workflows/pre-commit.yaml | 2 +- .github/workflows/tests.yaml | 2 +- pyproject.toml | 6 ++---- 3 files changed, 4 insertions(+), 6 deletions(-) diff --git a/.github/workflows/pre-commit.yaml b/.github/workflows/pre-commit.yaml index 2eb93db1..ed323cbb 100644 --- a/.github/workflows/pre-commit.yaml +++ b/.github/workflows/pre-commit.yaml @@ -27,7 +27,7 @@ jobs: - uses: actions/checkout@v4 - uses: actions/setup-python@v5 with: - python-version: 3.8 + python-version: 3.10 - run: pip install pre-commit - run: pre-commit install - run: pre-commit run --all-files diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml index dd60a230..0ec1b3ed 100644 --- a/.github/workflows/tests.yaml +++ b/.github/workflows/tests.yaml @@ -16,7 +16,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: ['3.8', '3.9', '3.10', '3.11'] + python-version: ['3.10', '3.11'] os: [ubuntu-latest, macos-latest, windows-latest] defaults: run: diff --git a/pyproject.toml b/pyproject.toml index 06b4baa4..70bde8f5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -37,8 +37,6 @@ classifiers = [ "Operating System :: POSIX :: Linux", "Operating System :: Microsoft :: Windows", "Operating System :: MacOS", - "Programming Language :: Python :: 3.8", - "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Topic :: Scientific/Engineering :: Artificial Intelligence", @@ -48,7 +46,7 @@ packages = [{ include = "neps" }, { include = "neps_examples" }] [tool.poetry.dependencies] -python = ">=3.8,<3.12" +python = ">=3.10,<3.12" ConfigSpace = "^0.7" grakel = "^0.1" numpy = "^1" @@ -274,7 +272,7 @@ markers = [ filterwarnings = "ignore::DeprecationWarning:torch.utils.tensorboard.*:" [tool.mypy] -python_version = "3.8" # Match minimum supported version +python_version = "3.10" # Match minimum supported version packages = ["neps"] show_error_codes = true From 78cf9009a04279da86250acfccf0a8f7c2ce9397 Mon Sep 17 00:00:00 2001 From: "Timur M. Carstensen" Date: Mon, 26 Aug 2024 13:18:18 +0200 Subject: [PATCH 02/46] fixing version in pre-commit.yaml --- .github/workflows/pre-commit.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/pre-commit.yaml b/.github/workflows/pre-commit.yaml index ed323cbb..ace7ab76 100644 --- a/.github/workflows/pre-commit.yaml +++ b/.github/workflows/pre-commit.yaml @@ -27,7 +27,7 @@ jobs: - uses: actions/checkout@v4 - uses: actions/setup-python@v5 with: - python-version: 3.10 + python-version: '3.10' - run: pip install pre-commit - run: pre-commit install - run: pre-commit run --all-files From 752f9945fa3bc49dd1a9077b68705504e85c4c3a Mon Sep 17 00:00:00 2001 From: "Timur M. Carstensen" Date: Mon, 26 Aug 2024 14:27:11 +0200 Subject: [PATCH 03/46] fixes --- neps/env.py | 3 ++- neps/plot/tensorboard_eval.py | 3 ++- neps/runtime.py | 5 +---- neps/search_spaces/hyperparameters/categorical.py | 11 +++++------ neps/search_spaces/hyperparameters/float.py | 3 ++- neps/search_spaces/hyperparameters/integer.py | 3 ++- neps/search_spaces/hyperparameters/numerical.py | 3 ++- neps/search_spaces/parameter.py | 3 ++- neps/search_spaces/search_space.py | 11 +++++++---- neps/state/_eval.py | 3 ++- neps/state/filebased.py | 3 ++- neps/state/neps_state.py | 14 +++++++++----- neps/state/optimizer.py | 3 ++- neps/state/protocols.py | 3 ++- neps/state/seed_snapshot.py | 11 +++++------ neps/state/trial.py | 3 ++- neps/utils/_locker.py | 3 ++- neps/utils/common.py | 5 +++-- neps/utils/data_loading.py | 3 ++- neps/utils/files.py | 3 ++- neps/utils/run_args.py | 7 ++++--- neps/utils/types.py | 8 ++++---- neps/utils/validation.py | 3 ++- pyproject.toml | 2 +- 24 files changed, 69 insertions(+), 50 deletions(-) diff --git a/neps/env.py b/neps/env.py index 155c3d32..256a5415 100644 --- a/neps/env.py +++ b/neps/env.py @@ -3,7 +3,8 @@ from __future__ import annotations import os -from typing import Any, Callable, TypeVar +from collections.abc import Callable +from typing import Any, TypeVar T = TypeVar("T") V = TypeVar("V") diff --git a/neps/plot/tensorboard_eval.py b/neps/plot/tensorboard_eval.py index e77329b4..a6c27450 100644 --- a/neps/plot/tensorboard_eval.py +++ b/neps/plot/tensorboard_eval.py @@ -3,8 +3,9 @@ from __future__ import annotations import math +from collections.abc import Mapping from pathlib import Path -from typing import Any, ClassVar, Mapping +from typing import Any, ClassVar from typing_extensions import override import numpy as np diff --git a/neps/runtime.py b/neps/runtime.py index 5cf0f29f..99773afa 100644 --- a/neps/runtime.py +++ b/neps/runtime.py @@ -7,18 +7,15 @@ import os import shutil import time +from collections.abc import Callable, Iterable, Iterator, Mapping from contextlib import contextmanager from dataclasses import dataclass from pathlib import Path from typing import ( TYPE_CHECKING, Any, - Callable, Generic, - Iterable, - Iterator, Literal, - Mapping, TypeVar, ) diff --git a/neps/search_spaces/hyperparameters/categorical.py b/neps/search_spaces/hyperparameters/categorical.py index 39694a19..a349f898 100644 --- a/neps/search_spaces/hyperparameters/categorical.py +++ b/neps/search_spaces/hyperparameters/categorical.py @@ -2,16 +2,15 @@ from __future__ import annotations +from collections.abc import Iterable, Mapping from typing import ( TYPE_CHECKING, Any, ClassVar, - Iterable, Literal, - Mapping, - Union, + TypeAlias, ) -from typing_extensions import Self, TypeAlias, override +from typing_extensions import Self, override import numpy as np import numpy.typing as npt @@ -22,7 +21,7 @@ if TYPE_CHECKING: from neps.utils.types import f64 -CategoricalTypes: TypeAlias = Union[float, int, str] +CategoricalTypes: TypeAlias = float | int | str class CategoricalParameter( @@ -81,7 +80,7 @@ def __init__( super().__init__(value=None, is_fidelity=False, default=default) for choice in choices: - if not isinstance(choice, (float, int, str)): + if not isinstance(choice, float | int | str): raise TypeError( f'Choice "{choice}" is not of a valid type (float, int, str)' ) diff --git a/neps/search_spaces/hyperparameters/float.py b/neps/search_spaces/hyperparameters/float.py index b780f3ff..4e4016bd 100644 --- a/neps/search_spaces/hyperparameters/float.py +++ b/neps/search_spaces/hyperparameters/float.py @@ -3,7 +3,8 @@ from __future__ import annotations import math -from typing import TYPE_CHECKING, ClassVar, Literal, Mapping +from collections.abc import Mapping +from typing import TYPE_CHECKING, ClassVar, Literal from typing_extensions import Self, override import numpy as np diff --git a/neps/search_spaces/hyperparameters/integer.py b/neps/search_spaces/hyperparameters/integer.py index 6462cc63..7f816588 100644 --- a/neps/search_spaces/hyperparameters/integer.py +++ b/neps/search_spaces/hyperparameters/integer.py @@ -2,7 +2,8 @@ from __future__ import annotations -from typing import TYPE_CHECKING, ClassVar, Literal, Mapping +from collections.abc import Mapping +from typing import TYPE_CHECKING, ClassVar, Literal from typing_extensions import Self, override import numpy as np diff --git a/neps/search_spaces/hyperparameters/numerical.py b/neps/search_spaces/hyperparameters/numerical.py index 9aaaf6d1..a97473e1 100644 --- a/neps/search_spaces/hyperparameters/numerical.py +++ b/neps/search_spaces/hyperparameters/numerical.py @@ -22,8 +22,9 @@ from __future__ import annotations +from collections.abc import Mapping from functools import lru_cache -from typing import TYPE_CHECKING, Any, ClassVar, Literal, Mapping, TypeVar +from typing import TYPE_CHECKING, Any, ClassVar, Literal, TypeVar from typing_extensions import Self, override import numpy as np diff --git a/neps/search_spaces/parameter.py b/neps/search_spaces/parameter.py index a2f6c09c..27684a2d 100644 --- a/neps/search_spaces/parameter.py +++ b/neps/search_spaces/parameter.py @@ -23,7 +23,8 @@ from __future__ import annotations from abc import ABC, abstractmethod -from typing import Any, ClassVar, Generic, Mapping, TypeVar, runtime_checkable +from collections.abc import Mapping +from typing import Any, ClassVar, Generic, TypeVar, runtime_checkable from typing_extensions import Protocol, Self ValueT = TypeVar("ValueT") diff --git a/neps/search_spaces/search_space.py b/neps/search_spaces/search_space.py index 3f0d6703..85cef066 100644 --- a/neps/search_spaces/search_space.py +++ b/neps/search_spaces/search_space.py @@ -7,9 +7,10 @@ import logging import operator import pprint +from collections.abc import Hashable, Iterator, Mapping from itertools import product from pathlib import Path -from typing import TYPE_CHECKING, Any, Hashable, Iterator, Literal, Mapping +from typing import TYPE_CHECKING, Any, Literal from typing_extensions import Self import ConfigSpace as CS @@ -111,7 +112,7 @@ def pipeline_space_from_yaml( # noqa: C901 format, contents, or if the dictionary is invalid. """ try: - if isinstance(config, (str, Path)): + if isinstance(config, str | Path): # try to load the YAML file try: yaml_file_path = Path(config) @@ -559,7 +560,7 @@ def get_vectorial_dim(self) -> dict[Literal["continuous", "categorical"], int] | The vectorial dimension """ if not any( - isinstance(hp, (NumericalParameter, CategoricalParameter, ConstantParameter)) + isinstance(hp, NumericalParameter | CategoricalParameter | ConstantParameter) for hp in self.values() ): return None @@ -657,7 +658,9 @@ def get_search_space_grid( SearchSpace( **{ name: ConstantParameter(value=value) # type: ignore - for name, value in zip(self.hyperparameters.keys(), config_values) + for name, value in zip( + self.hyperparameters.keys(), config_values, strict=False + ) } ) for config_values in full_grid diff --git a/neps/state/_eval.py b/neps/state/_eval.py index 0d08dfdd..48c7e960 100644 --- a/neps/state/_eval.py +++ b/neps/state/_eval.py @@ -4,8 +4,9 @@ import logging import time import traceback +from collections.abc import Callable, Mapping from pathlib import Path -from typing import TYPE_CHECKING, Any, Callable, Literal, Mapping, TypeVar +from typing import TYPE_CHECKING, Any, Literal, TypeVar from neps.exceptions import NePSError diff --git a/neps/state/filebased.py b/neps/state/filebased.py index 360364cf..3e68b165 100644 --- a/neps/state/filebased.py +++ b/neps/state/filebased.py @@ -21,10 +21,11 @@ import json import logging import pprint +from collections.abc import Iterable, Iterator from contextlib import contextmanager from dataclasses import asdict, dataclass, field from pathlib import Path -from typing import ClassVar, Iterable, Iterator, TypeVar +from typing import ClassVar, TypeVar from typing_extensions import override from uuid import uuid4 diff --git a/neps/state/neps_state.py b/neps/state/neps_state.py index 8afaee62..5d2af554 100644 --- a/neps/state/neps_state.py +++ b/neps/state/neps_state.py @@ -12,8 +12,9 @@ import logging import time +from collections.abc import Callable from dataclasses import dataclass, field -from typing import TYPE_CHECKING, Callable, Generic, TypeVar, overload +from typing import TYPE_CHECKING, Generic, TypeVar, overload from more_itertools import take @@ -88,10 +89,13 @@ def sample_trial( Returns: The new trial. """ - with self._optimizer_state.acquire() as ( - opt_state, - put_opt, - ), self._seed_state.acquire() as (seed_state, put_seed_state): + with ( + self._optimizer_state.acquire() as ( + opt_state, + put_opt, + ), + self._seed_state.acquire() as (seed_state, put_seed_state), + ): trials: dict[Trial.ID, Trial] = {} for trial_id, shared_trial in self._trials.all().items(): trial = shared_trial.synced() diff --git a/neps/state/optimizer.py b/neps/state/optimizer.py index f4000b07..3b3019f0 100644 --- a/neps/state/optimizer.py +++ b/neps/state/optimizer.py @@ -2,8 +2,9 @@ from __future__ import annotations +from collections.abc import Mapping from dataclasses import dataclass -from typing import Any, Mapping +from typing import Any @dataclass diff --git a/neps/state/protocols.py b/neps/state/protocols.py index 78fcee0d..de2a4819 100644 --- a/neps/state/protocols.py +++ b/neps/state/protocols.py @@ -6,10 +6,11 @@ from __future__ import annotations import logging +from collections.abc import Callable, Iterable, Iterator from contextlib import contextmanager from copy import deepcopy from dataclasses import dataclass -from typing import TYPE_CHECKING, Callable, ClassVar, Generic, Iterable, Iterator, TypeVar +from typing import TYPE_CHECKING, ClassVar, Generic, TypeVar from typing_extensions import Protocol, Self from neps.exceptions import ( diff --git a/neps/state/seed_snapshot.py b/neps/state/seed_snapshot.py index 0f9fad87..4a26370b 100644 --- a/neps/state/seed_snapshot.py +++ b/neps/state/seed_snapshot.py @@ -5,18 +5,17 @@ import contextlib import random from dataclasses import dataclass -from typing import TYPE_CHECKING, Any, List, Tuple, Union -from typing_extensions import TypeAlias +from typing import TYPE_CHECKING, Any, TypeAlias import numpy as np if TYPE_CHECKING: import torch - NP_RNG_STATE: TypeAlias = Tuple[str, np.ndarray, int, int, float] - PY_RNG_STATE: TypeAlias = Tuple[int, Tuple[int, ...], Union[int, None]] + NP_RNG_STATE: TypeAlias = tuple[str, np.ndarray, int, int, float] + PY_RNG_STATE: TypeAlias = tuple[int, tuple[int, ...], int | None] TORCH_RNG_STATE: TypeAlias = torch.Tensor - TORCH_CUDA_RNG_STATE: TypeAlias = List[torch.Tensor] + TORCH_CUDA_RNG_STATE: TypeAlias = list[torch.Tensor] @dataclass @@ -105,7 +104,7 @@ def __eq__(self, other: Any, /) -> bool: # noqa: PLR0911 if not all( torch.equal(a, b) - for a, b in zip(self.torch_cuda_rng, other.torch_cuda_rng) + for a, b in zip(self.torch_cuda_rng, other.torch_cuda_rng, strict=False) ): return False diff --git a/neps/state/trial.py b/neps/state/trial.py index 862e2bbb..ddf956b9 100644 --- a/neps/state/trial.py +++ b/neps/state/trial.py @@ -3,9 +3,10 @@ from __future__ import annotations import logging +from collections.abc import Callable, Mapping from dataclasses import asdict, dataclass from enum import Enum -from typing import TYPE_CHECKING, Any, Callable, ClassVar, Literal, Mapping +from typing import TYPE_CHECKING, Any, ClassVar, Literal from typing_extensions import Self import numpy as np diff --git a/neps/utils/_locker.py b/neps/utils/_locker.py index 0b9a92d5..f2d430f8 100644 --- a/neps/utils/_locker.py +++ b/neps/utils/_locker.py @@ -1,8 +1,9 @@ from __future__ import annotations +from collections.abc import Iterator from contextlib import contextmanager from pathlib import Path -from typing import IO, Iterator +from typing import IO import portalocker as pl diff --git a/neps/utils/common.py b/neps/utils/common.py index 2a9ca586..fec76b57 100644 --- a/neps/utils/common.py +++ b/neps/utils/common.py @@ -3,9 +3,10 @@ from __future__ import annotations import inspect +from collections.abc import Iterable, Mapping, Sequence from functools import partial from pathlib import Path -from typing import Any, Iterable, Mapping, Sequence +from typing import Any import torch import yaml @@ -236,7 +237,7 @@ def get_value(obj: Any) -> Any: """Honestly, don't know why you would use this. Please try not to.""" if obj is None: return None - if isinstance(obj, (str, int, float, bool)): + if isinstance(obj, str | int | float | bool): return obj if isinstance(obj, dict): return {key: get_value(value) for key, value in obj.items()} diff --git a/neps/utils/data_loading.py b/neps/utils/data_loading.py index a0f86210..d3b08c15 100644 --- a/neps/utils/data_loading.py +++ b/neps/utils/data_loading.py @@ -5,10 +5,11 @@ import json import os import re +from collections.abc import Mapping from dataclasses import asdict from itertools import chain from pathlib import Path -from typing import Any, Mapping, TypedDict +from typing import Any, TypedDict import numpy as np import yaml diff --git a/neps/utils/files.py b/neps/utils/files.py index ddb0627c..cfd49963 100644 --- a/neps/utils/files.py +++ b/neps/utils/files.py @@ -3,9 +3,10 @@ from __future__ import annotations import dataclasses +from collections.abc import Iterable, Mapping from enum import Enum from pathlib import Path -from typing import Any, Iterable, Mapping +from typing import Any import yaml diff --git a/neps/utils/run_args.py b/neps/utils/run_args.py index 9d7f6445..7279f625 100644 --- a/neps/utils/run_args.py +++ b/neps/utils/run_args.py @@ -7,8 +7,9 @@ import importlib.util import logging import sys +from collections.abc import Callable from pathlib import Path -from typing import Any, Callable +from typing import Any import yaml @@ -273,7 +274,7 @@ def process_searcher(key: str, special_configs: dict, settings: dict) -> None: settings[SEARCHER_KWARGS] = searcher searcher = load_and_return_object(path, name, key) - elif isinstance(searcher, (str, Path)): + elif isinstance(searcher, str | Path): pass else: raise TypeError( @@ -428,7 +429,7 @@ def check_run_args(settings: dict) -> None: if not all(callable(item) for item in value): raise TypeError("All items in 'pre_load_hooks' must be callable.") elif param == SEARCHER: - if not (isinstance(value, (str, dict)) or issubclass(value, BaseOptimizer)): + if not (isinstance(value, str | dict) or issubclass(value, BaseOptimizer)): raise TypeError( "Parameter 'searcher' must be a string or a class that is a subclass " "of BaseOptimizer." diff --git a/neps/utils/types.py b/neps/utils/types.py index a6b6c540..03ca7385 100644 --- a/neps/utils/types.py +++ b/neps/utils/types.py @@ -2,9 +2,9 @@ from __future__ import annotations +from collections.abc import Mapping from dataclasses import dataclass -from typing import TYPE_CHECKING, Any, Dict, Literal, Mapping, Union -from typing_extensions import TypeAlias +from typing import TYPE_CHECKING, Any, Literal, TypeAlias import numpy as np @@ -15,10 +15,10 @@ # TODO(eddiebergman): We can turn this to an enum at some # point to prevent having to isinstance and str match ERROR: TypeAlias = Literal["error"] -Number: TypeAlias = Union[int, float, np.number] +Number: TypeAlias = int | float | np.number ConfigID: TypeAlias = str RawConfig: TypeAlias = Mapping[str, Any] -Metadata: TypeAlias = Dict[str, Any] +Metadata: TypeAlias = dict[str, Any] ResultDict: TypeAlias = Mapping[str, Any] # NOTE(eddiebergman): Getting types for scipy distributions sucks diff --git a/neps/utils/validation.py b/neps/utils/validation.py index 884df0c5..0f8281cd 100644 --- a/neps/utils/validation.py +++ b/neps/utils/validation.py @@ -3,7 +3,8 @@ from __future__ import annotations import inspect -from typing import Any, Callable +from collections.abc import Callable +from typing import Any from neps.exceptions import NePSError diff --git a/pyproject.toml b/pyproject.toml index 70bde8f5..d8d57fca 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -94,7 +94,7 @@ build-backend = "poetry.core.masonry.api" # "tests", # "neps_examples", [tool.ruff] -target-version = "py38" +target-version = "py310" output-format = "full" line-length = 90 src = ["neps"] From 33bacf0cd042a5c6ac00303a289351dc6d498a35 Mon Sep 17 00:00:00 2001 From: "Timur M. Carstensen" Date: Mon, 26 Aug 2024 13:58:55 +0200 Subject: [PATCH 04/46] removing from __future__ import annotations; removing typing_extensions import --- docs/_code/api_generator.py | 2 +- docs/_code/example_generator.py | 2 - docs/doc_yamls/architecture_search_space.py | 2 +- neps/api.py | 2 +- neps/optimizers/__init__.py | 2 +- neps/optimizers/base_optimizer.py | 2 +- .../acquisition_functions/__init__.py | 2 +- .../acquisition_functions/ei.py | 3 +- .../acquisition_samplers/base_acq_sampler.py | 2 +- .../freeze_thaw_sampler.py | 6 +-- .../acquisition_samplers/mutation_sampler.py | 10 ++--- .../acquisition_samplers/random_sampler.py | 2 - .../bayesian_optimization/cost_cooling.py | 7 +--- .../bayesian_optimization/kernels/__init__.py | 2 +- .../kernels/get_kernels.py | 2 - .../bayesian_optimization/kernels/utils.py | 2 - .../bayesian_optimization/mf_tpe.py | 7 +--- .../bayesian_optimization/models/deepGP.py | 2 - .../bayesian_optimization/optimizer.py | 5 +-- neps/optimizers/grid_search/optimizer.py | 5 +-- neps/optimizers/info.py | 2 - neps/optimizers/multi_fidelity/_dyhpo.py | 5 +-- neps/optimizers/multi_fidelity/dyhpo.py | 5 +-- neps/optimizers/multi_fidelity/hyperband.py | 6 +-- neps/optimizers/multi_fidelity/mf_bo.py | 2 +- .../multi_fidelity/promotion_policy.py | 6 +-- .../multi_fidelity/sampling_policy.py | 2 - .../multi_fidelity/successive_halving.py | 10 ++--- neps/optimizers/multi_fidelity/utils.py | 2 - .../multi_fidelity_prior/async_priorband.py | 4 +- .../multi_fidelity_prior/priorband.py | 6 +-- neps/optimizers/multi_fidelity_prior/utils.py | 2 - .../prototype_optimizer.py | 5 +-- neps/optimizers/random_search/optimizer.py | 4 +- .../regularized_evolution/optimizer.py | 14 +++---- neps/runtime.py | 1 - neps/search_spaces/architecture/api.py | 2 +- neps/search_spaces/architecture/cfg.py | 2 +- .../cfg_variants/constrained_cfg.py | 2 +- .../architecture/core_graph_grammar.py | 2 +- .../architecture/graph_grammar.py | 40 +++++++++++-------- .../hyperparameters/categorical.py | 8 +--- neps/search_spaces/parameter.py | 4 +- neps/search_spaces/yaml_search_space_utils.py | 2 - neps/state/neps_state.py | 1 + neps/state/protocols.py | 4 +- neps/state/trial.py | 4 +- neps/utils/types.py | 1 - neps_examples/basic_usage/architecture.py | 2 +- .../experimental/hierarchical_architecture.py | 2 - ...erarchical_architecture_hierarchical_GP.py | 2 - tests/regression_objectives.py | 5 +-- tests/regression_runner.py | 2 - tests/test_neps_api/test_api.py | 2 - .../test_default_report_values.py | 2 - .../test_error_handling_strategies.py | 2 - tests/test_runtime/test_stopping_criterion.py | 2 - tests/test_state/test_filebased_neps_state.py | 2 - tests/test_state/test_neps_state.py | 2 - tests/test_state/test_rng.py | 2 - tests/test_state/test_synced.py | 7 +--- tests/test_state/test_trial.py | 5 ++- 62 files changed, 84 insertions(+), 172 deletions(-) diff --git a/docs/_code/api_generator.py b/docs/_code/api_generator.py index 1b9951bf..b19f40a2 100644 --- a/docs/_code/api_generator.py +++ b/docs/_code/api_generator.py @@ -2,7 +2,7 @@ # https://mkdocstrings.github.io/recipes/ """ -from __future__ import annotations + import logging from pathlib import Path diff --git a/docs/_code/example_generator.py b/docs/_code/example_generator.py index ca866a0e..6452bbda 100644 --- a/docs/_code/example_generator.py +++ b/docs/_code/example_generator.py @@ -2,7 +2,6 @@ # https://mkdocstrings.github.io/recipes/ """ -from __future__ import annotations import logging from pathlib import Path @@ -16,7 +15,6 @@ EXAMPLE_FOLDER = ROOT / "neps_examples" TAB = " " - if not SRCDIR.exists(): raise FileNotFoundError( f"{SRCDIR} does not exist, make sure you are running this from the root of the repository." diff --git a/docs/doc_yamls/architecture_search_space.py b/docs/doc_yamls/architecture_search_space.py index 36f8bb38..cdac0da0 100644 --- a/docs/doc_yamls/architecture_search_space.py +++ b/docs/doc_yamls/architecture_search_space.py @@ -1,4 +1,4 @@ -from __future__ import annotations + from torch import nn import neps from neps.search_spaces.architecture import primitives as ops diff --git a/neps/api.py b/neps/api.py index 6be520ad..74754d61 100644 --- a/neps/api.py +++ b/neps/api.py @@ -1,6 +1,6 @@ """API for the neps package.""" -from __future__ import annotations + import inspect import logging diff --git a/neps/optimizers/__init__.py b/neps/optimizers/__init__.py index 31cb4c4a..1fdae197 100644 --- a/neps/optimizers/__init__.py +++ b/neps/optimizers/__init__.py @@ -1,4 +1,4 @@ -from __future__ import annotations + from functools import partial from typing import Callable, Mapping diff --git a/neps/optimizers/base_optimizer.py b/neps/optimizers/base_optimizer.py index 34804626..c5b5f83f 100644 --- a/neps/optimizers/base_optimizer.py +++ b/neps/optimizers/base_optimizer.py @@ -1,4 +1,4 @@ -from __future__ import annotations + import logging from abc import abstractmethod diff --git a/neps/optimizers/bayesian_optimization/acquisition_functions/__init__.py b/neps/optimizers/bayesian_optimization/acquisition_functions/__init__.py index 89cfb4fb..3b36cd89 100644 --- a/neps/optimizers/bayesian_optimization/acquisition_functions/__init__.py +++ b/neps/optimizers/bayesian_optimization/acquisition_functions/__init__.py @@ -1,4 +1,4 @@ -from __future__ import annotations + from functools import partial from typing import Callable diff --git a/neps/optimizers/bayesian_optimization/acquisition_functions/ei.py b/neps/optimizers/bayesian_optimization/acquisition_functions/ei.py index ba5eb38b..07a2f20b 100644 --- a/neps/optimizers/bayesian_optimization/acquisition_functions/ei.py +++ b/neps/optimizers/bayesian_optimization/acquisition_functions/ei.py @@ -1,5 +1,3 @@ -from __future__ import annotations - from typing import TYPE_CHECKING, Sequence, Union import numpy as np import torch @@ -10,6 +8,7 @@ if TYPE_CHECKING: from neps.search_spaces import SearchSpace + class ComprehensiveExpectedImprovement(BaseAcquisition): def __init__( self, diff --git a/neps/optimizers/bayesian_optimization/acquisition_samplers/base_acq_sampler.py b/neps/optimizers/bayesian_optimization/acquisition_samplers/base_acq_sampler.py index adf47b82..2a6e508f 100644 --- a/neps/optimizers/bayesian_optimization/acquisition_samplers/base_acq_sampler.py +++ b/neps/optimizers/bayesian_optimization/acquisition_samplers/base_acq_sampler.py @@ -1,4 +1,4 @@ -from __future__ import annotations + from abc import abstractmethod from typing import TYPE_CHECKING, Sequence, Callable diff --git a/neps/optimizers/bayesian_optimization/acquisition_samplers/freeze_thaw_sampler.py b/neps/optimizers/bayesian_optimization/acquisition_samplers/freeze_thaw_sampler.py index 89b7d9d3..b9dd5ddc 100644 --- a/neps/optimizers/bayesian_optimization/acquisition_samplers/freeze_thaw_sampler.py +++ b/neps/optimizers/bayesian_optimization/acquisition_samplers/freeze_thaw_sampler.py @@ -1,8 +1,7 @@ # type: ignore -from __future__ import annotations + import warnings -from copy import deepcopy import numpy as np import pandas as pd @@ -13,7 +12,6 @@ class FreezeThawSampler(AcquisitionSampler): - SAMPLES_TO_DRAW = 100 # number of random samples to draw at lowest fidelity def __init__(self, **kwargs): @@ -27,7 +25,7 @@ def __init__(self, **kwargs): self.sample_full_table = None self.set_sample_full_tabular(True) # sets flag that samples full table - def set_sample_full_tabular(self, flag: bool=False): + def set_sample_full_tabular(self, flag: bool = False): if self.is_tabular: self.sample_full_table = flag diff --git a/neps/optimizers/bayesian_optimization/acquisition_samplers/mutation_sampler.py b/neps/optimizers/bayesian_optimization/acquisition_samplers/mutation_sampler.py index 4c6b17df..ee243e43 100644 --- a/neps/optimizers/bayesian_optimization/acquisition_samplers/mutation_sampler.py +++ b/neps/optimizers/bayesian_optimization/acquisition_samplers/mutation_sampler.py @@ -1,11 +1,8 @@ -from __future__ import annotations - -from typing import TYPE_CHECKING, Callable, Sequence +from typing import TYPE_CHECKING, Callable, Sequence, override import numpy as np import torch from more_itertools import first -from typing_extensions import override from neps.optimizers.bayesian_optimization.acquisition_samplers.base_acq_sampler import ( AcquisitionSampler, @@ -109,8 +106,9 @@ def create_pool( n_best = len(self.x) if len(self.x) < self.n_best else self.n_best best_configs = [ - x for (_, x) in sorted(zip(self.y, self.x), key=lambda pair: pair[0]) - ][:n_best] + x for (_, x) in + sorted(zip(self.y, self.x), key=lambda pair: pair[0]) + ][:n_best] seen: set[int] = set() diff --git a/neps/optimizers/bayesian_optimization/acquisition_samplers/random_sampler.py b/neps/optimizers/bayesian_optimization/acquisition_samplers/random_sampler.py index e3b75515..5d783a3e 100644 --- a/neps/optimizers/bayesian_optimization/acquisition_samplers/random_sampler.py +++ b/neps/optimizers/bayesian_optimization/acquisition_samplers/random_sampler.py @@ -1,5 +1,3 @@ -from __future__ import annotations - from ....search_spaces.search_space import SearchSpace from .base_acq_sampler import AcquisitionSampler diff --git a/neps/optimizers/bayesian_optimization/cost_cooling.py b/neps/optimizers/bayesian_optimization/cost_cooling.py index f2878fe9..b602d9d3 100644 --- a/neps/optimizers/bayesian_optimization/cost_cooling.py +++ b/neps/optimizers/bayesian_optimization/cost_cooling.py @@ -1,7 +1,4 @@ -from __future__ import annotations - -from typing import Any -from typing_extensions import override +from typing import Any, override from neps.state.optimizer import BudgetInfo from neps.utils.types import ConfigResult @@ -236,7 +233,7 @@ def load_optimization_state( self.acquisition.set_state( self.surrogate_model, alpha=1 - - (budget_info.used_cost_budget / budget_info.max_cost_budget), + - (budget_info.used_cost_budget / budget_info.max_cost_budget), cost_model=self.cost_model, ) self.acquisition_sampler.set_state(x=train_x, y=train_y) diff --git a/neps/optimizers/bayesian_optimization/kernels/__init__.py b/neps/optimizers/bayesian_optimization/kernels/__init__.py index 8d11ea81..7217957b 100644 --- a/neps/optimizers/bayesian_optimization/kernels/__init__.py +++ b/neps/optimizers/bayesian_optimization/kernels/__init__.py @@ -1,4 +1,4 @@ -from __future__ import annotations + from functools import partial from typing import Callable diff --git a/neps/optimizers/bayesian_optimization/kernels/get_kernels.py b/neps/optimizers/bayesian_optimization/kernels/get_kernels.py index f606f442..927e23c2 100644 --- a/neps/optimizers/bayesian_optimization/kernels/get_kernels.py +++ b/neps/optimizers/bayesian_optimization/kernels/get_kernels.py @@ -1,5 +1,3 @@ -from __future__ import annotations - from neps.utils.common import instance_from_map from ....search_spaces.architecture.core_graph_grammar import CoreGraphGrammar from ....search_spaces.hyperparameters.categorical import CategoricalParameter diff --git a/neps/optimizers/bayesian_optimization/kernels/utils.py b/neps/optimizers/bayesian_optimization/kernels/utils.py index 92ee1817..9736586b 100644 --- a/neps/optimizers/bayesian_optimization/kernels/utils.py +++ b/neps/optimizers/bayesian_optimization/kernels/utils.py @@ -1,5 +1,3 @@ -from __future__ import annotations - from typing import TYPE_CHECKING, Tuple import networkx as nx diff --git a/neps/optimizers/bayesian_optimization/mf_tpe.py b/neps/optimizers/bayesian_optimization/mf_tpe.py index 45e4adc4..61092c80 100644 --- a/neps/optimizers/bayesian_optimization/mf_tpe.py +++ b/neps/optimizers/bayesian_optimization/mf_tpe.py @@ -1,13 +1,10 @@ -from __future__ import annotations - import random from copy import deepcopy -from typing import Any, Iterable +from typing import Any, Iterable, Literal, override import numpy as np import torch from scipy.stats import spearmanr -from typing_extensions import Literal, override from neps.state.optimizer import BudgetInfo, OptimizationState from neps.utils.types import ConfigResult, RawConfig @@ -225,7 +222,7 @@ def _enhance_priors(self): def _get_rung_maps(self, s: int = 0) -> dict: """Maps rungs (0,1,...,k) to a fidelity value based on fidelity bounds, eta, s.""" eta = round(1 / self.good_fraction) - new_min_budget = self.min_fidelity * (1 / eta**s) + new_min_budget = self.min_fidelity * (1 / eta ** s) nrungs = ( np.floor(np.log(self.max_fidelity / new_min_budget) / np.log(eta)).astype(int) + 1 diff --git a/neps/optimizers/bayesian_optimization/models/deepGP.py b/neps/optimizers/bayesian_optimization/models/deepGP.py index d5145043..c3522e73 100644 --- a/neps/optimizers/bayesian_optimization/models/deepGP.py +++ b/neps/optimizers/bayesian_optimization/models/deepGP.py @@ -1,5 +1,3 @@ -from __future__ import annotations - import logging import os from copy import deepcopy diff --git a/neps/optimizers/bayesian_optimization/optimizer.py b/neps/optimizers/bayesian_optimization/optimizer.py index 9fc3aeae..337d849b 100644 --- a/neps/optimizers/bayesian_optimization/optimizer.py +++ b/neps/optimizers/bayesian_optimization/optimizer.py @@ -1,8 +1,5 @@ -from __future__ import annotations - import random -from typing import Any, TYPE_CHECKING, Literal -from typing_extensions import override +from typing import Any, TYPE_CHECKING, Literal, override from neps.state.optimizer import BudgetInfo, OptimizationState from neps.utils.types import ConfigResult, RawConfig diff --git a/neps/optimizers/grid_search/optimizer.py b/neps/optimizers/grid_search/optimizer.py index 4f5ff24e..8b367d65 100644 --- a/neps/optimizers/grid_search/optimizer.py +++ b/neps/optimizers/grid_search/optimizer.py @@ -1,8 +1,5 @@ -from __future__ import annotations - import random -from typing import Any -from typing_extensions import override +from typing import Any, override from neps.state.optimizer import BudgetInfo from neps.utils.types import ConfigResult, RawConfig diff --git a/neps/optimizers/info.py b/neps/optimizers/info.py index 7088f341..40b08174 100644 --- a/neps/optimizers/info.py +++ b/neps/optimizers/info.py @@ -1,5 +1,3 @@ -from __future__ import annotations - import os import yaml diff --git a/neps/optimizers/multi_fidelity/_dyhpo.py b/neps/optimizers/multi_fidelity/_dyhpo.py index da3e36bf..1a0ce0e5 100644 --- a/neps/optimizers/multi_fidelity/_dyhpo.py +++ b/neps/optimizers/multi_fidelity/_dyhpo.py @@ -1,7 +1,4 @@ -from __future__ import annotations - -from typing import Any, List, Union -from typing_extensions import override +from typing import Any, List, Union, override import numpy as np diff --git a/neps/optimizers/multi_fidelity/dyhpo.py b/neps/optimizers/multi_fidelity/dyhpo.py index 59804637..0baeb596 100755 --- a/neps/optimizers/multi_fidelity/dyhpo.py +++ b/neps/optimizers/multi_fidelity/dyhpo.py @@ -1,7 +1,4 @@ -from __future__ import annotations - -from typing import Any -from typing_extensions import override +from typing import Any, override import numpy as np diff --git a/neps/optimizers/multi_fidelity/hyperband.py b/neps/optimizers/multi_fidelity/hyperband.py index dde96c56..6ac3f529 100644 --- a/neps/optimizers/multi_fidelity/hyperband.py +++ b/neps/optimizers/multi_fidelity/hyperband.py @@ -1,11 +1,8 @@ -from __future__ import annotations - import typing from copy import deepcopy -from typing import Any +from typing import Any, Literal, override import numpy as np -from typing_extensions import Literal, override from neps.state.optimizer import BudgetInfo from neps.utils.types import ConfigResult, RawConfig @@ -537,5 +534,4 @@ def __init__( sh.model_policy = self.model_policy sh.sample_new_config = self.sample_new_config - # TODO: TrulyAsyncHyperband diff --git a/neps/optimizers/multi_fidelity/mf_bo.py b/neps/optimizers/multi_fidelity/mf_bo.py index a24c9d1b..904d79e8 100755 --- a/neps/optimizers/multi_fidelity/mf_bo.py +++ b/neps/optimizers/multi_fidelity/mf_bo.py @@ -1,5 +1,5 @@ # type: ignore -from __future__ import annotations + from copy import deepcopy diff --git a/neps/optimizers/multi_fidelity/promotion_policy.py b/neps/optimizers/multi_fidelity/promotion_policy.py index 41b25176..102b7f82 100644 --- a/neps/optimizers/multi_fidelity/promotion_policy.py +++ b/neps/optimizers/multi_fidelity/promotion_policy.py @@ -1,5 +1,3 @@ -from __future__ import annotations - from abc import ABC, abstractmethod import numpy as np @@ -104,6 +102,6 @@ def retrieve_promotions(self) -> dict: top_k = len(self.rung_members_performance[rung]) // self.eta _ordered_idx = np.argsort(self.rung_members_performance[rung]) self.rung_promotions[rung] = np.array(self.rung_members[rung])[_ordered_idx][ - :top_k - ].tolist() + :top_k + ].tolist() return self.rung_promotions diff --git a/neps/optimizers/multi_fidelity/sampling_policy.py b/neps/optimizers/multi_fidelity/sampling_policy.py index 9321633c..4ae6118d 100644 --- a/neps/optimizers/multi_fidelity/sampling_policy.py +++ b/neps/optimizers/multi_fidelity/sampling_policy.py @@ -1,6 +1,4 @@ # mypy: disable-error-code = assignment -from __future__ import annotations - import logging from abc import ABC, abstractmethod from typing import Any diff --git a/neps/optimizers/multi_fidelity/successive_halving.py b/neps/optimizers/multi_fidelity/successive_halving.py index 6d2ed8ef..2813285e 100644 --- a/neps/optimizers/multi_fidelity/successive_halving.py +++ b/neps/optimizers/multi_fidelity/successive_halving.py @@ -1,14 +1,12 @@ # type: ignore -from __future__ import annotations - import random import typing from copy import deepcopy import numpy as np import pandas as pd -from typing_extensions import Literal, override +from typing import Literal, override from neps.utils.types import ConfigResult, RawConfig from neps.search_spaces import ( @@ -178,7 +176,7 @@ def get_incumbent_score(self): def _get_rung_map(self, s: int = 0) -> dict: """Maps rungs (0,1,...,k) to a fidelity value based on fidelity bounds, eta, s.""" assert s <= self.stopping_rate_limit - new_min_budget = self.min_budget * (self.eta**s) + new_min_budget = self.min_budget * (self.eta ** s) nrungs = ( np.floor(np.log(self.max_budget / new_min_budget) / np.log(self.eta)).astype( int @@ -199,7 +197,7 @@ def _get_rung_map(self, s: int = 0) -> dict: def _get_config_map(self, s: int = 0) -> dict: """Maps rungs (0,1,...,k) to the number of configs for each fidelity""" assert s <= self.stopping_rate_limit - new_min_budget = self.min_budget * (self.eta**s) + new_min_budget = self.min_budget * (self.eta ** s) nrungs = ( np.floor(np.log(self.max_budget / new_min_budget) / np.log(self.eta)).astype( int @@ -209,7 +207,7 @@ def _get_config_map(self, s: int = 0) -> dict: s_max = self.stopping_rate_limit + 1 _s = self.stopping_rate_limit - s # L2 from Alg 1 in https://arxiv.org/pdf/1603.06560.pdf - _n_config = np.floor(s_max / (_s + 1)) * self.eta**_s + _n_config = np.floor(s_max / (_s + 1)) * self.eta ** _s config_map = dict() for i in range(nrungs): config_map[i + s] = int(_n_config) diff --git a/neps/optimizers/multi_fidelity/utils.py b/neps/optimizers/multi_fidelity/utils.py index dd36e489..85f1bb69 100644 --- a/neps/optimizers/multi_fidelity/utils.py +++ b/neps/optimizers/multi_fidelity/utils.py @@ -1,6 +1,4 @@ # type: ignore -from __future__ import annotations - from typing import Any, Sequence import numpy as np diff --git a/neps/optimizers/multi_fidelity_prior/async_priorband.py b/neps/optimizers/multi_fidelity_prior/async_priorband.py index 40f6cb29..0b933276 100644 --- a/neps/optimizers/multi_fidelity_prior/async_priorband.py +++ b/neps/optimizers/multi_fidelity_prior/async_priorband.py @@ -1,9 +1,7 @@ -from __future__ import annotations - import typing import numpy as np -from typing_extensions import Literal, override +from typing import Literal, override from neps.state.optimizer import BudgetInfo, OptimizationState from neps.utils.types import ConfigResult, RawConfig diff --git a/neps/optimizers/multi_fidelity_prior/priorband.py b/neps/optimizers/multi_fidelity_prior/priorband.py index 614ad4b0..be7b3151 100644 --- a/neps/optimizers/multi_fidelity_prior/priorband.py +++ b/neps/optimizers/multi_fidelity_prior/priorband.py @@ -1,9 +1,7 @@ -from __future__ import annotations - import typing +from typing import Literal import numpy as np -from typing_extensions import Literal from neps.utils.types import RawConfig from neps.search_spaces.search_space import SearchSpace @@ -146,7 +144,7 @@ def calc_sampling_args(self, rung) -> dict: # scales weight of prior by eta raised to the current rung level # at the base rung thus w_prior = w_random # at the max rung r, w_prior = eta^r * w_random - _w_prior = (self.eta**rung) * _w_random + _w_prior = (self.eta ** rung) * _w_random elif self.prior_weight_type == "linear": _w_random = 1 w_prior_min_rung = 1 * _w_random diff --git a/neps/optimizers/multi_fidelity_prior/utils.py b/neps/optimizers/multi_fidelity_prior/utils.py index edbbadc7..9f4c1a47 100644 --- a/neps/optimizers/multi_fidelity_prior/utils.py +++ b/neps/optimizers/multi_fidelity_prior/utils.py @@ -1,5 +1,3 @@ -from __future__ import annotations - import numpy as np import pandas as pd import scipy diff --git a/neps/optimizers/multiple_knowledge_sources/prototype_optimizer.py b/neps/optimizers/multiple_knowledge_sources/prototype_optimizer.py index 845552ea..d14657bf 100644 --- a/neps/optimizers/multiple_knowledge_sources/prototype_optimizer.py +++ b/neps/optimizers/multiple_knowledge_sources/prototype_optimizer.py @@ -1,8 +1,5 @@ -from __future__ import annotations - import logging -from typing import Any -from typing_extensions import override +from typing import Any, override from neps.state.optimizer import BudgetInfo, OptimizationState from neps.utils.types import ConfigResult, RawConfig diff --git a/neps/optimizers/random_search/optimizer.py b/neps/optimizers/random_search/optimizer.py index 5aeaff33..094ca87d 100644 --- a/neps/optimizers/random_search/optimizer.py +++ b/neps/optimizers/random_search/optimizer.py @@ -1,6 +1,4 @@ -from __future__ import annotations -from typing import Any -from typing_extensions import override +from typing import Any, override from neps.state.optimizer import BudgetInfo, OptimizationState from neps.utils.types import ConfigResult, RawConfig diff --git a/neps/optimizers/regularized_evolution/optimizer.py b/neps/optimizers/regularized_evolution/optimizer.py index 0860ba1c..a7ce782b 100644 --- a/neps/optimizers/regularized_evolution/optimizer.py +++ b/neps/optimizers/regularized_evolution/optimizer.py @@ -1,11 +1,8 @@ -from __future__ import annotations - import math import os import random from pathlib import Path -from typing import Any, Callable -from typing_extensions import override +from typing import Any, Callable, override import numpy as np import yaml @@ -68,7 +65,7 @@ def load_optimization_state( self.population = [ (x, y) for x, y in zip( - train_x[-self.population_size :], train_y[-self.population_size :] + train_x[-self.population_size:], train_y[-self.population_size:] ) ] self.pending_evaluations = [el for el in pending_evaluations.values()] @@ -85,12 +82,13 @@ def get_config_and_ids(self) -> tuple[RawConfig, str, str | None]: for _ in range(cur_population_size * 2) ] if self.assisted_zero_cost_proxy is not None: - zero_cost_proxy_values = self.assisted_zero_cost_proxy(x=configs) # type: ignore[misc] + zero_cost_proxy_values = self.assisted_zero_cost_proxy( + x=configs) # type: ignore[misc] else: raise Exception("Zero cost proxy function is not defined!") indices = np.argsort(zero_cost_proxy_values)[-cur_population_size:][ - ::-1 - ] + ::-1 + ] for idx, config_idx in enumerate(indices): filename = str(idx).zfill( int(math.log10(cur_population_size)) + 1 diff --git a/neps/runtime.py b/neps/runtime.py index 99773afa..5e864159 100644 --- a/neps/runtime.py +++ b/neps/runtime.py @@ -48,7 +48,6 @@ def _default_worker_name() -> str: Loc = TypeVar("Loc") - # NOTE: As each NEPS process is only ever evaluating a single trial, this global can # be retrieved in NePS and refers to what this process is currently evaluating. # Note that before `_set_in_progress_trial` is called, this should be cleared diff --git a/neps/search_spaces/architecture/api.py b/neps/search_spaces/architecture/api.py index 98af2f14..a3af1510 100644 --- a/neps/search_spaces/architecture/api.py +++ b/neps/search_spaces/architecture/api.py @@ -1,4 +1,4 @@ -from __future__ import annotations + import inspect from typing import Callable diff --git a/neps/search_spaces/architecture/cfg.py b/neps/search_spaces/architecture/cfg.py index f7815f6d..7e4aa453 100644 --- a/neps/search_spaces/architecture/cfg.py +++ b/neps/search_spaces/architecture/cfg.py @@ -1,4 +1,4 @@ -from __future__ import annotations + import itertools import math import sys diff --git a/neps/search_spaces/architecture/cfg_variants/constrained_cfg.py b/neps/search_spaces/architecture/cfg_variants/constrained_cfg.py index a79ce212..dda20458 100644 --- a/neps/search_spaces/architecture/cfg_variants/constrained_cfg.py +++ b/neps/search_spaces/architecture/cfg_variants/constrained_cfg.py @@ -1,4 +1,4 @@ -from __future__ import annotations + import itertools import math diff --git a/neps/search_spaces/architecture/core_graph_grammar.py b/neps/search_spaces/architecture/core_graph_grammar.py index 277ae9fc..17323b48 100644 --- a/neps/search_spaces/architecture/core_graph_grammar.py +++ b/neps/search_spaces/architecture/core_graph_grammar.py @@ -1,4 +1,4 @@ -from __future__ import annotations + import collections import inspect diff --git a/neps/search_spaces/architecture/graph_grammar.py b/neps/search_spaces/architecture/graph_grammar.py index e21e94d8..74ba84f9 100644 --- a/neps/search_spaces/architecture/graph_grammar.py +++ b/neps/search_spaces/architecture/graph_grammar.py @@ -1,12 +1,10 @@ -from __future__ import annotations - from abc import abstractmethod from collections import OrderedDict from copy import deepcopy from functools import partial -from typing import Any, ClassVar, Mapping -from typing_extensions import override, Self -from neps.utils.types import NotSet, _NotSet +from typing import Any, ClassVar, Mapping, Self +from typing_extensions import override +from neps.utils.types import NotSet import networkx as nx import numpy as np @@ -46,22 +44,26 @@ class GraphParameter(ParameterWithPrior[nx.DiGraph, str], MutatableParameter): @property @abstractmethod - def id(self) -> str: ... + def id(self) -> str: + ... # NOTE(eddiebergman): Unlike traditional parameters, it seems @property @abstractmethod - def value(self) -> nx.DiGraph: ... + def value(self) -> nx.DiGraph: + ... # NOTE(eddiebergman): This is a function common to the three graph # parameters that is used for `load_from` @abstractmethod - def create_from_id(self, value: str) -> None: ... + def create_from_id(self, value: str) -> None: + ... # NOTE(eddiebergman): Function shared between graph parameters. # Used to `set_value()` @abstractmethod - def reset(self) -> None: ... + def reset(self) -> None: + ... @override def __eq__(self, other: Any) -> bool: @@ -71,7 +73,8 @@ def __eq__(self, other: Any) -> bool: return self.id == other.id @abstractmethod - def compute_prior(self, normalized_value: float) -> float: ... + def compute_prior(self, normalized_value: float) -> float: + ... @override def set_value(self, value: str | None) -> None: @@ -137,7 +140,9 @@ def load_from(self, value: str | Self) -> None: self.create_from_id(value) @abstractmethod - def mutate(self, parent: Self | None = None, *, mutation_strategy: str = "bananas") -> Self: ... + def mutate(self, parent: Self | None = None, *, + mutation_strategy: str = "bananas") -> Self: + ... @abstractmethod def crossover(self, parent1: Self, parent2: Self | None = None) -> tuple[Self, Self]: @@ -154,7 +159,7 @@ def normalized_to_value(self, normalized_value: float) -> nx.DiGraph: @override def clone(self) -> Self: - new_self = self.__class__(**self.input_kwargs) + new_self = self.__class__(**self.input_kwargs) # HACK(eddiebergman): It seems the subclasses all have these and # so we just copy over those attributes, deepcloning anything that is mutable @@ -178,6 +183,7 @@ def clone(self) -> Self: return new_self + class GraphGrammar(GraphParameter, CoreGraphGrammar): hp_name = "graph_grammar" @@ -227,7 +233,8 @@ def __init__( def sample(self, *, user_priors: bool = False) -> Self: copy_self = self.clone() copy_self.reset() - copy_self.string_tree = copy_self.grammars[0].sampler(1, user_priors=user_priors)[0] + copy_self.string_tree = copy_self.grammars[0].sampler(1, user_priors=user_priors)[ + 0] _ = copy_self.value # required for checking if graph is valid! return copy_self @@ -507,7 +514,8 @@ def crossover( def sample(self, *, user_priors: bool = False) -> Self: copy_self = self.clone() copy_self.reset() - copy_self.string_tree_list = [grammar.sampler(1)[0] for grammar in copy_self.grammars] + copy_self.string_tree_list = [grammar.sampler(1)[0] for grammar in + copy_self.grammars] copy_self.string_tree = copy_self.assemble_trees( copy_self.string_tree_list[0], copy_self.string_tree_list[1:], @@ -1029,8 +1037,8 @@ def recursive_worker( [ grammar.compute_space_size for grammar, n_grammar in zip( - self.grammars, self.number_of_repetitive_motifs_per_grammar - ) + self.grammars, self.number_of_repetitive_motifs_per_grammar + ) for _ in range(n_grammar) ] ) diff --git a/neps/search_spaces/hyperparameters/categorical.py b/neps/search_spaces/hyperparameters/categorical.py index a349f898..bc1f9423 100644 --- a/neps/search_spaces/hyperparameters/categorical.py +++ b/neps/search_spaces/hyperparameters/categorical.py @@ -3,13 +3,7 @@ from __future__ import annotations from collections.abc import Iterable, Mapping -from typing import ( - TYPE_CHECKING, - Any, - ClassVar, - Literal, - TypeAlias, -) +from typing import TYPE_CHECKING, Any, ClassVar, Literal, TypeAlias from typing_extensions import Self, override import numpy as np diff --git a/neps/search_spaces/parameter.py b/neps/search_spaces/parameter.py index 27684a2d..277b8ca7 100644 --- a/neps/search_spaces/parameter.py +++ b/neps/search_spaces/parameter.py @@ -24,8 +24,8 @@ from abc import ABC, abstractmethod from collections.abc import Mapping -from typing import Any, ClassVar, Generic, TypeVar, runtime_checkable -from typing_extensions import Protocol, Self +from typing import Any, ClassVar, Generic, Protocol, TypeVar, runtime_checkable +from typing_extensions import Self ValueT = TypeVar("ValueT") SerializedT = TypeVar("SerializedT") diff --git a/neps/search_spaces/yaml_search_space_utils.py b/neps/search_spaces/yaml_search_space_utils.py index 8b25b1b0..ff6d72ad 100644 --- a/neps/search_spaces/yaml_search_space_utils.py +++ b/neps/search_spaces/yaml_search_space_utils.py @@ -1,5 +1,3 @@ -from __future__ import annotations - import logging import re from typing import Literal, overload diff --git a/neps/state/neps_state.py b/neps/state/neps_state.py index 5d2af554..dd7d9279 100644 --- a/neps/state/neps_state.py +++ b/neps/state/neps_state.py @@ -199,6 +199,7 @@ def get_errors(self) -> ErrDump: @overload def get_next_pending_trial(self) -> Trial | None: ... + @overload def get_next_pending_trial(self, n: int | None = None) -> list[Trial]: ... diff --git a/neps/state/protocols.py b/neps/state/protocols.py index de2a4819..6ee54be4 100644 --- a/neps/state/protocols.py +++ b/neps/state/protocols.py @@ -10,8 +10,8 @@ from contextlib import contextmanager from copy import deepcopy from dataclasses import dataclass -from typing import TYPE_CHECKING, ClassVar, Generic, TypeVar -from typing_extensions import Protocol, Self +from typing import TYPE_CHECKING, ClassVar, Generic, Protocol, TypeVar +from typing_extensions import Self from neps.exceptions import ( LockFailedError, diff --git a/neps/state/trial.py b/neps/state/trial.py index ddf956b9..7efd2d1e 100644 --- a/neps/state/trial.py +++ b/neps/state/trial.py @@ -12,12 +12,10 @@ import numpy as np from neps.exceptions import NePSError -from neps.utils.types import ConfigResult +from neps.utils.types import ERROR, ConfigResult, RawConfig if TYPE_CHECKING: from neps.search_spaces import SearchSpace - from neps.utils.types import ERROR, RawConfig - logger = logging.getLogger(__name__) diff --git a/neps/utils/types.py b/neps/utils/types.py index 03ca7385..867ff8f7 100644 --- a/neps/utils/types.py +++ b/neps/utils/types.py @@ -33,7 +33,6 @@ def __repr__(self) -> str: NotSet = _NotSet() - f64 = np.float64 i64 = np.int64 diff --git a/neps_examples/basic_usage/architecture.py b/neps_examples/basic_usage/architecture.py index 57ac74cc..5d43efe7 100644 --- a/neps_examples/basic_usage/architecture.py +++ b/neps_examples/basic_usage/architecture.py @@ -1,4 +1,4 @@ -from __future__ import annotations + import logging diff --git a/neps_examples/experimental/hierarchical_architecture.py b/neps_examples/experimental/hierarchical_architecture.py index db101be4..6751cc7a 100644 --- a/neps_examples/experimental/hierarchical_architecture.py +++ b/neps_examples/experimental/hierarchical_architecture.py @@ -1,5 +1,3 @@ -from __future__ import annotations - import logging from torch import nn diff --git a/neps_examples/experimental/hierarchical_architecture_hierarchical_GP.py b/neps_examples/experimental/hierarchical_architecture_hierarchical_GP.py index 3db93bde..c79a7a01 100644 --- a/neps_examples/experimental/hierarchical_architecture_hierarchical_GP.py +++ b/neps_examples/experimental/hierarchical_architecture_hierarchical_GP.py @@ -1,5 +1,3 @@ -from __future__ import annotations - import logging import time diff --git a/tests/regression_objectives.py b/tests/regression_objectives.py index 6655fcbb..8265c704 100644 --- a/tests/regression_objectives.py +++ b/tests/regression_objectives.py @@ -1,11 +1,8 @@ -from __future__ import annotations - import warnings from pathlib import Path -from typing import Any, Callable +from typing import Any, Callable, Literal import numpy as np -from typing_extensions import Literal import neps from neps.search_spaces.search_space import SearchSpace, pipeline_space_from_configspace diff --git a/tests/regression_runner.py b/tests/regression_runner.py index 0a5a8898..d102a1df 100644 --- a/tests/regression_runner.py +++ b/tests/regression_runner.py @@ -1,6 +1,4 @@ # mypy: disable-error-code = union-attr -from __future__ import annotations - import json import logging from pathlib import Path diff --git a/tests/test_neps_api/test_api.py b/tests/test_neps_api/test_api.py index 32408007..b4a54940 100644 --- a/tests/test_neps_api/test_api.py +++ b/tests/test_neps_api/test_api.py @@ -1,5 +1,3 @@ -from __future__ import annotations - import logging import os import runpy diff --git a/tests/test_runtime/test_default_report_values.py b/tests/test_runtime/test_default_report_values.py index 652db9de..2ebec1c0 100644 --- a/tests/test_runtime/test_default_report_values.py +++ b/tests/test_runtime/test_default_report_values.py @@ -1,5 +1,3 @@ -from __future__ import annotations - from pathlib import Path from pytest_cases import fixture diff --git a/tests/test_runtime/test_error_handling_strategies.py b/tests/test_runtime/test_error_handling_strategies.py index 890717c5..d3180ae9 100644 --- a/tests/test_runtime/test_error_handling_strategies.py +++ b/tests/test_runtime/test_error_handling_strategies.py @@ -1,5 +1,3 @@ -from __future__ import annotations - from neps.exceptions import WorkerRaiseError import pytest from dataclasses import dataclass diff --git a/tests/test_runtime/test_stopping_criterion.py b/tests/test_runtime/test_stopping_criterion.py index 28426a1f..5b83985d 100644 --- a/tests/test_runtime/test_stopping_criterion.py +++ b/tests/test_runtime/test_stopping_criterion.py @@ -1,5 +1,3 @@ -from __future__ import annotations - import time from pathlib import Path from pytest_cases import fixture diff --git a/tests/test_state/test_filebased_neps_state.py b/tests/test_state/test_filebased_neps_state.py index a3385141..cf4e77b8 100644 --- a/tests/test_state/test_filebased_neps_state.py +++ b/tests/test_state/test_filebased_neps_state.py @@ -2,8 +2,6 @@ This could be generalized if we end up with a server based implementation but for now we're just testing the filebased implementation.""" -from __future__ import annotations - from pathlib import Path from typing import Any from neps.exceptions import NePSError, TrialNotFoundError diff --git a/tests/test_state/test_neps_state.py b/tests/test_state/test_neps_state.py index 0d0891ce..af993311 100644 --- a/tests/test_state/test_neps_state.py +++ b/tests/test_state/test_neps_state.py @@ -2,8 +2,6 @@ This could be generalized if we end up with a server based implementation but for now we're just testing the filebased implementation.""" -from __future__ import annotations - import time from pathlib import Path from typing import Any diff --git a/tests/test_state/test_rng.py b/tests/test_state/test_rng.py index 1f1318d3..d122c8e8 100644 --- a/tests/test_state/test_rng.py +++ b/tests/test_state/test_rng.py @@ -1,5 +1,3 @@ -from __future__ import annotations - from pathlib import Path import random from typing import Callable diff --git a/tests/test_state/test_synced.py b/tests/test_state/test_synced.py index 3a28b724..fb39b148 100644 --- a/tests/test_state/test_synced.py +++ b/tests/test_state/test_synced.py @@ -1,9 +1,8 @@ -from __future__ import annotations +import copy +import random from pytest_cases import parametrize, parametrize_with_cases, case -import copy import numpy as np -import random from neps.state.err_dump import ErrDump, SerializableTrialError from neps.state.filebased import ( ReaderWriterErrDump, @@ -15,8 +14,6 @@ FileLocker, ) from neps.state.optimizer import BudgetInfo, OptimizationState, OptimizerInfo -from neps.state.protocols import Synced -from neps.state.trial import Trial import pytest from typing import Any, Callable from pathlib import Path diff --git a/tests/test_state/test_trial.py b/tests/test_state/test_trial.py index 0ddc9e34..a433a917 100644 --- a/tests/test_state/test_trial.py +++ b/tests/test_state/test_trial.py @@ -1,8 +1,9 @@ -from __future__ import annotations -from neps.state import Trial import os + import numpy as np +from neps.state import Trial + def test_trial_creation() -> None: trial_id = "1" From 2df23898e7e87c96c80947432d2b174f88d78f48 Mon Sep 17 00:00:00 2001 From: "Timur M. Carstensen" Date: Mon, 26 Aug 2024 13:59:42 +0200 Subject: [PATCH 05/46] removing mandatory import of `from __future__ import annotations` --- pyproject.toml | 1 - 1 file changed, 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index d8d57fca..fc42f1cc 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -248,7 +248,6 @@ ignore = [ known-first-party = ["neps"] known-third-party = [] no-lines-before = ["future"] -required-imports = ["from __future__ import annotations"] combine-as-imports = true extra-standard-library = ["typing_extensions"] force-wrap-aliases = true From ed88a29b3615190ad901657a74ce1ca2191ee1fb Mon Sep 17 00:00:00 2001 From: "Timur M. Carstensen" Date: Mon, 26 Aug 2024 14:13:33 +0200 Subject: [PATCH 06/46] added back from `__future__ import annotations` where necessary; added back `from typing_extensions import ...` for py310 compatability --- .../bayesian_optimization/acquisition_functions/ei.py | 2 ++ .../acquisition_samplers/base_acq_sampler.py | 2 +- .../acquisition_samplers/mutation_sampler.py | 5 ++++- neps/optimizers/bayesian_optimization/cost_cooling.py | 3 ++- neps/optimizers/bayesian_optimization/kernels/utils.py | 2 ++ neps/optimizers/bayesian_optimization/mf_tpe.py | 3 ++- neps/optimizers/bayesian_optimization/optimizer.py | 7 +++++-- neps/optimizers/grid_search/optimizer.py | 3 ++- neps/optimizers/multi_fidelity/dyhpo.py | 3 ++- neps/optimizers/multi_fidelity/hyperband.py | 3 ++- neps/optimizers/multi_fidelity/successive_halving.py | 4 +++- neps/optimizers/multi_fidelity_prior/async_priorband.py | 5 +++-- neps/optimizers/random_search/optimizer.py | 5 +++-- neps/optimizers/regularized_evolution/optimizer.py | 5 +++-- .../architecture/cfg_variants/constrained_cfg.py | 2 +- neps/search_spaces/architecture/graph_grammar.py | 2 ++ pyproject.toml | 1 + 17 files changed, 40 insertions(+), 17 deletions(-) diff --git a/neps/optimizers/bayesian_optimization/acquisition_functions/ei.py b/neps/optimizers/bayesian_optimization/acquisition_functions/ei.py index 07a2f20b..90a99f26 100644 --- a/neps/optimizers/bayesian_optimization/acquisition_functions/ei.py +++ b/neps/optimizers/bayesian_optimization/acquisition_functions/ei.py @@ -1,3 +1,5 @@ +from __future__ import annotations + from typing import TYPE_CHECKING, Sequence, Union import numpy as np import torch diff --git a/neps/optimizers/bayesian_optimization/acquisition_samplers/base_acq_sampler.py b/neps/optimizers/bayesian_optimization/acquisition_samplers/base_acq_sampler.py index 2a6e508f..adf47b82 100644 --- a/neps/optimizers/bayesian_optimization/acquisition_samplers/base_acq_sampler.py +++ b/neps/optimizers/bayesian_optimization/acquisition_samplers/base_acq_sampler.py @@ -1,4 +1,4 @@ - +from __future__ import annotations from abc import abstractmethod from typing import TYPE_CHECKING, Sequence, Callable diff --git a/neps/optimizers/bayesian_optimization/acquisition_samplers/mutation_sampler.py b/neps/optimizers/bayesian_optimization/acquisition_samplers/mutation_sampler.py index ee243e43..227becf9 100644 --- a/neps/optimizers/bayesian_optimization/acquisition_samplers/mutation_sampler.py +++ b/neps/optimizers/bayesian_optimization/acquisition_samplers/mutation_sampler.py @@ -1,4 +1,7 @@ -from typing import TYPE_CHECKING, Callable, Sequence, override +from __future__ import annotations + +from typing import TYPE_CHECKING, Callable, Sequence +from typing_extensions import override import numpy as np import torch diff --git a/neps/optimizers/bayesian_optimization/cost_cooling.py b/neps/optimizers/bayesian_optimization/cost_cooling.py index b602d9d3..d5f9848a 100644 --- a/neps/optimizers/bayesian_optimization/cost_cooling.py +++ b/neps/optimizers/bayesian_optimization/cost_cooling.py @@ -1,4 +1,5 @@ -from typing import Any, override +from typing import Any +from typing_extensions import override from neps.state.optimizer import BudgetInfo from neps.utils.types import ConfigResult diff --git a/neps/optimizers/bayesian_optimization/kernels/utils.py b/neps/optimizers/bayesian_optimization/kernels/utils.py index 9736586b..92ee1817 100644 --- a/neps/optimizers/bayesian_optimization/kernels/utils.py +++ b/neps/optimizers/bayesian_optimization/kernels/utils.py @@ -1,3 +1,5 @@ +from __future__ import annotations + from typing import TYPE_CHECKING, Tuple import networkx as nx diff --git a/neps/optimizers/bayesian_optimization/mf_tpe.py b/neps/optimizers/bayesian_optimization/mf_tpe.py index 61092c80..1c2a58df 100644 --- a/neps/optimizers/bayesian_optimization/mf_tpe.py +++ b/neps/optimizers/bayesian_optimization/mf_tpe.py @@ -1,6 +1,7 @@ import random from copy import deepcopy -from typing import Any, Iterable, Literal, override +from typing import Any, Iterable, Literal +from typing_extensions import override import numpy as np import torch diff --git a/neps/optimizers/bayesian_optimization/optimizer.py b/neps/optimizers/bayesian_optimization/optimizer.py index 337d849b..9ff00f28 100644 --- a/neps/optimizers/bayesian_optimization/optimizer.py +++ b/neps/optimizers/bayesian_optimization/optimizer.py @@ -1,7 +1,10 @@ +from __future__ import annotations + import random -from typing import Any, TYPE_CHECKING, Literal, override +from typing import Any, TYPE_CHECKING, Literal +from typing_extensions import override -from neps.state.optimizer import BudgetInfo, OptimizationState +from neps.state.optimizer import BudgetInfo from neps.utils.types import ConfigResult, RawConfig from neps.utils.common import instance_from_map from neps.search_spaces import ( diff --git a/neps/optimizers/grid_search/optimizer.py b/neps/optimizers/grid_search/optimizer.py index 8b367d65..01ed394b 100644 --- a/neps/optimizers/grid_search/optimizer.py +++ b/neps/optimizers/grid_search/optimizer.py @@ -1,5 +1,6 @@ import random -from typing import Any, override +from typing import Any +from typing_extensions import override from neps.state.optimizer import BudgetInfo from neps.utils.types import ConfigResult, RawConfig diff --git a/neps/optimizers/multi_fidelity/dyhpo.py b/neps/optimizers/multi_fidelity/dyhpo.py index 0baeb596..a058d38c 100755 --- a/neps/optimizers/multi_fidelity/dyhpo.py +++ b/neps/optimizers/multi_fidelity/dyhpo.py @@ -1,4 +1,5 @@ -from typing import Any, override +from typing import Any +from typing_extensions import override import numpy as np diff --git a/neps/optimizers/multi_fidelity/hyperband.py b/neps/optimizers/multi_fidelity/hyperband.py index 6ac3f529..510fb582 100644 --- a/neps/optimizers/multi_fidelity/hyperband.py +++ b/neps/optimizers/multi_fidelity/hyperband.py @@ -1,6 +1,7 @@ import typing from copy import deepcopy -from typing import Any, Literal, override +from typing import Any, Literal +from typing_extensions import override import numpy as np diff --git a/neps/optimizers/multi_fidelity/successive_halving.py b/neps/optimizers/multi_fidelity/successive_halving.py index 2813285e..6df62333 100644 --- a/neps/optimizers/multi_fidelity/successive_halving.py +++ b/neps/optimizers/multi_fidelity/successive_halving.py @@ -1,4 +1,5 @@ # type: ignore +from __future__ import annotations import random import typing @@ -6,7 +7,8 @@ import numpy as np import pandas as pd -from typing import Literal, override +from typing import Literal +from typing_extensions import override from neps.utils.types import ConfigResult, RawConfig from neps.search_spaces import ( diff --git a/neps/optimizers/multi_fidelity_prior/async_priorband.py b/neps/optimizers/multi_fidelity_prior/async_priorband.py index 0b933276..ce2352cf 100644 --- a/neps/optimizers/multi_fidelity_prior/async_priorband.py +++ b/neps/optimizers/multi_fidelity_prior/async_priorband.py @@ -1,9 +1,10 @@ import typing import numpy as np -from typing import Literal, override +from typing import Literal +from typing_extensions import override -from neps.state.optimizer import BudgetInfo, OptimizationState +from neps.state.optimizer import BudgetInfo from neps.utils.types import ConfigResult, RawConfig from neps.search_spaces.search_space import SearchSpace from neps.optimizers.bayesian_optimization.acquisition_functions.base_acquisition import ( diff --git a/neps/optimizers/random_search/optimizer.py b/neps/optimizers/random_search/optimizer.py index 094ca87d..abe16866 100644 --- a/neps/optimizers/random_search/optimizer.py +++ b/neps/optimizers/random_search/optimizer.py @@ -1,6 +1,7 @@ -from typing import Any, override +from typing import Any +from typing_extensions import override -from neps.state.optimizer import BudgetInfo, OptimizationState +from neps.state.optimizer import BudgetInfo from neps.utils.types import ConfigResult, RawConfig from neps.search_spaces.search_space import SearchSpace from neps.optimizers.base_optimizer import BaseOptimizer diff --git a/neps/optimizers/regularized_evolution/optimizer.py b/neps/optimizers/regularized_evolution/optimizer.py index a7ce782b..d112be31 100644 --- a/neps/optimizers/regularized_evolution/optimizer.py +++ b/neps/optimizers/regularized_evolution/optimizer.py @@ -2,12 +2,13 @@ import os import random from pathlib import Path -from typing import Any, Callable, override +from typing import Any, Callable +from typing_extensions import override import numpy as np import yaml -from neps.state.optimizer import BudgetInfo, OptimizationState +from neps.state.optimizer import BudgetInfo from neps.utils.types import ConfigResult, RawConfig from neps.search_spaces.search_space import SearchSpace diff --git a/neps/search_spaces/architecture/cfg_variants/constrained_cfg.py b/neps/search_spaces/architecture/cfg_variants/constrained_cfg.py index dda20458..a79ce212 100644 --- a/neps/search_spaces/architecture/cfg_variants/constrained_cfg.py +++ b/neps/search_spaces/architecture/cfg_variants/constrained_cfg.py @@ -1,4 +1,4 @@ - +from __future__ import annotations import itertools import math diff --git a/neps/search_spaces/architecture/graph_grammar.py b/neps/search_spaces/architecture/graph_grammar.py index 74ba84f9..61ae6922 100644 --- a/neps/search_spaces/architecture/graph_grammar.py +++ b/neps/search_spaces/architecture/graph_grammar.py @@ -1,3 +1,5 @@ +from __future__ import annotations + from abc import abstractmethod from collections import OrderedDict from copy import deepcopy diff --git a/pyproject.toml b/pyproject.toml index fc42f1cc..f42f840a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -247,6 +247,7 @@ ignore = [ [tool.ruff.lint.isort] known-first-party = ["neps"] known-third-party = [] +required-imports = ["from __future__ import annotations"] no-lines-before = ["future"] combine-as-imports = true extra-standard-library = ["typing_extensions"] From e47ac919a788848d9dc2f7a68135742db25ee032 Mon Sep 17 00:00:00 2001 From: "Timur M. Carstensen" Date: Mon, 26 Aug 2024 14:17:19 +0200 Subject: [PATCH 07/46] fixing an import of `Self` --- neps/search_spaces/architecture/graph_grammar.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/neps/search_spaces/architecture/graph_grammar.py b/neps/search_spaces/architecture/graph_grammar.py index 61ae6922..1c9fa159 100644 --- a/neps/search_spaces/architecture/graph_grammar.py +++ b/neps/search_spaces/architecture/graph_grammar.py @@ -4,8 +4,8 @@ from collections import OrderedDict from copy import deepcopy from functools import partial -from typing import Any, ClassVar, Mapping, Self -from typing_extensions import override +from typing import Any, ClassVar, Mapping +from typing_extensions import override, Self from neps.utils.types import NotSet import networkx as nx From 2713e6a1bb36a8cdb6cea134f95b48d8df6ab7ee Mon Sep 17 00:00:00 2001 From: Neeratyoy Mallik Date: Tue, 9 Jul 2024 00:45:41 +0200 Subject: [PATCH 08/46] Adding ifbo as dependency (for surrogates) --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index f42f840a..8e34b01b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -63,6 +63,7 @@ pyyaml = "^6" tensorboard = "^2" typing-extensions = "*" torchvision = ">=0.8.0" +ifbo = "^0.3" [tool.poetry.group.dev.dependencies] ruff = "^0.4" From cf6a8ac2f3e8fc0324877f5ec165ef49dea8559e Mon Sep 17 00:00:00 2001 From: Neeratyoy Mallik Date: Mon, 15 Jul 2024 16:37:11 +0100 Subject: [PATCH 09/46] Initial bulk commit --- neps/optimizers/__init__.py | 2 +- .../acquisition_functions/mf_ei.py | 401 ++++++++++++++---- .../freeze_thaw_sampler.py | 87 ++-- neps/optimizers/grid_search/optimizer.py | 1 + neps/optimizers/multi_fidelity/dyhpo.py | 107 +++-- neps/optimizers/multi_fidelity/mf_bo.py | 62 ++- neps/optimizers/multi_fidelity/utils.py | 64 ++- neps/optimizers/utils.py | 39 +- neps/search_spaces/search_space.py | 7 + neps/utils/common.py | 71 ++++ 10 files changed, 645 insertions(+), 196 deletions(-) diff --git a/neps/optimizers/__init__.py b/neps/optimizers/__init__.py index 1fdae197..a2b86c14 100644 --- a/neps/optimizers/__init__.py +++ b/neps/optimizers/__init__.py @@ -45,5 +45,5 @@ "hyperband_custom_default": HyperbandCustomDefault, "priorband": PriorBand, "mobster": MOBSTER, - "mf_ei_bo": MFEIBO, + "ifbo_ei": MFEIBO, } diff --git a/neps/optimizers/bayesian_optimization/acquisition_functions/mf_ei.py b/neps/optimizers/bayesian_optimization/acquisition_functions/mf_ei.py index 3d19040d..8e74b338 100644 --- a/neps/optimizers/bayesian_optimization/acquisition_functions/mf_ei.py +++ b/neps/optimizers/bayesian_optimization/acquisition_functions/mf_ei.py @@ -6,13 +6,80 @@ import torch from torch.distributions import Normal -from ....optimizers.utils import map_real_hyperparameters_from_tabular_ids -from ....search_spaces.search_space import SearchSpace -from ...multi_fidelity.utils import MFObservedData -from .ei import ComprehensiveExpectedImprovement +from neps.optimizers.utils import map_real_hyperparameters_from_tabular_ids +from neps.search_spaces.search_space import SearchSpace +from neps.optimizers.multi_fidelity.utils import MFObservedData +from neps.optimizers.bayesian_optimization.acquisition_functions.base_acquisition import BaseAcquisition +from neps.optimizers.bayesian_optimization.acquisition_functions.ei import ComprehensiveExpectedImprovement -class MFEI(ComprehensiveExpectedImprovement): +class MFStepBase(BaseAcquisition): + """A class holding common operations that can be inherited. + + WARNING: Unsafe use of self attributes, can break if not used correctly. + """ + def set_state( + self, + pipeline_space: SearchSpace, + surrogate_model: Any, + observations: MFObservedData, + b_step: Union[int, float], + **kwargs, + ): + # overload to select incumbent differently through observations + self.pipeline_space = pipeline_space + self.surrogate_model = surrogate_model + self.observations = observations + self.b_step = b_step + return + + def get_budget_level(self, config) -> int: + return int((config.fidelity.value - config.fidelity.lower) / self.b_step) + + + def preprocess_gp(self, x: pd.Series) -> Tuple[pd.Series, torch.Tensor]: + x, inc_list = self.preprocess(x) + return x, inc_list + + def preprocess_deep_gp(self, x: pd.Series) -> Tuple[pd.Series, torch.Tensor]: + x, inc_list = self.preprocess(x) + x_lcs = [] + for idx in x.index: + if idx in self.observations.df.index.levels[0]: + # TODO: Samir, check if `budget_id=None` is okay? + # budget_level = self.get_budget_level(x[idx]) + # extracting the available/observed learning curve + lc = self.observations.extract_learning_curve(idx, budget_id=None) + else: + # initialize a learning curve with a placeholder + # This is later padded accordingly for the Conv1D layer + lc = [] + x_lcs.append(lc) + self.surrogate_model.set_prediction_learning_curves(x_lcs) + return x, inc_list + + def preprocess_pfn(self, x: pd.Series) -> Tuple[torch.Tensor, pd.Series, torch.Tensor]: + """Prepares the configurations for appropriate EI calculation. + + Takes a set of points and computes the budget and incumbent for each point, as + required by the multi-fidelity Expected Improvement acquisition function. + """ + _x, inc_list = self.preprocess(x.copy()) + _x_tok = self.observations.tokenize(_x, as_tensor=True) + len_partial = len(self.observations.seen_config_ids) + z_min = x[0].fidelity.lower + z_max = x[0].fidelity.upper + # converting fidelity to the discrete budget level + # STRICT ASSUMPTION: fidelity is the second dimension + _x_tok[:len_partial, 1] = ( + _x_tok[:len_partial, 1] + self.b_step - z_min + ) / self.b_step + _x_tok[:, 1] = _x_tok[:, 1] / z_max + return _x, _x_tok, inc_list + + +# NOTE: the order of inheritance is important +class MFEI(MFStepBase, ComprehensiveExpectedImprovement): def __init__( self, pipeline_space: SearchSpace, @@ -20,30 +87,40 @@ def __init__( augmented_ei: bool = False, xi: float = 0.0, in_fill: str = "best", + inc_normalization: bool = False, log_ei: bool = False, ): super().__init__(augmented_ei, xi, in_fill, log_ei) self.pipeline_space = pipeline_space self.surrogate_model_name = surrogate_model_name + self.inc_normalization = inc_normalization self.surrogate_model = None self.observations = None self.b_step = None - def get_budget_level(self, config) -> int: - return int((config.fidelity.value - config.fidelity.lower) / self.b_step) + def preprocess_inc_list(self, **kwargs) -> list: + assert "budget_list" in kwargs, "Requires a list of query step for candidate set." + budget_list = kwargs["budget_list"] + performances = self.observations.get_best_performance_for_each_budget() + inc_list = [] + for budget_level in budget_list: + if budget_level in performances.index: + inc = performances[budget_level] + else: + inc = self.observations.get_best_seen_performance() + inc_list.append(inc) + return inc_list - def preprocess(self, x: pd.Series) -> Tuple[Iterable, Iterable]: + def preprocess(self, x: pd.Series) -> Tuple[pd.Series, torch.Tensor]: """Prepares the configurations for appropriate EI calculation. Takes a set of points and computes the budget and incumbent for each point, as required by the multi-fidelity Expected Improvement acquisition function. """ budget_list = [] - if self.pipeline_space.has_tabular: # preprocess tabular space differently # expected input: IDs pertaining to the tabular data - # expected output: IDs pertaining to current observations and set of HPs x = map_real_hyperparameters_from_tabular_ids(x, self.pipeline_space) indices_to_drop = [] for i, config in x.items(): @@ -55,88 +132,52 @@ def preprocess(self, x: pd.Series) -> Tuple[Iterable, Iterable]: if np.less_equal(target_fidelity, config.fidelity.upper): # only consider the configs with fidelity lower than the max fidelity - config.fidelity.set_value(target_fidelity) + config.fidelity.value = target_fidelity budget_list.append(self.get_budget_level(config)) else: # if the target_fidelity higher than the max drop the configuration indices_to_drop.append(i) else: - config.fidelity.set_value(target_fidelity) + config.fidelity.value = target_fidelity budget_list.append(self.get_budget_level(config)) # Drop unused configs x.drop(labels=indices_to_drop, inplace=True) - performances = self.observations.get_best_performance_for_each_budget() - inc_list = [] - for budget_level in budget_list: - if budget_level in performances.index: - inc = performances[budget_level] - else: - inc = self.observations.get_best_seen_performance() - inc_list.append(inc) + # Collecting incumbent list per configuration + inc_list = self.preprocess_inc_list(budget_list=budget_list) return x, torch.Tensor(inc_list) - def preprocess_gp(self, x: Iterable) -> Tuple[Iterable, Iterable]: - x, inc_list = self.preprocess(x) - return x.values.tolist(), inc_list - - def preprocess_deep_gp(self, x: Iterable) -> Tuple[Iterable, Iterable]: - x, inc_list = self.preprocess(x) - x_lcs = [] - for idx in x.index: - if idx in self.observations.df.index.levels[0]: - budget_level = self.get_budget_level(x[idx]) - lc = self.observations.extract_learning_curve(idx, budget_level) - else: - # initialize a learning curve with a place holder - # This is later padded accordingly for the Conv1D layer - lc = [0.0] - x_lcs.append(lc) - self.surrogate_model.set_prediction_learning_curves(x_lcs) - return x.values.tolist(), inc_list - - def preprocess_pfn(self, x: Iterable) -> Tuple[Iterable, Iterable, Iterable]: - """Prepares the configurations for appropriate EI calculation. - - Takes a set of points and computes the budget and incumbent for each point, as - required by the multi-fidelity Expected Improvement acquisition function. - """ - _x, inc_list = self.preprocess(x.copy()) - _x_tok = self.observations.tokenize(_x, as_tensor=True) - len_partial = len(self.observations.seen_config_ids) - z_min = x[0].fidelity.lower - # converting fidelity to the discrete budget level - # STRICT ASSUMPTION: fidelity is the first dimension - _x_tok[:len_partial, 0] = ( - _x_tok[:len_partial, 0] + self.b_step - z_min - ) / self.b_step - return _x_tok, _x, inc_list - def eval(self, x: pd.Series, asscalar: bool = False) -> Tuple[np.ndarray, pd.Series]: - # _x = x.copy() # preprocessing needs to change the reference x Series so we don't copy here + # deepcopy + _x = pd.Series([x.loc[idx].copy() for idx in x.index.values], index=x.index) if self.surrogate_model_name == "pfn": - _x_tok, _x, inc_list = self.preprocess_pfn( + _x, _x_tok, inc_list = self.preprocess_pfn( x.copy() ) # IMPORTANT change from vanilla-EI ei = self.eval_pfn_ei(_x_tok, inc_list) - elif self.surrogate_model_name == "deep_gp": + elif self.surrogate_model_name in ["deep_gp", "dpl"]: _x, inc_list = self.preprocess_deep_gp( - x.copy() + _x ) # IMPORTANT change from vanilla-EI - ei = self.eval_gp_ei(_x, inc_list) - _x = pd.Series(_x, index=np.arange(len(_x))) - else: + ei = self.eval_gp_ei(_x.values.tolist(), inc_list) + elif self.surrogate_model_name == "gp": _x, inc_list = self.preprocess_gp( - x.copy() + _x ) # IMPORTANT change from vanilla-EI - ei = self.eval_gp_ei(_x, inc_list) - _x = pd.Series(_x, index=np.arange(len(_x))) + ei = self.eval_gp_ei(_x.values.tolist(), inc_list) + else: + raise ValueError( + f"Unrecognized surrogate model name: {self.surrogate_model_name}" + ) + + if self.inc_normalization: + ei = ei / inc_list if ei.is_cuda: ei = ei.cpu() - if len(x) > 1 and asscalar: + if len(_x) > 1 and asscalar: return ei.detach().numpy(), _x else: return ei.detach().numpy().item(), _x @@ -145,8 +186,6 @@ def eval_pfn_ei( self, x: Iterable, inc_list: Iterable ) -> Union[np.ndarray, torch.Tensor, float]: """PFN-EI modified to preprocess samples and accept list of incumbents.""" - # x, inc_list = self.preprocess(x) # IMPORTANT change from vanilla-EI - # _x = x.copy() ei = self.surrogate_model.get_ei(x.to(self.surrogate_model.device), inc_list) if len(ei.shape) == 2: ei = ei.flatten() @@ -156,7 +195,6 @@ def eval_gp_ei( self, x: Iterable, inc_list: Iterable ) -> Union[np.ndarray, torch.Tensor, float]: """Vanilla-EI modified to preprocess samples and accept list of incumbents.""" - # x, inc_list = self.preprocess(x) # IMPORTANT change from vanilla-EI _x = x.copy() try: mu, cov = self.surrogate_model.predict(_x) @@ -182,13 +220,153 @@ def eval_gp_ei( ucdf = gauss.cdf(u) updf = torch.exp(gauss.log_prob(u)) ei = std * updf + (mu_star - mu - self.xi) * ucdf + # Clip ei if std == 0.0 + # ei = torch.where(torch.isclose(std, torch.tensor(0.0)), 0, ei) if self.augmented_ei: sigma_n = self.surrogate_model.likelihood ei *= 1.0 - torch.sqrt(torch.tensor(sigma_n, device=mu.device)) / torch.sqrt( sigma_n + torch.diag(cov) ) + + # Save data for writing + self.mu_star = mu_star.detach().numpy().tolist() + self.mu = mu.detach().numpy().tolist() + self.std = std.detach().numpy().tolist() return ei + +class MFEI_AtMax(MFEI): + + def preprocess_inc_list(self, **kwargs) -> list: + assert "len_x" in kwargs, "Requires the length of the candidate set." + len_x = kwargs["len_x"] + # finds global incumbent + inc_value = min(self.observations.get_best_performance_for_each_budget()) + # uses the best seen value as the incumbent in EI computation for all candidates + inc_list = [inc_value] * len_x + return inc_list + + def preprocess(self, x: pd.Series) -> Tuple[pd.Series, torch.Tensor]: + """Prepares the configurations for appropriate EI calculation. + + Takes a set of points and computes the budget and incumbent for each point. + Unlike the base class MFEI, sets the target fidelity to be max budget and the + incumbent choice to be the max seen across history for all candidates. + """ + budget_list = [] + if self.pipeline_space.has_tabular: + # preprocess tabular space differently + # expected input: IDs pertaining to the tabular data + x = map_real_hyperparameters_from_tabular_ids(x, self.pipeline_space) + + indices_to_drop = [] + for i, config in x.items(): + target_fidelity = config.fidelity.upper # change from MFEI + + if config.fidelity.value == target_fidelity: + # if the target_fidelity already reached, drop the configuration + indices_to_drop.append(i) + else: + config.fidelity.value = target_fidelity + budget_list.append(self.get_budget_level(config)) + + # drop unused configs + x.drop(labels=indices_to_drop, inplace=True) + + # create the same incumbent for all candidates + inc_list = self.preprocess_inc_list(len_x=len(x.index.values)) + + return x, torch.Tensor(inc_list) + + +class MFEI_Dyna(MFEI_AtMax): + """ + Computes extrapolation length of curves to maximum fidelity seen. + Uses the global incumbent as the best score in EI computation. + """ + + def preprocess(self, x: pd.Series) -> Tuple[pd.Series, torch.Tensor]: + """Prepares the configurations for appropriate EI calculation. + + Takes a set of points and computes the budget and incumbent for each point. + Unlike the base class MFEI, sets the target fidelity to be max budget and the + incumbent choice to be the max seen across history for all candidates. + """ + if self.pipeline_space.has_tabular: + # preprocess tabular space differently + # expected input: IDs pertaining to the tabular data + x = map_real_hyperparameters_from_tabular_ids(x, self.pipeline_space) + + # find the maximum observed steps per config to obtain the current pseudo_z_max + max_z_level_per_x = self.observations.get_max_observed_fidelity_level_per_config() + pseudo_z_level_max = max_z_level_per_x.max() # highest seen fidelity step so far + # find the fidelity step at which the best seen performance was recorded + z_inc_level = self.observations.get_budget_level_for_best_performance() + # retrieving actual fidelity values from budget level + ## marker 1: the fidelity value at which the best seen performance was recorded + z_inc = self.b_step * z_inc_level + self.pipeline_space.fidelity.lower + ## marker 2: the maximum fidelity value recorded in observation history + pseudo_z_max = self.b_step * pseudo_z_level_max + self.pipeline_space.fidelity.lower + + # TODO: compare with this first draft logic + # def update_fidelity(config): + # ### DO NOT DELETE THIS FUNCTION YET + # # for all configs, set the min(max(current fidelity + step, z_inc), pseudo_z_max) + # ## that is, choose the next highest marker from 1 and 2 + # z_extrapolate = min( + # max(config.fidelity.value + self.b_step, z_inc), + # pseudo_z_max + # ) + # config.fidelity.value = z_extrapolate + # return config + + def update_fidelity(config): + # for all configs, set to pseudo_z_max + ## that is, choose the highest seen fidelity in observation history + z_extrapolate = pseudo_z_max + config.fidelity.value = z_extrapolate + return config + + # collect IDs for partial configurations + _partial_config_ids = (x.index <= max(self.observations.seen_config_ids)) + # filter for configurations that reached max budget + indices_to_drop = [ + _idx + for _idx, _x in x.loc[_partial_config_ids].items() + if _x.fidelity.value == self.pipeline_space.fidelity.upper + ] + # drop unused configs + x.drop(labels=indices_to_drop, inplace=True) + + # set fidelity for all partial configs + x = x.apply(update_fidelity) + + # create the same incumbent for all candidates + inc_list = self.preprocess_inc_list(len_x=len(x.index.values)) + + return x, torch.Tensor(inc_list) + + +class MFEI_Random(MFEI): + + BUDGET = 1000 + + + def __init__( + self, + pipeline_space: SearchSpace, + horizon: str = "random", + threshold: str = "random", + surrogate_model_name: str = None, + augmented_ei: bool = False, + xi: float = 0.0, + in_fill: str = "best", + log_ei: bool = False, + ): + super().__init__(pipeline_space, surrogate_model_name, augmented_ei, xi, in_fill, log_ei) + self.horizon = horizon + self.threshold = threshold + def set_state( self, pipeline_space: SearchSpace, @@ -197,9 +375,80 @@ def set_state( b_step: Union[int, float], **kwargs, ): - # overload to select incumbent differently through observations - self.pipeline_space = pipeline_space - self.surrogate_model = surrogate_model - self.observations = observations - self.b_step = b_step - return + # set RNG + self.rng = np.random.RandomState(seed=42) + for i in range(len(observations.completed_runs)): + self.rng.uniform(-4,-1) + self.rng.randint(1,51) + + return super().set_state(pipeline_space, surrogate_model, observations, b_step) + + def sample_horizon(self, steps_passed): + if self.horizon == 'random': + shortest = self.pipeline_space.fidelity.lower + longest = min(self.pipeline_space.fidelity.upper, self.BUDGET - steps_passed) + return self.rng.randint(shortest, longest+1) + elif self.horizon == 'max': + return min(self.pipeline_space.fidelity.upper, self.BUDGET - steps_passed) + else: + return int(self.horizon) + + def sample_threshold(self, f_inc): + if self.threshold == 'random': + lu = 10**self.rng.uniform(-4,-1) # % of gap closed + else: + lu = float(self.threshold) + return f_inc * (1 - lu) + + def preprocess(self, x: pd.Series) -> Tuple[pd.Series, torch.Tensor]: + """Prepares the configurations for appropriate EI calculation. + + Takes a set of points and computes the budget and incumbent for each point, as + required by the multi-fidelity Expected Improvement acquisition function. + """ + if self.pipeline_space.has_tabular: + # preprocess tabular space differently + # expected input: IDs pertaining to the tabular data + x = map_real_hyperparameters_from_tabular_ids(x, self.pipeline_space) + + + indices_to_drop = [] + inc_list = [] + + steps_passed = len(self.observations.completed_runs) + print(f"Steps acquired: {steps_passed}") + + # Like EI-AtMax, use the global incumbent as a basis for the EI threshold + inc_value = min(self.observations.get_best_performance_for_each_budget()) + # Extension: Add a random min improvement threshold to encourage high risk high gain + inc_value = self.sample_threshold(inc_value) + print(f"Threshold for EI: {inc_value}") + + # Like MFEI: set fidelities to query using horizon as self.b_step + # Extension: Unlike DyHPO, we sample the horizon randomly over the full range + horizon = self.sample_horizon(steps_passed) + print(f"Horizon for EI: {horizon}") + for i, config in x.items(): + if i <= max(self.observations.seen_config_ids): + current_fidelity = config.fidelity.value + if np.equal(config.fidelity.value, config.fidelity.upper): + # this training run has ended, drop it from future selection + indices_to_drop.append(i) + else: + # a candidate partial training run to continue + target_fidelity = config.fidelity.value + horizon + config.fidelity.value = min(config.fidelity.value + horizon, config.fidelity.upper) # if horizon exceeds max, query at max + inc_list.append(inc_value) + else: + # a candidate new training run that we would need to start + current_fidelity = 0 + config.fidelity.value = horizon + inc_list.append(inc_value) + #print(f"- {x.index.values[i]}: {current_fidelity} --> {config.fidelity.value}") + + # Drop unused configs + x.drop(labels=indices_to_drop, inplace=True) + + assert len(inc_list) == len(x) + + return x, torch.Tensor(inc_list) diff --git a/neps/optimizers/bayesian_optimization/acquisition_samplers/freeze_thaw_sampler.py b/neps/optimizers/bayesian_optimization/acquisition_samplers/freeze_thaw_sampler.py index b9dd5ddc..f826f18a 100644 --- a/neps/optimizers/bayesian_optimization/acquisition_samplers/freeze_thaw_sampler.py +++ b/neps/optimizers/bayesian_optimization/acquisition_samplers/freeze_thaw_sampler.py @@ -6,9 +6,9 @@ import numpy as np import pandas as pd -from ....search_spaces.search_space import SearchSpace -from ...multi_fidelity.utils import MFObservedData -from .base_acq_sampler import AcquisitionSampler +from neps.search_spaces.search_space import SearchSpace +from neps.optimizers.multi_fidelity.utils import MFObservedData +from neps.optimizers.bayesian_optimization.acquisition_samplers.base_acq_sampler import AcquisitionSampler class FreezeThawSampler(AcquisitionSampler): @@ -104,60 +104,70 @@ def sample( set_new_sample_fidelity: int | float = None, ) -> list(): """Samples a new set and returns the total set of observed + new configs.""" + start = time.time() partial_configs = self.observations.get_partial_configs_at_max_seen() - new_configs = self._sample_new( - index_from=self.observations.next_config_id(), n=n, ignore_fidelity=False - ) - - def __sample_single_new_tabular(index: int): - """ - A function to use in a list comprehension to slightly speed up - the sampling process when self.SAMPLE_TO_DRAW is large - """ - config = self.pipeline_space.sample( - patience=self.patience, user_priors=False, ignore_fidelity=False - ) - config["id"].set_value(_new_configs[index]) - config.fidelity.set_value(set_new_sample_fidelity) - return config + # print("-" * 50) + # print(f"| freeze-thaw:get_partial_at_max_seen(): {time.time()-start:.2f}s") + # print("-" * 50) + _n = n if n is not None else self.SAMPLES_TO_DRAW if self.is_tabular: - _n = n if n is not None else self.SAMPLES_TO_DRAW + # handles tabular data such that the entire unseen set of configs from the + # table is considered to be the new set of candidates _partial_ids = {conf["id"].value for conf in partial_configs} - _all_ids = set(self.pipeline_space.custom_grid_table.index.values) + _all_ids = set(list(self.pipeline_space.custom_grid_table.keys())) # accounting for unseen configs only, samples remaining table if flag is set max_n = len(_all_ids) + 1 if self.sample_full_table else _n _n = min(max_n, len(_all_ids - _partial_ids)) + start = time.time() _new_configs = np.random.choice( list(_all_ids - _partial_ids), size=_n, replace=False ) - new_configs = [__sample_single_new_tabular(i) for i in range(_n)] + placeholder_config = self.pipeline_space.sample( + patience=self.patience, user_priors=False, ignore_fidelity=False + ) + _configs = [deepcopy(placeholder_config) for _id in _new_configs] + for _i, val in enumerate(_new_configs): + _configs[_i]["id"].value = val + + # print("-" * 50) + # print(f"| freeze-thaw:sample:new_configs_extraction: {time.time()-start:.2f}s") + # print("-" * 50) new_configs = pd.Series( - new_configs, + _configs, index=np.arange( - len(partial_configs), len(partial_configs) + len(new_configs) + len(partial_configs), len(partial_configs) + len(_new_configs) ), ) + else: + # handles sampling new configurations for continuous spaces + new_configs = self._sample_new( + index_from=self.observations.next_config_id(), n=_n, ignore_fidelity=False + ) + # Continuous benchmarks need to deepcopy individual configs here, + # because in contrast to tabular benchmarks + # they are not reset in every sampling step + partial_configs = pd.Series( + [deepcopy(p_config_) for idx, p_config_ in partial_configs.items()], + index=partial_configs.index + ) - elif set_new_sample_fidelity is not None: + # Updating fidelity values + start = time.time() + if set_new_sample_fidelity is not None: for config in new_configs: - config.fidelity.set_value(set_new_sample_fidelity) - - # Deep copy configs for fidelity updates - partial_configs_list = [] - index_list = [] - for idx, config in partial_configs.items(): - _config = config.clone() - partial_configs_list.append(_config) - index_list.append(idx) - - # We build a new series of partial configs to avoid - # incrementing fidelities multiple times due to pass-by-reference - partial_configs = pd.Series(partial_configs_list, index=index_list) + config.fidelity.value = set_new_sample_fidelity + # print("-" * 50) + # print(f"| freeze-thaw:sample:new_configs_set_fidelity: {time.time()-start:.2f}s") + # print("-" * 50) - configs = pd.concat([partial_configs, new_configs]) + start = time.time() + configs = pd.concat([deepcopy(partial_configs), new_configs]) + # print("-" * 50) + # print(f"| freeze-thaw:sample:concat_configs: {time.time()-start:.2f}s") + # print("-" * 50) return configs @@ -178,3 +188,4 @@ def set_state( and self.pipeline_space.custom_grid_table is not None ): self.is_tabular = True + self.set_sample_full_tabular(True) diff --git a/neps/optimizers/grid_search/optimizer.py b/neps/optimizers/grid_search/optimizer.py index 01ed394b..e9f1d9a3 100644 --- a/neps/optimizers/grid_search/optimizer.py +++ b/neps/optimizers/grid_search/optimizer.py @@ -18,6 +18,7 @@ def __init__( size_per_numerical_hp=grid_step_size, include_endpoints=True, ) + # TODO: handle this shuffling better and offer more control to the user random.shuffle(self.configs_list) @override diff --git a/neps/optimizers/multi_fidelity/dyhpo.py b/neps/optimizers/multi_fidelity/dyhpo.py index a058d38c..9c567f89 100755 --- a/neps/optimizers/multi_fidelity/dyhpo.py +++ b/neps/optimizers/multi_fidelity/dyhpo.py @@ -2,10 +2,11 @@ from typing_extensions import override import numpy as np +import pandas as pd from neps.state.optimizer import BudgetInfo from neps.utils.types import ConfigResult, RawConfig -from neps.utils.common import instance_from_map +from neps.utils.common import instance_from_map, EvaluationData from neps.search_spaces.search_space import FloatParameter, IntegerParameter, SearchSpace from neps.optimizers.base_optimizer import BaseOptimizer from neps.optimizers.bayesian_optimization.acquisition_functions import AcquisitionMapping @@ -31,9 +32,9 @@ class MFEIBO(BaseOptimizer): def __init__( self, pipeline_space: SearchSpace, - budget: int | None = None, + budget: int = None, step_size: int | float = 1, - optimal_assignment: bool = False, + optimal_assignment: bool = False, # pylint: disable=unused-argument use_priors: bool = False, sample_default_first: bool = False, sample_default_at_target: bool = False, @@ -44,18 +45,18 @@ def __init__( logger=None, # arguments for model surrogate_model: str | Any = "deep_gp", - surrogate_model_args: dict | None = None, - domain_se_kernel: str | None = None, - graph_kernels: list | None = None, - hp_kernels: list | None = None, + surrogate_model_args: dict = None, + domain_se_kernel: str = None, + graph_kernels: list = None, + hp_kernels: list = None, acquisition: str | BaseAcquisition = acquisition, - acquisition_args: dict | None = None, + acquisition_args: dict = None, acquisition_sampler: str | AcquisitionSampler = "freeze-thaw", - acquisition_sampler_args: dict | None = None, + acquisition_sampler_args: dict = None, model_policy: Any = FreezeThawModel, initial_design_fraction: float = 0.75, initial_design_size: int = 10, - initial_design_budget: int | None = None, + initial_design_budget: int = None, ): """Initialise @@ -92,10 +93,7 @@ def __init__( self.max_budget = self.pipeline_space.fidelity.upper self._initial_design_fraction = initial_design_fraction - ( - self._initial_design_size, - self._initial_design_budget, - ) = self._set_initial_design( + self._initial_design_size, self._initial_design_budget = self._set_initial_design( initial_design_size, initial_design_budget, self._initial_design_fraction ) # TODO: Write use cases for these parameters @@ -127,7 +125,7 @@ def __init__( self._prep_model_args(self.hp_kernels, self.graph_kernels, pipeline_space) # TODO: Better solution than branching based on the surrogate name is needed - if surrogate_model in ["deep_gp", "gp"]: + if surrogate_model in ["deep_gp", "gp", "dpl"]: model_policy = FreezeThawModel elif surrogate_model == "pfn": model_policy = PFNSurrogate @@ -167,6 +165,8 @@ def __init__( ) self.count = 0 + self.evaluation_data = EvaluationData() + def _prep_model_args(self, hp_kernels, graph_kernels, pipeline_space): if self.surrogate_model_name in ["gp", "gp_hierarchy"]: # setup for GP implemented in NePS @@ -267,7 +267,7 @@ def total_budget_spent(self) -> int | float: return total_budget_spent - def is_init_phase(self, budget_based: bool = True) -> bool: + def is_init_phase(self, budget_based: bool = False) -> bool: if budget_based: # Check if we are still in the initial design phase based on # either the budget spent so far or the number of configurations evaluated @@ -300,7 +300,6 @@ def load_optimization_state( columns=["config", "perf", "learning_curves"], index_names=["config_id", "budget_id"], ) - # previous optimization run exists and needs to be loaded self._load_previous_observations(previous_results) self.total_fevals = len(previous_results) + len(pending_evaluations) @@ -312,7 +311,6 @@ def load_optimization_state( self.observed_configs.df.sort_index( level=self.observed_configs.df.index.names, inplace=True ) - # TODO: can we do better than keeping a copy of the observed configs? # TODO: can we not hide this in load_results and have something that pops out # more, like a set_state or policy_args @@ -404,11 +402,13 @@ def _randomly_promote(self) -> tuple[SearchSpace, int]: budget = self.observed_configs.df.loc[_config_id].index.values[-1] # calculating fidelity value new_fidelity = self.get_budget_value(budget + 1) - # settingt the config fidelity - config.fidelity.set_value(new_fidelity) + # setting the config fidelity + config.fidelity.value = new_fidelity return config, _config_id - def get_config_and_ids(self) -> tuple[RawConfig, str, str | None]: + def get_config_and_ids( # pylint: disable=no-self-use + self, + ) -> tuple[SearchSpace, str, str | None]: """...and this is the method that decides which point to query. Returns: @@ -422,11 +422,9 @@ def get_config_and_ids(self) -> tuple[RawConfig, str, str | None]: config = self.pipeline_space.sample( patience=self.patience, user_priors=True, ignore_fidelity=False ) - assert config.fidelity is not None - config.fidelity.set_value(self.min_budget) - + config.fidelity.value = self.min_budget _config_id = self.observed_configs.next_config_id() - elif self.is_init_phase(budget_based=True) or self._model_update_failed: + elif self.is_init_phase() or self._model_update_failed: # promote a config randomly if initial design size is satisfied but the # initial design budget has not been exhausted self.logger.info("promoting...") @@ -439,33 +437,76 @@ def get_config_and_ids(self) -> tuple[RawConfig, str, str | None]: # main acquisition call here after initial design is turned off self.logger.info("acquiring...") # generates candidate samples for acquisition calculation - assert self.pipeline_space.fidelity is not None samples = self.acquisition_sampler.sample( set_new_sample_fidelity=self.pipeline_space.fidelity.lower ) # fidelity values here should be the observations or min. fidelity + # calculating acquisition function values for the candidate samples acq, _samples = self.acquisition.eval( # type: ignore[attr-defined] x=samples, asscalar=True ) + acq = pd.Series(acq, index=_samples.index) + # maximizing acquisition function - _idx = np.argsort(acq)[-1] + best_idx = acq.sort_values().index[-1] # extracting the config ID for the selected maximizer - _config_id = samples.index[_samples.index.values[_idx]] + _config_id = best_idx # samples.index[_samples.index.values[_idx]] # `_samples` should have new configs with fidelities set to as required # NOTE: len(samples) need not be equal to len(_samples) as `samples` contain # all (partials + new) configurations obtained from the sampler, but # in `_samples`, configs are removed that have reached maximum epochs allowed # NOTE: `samples` and `_samples` should share the same index values, hence, - # avoid using `.iloc` and work with `.loc` on pandas DataFrame/Series - - # Is this "config = _samples.loc[_config_id]"? + # avoid using `.iloc` and work with `.loc` on these pandas DataFrame/Series + + if hasattr(self.acquisition, "mu"): + # collect prediction learning_curves + lcs = [] + # and tabular ids + tabular_ids = [] + for idx in _samples.index: + if self.acquisition_sampler.is_tabular: + tabular_ids.append(samples[idx]["id"].value) + if idx in self.observed_configs.df.index.levels[0]: + # extracting the available/observed learning curve + lc = self.observed_configs.extract_learning_curve( + idx, budget_id=None + ) + else: + # initialize a learning curve with a placeholder + # This is later padded accordingly for the Conv1D layer + lc = [] + lcs.append(lc) + + data = { + "Acq Value": acq.values, + "preds": self.acquisition.mu, + "incumbents": self.acquisition.mu_star, + "std": self.acquisition.std, + "pred_learning_curves": lcs, + } + if self.acquisition_sampler.is_tabular: + data["tabular_ids"] = tabular_ids + + # assigning config hyperparameters config = samples.loc[_config_id] - config.fidelity.set_value(_samples.loc[_config_id].fidelity.value) + # IMPORTANT: setting the fidelity value appropriately + + config.fidelity.value = ( + config.fidelity.lower + if best_idx > max(self.observed_configs.seen_config_ids) + else ( + self.get_budget_value( + self.observed_configs.get_max_observed_fidelity_level_per_config().loc[ + best_idx + ] + ) + + self.step_size # ONE-STEP FIDELITY QUERY + ) + ) # generating correct IDs if _config_id in self.observed_configs.seen_config_ids: config_id = f"{_config_id}_{self.get_budget_level(config)}" previous_config_id = f"{_config_id}_{self.get_budget_level(config) - 1}" else: config_id = f"{self.observed_configs.next_config_id()}_{self.get_budget_level(config)}" - return config.hp_values(), config_id, previous_config_id diff --git a/neps/optimizers/multi_fidelity/mf_bo.py b/neps/optimizers/multi_fidelity/mf_bo.py index 904d79e8..964e483d 100755 --- a/neps/optimizers/multi_fidelity/mf_bo.py +++ b/neps/optimizers/multi_fidelity/mf_bo.py @@ -8,10 +8,13 @@ import torch from neps.utils.common import instance_from_map -from ..bayesian_optimization.models import SurrogateModelMapping -from ..multi_fidelity.utils import normalize_vectorize_config -from ..multi_fidelity_prior.utils import calc_total_resources_spent, update_fidelity -from ..utils import map_real_hyperparameters_from_tabular_ids +# from ..bayesian_optimization.models import SurrogateModelMapping +from neps.optimizers.bayesian_optimization.models import SurrogateModelMapping +# from ..multi_fidelity.utils import normalize_vectorize_config +from neps.optimizers.multi_fidelity.utils import normalize_vectorize_config +# from ..multi_fidelity_prior.utils import calc_total_resources_spent, update_fidelity +from neps.optimizers.multi_fidelity_prior.utils import calc_total_resources_spent, update_fidelity +from neps.optimizers.utils import map_real_hyperparameters_from_tabular_ids class MFBOBase: @@ -142,7 +145,7 @@ def is_init_phase(self) -> bool: def sample_new_config( self, rung: int = None, - **kwargs, + **kwargs, # pylint: disable=unused-argument ): """Samples configuration from policies or random.""" if self.model_based and not self.is_init_phase(): @@ -187,7 +190,7 @@ class FreezeThawModel: def __init__( self, pipeline_space, - surrogate_model: str = "deep_gp", + surrogate_model: str = "pfn", surrogate_model_args: dict = None, ): self.observed_configs = None @@ -198,6 +201,11 @@ def __init__( ) if self.surrogate_model_name in ["deep_gp", "pfn"]: self.surrogate_model_args.update({"pipeline_space": pipeline_space}) + elif self.surrogate_model_name == "dpl": + self.surrogate_model_args.update( + {"pipeline_space": self.pipeline_space, + "observed_data": self.observed_configs} + ) # instantiate the surrogate model self.surrogate_model = instance_from_map( @@ -233,7 +241,7 @@ def _fantasize_pending(self, train_x, train_y, pending_x): def _fit(self, train_x, train_y, train_lcs): if self.surrogate_model_name in ["gp", "gp_hierarchy"]: self.surrogate_model.fit(train_x, train_y) - elif self.surrogate_model_name in ["deep_gp", "pfn"]: + elif self.surrogate_model_name in ["deep_gp", "pfn", "dpl"]: self.surrogate_model.fit(train_x, train_y, train_lcs) else: # check neps/optimizers/bayesian_optimization/models/__init__.py for options @@ -244,7 +252,7 @@ def _fit(self, train_x, train_y, train_lcs): def _predict(self, test_x, test_lcs): if self.surrogate_model_name in ["gp", "gp_hierarchy"]: return self.surrogate_model.predict(test_x) - elif self.surrogate_model_name in ["deep_gp", "pfn"]: + elif self.surrogate_model_name in ["deep_gp", "pfn", "dpl"]: return self.surrogate_model.predict(test_x, test_lcs) else: # check neps/optimizers/bayesian_optimization/models/__init__.py for options @@ -256,18 +264,35 @@ def set_state( self, pipeline_space, surrogate_model_args, - **kwargs, + **kwargs, # pylint: disable=unused-argument ): self.pipeline_space = pipeline_space self.surrogate_model_args = ( surrogate_model_args if surrogate_model_args is not None else {} ) + if self.surrogate_model_name == "dpl": + self.surrogate_model_args.update( + {"pipeline_space": self.pipeline_space, + "observed_data": self.observed_configs} + ) + self.surrogate_model = instance_from_map( + SurrogateModelMapping, + self.surrogate_model_name, + name="surrogate model", + kwargs=self.surrogate_model_args, + ) + # only to handle tabular spaces if self.pipeline_space.has_tabular: if self.surrogate_model_name in ["deep_gp", "pfn"]: self.surrogate_model_args.update( {"pipeline_space": self.pipeline_space.raw_tabular_space} ) + elif self.surrogate_model_name == "dpl": + self.surrogate_model_args.update( + {"pipeline_space": self.pipeline_space, + "observed_data": self.observed_configs} + ) # instantiate the surrogate model, again, with the new pipeline space self.surrogate_model = instance_from_map( SurrogateModelMapping, @@ -275,6 +300,17 @@ def set_state( name="surrogate model", kwargs=self.surrogate_model_args, ) + elif self.surrogate_model_name == "dpl": + self.surrogate_model_args.update( + {"pipeline_space": self.pipeline_space, + "observed_data": self.observed_configs} + ) + self.surrogate_model = instance_from_map( + SurrogateModelMapping, + self.surrogate_model_name, + name="surrogate model", + kwargs=self.surrogate_model_args, + ) def update_model(self, train_x=None, train_y=None, pending_x=None, decay_t=None): if train_x is None: @@ -286,9 +322,7 @@ def update_model(self, train_x=None, train_y=None, pending_x=None, decay_t=None) if decay_t is None: decay_t = len(train_x) - train_x, train_y, train_lcs = self._fantasize_pending( - train_x, train_y, pending_x - ) + train_x, train_y, train_lcs = self._fantasize_pending(train_x, train_y, pending_x) self._fit(train_x, train_y, train_lcs) return self.surrogate_model, decay_t @@ -302,7 +336,7 @@ def __init__(self, *args, **kwargs): self.train_x = None self.train_y = None - def _fit(self, *args): + def _fit(self, *args): # pylint: disable=unused-argument assert self.surrogate_model_name == "pfn" self.preprocess_training_set() self.surrogate_model.fit(self.train_x, self.train_y) @@ -324,6 +358,8 @@ def preprocess_training_set(self): configs, idxs, performances = self.observed_configs.get_tokenized_data( self.observed_configs.df.copy().assign(config=_configs) ) + idxs = idxs.astype(float) + idxs[:, 1] = idxs[:, 1] / _configs[0].fidelity.upper # TODO: account for fantasization self.train_x = torch.Tensor(np.hstack([idxs, configs])).to(device) self.train_y = torch.Tensor(performances).to(device) diff --git a/neps/optimizers/multi_fidelity/utils.py b/neps/optimizers/multi_fidelity/utils.py index 85f1bb69..cc4e2e66 100644 --- a/neps/optimizers/multi_fidelity/utils.py +++ b/neps/optimizers/multi_fidelity/utils.py @@ -5,8 +5,8 @@ import pandas as pd import torch -from ...optimizers.utils import map_real_hyperparameters_from_tabular_ids -from ...search_spaces.search_space import SearchSpace +from neps.optimizers.utils import map_real_hyperparameters_from_tabular_ids +from neps.search_spaces.search_space import SearchSpace def continuous_to_tabular( @@ -53,6 +53,7 @@ class MFObservedData: default_config_col = "config" default_perf_col = "perf" default_lc_col = "learning_curves" + # TODO: deepcopy all the mutable outputs from the dataframe def __init__( self, @@ -77,6 +78,7 @@ def __init__( self.config_idx = index_names[0] self.budget_idx = index_names[1] + self.index_names = index_names index = pd.MultiIndex.from_tuples([], names=index_names) @@ -127,8 +129,9 @@ def add_data( data_list = data if not self.df.index.isin(index_list).any(): - _df = pd.DataFrame(data_list, columns=self.df.columns, index=index_list) - self.df = pd.concat((self.df, _df)) + index = pd.MultiIndex.from_tuples(index_list, names=self.index_names) + _df = pd.DataFrame(data_list, columns=self.df.columns, index=index) + self.df = _df.copy() if self.df.empty else pd.concat((self.df, _df)) elif error: raise ValueError( f"Data with at least one of the given indices already " @@ -176,7 +179,7 @@ def get_incumbents_for_budgets(self, maximize: bool = False): Returns a series object with the best partial configuration for each budget id Note: this will always map the best lowest ID if two configurations - has the same performance at the same fidelity + have the same performance at the same fidelity """ learning_curves = self.get_learning_curves() if maximize: @@ -203,6 +206,16 @@ def get_best_performance_for_each_budget(self, maximize: bool = False): return performance + def get_budget_level_for_best_performance(self, maximize: bool = False) -> int: + """Returns the lowest budget level at which the highest performance was recorded. + """ + perf_per_z = self.get_best_performance_for_each_budget(maximize=maximize) + y_star = self.get_best_seen_performance(maximize=maximize) + # uses the minimum of the budget that see the maximum obseved score + op = max if maximize else min + z_inc = int(op([_z for _z, _y in perf_per_z.items() if _y == y_star])) + return z_inc + def get_best_learning_curve_id(self, maximize: bool = False): """ Returns a single configuration id of the best observed performance @@ -238,7 +251,17 @@ def reduce_to_max_seen_budgets(self): def get_partial_configs_at_max_seen(self): return self.reduce_to_max_seen_budgets()[self.config_col] - def extract_learning_curve(self, config_id: int, budget_id: int) -> list[float]: + def extract_learning_curve( + self, config_id: int, budget_id: int | None = None + ) -> list[float]: + if budget_id is None: + # budget_id only None when predicting + # extract full observed learning curve for prediction pipeline + budget_id = max(self.df.loc[config_id].index.get_level_values("budget_id").values) + 1 + + # For the first epoch we have no learning curve available + if budget_id == 0: + return [] # reduce budget_id to discount the current validation loss # both during training and prediction phase budget_id = max(0, budget_id - 1) @@ -247,11 +270,12 @@ def extract_learning_curve(self, config_id: int, budget_id: int) -> list[float]: else: lcs = self.get_learning_curves() lc = lcs.loc[config_id, :budget_id].values.flatten().tolist() - return lc + return deepcopy(lc) def get_training_data_4DyHPO( self, df: pd.DataFrame, pipeline_space: SearchSpace | None = None ): + start = time.time() configs = [] learning_curves = [] performance = [] @@ -266,8 +290,34 @@ def get_training_data_4DyHPO( configs.append(row[self.config_col]) performance.append(row[self.perf_col]) learning_curves.append(self.extract_learning_curve(config_id, budget_id)) + # print("-" * 50) + # print(f"| Time for `get_training_data_4DyHPO()`: {time.time()-start:.2f}s") + # print("-" * 50) return configs, learning_curves, performance + def get_best_performance_per_config(self, maximize: bool = False) -> pd.Series: + """Returns the best score recorded per config across fidelities seen. + """ + op = np.max if maximize else np.min + perf = ( + self.df + .sort_values("budget_id", ascending=False) # sorts with largest budget first + .groupby("config_id") # retains only config_id + .first() # retrieves the largest budget seen for each config_id + .learning_curves # extracts all values seen till largest budget for a config + .apply(op) # finds the minimum over per-config learning curve + ) + return perf + + def get_max_observed_fidelity_level_per_config(self) -> pd.Series: + """Returns the highest fidelity level recorded per config seen. + """ + max_z_observed = { + _id: self.df.loc[_id,:].index.sort_values()[-1] + for _id in self.df.index.get_level_values("config_id").sort_values() + } + return pd.Series(max_z_observed) + def get_tokenized_data(self, df: pd.DataFrame): idxs = df.index.values idxs = np.array([list(idx) for idx in idxs]) diff --git a/neps/optimizers/utils.py b/neps/optimizers/utils.py index c203f4db..e9d29222 100644 --- a/neps/optimizers/utils.py +++ b/neps/optimizers/utils.py @@ -1,13 +1,7 @@ import pandas as pd -from ..search_spaces.search_space import SearchSpace - - -# def map_real_hyperparameters_from_tabular_ids( -# ids: pd.Series, pipeline_space: SearchSpace -# ) -> pd.Series: -# return x - +from neps.search_spaces.search_space import SearchSpace + def map_real_hyperparameters_from_tabular_ids( x: pd.Series, pipeline_space: SearchSpace @@ -25,23 +19,12 @@ def map_real_hyperparameters_from_tabular_ids( """ if len(x) == 0: return x - # extract fid name - _x = x.iloc[0].hp_values() - _x.pop("id") - fid_name = list(_x.keys())[0] - for i in x.index.values: - # extracting actual HPs from the tabular space - _config = pipeline_space.custom_grid_table.loc[x.loc[i]["id"].value].to_dict() - # updating fidelities as per the candidate set passed - _config.update({fid_name: x.loc[i][fid_name].value}) - # placeholder config from the raw tabular space - config = pipeline_space.raw_tabular_space.sample( - patience=100, - user_priors=True, - ignore_fidelity=True # True allows fidelity to appear in the sample - ) - # copying values from table to placeholder config of type SearchSpace - config.load_from(_config) - # replacing the ID in the candidate set with the actual HPs of the config - x.loc[i] = config - return x + # copying hyperparameter configs based on IDs + _x = pd.Series( + [pipeline_space.custom_grid_table[x.loc[idx]["id"].value] for idx in x.index.values], + index=x.index + ) + # setting the passed fidelities for the corresponding IDs + for idx in _x.index.values: + _x.loc[idx].fidelity.value = x.loc[idx].fidelity.value + return _x diff --git a/neps/search_spaces/search_space.py b/neps/search_spaces/search_space.py index 85cef066..bfb707bf 100644 --- a/neps/search_spaces/search_space.py +++ b/neps/search_spaces/search_space.py @@ -615,6 +615,13 @@ def get_search_space_grid( Does not support graph parameters currently. + !!! note "TODO" + + Include default hyperparameters in the grid. + If all HPs have a `default` then add a single configuration. + If only partial HPs have defaults then add all combinations of defaults, but only to + the end of the list of configs. + Args: size_per_numerical_hp: The size of the grid for each numerical hyperparameter. include_endpoints: Whether to include the endpoints of the grid. diff --git a/neps/utils/common.py b/neps/utils/common.py index fec76b57..8e90680a 100644 --- a/neps/utils/common.py +++ b/neps/utils/common.py @@ -3,11 +3,13 @@ from __future__ import annotations import inspect +import random from collections.abc import Iterable, Mapping, Sequence from functools import partial from pathlib import Path from typing import Any +import numpy as np import torch import yaml @@ -356,3 +358,72 @@ def instance_from_map( # noqa: C901, PLR0912 raise TypeError(f"{e} when calling {instance} with {args_dict}") from e return instance + + +def get_rnd_state() -> dict: + np_state = list(np.random.get_state()) + np_state[1] = np_state[1].tolist() + state = { + "random_state": random.getstate(), + "np_seed_state": np_state, + "torch_seed_state": torch.random.get_rng_state().tolist(), + } + if torch.cuda.is_available(): + state["torch_cuda_seed_state"] = [ + dev.tolist() for dev in torch.cuda.get_rng_state_all() + ] + return state + + +def set_rnd_state(state: dict): + # rnd_s1, rnd_s2, rnd_s3 = state["random_state"] + random.setstate( + tuple( + tuple(rnd_s) if isinstance(rnd_s, list) else rnd_s + for rnd_s in state["random_state"] + ) + ) + np.random.set_state(tuple(state["np_seed_state"])) + torch.random.set_rng_state(torch.ByteTensor(state["torch_seed_state"])) + if torch.cuda.is_available() and "torch_cuda_seed_state" in state: + torch.cuda.set_rng_state_all( + [torch.ByteTensor(dev) for dev in state["torch_cuda_seed_state"]] + ) + + +class AttrDict(dict): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.__dict__ = self + + +class DataWriter: + """A class to specify how to save/write a data to the folder by + implementing your own write_data function. + Use the set_attributes function to set all your necessary attributes and the data + and then write_data will be called with only the directory path as argument + during the write process. + """ + + def __init__(self, name: str): + self.name = name + + def set_attributes(self, attribute_dict: dict[str, Any]): + for attribute_name, attribute in attribute_dict.items(): + setattr(self, attribute_name, attribute) + + def write_data(self, to_directory: Path): + raise NotImplementedError + + +class EvaluationData: + """A class to store some data for a single evaluation (configuration) + and write that data to its corresponding config folder. + """ + + def __init__(self): + self.data_dict: dict[str, DataWriter] = {} + + def write_all(self, directory: Path): + for _, data_writer in self.data_dict.items(): + data_writer.write_data(directory) From 46e39d52dc7d1919ed6164e407bded6e764a313f Mon Sep 17 00:00:00 2001 From: Neeratyoy Mallik Date: Mon, 15 Jul 2024 16:54:45 +0100 Subject: [PATCH 10/46] Testing for ifbo as dep with mismatch of Python versions --- pyproject.toml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 8e34b01b..f1882daf 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -63,7 +63,9 @@ pyyaml = "^6" tensorboard = "^2" typing-extensions = "*" torchvision = ">=0.8.0" -ifbo = "^0.3" + +[tool.poetry.extras] +ifbo = ["ifbo>=0.3,<0.4 ; { python_version >= '3.10' , python_version < '3.12' }"] [tool.poetry.group.dev.dependencies] ruff = "^0.4" From d97d3dab19b739aade66d70c1242d96d969e9483 Mon Sep 17 00:00:00 2001 From: Neeratyoy Mallik Date: Mon, 15 Jul 2024 17:03:41 +0100 Subject: [PATCH 11/46] Trying again --- pyproject.toml | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index f1882daf..5bf34230 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -64,8 +64,13 @@ tensorboard = "^2" typing-extensions = "*" torchvision = ">=0.8.0" +[tool.poetry.dependencies.ifbo] +version = ">=0.3,<0.4" +python = ">=3.10,<3.12" +optional = true + [tool.poetry.extras] -ifbo = ["ifbo>=0.3,<0.4 ; { python_version >= '3.10' , python_version < '3.12' }"] +ifbo = ["ifbo"] [tool.poetry.group.dev.dependencies] ruff = "^0.4" From 00530c56a7235a68a26a3a8a4a7e0d2b2fa55f9a Mon Sep 17 00:00:00 2001 From: Neeratyoy Mallik Date: Mon, 15 Jul 2024 17:11:44 +0100 Subject: [PATCH 12/46] Resolving scipy dependency --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 5bf34230..ca2d2d8f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -53,7 +53,7 @@ numpy = "^1" pandas = "^2" networkx = "^2.6.3" nltk = "^3.6.4" -scipy = "^1" +scipy = "scipy>=1.13.1" torch = ">1.7.0,!=2.0.1, !=2.1.0" matplotlib = "^3" more-itertools = "*" From 41e254aa82f1834c7f9ffc6576133d1ce8501258 Mon Sep 17 00:00:00 2001 From: Neeratyoy Mallik Date: Mon, 15 Jul 2024 17:13:36 +0100 Subject: [PATCH 13/46] Simple format fix --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index ca2d2d8f..77292875 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -53,7 +53,7 @@ numpy = "^1" pandas = "^2" networkx = "^2.6.3" nltk = "^3.6.4" -scipy = "scipy>=1.13.1" +scipy = ">=1.13.1" torch = ">1.7.0,!=2.0.1, !=2.1.0" matplotlib = "^3" more-itertools = "*" From bbb9038e90075395d557f00647e10ea4d8ec205b Mon Sep 17 00:00:00 2001 From: Neeratyoy Mallik Date: Mon, 15 Jul 2024 19:17:44 +0100 Subject: [PATCH 14/46] Adding MFPI-random + misc. changes --- neps/optimizers/__init__.py | 1 + .../acquisition_functions/__init__.py | 6 + .../acquisition_functions/mf_pi.py | 447 +++++++++++++ .../bayesian_optimization/models/deepGP.py | 632 ------------------ neps/optimizers/default_searchers/ifbo.yaml | 2 + neps/optimizers/multi_fidelity/dyhpo.py | 11 +- 6 files changed, 464 insertions(+), 635 deletions(-) create mode 100644 neps/optimizers/bayesian_optimization/acquisition_functions/mf_pi.py delete mode 100644 neps/optimizers/bayesian_optimization/models/deepGP.py create mode 100644 neps/optimizers/default_searchers/ifbo.yaml diff --git a/neps/optimizers/__init__.py b/neps/optimizers/__init__.py index a2b86c14..7a3619ce 100644 --- a/neps/optimizers/__init__.py +++ b/neps/optimizers/__init__.py @@ -46,4 +46,5 @@ "priorband": PriorBand, "mobster": MOBSTER, "ifbo_ei": MFEIBO, + "ifbo": partial(MFEIBO, acquisition="MFPI-random"), } diff --git a/neps/optimizers/bayesian_optimization/acquisition_functions/__init__.py b/neps/optimizers/bayesian_optimization/acquisition_functions/__init__.py index 3b36cd89..eed5f7f7 100644 --- a/neps/optimizers/bayesian_optimization/acquisition_functions/__init__.py +++ b/neps/optimizers/bayesian_optimization/acquisition_functions/__init__.py @@ -7,6 +7,7 @@ ComprehensiveExpectedImprovement, ) from neps.optimizers.bayesian_optimization.acquisition_functions.mf_ei import MFEI +from neps.optimizers.bayesian_optimization.acquisition_functions.mf_pi import MFPI_Random from neps.optimizers.bayesian_optimization.acquisition_functions.ucb import ( UpperConfidenceBound, MF_UCB, @@ -40,6 +41,11 @@ in_fill="best", augmented_ei=False, ), + "MFPI-random": partial( + MFPI_Random, + in_fill="best", + augmented_ei=False, + ), "UCB": partial( UpperConfidenceBound, maximize=False, diff --git a/neps/optimizers/bayesian_optimization/acquisition_functions/mf_pi.py b/neps/optimizers/bayesian_optimization/acquisition_functions/mf_pi.py new file mode 100644 index 00000000..cc3804f5 --- /dev/null +++ b/neps/optimizers/bayesian_optimization/acquisition_functions/mf_pi.py @@ -0,0 +1,447 @@ +# type: ignore +from pathlib import Path +from typing import Any, Iterable, Tuple, Union + +import numpy as np +import pandas as pd +import torch +from torch.distributions import Normal + +from copy import deepcopy + +from neps.optimizers.utils import map_real_hyperparameters_from_tabular_ids +from neps.search_spaces.search_space import SearchSpace +from neps.optimizers.multi_fidelity.utils import MFObservedData +from neps.optimizers.bayesian_optimization.acquisition_functions.ei import ComprehensiveExpectedImprovement +from neps.optimizers.bayesian_optimization.acquisition_functions.mf_ei import MFStepBase + + +# NOTE: the order of inheritance is important +class MFPI(MFStepBase, ComprehensiveExpectedImprovement): + def __init__( + self, + pipeline_space: SearchSpace, + surrogate_model_name: str = None, + augmented_ei: bool = False, + xi: float = 0.0, + in_fill: str = "best", + log_ei: bool = False, + ): + super().__init__(augmented_ei, xi, in_fill, log_ei) + self.pipeline_space = pipeline_space + self.surrogate_model_name = surrogate_model_name + self.surrogate_model = None + self.observations = None + self.b_step = None + + def preprocess_inc_list(self, **kwargs) -> list: + assert "budget_list" in kwargs, "Requires a list of query step for candidate set." + budget_list = kwargs["budget_list"] + performances = self.observations.get_best_performance_for_each_budget() + inc_list = [] + for budget_level in budget_list: + if budget_level in performances.index: + inc = performances[budget_level] + else: + inc = self.observations.get_best_seen_performance() + inc_list.append(inc) + return inc_list + + def preprocess(self, x: pd.Series) -> Tuple[pd.Series, torch.Tensor]: + """Prepares the configurations for appropriate EI calculation. + + Takes a set of points and computes the budget and incumbent for each point, as + required by the multi-fidelity Expected Improvement acquisition function. + """ + budget_list = [] + if self.pipeline_space.has_tabular: + # preprocess tabular space differently + # expected input: IDs pertaining to the tabular data + x = map_real_hyperparameters_from_tabular_ids(x, self.pipeline_space) + indices_to_drop = [] + for i, config in x.items(): + target_fidelity = config.fidelity.lower + if i <= max(self.observations.seen_config_ids): + # IMPORTANT to set the fidelity at which EI will be calculated only for + # the partial configs that have been observed already + target_fidelity = config.fidelity.value + self.b_step + + if np.less_equal(target_fidelity, config.fidelity.upper): + # only consider the configs with fidelity lower than the max fidelity + config.fidelity.value = target_fidelity + budget_list.append(self.get_budget_level(config)) + else: + # if the target_fidelity higher than the max drop the configuration + indices_to_drop.append(i) + else: + config.fidelity.value = target_fidelity + budget_list.append(self.get_budget_level(config)) + + # Drop unused configs + x.drop(labels=indices_to_drop, inplace=True) + + # Collecting incumbent list per configuration + inc_list = self.preprocess_inc_list(budget_list=budget_list) + + return x, torch.Tensor(inc_list) + + def eval(self, x: pd.Series, asscalar: bool = False) -> Tuple[np.ndarray, pd.Series]: + # deepcopy + _x = pd.Series([x.loc[idx].copy() for idx in x.index.values], index=x.index) + if self.surrogate_model_name == "pfn": + _x, _x_tok, inc_list = self.preprocess_pfn( + x.copy() + ) # IMPORTANT change from vanilla-EI + pi = self.eval_pfn_pi(_x_tok, inc_list) + elif self.surrogate_model_name in ["deep_gp", "dpl"]: + _x, inc_list = self.preprocess_deep_gp( + _x + ) # IMPORTANT change from vanilla-EI + pi = self.eval_gp_pi(_x.values.tolist(), inc_list) + elif self.surrogate_model_name == "gp": + _x, inc_list = self.preprocess_gp( + _x + ) # IMPORTANT change from vanilla-EI + pi = self.eval_gp_pi(_x.values.tolist(), inc_list) + else: + raise ValueError( + f"Unrecognized surrogate model name: {self.surrogate_model_name}" + ) + + if pi.is_cuda: + pi = pi.cpu() + if len(_x) > 1 and asscalar: + return pi.detach().numpy(), _x + else: + return pi.detach().numpy().item(), _x + + def eval_pfn_pi( + self, x: Iterable, inc_list: Iterable + ) -> Union[np.ndarray, torch.Tensor, float]: + """PFN-PI modified to preprocess samples and accept list of incumbents.""" + pi = self.surrogate_model.get_pi(x.to(self.surrogate_model.device), inc_list) + if len(pi.shape) == 2: + pi = pi.flatten() + print(f"Maximum PI: {pi.max()}") + return pi + + def eval_gp_pi( + self, x: Iterable, inc_list: Iterable + ) -> Union[np.ndarray, torch.Tensor, float]: + _x = x.copy() + try: + mu, cov = self.surrogate_model.predict(_x) + except ValueError as e: + raise e + std = torch.sqrt(torch.diag(cov)) + mu_star = inc_list.to(mu.device) + + gauss = Normal(torch.zeros(1, device=mu.device), torch.ones(1, device=mu.device)) + pi = gauss.cdf((mu_star - mu) / (std + 1E-9)) + return pi + + +class MFPI_AtMax(MFPI): + + def preprocess_inc_list(self, **kwargs) -> list: + assert "len_x" in kwargs, "Requires the length of the candidate set." + len_x = kwargs["len_x"] + # finds global incumbent + inc_value = min(self.observations.get_best_performance_for_each_budget()) + # uses the best seen value as the incumbent in EI computation for all candidates + inc_list = [inc_value] * len_x + return inc_list + + def preprocess(self, x: pd.Series) -> Tuple[pd.Series, torch.Tensor]: + """Prepares the configurations for appropriate EI calculation. + + Takes a set of points and computes the budget and incumbent for each point. + Unlike the base class MFPI, sets the target fidelity to be max budget and the + incumbent choice to be the max seen across history for all candidates. + """ + budget_list = [] + if self.pipeline_space.has_tabular: + # preprocess tabular space differently + # expected input: IDs pertaining to the tabular data + x = map_real_hyperparameters_from_tabular_ids(x, self.pipeline_space) + + indices_to_drop = [] + for i, config in x.items(): + target_fidelity = config.fidelity.upper # change from MFEI + + if config.fidelity.value == target_fidelity: + # if the target_fidelity already reached, drop the configuration + indices_to_drop.append(i) + else: + config.fidelity.value = target_fidelity + budget_list.append(self.get_budget_level(config)) + + # drop unused configs + x.drop(labels=indices_to_drop, inplace=True) + + # create the same incumbent for all candidates + inc_list = self.preprocess_inc_list(len_x=len(x.index.values)) + + return x, torch.Tensor(inc_list) + + +class MFPI_Dyna(MFPI_AtMax): + """ + Computes extrapolation length of curves to maximum fidelity seen. + Uses the global incumbent as the best score in EI computation. + """ + + def preprocess(self, x: pd.Series) -> Tuple[pd.Series, torch.Tensor]: + """Prepares the configurations for appropriate EI calculation. + + Takes a set of points and computes the budget and incumbent for each point. + Unlike the base class MFEI, sets the target fidelity to be max budget and the + incumbent choice to be the max seen across history for all candidates. + """ + if self.pipeline_space.has_tabular: + # preprocess tabular space differently + # expected input: IDs pertaining to the tabular data + x = map_real_hyperparameters_from_tabular_ids(x, self.pipeline_space) + + # find the maximum observed steps per config to obtain the current pseudo_z_max + max_z_level_per_x = self.observations.get_max_observed_fidelity_level_per_config() + pseudo_z_level_max = max_z_level_per_x.max() # highest seen fidelity step so far + # find the fidelity step at which the best seen performance was recorded + z_inc_level = self.observations.get_budget_level_for_best_performance() + # retrieving actual fidelity values from budget level + ## marker 1: the fidelity value at which the best seen performance was recorded + z_inc = self.b_step * z_inc_level + self.pipeline_space.fidelity.lower + ## marker 2: the maximum fidelity value recorded in observation history + pseudo_z_max = self.b_step * pseudo_z_level_max + self.pipeline_space.fidelity.lower + + # TODO: compare with this first draft logic + # def update_fidelity(config): + # ### DO NOT DELETE THIS FUNCTION YET + # # for all configs, set the min(max(current fidelity + step, z_inc), pseudo_z_max) + # ## that is, choose the next highest marker from 1 and 2 + # z_extrapolate = min( + # max(config.fidelity.value + self.b_step, z_inc), + # pseudo_z_max + # ) + # config.fidelity.value = z_extrapolate + # return config + + def update_fidelity(config): + # for all configs, set to pseudo_z_max + ## that is, choose the highest seen fidelity in observation history + z_extrapolate = pseudo_z_max + config.fidelity.value = z_extrapolate + return config + + # collect IDs for partial configurations + _partial_config_ids = (x.index <= max(self.observations.seen_config_ids)) + # filter for configurations that reached max budget + indices_to_drop = [ + _idx + for _idx, _x in x.loc[_partial_config_ids].items() + if _x.fidelity.value == self.pipeline_space.fidelity.upper + ] + # drop unused configs + x.drop(labels=indices_to_drop, inplace=True) + + # set fidelity for all partial configs + x = x.apply(update_fidelity) + + # create the same incumbent for all candidates + inc_list = self.preprocess_inc_list(len_x=len(x.index.values)) + + return x, torch.Tensor(inc_list) + + +class MFPI_Random(MFPI): + + BUDGET = 1000 + + def __init__( + self, + pipeline_space: SearchSpace, + horizon: str = "random", + threshold: str = "random", + surrogate_model_name: str = None, + augmented_ei: bool = False, + xi: float = 0.0, + in_fill: str = "best", + log_ei: bool = False, + ): + super().__init__(pipeline_space, surrogate_model_name, augmented_ei, xi, in_fill, log_ei) + self.horizon = horizon + self.threshold = threshold + + + + def set_state( + self, + pipeline_space: SearchSpace, + surrogate_model: Any, + observations: MFObservedData, + b_step: Union[int, float], + **kwargs, + ): + # set RNG + self.rng = np.random.RandomState(seed=42) + for i in range(len(observations.completed_runs)): + self.rng.uniform(-4,-1) + self.rng.randint(1,51) + + return super().set_state(pipeline_space, surrogate_model, observations, b_step) + + def sample_horizon(self, steps_passed): + if self.horizon == 'random': + shortest = self.pipeline_space.fidelity.lower + longest = min(self.pipeline_space.fidelity.upper, self.BUDGET - steps_passed) + return self.rng.randint(shortest, longest+1) + elif self.horizon == 'max': + return min(self.pipeline_space.fidelity.upper, self.BUDGET - steps_passed) + else: + return int(self.horizon) + + def sample_threshold(self, f_inc): + if self.threshold == 'random': + lu = 10**self.rng.uniform(-4,-1) # % of gap closed + else: + lu = float(self.threshold) + return f_inc * (1 - lu) + + def preprocess(self, x: pd.Series) -> Tuple[pd.Series, torch.Tensor]: + """Prepares the configurations for appropriate EI calculation. + + Takes a set of points and computes the budget and incumbent for each point, as + required by the multi-fidelity Expected Improvement acquisition function. + """ + if self.pipeline_space.has_tabular: + # preprocess tabular space differently + # expected input: IDs pertaining to the tabular data + x = map_real_hyperparameters_from_tabular_ids(x, self.pipeline_space) + + + indices_to_drop = [] + inc_list = [] + + steps_passed = len(self.observations.completed_runs) + print(f"Steps acquired: {steps_passed}") + + # Like EI-AtMax, use the global incumbent as a basis for the EI threshold + inc_value = min(self.observations.get_best_performance_for_each_budget()) + # Extension: Add a random min improvement threshold to encourage high risk high gain + t_value = self.sample_threshold(inc_value) + print(f"Threshold for PI: {inc_value - t_value}") + inc_value = t_value + + # Like MFEI: set fidelities to query using horizon as self.b_step + # Extension: Unlike DyHPO, we sample the horizon randomly over the full range + horizon = self.sample_horizon(steps_passed) + print(f"Horizon for PI: {horizon}") + for i, config in x.items(): + if i <= max(self.observations.seen_config_ids): + current_fidelity = config.fidelity.value + if np.equal(config.fidelity.value, config.fidelity.upper): + # this training run has ended, drop it from future selection + indices_to_drop.append(i) + else: + # a candidate partial training run to continue + target_fidelity = config.fidelity.value + horizon + config.fidelity.value = min(config.fidelity.value + horizon, config.fidelity.upper) # if horizon exceeds max, query at max + inc_list.append(inc_value) + else: + # a candidate new training run that we would need to start + current_fidelity = 0 + config.fidelity.value = horizon + inc_list.append(inc_value) + #print(f"- {x.index.values[i]}: {current_fidelity} --> {config.fidelity.value}") + + # Drop unused configs + x.drop(labels=indices_to_drop, inplace=True) + + assert len(inc_list) == len(x) + + return x, torch.Tensor(inc_list) + + +class MFPI_Random_HiT(MFPI): + + BUDGET = 1000 # total budget in freeze-thaw steps available + + def set_state( + self, + pipeline_space: SearchSpace, + surrogate_model: Any, + observations: MFObservedData, + b_step: Union[int, float], + **kwargs, + ): + # set RNG + self.rng = np.random.RandomState(seed=42) + for i in range(len(observations.completed_runs)): + self.rng.uniform(-4,0) + self.rng.randint(1,51) + + return super().set_state(pipeline_space, surrogate_model, observations, b_step) + + def sample_horizon(self, steps_passed): + shortest = self.pipeline_space.fidelity.lower + longest = min(self.pipeline_space.fidelity.upper, self.BUDGET - steps_passed) + return self.rng.randint(shortest, longest+1) + + def sample_threshold(self, f_inc): + lu = 10**self.rng.uniform(-4,0) # % of gap closed + return f_inc * (1 - lu) + + def preprocess(self, x: pd.Series) -> Tuple[pd.Series, torch.Tensor]: + """Prepares the configurations for appropriate EI calculation. + + Takes a set of points and computes the budget and incumbent for each point, as + required by the multi-fidelity Expected Improvement acquisition function. + """ + if self.pipeline_space.has_tabular: + # preprocess tabular space differently + # expected input: IDs pertaining to the tabular data + x = map_real_hyperparameters_from_tabular_ids(x, self.pipeline_space) + + + indices_to_drop = [] + inc_list = [] + + steps_passed = len(self.observations.completed_runs) + print(f"Steps acquired: {steps_passed}") + + # Like EI-AtMax, use the global incumbent as a basis for the EI threshold + inc_value = min(self.observations.get_best_performance_for_each_budget()) + # Extension: Add a random min improvement threshold to encourage high risk high gain + t_value = self.sample_threshold(inc_value) + print(f"Threshold for EI: {inc_value - t_value}") + inc_value = t_value + + # Like MFEI: set fidelities to query using horizon as self.b_step + # Extension: Unlike DyHPO, we sample the horizon randomly over the full range + horizon = self.sample_horizon(steps_passed) + print(f"Horizon for EI: {horizon}") + for i, config in x.items(): + if i <= max(self.observations.seen_config_ids): + current_fidelity = config.fidelity.value + if np.equal(config.fidelity.value, config.fidelity.upper): + # this training run has ended, drop it from future selection + indices_to_drop.append(i) + else: + # a candidate partial training run to continue + target_fidelity = config.fidelity.value + horizon + # if horizon exceeds max, query at max + config.fidelity.value = min(config.fidelity.value + horizon, config.fidelity.upper) + inc_list.append(inc_value) + else: + # a candidate new training run that we would need to start + current_fidelity = 0 + config.fidelity.value = horizon + inc_list.append(inc_value) + #print(f"- {x.index.values[i]}: {current_fidelity} --> {config.fidelity.value}") + + # Drop unused configs + x.drop(labels=indices_to_drop, inplace=True) + + assert len(inc_list) == len(x) + + return x, torch.Tensor(inc_list) diff --git a/neps/optimizers/bayesian_optimization/models/deepGP.py b/neps/optimizers/bayesian_optimization/models/deepGP.py deleted file mode 100644 index c3522e73..00000000 --- a/neps/optimizers/bayesian_optimization/models/deepGP.py +++ /dev/null @@ -1,632 +0,0 @@ -import logging -import os -from copy import deepcopy -from pathlib import Path - -import gpytorch -import numpy as np -import torch -import torch.nn as nn - -from ....search_spaces.search_space import ( - CategoricalParameter, - FloatParameter, - IntegerParameter, - SearchSpace, -) - - -def count_non_improvement_steps(root_directory: Path | str) -> int: - root_directory = Path(root_directory) - - all_losses_file = root_directory / "all_losses_and_configs.txt" - best_loss_fiel = root_directory / "best_loss_trajectory.txt" - - # Read all losses from the file in the order they are explored - losses = [ - float(line[6:]) - for line in all_losses_file.read_text(encoding="utf-8").splitlines() - if "Loss: " in line - ] - # Get the best seen loss value - best_loss = float(best_loss_fiel.read_text(encoding="utf-8").splitlines()[-1].strip()) - - # Count the non-improvement - count = 0 - for loss in reversed(losses): - if np.greater(loss, best_loss): - count += 1 - else: - break - - return count - - -class NeuralFeatureExtractor(nn.Module): - """ - Neural network to be used in the DeepGP - """ - - def __init__(self, input_size: int, **kwargs): - super().__init__() - - # Set number of hyperparameters - self.input_size = input_size - - self.n_layers = kwargs.get("n_layers", 2) - self.activation = nn.LeakyReLU() - - layer1_units = kwargs.get("layer1_units", 128) - self.fc1 = nn.Linear(input_size, layer1_units) - self.bn1 = nn.BatchNorm1d(layer1_units) - - previous_layer_units = layer1_units - for i in range(2, self.n_layers): - next_layer_units = kwargs.get(f"layer{i}_units", 256) - setattr( - self, - f"fc{i}", - nn.Linear(previous_layer_units, next_layer_units), - ) - setattr( - self, - f"bn{i}", - nn.BatchNorm1d(next_layer_units), - ) - previous_layer_units = next_layer_units - - setattr( - self, - f"fc{self.n_layers}", - nn.Linear( - previous_layer_units + kwargs.get("cnn_nr_channels", 4), - # accounting for the learning curve features - kwargs.get(f"layer{self.n_layers}_units", 256), - ), - ) - self.cnn = nn.Sequential( - nn.Conv1d( - in_channels=1, - kernel_size=(kwargs.get("cnn_kernel_size", 3),), - out_channels=4, - ), - nn.AdaptiveMaxPool1d(1), - ) - - def forward(self, x, budgets, learning_curves): - # add an extra dimensionality for the budget - # making it nr_rows x 1. - budgets = torch.unsqueeze(budgets, dim=1) - # concatenate budgets with examples - x = torch.cat((x, budgets), dim=1) - x = self.fc1(x) - x = self.activation(self.bn1(x)) - - for i in range(2, self.n_layers): - x = self.activation(getattr(self, f"bn{i}")(getattr(self, f"fc{i}")(x))) - - # add an extra dimensionality for the learning curve - # making it nr_rows x 1 x lc_values. - learning_curves = torch.unsqueeze(learning_curves, 1) - lc_features = self.cnn(learning_curves) - # revert the output from the cnn into nr_rows x nr_kernels. - lc_features = torch.squeeze(lc_features, 2) - - # put learning curve features into the last layer along with the higher level features. - x = torch.cat((x, lc_features), dim=1) - x = self.activation(getattr(self, f"fc{self.n_layers}")(x)) - - return x - - -class GPRegressionModel(gpytorch.models.ExactGP): - """ - A simple GP model. - """ - - def __init__( - self, - train_x: torch.Tensor, - train_y: torch.Tensor, - likelihood: gpytorch.likelihoods.GaussianLikelihood, - ): - """ - Constructor of the GPRegressionModel. - - Args: - train_x: The initial train examples for the GP. - train_y: The initial train labels for the GP. - likelihood: The likelihood to be used. - """ - super().__init__(train_x, train_y, likelihood) - - self.mean_module = gpytorch.means.ConstantMean() - self.covar_module = gpytorch.kernels.ScaleKernel(gpytorch.kernels.RBFKernel()) - - def forward(self, x): - mean_x = self.mean_module(x) - covar_x = self.covar_module(x) - - return gpytorch.distributions.MultivariateNormal(mean_x, covar_x) - - -class DeepGP: - """ - Gaussian process with a deep kernel - """ - - def __init__( - self, - pipeline_space: SearchSpace, - neural_network_args: dict | None = None, - logger=None, - surrogate_model_fit_args: dict | None = None, - # IMPORTANT: Checkpointing does not use file locking, - # IMPORTANT: hence, it is not suitable for multiprocessing settings - checkpointing: bool = False, - root_directory: Path | str | None = None, - checkpoint_file: Path | str = "surrogate_checkpoint.pth", - refine_epochs: int = 50, - **kwargs, - ): - self.surrogate_model_fit_args = ( - surrogate_model_fit_args if surrogate_model_fit_args is not None else {} - ) - - self.checkpointing = checkpointing - self.refine_epochs = refine_epochs - if checkpointing: - assert ( - root_directory is not None - ), "neps root_directory must be provided for the checkpointing" - self.root_dir = Path(os.getcwd(), root_directory) - self.checkpoint_path = Path(os.getcwd(), root_directory, checkpoint_file) - - super().__init__() - self.__preprocess_search_space(pipeline_space) - # set the categories array for the encoder - self.categories_array = np.array(self.categories) - - if neural_network_args is None: - neural_network_args = {} - self.nn_args = neural_network_args - - self.device = ( - torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu") - ) - # self.device = torch.device("cpu") - - # Save the NN args, necessary for preprocessing - self.cnn_kernel_size = neural_network_args.get("cnn_kernel_size", 3) - self.model, self.likelihood, self.mll = self.__initialize_gp_model( - neural_network_args.get("n_layers", 2) - ) - - # build the neural network - self.nn = NeuralFeatureExtractor(self.input_size, **neural_network_args) - - self.logger = logger or logging.getLogger("neps") - - def __initialize_gp_model( - self, - train_size: int, - ) -> tuple[ - GPRegressionModel, - gpytorch.likelihoods.GaussianLikelihood, - gpytorch.mlls.ExactMarginalLogLikelihood, - ]: - """ - Called when the surrogate is first initialized or restarted. - - Args: - train_size: The size of the current training set. - - Returns: - model, likelihood, mll - The GP model, the likelihood and - the marginal likelihood. - """ - train_x = torch.ones(train_size, train_size).to(self.device) - train_y = torch.ones(train_size).to(self.device) - - likelihood = gpytorch.likelihoods.GaussianLikelihood().to(self.device) - model = GPRegressionModel( - train_x=train_x, train_y=train_y, likelihood=likelihood - ).to(self.device) - mll = gpytorch.mlls.ExactMarginalLogLikelihood(likelihood, model).to(self.device) - return model, likelihood, mll - - def __preprocess_search_space(self, pipeline_space: SearchSpace): - self.categories = [] - self.categorical_hps = [] - - parameter_count = 0 - for hp_name, hp in pipeline_space.items(): - # Collect all categories in a list for the encoder - if isinstance(hp, CategoricalParameter): - self.categorical_hps.append(hp_name) - self.categories.extend(hp.choices) - parameter_count += len(hp.choices) - else: - parameter_count += 1 - - # add 1 for budget - self.input_size = parameter_count - self.continuous_params_size = self.input_size - len(self.categories) - self.min_fidelity = pipeline_space.fidelity.lower - self.max_fidelity = pipeline_space.fidelity.upper - - def __encode_config(self, config: SearchSpace): - categorical_encoding = np.zeros_like(self.categories_array) - continuous_values = [] - - for hp_name, hp in config.items(): - if hp.is_fidelity: - continue # Ignore fidelity - if hp_name in self.categorical_hps: - label = hp.value - categorical_encoding[np.argwhere(self.categories_array == label)] = 1 - else: - continuous_values.append(hp.value_to_normalized(hp.value)) - - continuous_encoding = np.array(continuous_values) - - encoding = np.concatenate([categorical_encoding, continuous_encoding]) - return encoding - - def __extract_budgets( - self, x_train: list[SearchSpace], normalized: bool = True - ) -> np.ndarray: - budgets = np.array([config.fidelity.value for config in x_train], dtype=np.single) - if normalized: - normalized_budgets = (budgets - self.min_fidelity) / ( - self.max_fidelity - self.min_fidelity - ) - budgets = normalized_budgets - return budgets - - def __preprocess_learning_curves( - self, learning_curves: list[list[float]], padding_value: float = 0.0 - ) -> np.ndarray: - # Add padding to the learning curves to make them the same size - - # Get max learning_curve length - max_length = 0 - for lc in learning_curves: - length = len(lc) - if length > max_length: - max_length = length - - for lc in learning_curves: - # add padding to the learning curve to fit the cnn kernel or - # the max_length depending on which is the largest - padding_length = max([max_length - len(lc), self.cnn_kernel_size - len(lc)]) - lc.extend([padding_value] * padding_length) - - # TODO: check if the lc values are within bounds [0, 1] (karibbov) - # TODO: add normalize_lcs option in the future - - return np.array(learning_curves, dtype=np.single) - - def __reset_xy( - self, - x_train: list[SearchSpace], - y_train: list[float], - learning_curves: list[list[float]], - normalize_y: bool = False, - normalize_budget: bool = True, - ): - self.normalize_budget = normalize_budget - self.normalize_y = normalize_y - - x_train, train_budgets, learning_curves = self._preprocess_input( - x_train, learning_curves, normalize_budget - ) - - y_train = self._preprocess_y(y_train, normalize_y) - - self.x_train = x_train - self.train_budgets = train_budgets - self.learning_curves = learning_curves - self.y_train = y_train - - def _preprocess_input( - self, - x: list[SearchSpace], - learning_curves: list[list[float]], - normalize_budget: bool = True, - ): - budgets = self.__extract_budgets(x, normalize_budget) - learning_curves = self.__preprocess_learning_curves(learning_curves) - - x = np.array([self.__encode_config(config) for config in x], dtype=np.single) - - x = torch.tensor(x).to(device=self.device) - budgets = torch.tensor(budgets).to(device=self.device) - learning_curves = torch.tensor(learning_curves).to(device=self.device) - - return x, budgets, learning_curves - - def _preprocess_y(self, y_train: list[float], normalize_y: bool = False): - y_train_array = np.array(y_train, dtype=np.single) - self.min_y = y_train_array.min() - self.max_y = y_train_array.max() - if normalize_y: - y_train_array = (y_train_array - self.min_y) / (self.max_y - self.min_y) - y_train_array = torch.tensor(y_train_array).to(device=self.device) - return y_train_array - - def fit( - self, - x_train: list[SearchSpace], - y_train: list[float], - learning_curves: list[list[float]], - ): - self._fit(x_train, y_train, learning_curves, **self.surrogate_model_fit_args) - - def _fit( - self, - x_train: list[SearchSpace], - y_train: list[float], - learning_curves: list[list[float]], - normalize_y: bool = False, - normalize_budget: bool = True, - n_epochs: int = 1000, - batch_size: int = 64, - optimizer_args: dict | None = None, - early_stopping: bool = True, - patience: int = 10, - perf_patience: int = 10, - ): - self.__reset_xy( - x_train, - y_train, - learning_curves, - normalize_y=normalize_y, - normalize_budget=normalize_budget, - ) - self.model, self.likelihood, self.mll = self.__initialize_gp_model(len(y_train)) - self.nn = NeuralFeatureExtractor(self.input_size, **self.nn_args) - self.model.to(self.device) - self.likelihood.to(self.device) - self.nn.to(self.device) - - if self.checkpointing and self.checkpoint_path.exists(): - non_improvement_steps = count_non_improvement_steps(self.root_dir) - # If checkpointing and patience is not exhausted load a partial model - if non_improvement_steps < perf_patience: - n_epochs = self.refine_epochs - self.load_checkpoint() - self.logger.debug(f"No improvement for: {non_improvement_steps} evaulations") - self.logger.debug(f"N Epochs for the full training: {n_epochs}") - - initial_state = self.get_state() - try: - self.__train_model( - self.x_train, - self.train_budgets, - self.learning_curves, - self.y_train, - n_epochs=n_epochs, - batch_size=batch_size, - optimizer_args=optimizer_args, - early_stopping=early_stopping, - patience=patience, - ) - if self.checkpointing: - self.save_checkpoint() - except gpytorch.utils.errors.NotPSDError: - self.logger.info("Model training failed loading the untrained model") - self.load_checkpoint(initial_state) - # Delete checkpoint to restart training - self.delete_checkpoint() - - def __train_model( - self, - x_train: torch.Tensor, - train_budgets: torch.Tensor, - learning_curves: torch.Tensor, - y_train: torch.Tensor, - n_epochs: int = 1000, - batch_size: int = 64, - optimizer_args: dict | None = None, - early_stopping: bool = True, - patience: int = 10, - ): - if optimizer_args is None: - optimizer_args = {"lr": 0.001} - - self.model.train() - self.likelihood.train() - self.nn.train() - self.optimizer = torch.optim.Adam( - [ - dict({"params": self.model.parameters()}, **optimizer_args), - dict({"params": self.nn.parameters()}, **optimizer_args), - ] - ) - - count_down = patience - min_avg_loss_val = np.inf - average_loss: float = 0.0 - - for epoch_nr in range(0, n_epochs): - if early_stopping and count_down == 0: - self.logger.info( - f"Epoch: {epoch_nr - 1} surrogate training stops due to early " - f"stopping with the patience: {patience} and " - f"the minimum average loss of {min_avg_loss_val} and " - f"the final average loss of {average_loss}" - ) - break - - n_examples_batch = x_train.size(dim=0) - - # get a random permutation for mini-batches - permutation = torch.randperm(n_examples_batch) - - # optimize over mini-batches - total_scaled_loss = 0.0 - for batch_idx, start_index in enumerate( - range(0, n_examples_batch, batch_size) - ): - end_index = start_index + batch_size - if end_index > n_examples_batch: - end_index = n_examples_batch - indices = permutation[start_index:end_index] - batch_x, batch_budget, batch_lc, batch_y = ( - x_train[indices], - train_budgets[indices], - learning_curves[indices], - y_train[indices], - ) - - minibatch_size = end_index - start_index - # if only one example in the batch, skip the batch. - # Otherwise, the code will fail because of batchnorm - if minibatch_size <= 1: - continue - - # Zero backprop gradients - self.optimizer.zero_grad() - - projected_x = self.nn(batch_x, batch_budget, batch_lc) - self.model.set_train_data(projected_x, batch_y, strict=False) - output = self.model(projected_x) - - # try: - # Calc loss and backprop derivatives - loss = -self.mll(output, self.model.train_targets) - episodic_loss_value: float = loss.detach().to("cpu").item() - # weighted sum over losses in the batch - total_scaled_loss = ( - total_scaled_loss + episodic_loss_value * minibatch_size - ) - - mse = gpytorch.metrics.mean_squared_error( - output, self.model.train_targets - ) - self.logger.debug( - f"Epoch {epoch_nr} Batch {batch_idx} - MSE {mse:.5f}, " - f"Loss: {episodic_loss_value:.3f}, " - f"lengthscale: {self.model.covar_module.base_kernel.lengthscale.item():.3f}, " - f"noise: {self.model.likelihood.noise.item():.3f}, " - ) - - loss.backward() - self.optimizer.step() - - # Get average weighted loss over every batch - average_loss = total_scaled_loss / n_examples_batch - if average_loss < min_avg_loss_val: - min_avg_loss_val = average_loss - count_down = patience - elif early_stopping: - self.logger.debug( - f"No improvement over the minimum loss value of {min_avg_loss_val} " - f"for the past {patience - count_down} epochs " - f"the training will stop in {count_down} epochs" - ) - count_down -= 1 - # except Exception as training_error: - # self.logger.error( - # f'The following error happened while training: {training_error}') - # # An error has happened, trigger the restart of the optimization and restart - # # the model with default hyperparameters. - # self.restart = True - # training_errored = True - # break - - def set_prediction_learning_curves(self, learning_curves: list[list[float]]): - self.prediction_learning_curves = learning_curves - - def predict( - self, x: list[SearchSpace], learning_curves: list[list[float]] | None = None - ): - # Preprocess input - if learning_curves is None: - learning_curves = self.prediction_learning_curves - x_test, test_budgets, learning_curves = self._preprocess_input( - x, learning_curves, self.normalize_budget - ) - - self.model.eval() - self.nn.eval() - self.likelihood.eval() - - with torch.no_grad(): - projected_train_x = self.nn( - self.x_train, self.train_budgets, self.learning_curves - ) - self.model.set_train_data( - inputs=projected_train_x, targets=self.y_train, strict=False - ) - - projected_test_x = self.nn(x_test, test_budgets, learning_curves) - - preds = self.likelihood(self.model(projected_test_x)) - - means = preds.mean.detach().cpu() - - if self.normalize_y: - means = (means + self.min_y) * (self.max_y - self.min_y) - - cov = torch.diag(torch.pow(preds.stddev.detach(), 2)).cpu() - - return means, cov - - def load_checkpoint(self, state: dict | None = None): - """ - Load the state from a previous checkpoint. - """ - if state is None: - checkpoint = torch.load(self.checkpoint_path) - else: - checkpoint = state - self.model.load_state_dict(checkpoint["gp_state_dict"]) - self.nn.load_state_dict(checkpoint["nn_state_dict"]) - self.likelihood.load_state_dict(checkpoint["likelihood_state_dict"]) - - self.model.to(self.device) - self.likelihood.to(self.device) - self.nn.to(self.device) - - def save_checkpoint(self, state: dict | None = None): - """ - Save the given state or the current state in a - checkpoint file. - - Args: - checkpoint_path: path to the checkpoint file - state: The state to save, if none, it will - save the current state. - """ - - if state is None: - torch.save( - self.get_state(), - self.checkpoint_path, - ) - else: - torch.save( - state, - self.checkpoint_path, - ) - - def get_state(self) -> dict[str, dict]: - """ - Get the current state of the surrogate. - - Returns: - current_state: A dictionary that represents - the current state of the surrogate model. - """ - current_state = { - "gp_state_dict": deepcopy(self.model.state_dict()), - "nn_state_dict": deepcopy(self.nn.state_dict()), - "likelihood_state_dict": deepcopy(self.likelihood.state_dict()), - } - - return current_state - - def delete_checkpoint(self): - self.checkpoint_path.unlink(missing_ok=True) diff --git a/neps/optimizers/default_searchers/ifbo.yaml b/neps/optimizers/default_searchers/ifbo.yaml new file mode 100644 index 00000000..1eecea6a --- /dev/null +++ b/neps/optimizers/default_searchers/ifbo.yaml @@ -0,0 +1,2 @@ +strategy: ifbo +acquisition: MFPI-random \ No newline at end of file diff --git a/neps/optimizers/multi_fidelity/dyhpo.py b/neps/optimizers/multi_fidelity/dyhpo.py index 9c567f89..40730d9d 100755 --- a/neps/optimizers/multi_fidelity/dyhpo.py +++ b/neps/optimizers/multi_fidelity/dyhpo.py @@ -44,7 +44,7 @@ def __init__( ignore_errors: bool = False, logger=None, # arguments for model - surrogate_model: str | Any = "deep_gp", + surrogate_model: str | Any = "gp", surrogate_model_args: dict = None, domain_se_kernel: str = None, graph_kernels: list = None, @@ -125,7 +125,9 @@ def __init__( self._prep_model_args(self.hp_kernels, self.graph_kernels, pipeline_space) # TODO: Better solution than branching based on the surrogate name is needed - if surrogate_model in ["deep_gp", "gp", "dpl"]: + if surrogate_model in ["deep_gp", "dpl"]: + raise NotImplementedError + elif surrogate_model == "gp": model_policy = FreezeThawModel elif surrogate_model == "pfn": model_policy = PFNSurrogate @@ -422,7 +424,10 @@ def get_config_and_ids( # pylint: disable=no-self-use config = self.pipeline_space.sample( patience=self.patience, user_priors=True, ignore_fidelity=False ) - config.fidelity.value = self.min_budget + _config_dict = config.hp_values() + _config_dict.update({config.fidelity_name: self.min_budget}) + config.set_hyperparameters_from_dict(_config_dict) + # config.fidelity.value = self.min_budget _config_id = self.observed_configs.next_config_id() elif self.is_init_phase() or self._model_update_failed: # promote a config randomly if initial design size is satisfied but the From a3c1a43a8834fb2576ceaf3e8a6247c057ba4711 Mon Sep 17 00:00:00 2001 From: Neeratyoy Mallik Date: Mon, 5 Aug 2024 22:54:09 +0200 Subject: [PATCH 15/46] Major changes to get code synced; disabling Python 3.8 and 3.9 support --- .github/workflows/pre-commit.yaml | 1 - neps/api.py | 2 +- .../acquisition_functions/mf_ei.py | 27 +++++---------- .../acquisition_functions/mf_pi.py | 34 ++++++++----------- .../freeze_thaw_sampler.py | 18 +--------- neps/optimizers/multi_fidelity/dyhpo.py | 6 ++-- neps/optimizers/multi_fidelity/utils.py | 4 --- neps/search_spaces/search_space.py | 14 ++++++++ neps/utils/run_args.py | 10 +++--- pyproject.toml | 9 +---- 10 files changed, 48 insertions(+), 77 deletions(-) diff --git a/.github/workflows/pre-commit.yaml b/.github/workflows/pre-commit.yaml index ace7ab76..a7bd4690 100644 --- a/.github/workflows/pre-commit.yaml +++ b/.github/workflows/pre-commit.yaml @@ -31,4 +31,3 @@ jobs: - run: pip install pre-commit - run: pre-commit install - run: pre-commit run --all-files - diff --git a/neps/api.py b/neps/api.py index 74754d61..4f81b0cf 100644 --- a/neps/api.py +++ b/neps/api.py @@ -80,7 +80,7 @@ def run( root_directory: The directory to save progress to. This is also used to synchronize multiple calls to run(.) for parallelization. run_args: An option for providing the optimization settings e.g. - max_evaluation_total in a YAML file. + max_evaluations_total in a YAML file. overwrite_working_directory: If true, delete the working directory at the start of the run. This is, e.g., useful when debugging a run_pipeline function. post_run_summary: If True, creates a csv file after each worker is done, diff --git a/neps/optimizers/bayesian_optimization/acquisition_functions/mf_ei.py b/neps/optimizers/bayesian_optimization/acquisition_functions/mf_ei.py index 8e74b338..5139d4b4 100644 --- a/neps/optimizers/bayesian_optimization/acquisition_functions/mf_ei.py +++ b/neps/optimizers/bayesian_optimization/acquisition_functions/mf_ei.py @@ -132,13 +132,13 @@ def preprocess(self, x: pd.Series) -> Tuple[pd.Series, torch.Tensor]: if np.less_equal(target_fidelity, config.fidelity.upper): # only consider the configs with fidelity lower than the max fidelity - config.fidelity.value = target_fidelity + config.update_hp_values({config.fidelity_name: target_fidelity}) budget_list.append(self.get_budget_level(config)) else: # if the target_fidelity higher than the max drop the configuration indices_to_drop.append(i) else: - config.fidelity.value = target_fidelity + config.update_hp_values({config.fidelity_name: target_fidelity}) budget_list.append(self.get_budget_level(config)) # Drop unused configs @@ -267,7 +267,7 @@ def preprocess(self, x: pd.Series) -> Tuple[pd.Series, torch.Tensor]: # if the target_fidelity already reached, drop the configuration indices_to_drop.append(i) else: - config.fidelity.value = target_fidelity + config.update_hp_values({config.fidelity_name: target_fidelity}) budget_list.append(self.get_budget_level(config)) # drop unused configs @@ -308,23 +308,11 @@ def preprocess(self, x: pd.Series) -> Tuple[pd.Series, torch.Tensor]: ## marker 2: the maximum fidelity value recorded in observation history pseudo_z_max = self.b_step * pseudo_z_level_max + self.pipeline_space.fidelity.lower - # TODO: compare with this first draft logic - # def update_fidelity(config): - # ### DO NOT DELETE THIS FUNCTION YET - # # for all configs, set the min(max(current fidelity + step, z_inc), pseudo_z_max) - # ## that is, choose the next highest marker from 1 and 2 - # z_extrapolate = min( - # max(config.fidelity.value + self.b_step, z_inc), - # pseudo_z_max - # ) - # config.fidelity.value = z_extrapolate - # return config - def update_fidelity(config): # for all configs, set to pseudo_z_max ## that is, choose the highest seen fidelity in observation history z_extrapolate = pseudo_z_max - config.fidelity.value = z_extrapolate + config.update_hp_values({config.fidelity_name: z_extrapolate}) return config # collect IDs for partial configurations @@ -437,12 +425,15 @@ def preprocess(self, x: pd.Series) -> Tuple[pd.Series, torch.Tensor]: else: # a candidate partial training run to continue target_fidelity = config.fidelity.value + horizon - config.fidelity.value = min(config.fidelity.value + horizon, config.fidelity.upper) # if horizon exceeds max, query at max + # if horizon exceeds max, query at max + config.uppdate_hp_values({ + config.fidelity_name: min(target_fidelity, config.fidelity.upper) + }) inc_list.append(inc_value) else: # a candidate new training run that we would need to start current_fidelity = 0 - config.fidelity.value = horizon + config.update_hp_values({config.fidelity_name: horizon}) inc_list.append(inc_value) #print(f"- {x.index.values[i]}: {current_fidelity} --> {config.fidelity.value}") diff --git a/neps/optimizers/bayesian_optimization/acquisition_functions/mf_pi.py b/neps/optimizers/bayesian_optimization/acquisition_functions/mf_pi.py index cc3804f5..e64ea2e3 100644 --- a/neps/optimizers/bayesian_optimization/acquisition_functions/mf_pi.py +++ b/neps/optimizers/bayesian_optimization/acquisition_functions/mf_pi.py @@ -68,13 +68,13 @@ def preprocess(self, x: pd.Series) -> Tuple[pd.Series, torch.Tensor]: if np.less_equal(target_fidelity, config.fidelity.upper): # only consider the configs with fidelity lower than the max fidelity - config.fidelity.value = target_fidelity + config.update_hp_values({config.fidelity_name: target_fidelity}) budget_list.append(self.get_budget_level(config)) else: # if the target_fidelity higher than the max drop the configuration indices_to_drop.append(i) else: - config.fidelity.value = target_fidelity + config.update_hp_values({config.fidelity_name: target_fidelity}) budget_list.append(self.get_budget_level(config)) # Drop unused configs @@ -173,7 +173,7 @@ def preprocess(self, x: pd.Series) -> Tuple[pd.Series, torch.Tensor]: # if the target_fidelity already reached, drop the configuration indices_to_drop.append(i) else: - config.fidelity.value = target_fidelity + config.update_hp_values({config.fidelity_name: target_fidelity}) budget_list.append(self.get_budget_level(config)) # drop unused configs @@ -214,23 +214,11 @@ def preprocess(self, x: pd.Series) -> Tuple[pd.Series, torch.Tensor]: ## marker 2: the maximum fidelity value recorded in observation history pseudo_z_max = self.b_step * pseudo_z_level_max + self.pipeline_space.fidelity.lower - # TODO: compare with this first draft logic - # def update_fidelity(config): - # ### DO NOT DELETE THIS FUNCTION YET - # # for all configs, set the min(max(current fidelity + step, z_inc), pseudo_z_max) - # ## that is, choose the next highest marker from 1 and 2 - # z_extrapolate = min( - # max(config.fidelity.value + self.b_step, z_inc), - # pseudo_z_max - # ) - # config.fidelity.value = z_extrapolate - # return config - def update_fidelity(config): # for all configs, set to pseudo_z_max ## that is, choose the highest seen fidelity in observation history z_extrapolate = pseudo_z_max - config.fidelity.value = z_extrapolate + config.update_hp_values({config.fidelity_name: z_extrapolate}) return config # collect IDs for partial configurations @@ -345,12 +333,16 @@ def preprocess(self, x: pd.Series) -> Tuple[pd.Series, torch.Tensor]: else: # a candidate partial training run to continue target_fidelity = config.fidelity.value + horizon - config.fidelity.value = min(config.fidelity.value + horizon, config.fidelity.upper) # if horizon exceeds max, query at max + config.update_hp_values({ + config.fidelity_name: min( + config.fidelity.value + horizon, config.fidelity.upper + ) # if horizon exceeds max, query at max + }) inc_list.append(inc_value) else: # a candidate new training run that we would need to start current_fidelity = 0 - config.fidelity.value = horizon + config.update_hp_values({config.fidelity_name: horizon}) inc_list.append(inc_value) #print(f"- {x.index.values[i]}: {current_fidelity} --> {config.fidelity.value}") @@ -430,12 +422,14 @@ def preprocess(self, x: pd.Series) -> Tuple[pd.Series, torch.Tensor]: # a candidate partial training run to continue target_fidelity = config.fidelity.value + horizon # if horizon exceeds max, query at max - config.fidelity.value = min(config.fidelity.value + horizon, config.fidelity.upper) + config.update_hp_values({config.fidelity_name: min( + config.fidelity.value + horizon, config.fidelity.upper + )}) inc_list.append(inc_value) else: # a candidate new training run that we would need to start current_fidelity = 0 - config.fidelity.value = horizon + config.update_hp_values({config.fidelity_name: horizon}) inc_list.append(inc_value) #print(f"- {x.index.values[i]}: {current_fidelity} --> {config.fidelity.value}") diff --git a/neps/optimizers/bayesian_optimization/acquisition_samplers/freeze_thaw_sampler.py b/neps/optimizers/bayesian_optimization/acquisition_samplers/freeze_thaw_sampler.py index f826f18a..75d1f581 100644 --- a/neps/optimizers/bayesian_optimization/acquisition_samplers/freeze_thaw_sampler.py +++ b/neps/optimizers/bayesian_optimization/acquisition_samplers/freeze_thaw_sampler.py @@ -104,11 +104,7 @@ def sample( set_new_sample_fidelity: int | float = None, ) -> list(): """Samples a new set and returns the total set of observed + new configs.""" - start = time.time() partial_configs = self.observations.get_partial_configs_at_max_seen() - # print("-" * 50) - # print(f"| freeze-thaw:get_partial_at_max_seen(): {time.time()-start:.2f}s") - # print("-" * 50) _n = n if n is not None else self.SAMPLES_TO_DRAW if self.is_tabular: @@ -121,7 +117,6 @@ def sample( max_n = len(_all_ids) + 1 if self.sample_full_table else _n _n = min(max_n, len(_all_ids - _partial_ids)) - start = time.time() _new_configs = np.random.choice( list(_all_ids - _partial_ids), size=_n, replace=False ) @@ -132,9 +127,6 @@ def sample( for _i, val in enumerate(_new_configs): _configs[_i]["id"].value = val - # print("-" * 50) - # print(f"| freeze-thaw:sample:new_configs_extraction: {time.time()-start:.2f}s") - # print("-" * 50) new_configs = pd.Series( _configs, index=np.arange( @@ -155,19 +147,11 @@ def sample( ) # Updating fidelity values - start = time.time() if set_new_sample_fidelity is not None: for config in new_configs: - config.fidelity.value = set_new_sample_fidelity - # print("-" * 50) - # print(f"| freeze-thaw:sample:new_configs_set_fidelity: {time.time()-start:.2f}s") - # print("-" * 50) + config.update_hp_values({config.fidelity_name: set_new_sample_fidelity}) - start = time.time() configs = pd.concat([deepcopy(partial_configs), new_configs]) - # print("-" * 50) - # print(f"| freeze-thaw:sample:concat_configs: {time.time()-start:.2f}s") - # print("-" * 50) return configs diff --git a/neps/optimizers/multi_fidelity/dyhpo.py b/neps/optimizers/multi_fidelity/dyhpo.py index 40730d9d..3ad062cb 100755 --- a/neps/optimizers/multi_fidelity/dyhpo.py +++ b/neps/optimizers/multi_fidelity/dyhpo.py @@ -405,7 +405,7 @@ def _randomly_promote(self) -> tuple[SearchSpace, int]: # calculating fidelity value new_fidelity = self.get_budget_value(budget + 1) # setting the config fidelity - config.fidelity.value = new_fidelity + config.update_hp_values({config.fidelity_name: new_fidelity}) return config, _config_id def get_config_and_ids( # pylint: disable=no-self-use @@ -427,7 +427,6 @@ def get_config_and_ids( # pylint: disable=no-self-use _config_dict = config.hp_values() _config_dict.update({config.fidelity_name: self.min_budget}) config.set_hyperparameters_from_dict(_config_dict) - # config.fidelity.value = self.min_budget _config_id = self.observed_configs.next_config_id() elif self.is_init_phase() or self._model_update_failed: # promote a config randomly if initial design size is satisfied but the @@ -496,7 +495,7 @@ def get_config_and_ids( # pylint: disable=no-self-use config = samples.loc[_config_id] # IMPORTANT: setting the fidelity value appropriately - config.fidelity.value = ( + _fid_value = ( config.fidelity.lower if best_idx > max(self.observed_configs.seen_config_ids) else ( @@ -508,6 +507,7 @@ def get_config_and_ids( # pylint: disable=no-self-use + self.step_size # ONE-STEP FIDELITY QUERY ) ) + config.update_hp_values({config.fidelity_name: _fid_value}) # generating correct IDs if _config_id in self.observed_configs.seen_config_ids: config_id = f"{_config_id}_{self.get_budget_level(config)}" diff --git a/neps/optimizers/multi_fidelity/utils.py b/neps/optimizers/multi_fidelity/utils.py index cc4e2e66..686ccccd 100644 --- a/neps/optimizers/multi_fidelity/utils.py +++ b/neps/optimizers/multi_fidelity/utils.py @@ -275,7 +275,6 @@ def extract_learning_curve( def get_training_data_4DyHPO( self, df: pd.DataFrame, pipeline_space: SearchSpace | None = None ): - start = time.time() configs = [] learning_curves = [] performance = [] @@ -290,9 +289,6 @@ def get_training_data_4DyHPO( configs.append(row[self.config_col]) performance.append(row[self.perf_col]) learning_curves.append(self.extract_learning_curve(config_id, budget_id)) - # print("-" * 50) - # print(f"| Time for `get_training_data_4DyHPO()`: {time.time()-start:.2f}s") - # print("-" * 50) return configs, learning_curves, performance def get_best_performance_per_config(self, maximize: bool = False) -> pd.Series: diff --git a/neps/search_spaces/search_space.py b/neps/search_spaces/search_space.py index bfb707bf..04476de8 100644 --- a/neps/search_spaces/search_space.py +++ b/neps/search_spaces/search_space.py @@ -890,3 +890,17 @@ def is_equal_value( return False return True + + def update_hp_values(self, new_values: dict[str, Any]) -> None: + """Update the hyperparameter values with new values. + + Args: + new_values: The new values to set for the hyperparameters. + """ + _hp_dict = self.hp_values() + _intersect = set(_hp_dict.keys()) & set(new_values.keys()) + assert len(_intersect) == len(new_values), \ + "All hyperparameters must be present! "\ + f"{set(_hp_dict.keys()) - set(new_values.keys())} are missing" + _hp_dict.update(new_values) + self.set_hyperparameters_from_dict(_hp_dict) diff --git a/neps/utils/run_args.py b/neps/utils/run_args.py index 7279f625..bd2664e1 100644 --- a/neps/utils/run_args.py +++ b/neps/utils/run_args.py @@ -451,13 +451,13 @@ def check_essential_arguments( root_directory: str | None, pipeline_space: dict | None, max_cost_total: int | None, - max_evaluation_total: int | None, + max_evaluations_total: int | None, searcher: BaseOptimizer | dict | str | None, ) -> None: """Validates essential NePS configuration arguments. Ensures 'run_pipeline', 'root_directory', 'pipeline_space', and either - 'max_cost_total' or 'max_evaluation_total' are provided for NePS execution. + 'max_cost_total' or 'max_evaluations_total' are provided for NePS execution. Raises ValueError with missing argument details. Additionally, checks 'searcher' is a BaseOptimizer if 'pipeline_space' is absent. @@ -466,7 +466,7 @@ def check_essential_arguments( root_directory (str): Directory path for data storage. pipeline_space: search space for this run. max_cost_total: Max allowed total cost for experiments. - max_evaluation_total: Max allowed evaluations. + max_evaluations_total: Max allowed evaluations. searcher: Optimizer for the configuration space. Raises: @@ -481,9 +481,9 @@ def check_essential_arguments( # provide the search_space because it's the argument of the searcher. raise ValueError("'pipeline_space' is required but was not provided.") - if not max_evaluation_total and not max_cost_total: + if not max_evaluations_total and not max_cost_total: raise ValueError( - "'max_evaluation_total' or 'max_cost_total' is required but " + "'max_evaluations_total' or 'max_cost_total' is required but " "both were not provided." ) diff --git a/pyproject.toml b/pyproject.toml index 77292875..3ebfa42f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -63,14 +63,7 @@ pyyaml = "^6" tensorboard = "^2" typing-extensions = "*" torchvision = ">=0.8.0" - -[tool.poetry.dependencies.ifbo] -version = ">=0.3,<0.4" -python = ">=3.10,<3.12" -optional = true - -[tool.poetry.extras] -ifbo = ["ifbo"] +ifbo = ">=0.3.5" [tool.poetry.group.dev.dependencies] ruff = "^0.4" From 38d2588604af47f17e95b857688328b4122a7644 Mon Sep 17 00:00:00 2001 From: Neeratyoy Mallik Date: Thu, 29 Aug 2024 19:58:48 +0200 Subject: [PATCH 16/46] First ifBO successful run push --- .../acquisition_functions/mf_ei.py | 3 - .../acquisition_functions/mf_pi.py | 15 +--- .../bayesian_optimization/models/__init__.py | 8 +- .../bayesian_optimization/models/pfn.py | 77 +++++++++++++++++++ neps/optimizers/default_searchers/ifbo.yaml | 6 +- neps/optimizers/multi_fidelity/dyhpo.py | 2 +- neps/optimizers/multi_fidelity/mf_bo.py | 72 +++++++++++------ neps/optimizers/multi_fidelity/utils.py | 24 +----- pyproject.toml | 2 +- 9 files changed, 141 insertions(+), 68 deletions(-) create mode 100644 neps/optimizers/bayesian_optimization/models/pfn.py diff --git a/neps/optimizers/bayesian_optimization/acquisition_functions/mf_ei.py b/neps/optimizers/bayesian_optimization/acquisition_functions/mf_ei.py index 5139d4b4..c025578e 100644 --- a/neps/optimizers/bayesian_optimization/acquisition_functions/mf_ei.py +++ b/neps/optimizers/bayesian_optimization/acquisition_functions/mf_ei.py @@ -404,18 +404,15 @@ def preprocess(self, x: pd.Series) -> Tuple[pd.Series, torch.Tensor]: inc_list = [] steps_passed = len(self.observations.completed_runs) - print(f"Steps acquired: {steps_passed}") # Like EI-AtMax, use the global incumbent as a basis for the EI threshold inc_value = min(self.observations.get_best_performance_for_each_budget()) # Extension: Add a random min improvement threshold to encourage high risk high gain inc_value = self.sample_threshold(inc_value) - print(f"Threshold for EI: {inc_value}") # Like MFEI: set fidelities to query using horizon as self.b_step # Extension: Unlike DyHPO, we sample the horizon randomly over the full range horizon = self.sample_horizon(steps_passed) - print(f"Horizon for EI: {horizon}") for i, config in x.items(): if i <= max(self.observations.seen_config_ids): current_fidelity = config.fidelity.value diff --git a/neps/optimizers/bayesian_optimization/acquisition_functions/mf_pi.py b/neps/optimizers/bayesian_optimization/acquisition_functions/mf_pi.py index e64ea2e3..e41e0528 100644 --- a/neps/optimizers/bayesian_optimization/acquisition_functions/mf_pi.py +++ b/neps/optimizers/bayesian_optimization/acquisition_functions/mf_pi.py @@ -87,10 +87,10 @@ def preprocess(self, x: pd.Series) -> Tuple[pd.Series, torch.Tensor]: def eval(self, x: pd.Series, asscalar: bool = False) -> Tuple[np.ndarray, pd.Series]: # deepcopy - _x = pd.Series([x.loc[idx].copy() for idx in x.index.values], index=x.index) - if self.surrogate_model_name == "pfn": + _x = pd.Series([deepcopy(x.loc[idx]) for idx in x.index.values], index=x.index) + if self.surrogate_model_name == "ftpfn": _x, _x_tok, inc_list = self.preprocess_pfn( - x.copy() + deepcopy(x.copy()) ) # IMPORTANT change from vanilla-EI pi = self.eval_pfn_pi(_x_tok, inc_list) elif self.surrogate_model_name in ["deep_gp", "dpl"]: @@ -122,7 +122,6 @@ def eval_pfn_pi( pi = self.surrogate_model.get_pi(x.to(self.surrogate_model.device), inc_list) if len(pi.shape) == 2: pi = pi.flatten() - print(f"Maximum PI: {pi.max()}") return pi def eval_gp_pi( @@ -311,19 +310,16 @@ def preprocess(self, x: pd.Series) -> Tuple[pd.Series, torch.Tensor]: inc_list = [] steps_passed = len(self.observations.completed_runs) - print(f"Steps acquired: {steps_passed}") # Like EI-AtMax, use the global incumbent as a basis for the EI threshold inc_value = min(self.observations.get_best_performance_for_each_budget()) # Extension: Add a random min improvement threshold to encourage high risk high gain t_value = self.sample_threshold(inc_value) - print(f"Threshold for PI: {inc_value - t_value}") inc_value = t_value # Like MFEI: set fidelities to query using horizon as self.b_step # Extension: Unlike DyHPO, we sample the horizon randomly over the full range horizon = self.sample_horizon(steps_passed) - print(f"Horizon for PI: {horizon}") for i, config in x.items(): if i <= max(self.observations.seen_config_ids): current_fidelity = config.fidelity.value @@ -344,7 +340,6 @@ def preprocess(self, x: pd.Series) -> Tuple[pd.Series, torch.Tensor]: current_fidelity = 0 config.update_hp_values({config.fidelity_name: horizon}) inc_list.append(inc_value) - #print(f"- {x.index.values[i]}: {current_fidelity} --> {config.fidelity.value}") # Drop unused configs x.drop(labels=indices_to_drop, inplace=True) @@ -399,19 +394,16 @@ def preprocess(self, x: pd.Series) -> Tuple[pd.Series, torch.Tensor]: inc_list = [] steps_passed = len(self.observations.completed_runs) - print(f"Steps acquired: {steps_passed}") # Like EI-AtMax, use the global incumbent as a basis for the EI threshold inc_value = min(self.observations.get_best_performance_for_each_budget()) # Extension: Add a random min improvement threshold to encourage high risk high gain t_value = self.sample_threshold(inc_value) - print(f"Threshold for EI: {inc_value - t_value}") inc_value = t_value # Like MFEI: set fidelities to query using horizon as self.b_step # Extension: Unlike DyHPO, we sample the horizon randomly over the full range horizon = self.sample_horizon(steps_passed) - print(f"Horizon for EI: {horizon}") for i, config in x.items(): if i <= max(self.observations.seen_config_ids): current_fidelity = config.fidelity.value @@ -431,7 +423,6 @@ def preprocess(self, x: pd.Series) -> Tuple[pd.Series, torch.Tensor]: current_fidelity = 0 config.update_hp_values({config.fidelity_name: horizon}) inc_list.append(inc_value) - #print(f"- {x.index.values[i]}: {current_fidelity} --> {config.fidelity.value}") # Drop unused configs x.drop(labels=indices_to_drop, inplace=True) diff --git a/neps/optimizers/bayesian_optimization/models/__init__.py b/neps/optimizers/bayesian_optimization/models/__init__.py index c76bedfd..3bebbffb 100755 --- a/neps/optimizers/bayesian_optimization/models/__init__.py +++ b/neps/optimizers/bayesian_optimization/models/__init__.py @@ -8,14 +8,12 @@ except ImportError as e: DeepGP = MissingDependencyError("gpytorch", e) -try: - from .pfn import PFN_SURROGATE # only if available locally -except Exception as e: - PFN_SURROGATE = MissingDependencyError("pfn", e) +from .pfn import IFBOSurrogate + SurrogateModelMapping = { "deep_gp": DeepGP, "gp": ComprehensiveGP, "gp_hierarchy": ComprehensiveGPHierarchy, - "pfn": PFN_SURROGATE, + "ftpfn": IFBOSurrogate, } diff --git a/neps/optimizers/bayesian_optimization/models/pfn.py b/neps/optimizers/bayesian_optimization/models/pfn.py new file mode 100644 index 00000000..fcfd542b --- /dev/null +++ b/neps/optimizers/bayesian_optimization/models/pfn.py @@ -0,0 +1,77 @@ +from typing import Any +import numpy as np +import pandas as pd +from pathlib import Path +import torch + +from ifbo import FTPFN + + +class IFBOSurrogate: + """Special class to deal with PFN surrogate model and freeze-thaw acquisition.""" + + def __init__(self, target_path: Path = None, version: str = "0.0.1", *args, **kwargs): + super().__init__(*args, **kwargs) + self.ftpfn = FTPFN(target_path=target_path, version=version) + self.target_path = self.ftpfn.target_path + self.version = self.ftpfn.version + self.train_x = None + self.train_y = None + + @property + def device(self): + return self.ftpfn.device + + def _get_logits(self, test_x: torch.Tensor) -> torch.Tensor: + return self.ftpfn.model( + self._cast_tensor_shapes(self.train_x), + self._cast_tensor_shapes(self.train_y), + self._cast_tensor_shapes(test_x) + ) + + def _cast_tensor_shapes(self, x: torch.Tensor) -> torch.Tensor: + if len(x.shape) == 3 and x.shape[1] == 1: + return x + if len(x.shape) == 2: + return x.reshape(x.shape[0], 1, x.shape[1]) + if len(x.shape) == 1: + return x.reshape(x.shape[0], 1) + raise ValueError(f"Shape not recognized: {x.shape}") + + @torch.no_grad() + def get_pi(self, test_x, y_best): + logits = self._get_logits(test_x) + return self.ftpfn.model.criterion.pi( + logits.squeeze(), best_f=(1 - y_best).unsqueeze(1) + ) + + @torch.no_grad() + def get_ei(self, test_x, y_best): + logits = self._get_logits(test_x) + return self.ftpfn.model.criterion.ei( + logits.squeeze(), best_f=(1 - y_best).unsqueeze(1) + ) + + @torch.no_grad() + def get_lcb(self, test_x, beta: float=(1-.682)/2): + logits = self._get_logits(test_x) + # y values are always transformed for maximizing + lcb = self.ftpfn.model.criterion.ucb( + logits=logits, + best_f=None, + rest_prob=beta, + maximize=False # IMPORTANT to be False, should calculate the LCB using the lower-bound ICDF as per beta + ) + return lcb + + @torch.no_grad() + def get_ucb(self, test_x, beta: float=(1-.682)/2): + logits = self._get_logits(test_x) + # y values are always transformed for maximizing + lcb = self.ftpfn.model.criterion.ucb( + logits=logits, + best_f=None, + rest_prob=beta, + maximize=True # IMPORTANT to be True, should calculate the UCB using the upper-bound ICDF as per beta + ) + return lcb diff --git a/neps/optimizers/default_searchers/ifbo.yaml b/neps/optimizers/default_searchers/ifbo.yaml index 1eecea6a..38442175 100644 --- a/neps/optimizers/default_searchers/ifbo.yaml +++ b/neps/optimizers/default_searchers/ifbo.yaml @@ -1,2 +1,6 @@ strategy: ifbo -acquisition: MFPI-random \ No newline at end of file +surrogate_model: ftpfn +surrogate_model_args: + version: "0.0.1" +acquisition: MFPI-random +model_policy: PFNSurrogate \ No newline at end of file diff --git a/neps/optimizers/multi_fidelity/dyhpo.py b/neps/optimizers/multi_fidelity/dyhpo.py index 3ad062cb..482fdbe3 100755 --- a/neps/optimizers/multi_fidelity/dyhpo.py +++ b/neps/optimizers/multi_fidelity/dyhpo.py @@ -129,7 +129,7 @@ def __init__( raise NotImplementedError elif surrogate_model == "gp": model_policy = FreezeThawModel - elif surrogate_model == "pfn": + elif surrogate_model == "ftpfn": model_policy = PFNSurrogate else: raise ValueError("Invalid model option selected!") diff --git a/neps/optimizers/multi_fidelity/mf_bo.py b/neps/optimizers/multi_fidelity/mf_bo.py index 964e483d..d380c441 100755 --- a/neps/optimizers/multi_fidelity/mf_bo.py +++ b/neps/optimizers/multi_fidelity/mf_bo.py @@ -2,19 +2,16 @@ from copy import deepcopy - import numpy as np import pandas as pd import torch from neps.utils.common import instance_from_map -# from ..bayesian_optimization.models import SurrogateModelMapping from neps.optimizers.bayesian_optimization.models import SurrogateModelMapping -# from ..multi_fidelity.utils import normalize_vectorize_config from neps.optimizers.multi_fidelity.utils import normalize_vectorize_config -# from ..multi_fidelity_prior.utils import calc_total_resources_spent, update_fidelity -from neps.optimizers.multi_fidelity_prior.utils import calc_total_resources_spent, update_fidelity from neps.optimizers.utils import map_real_hyperparameters_from_tabular_ids +from neps.optimizers.multi_fidelity_prior.utils import calc_total_resources_spent, update_fidelity + class MFBOBase: @@ -199,15 +196,13 @@ def __init__( self.surrogate_model_args = ( surrogate_model_args if surrogate_model_args is not None else {} ) - if self.surrogate_model_name in ["deep_gp", "pfn"]: + if self.surrogate_model_name in ["deep_gp"]: self.surrogate_model_args.update({"pipeline_space": pipeline_space}) elif self.surrogate_model_name == "dpl": - self.surrogate_model_args.update( - {"pipeline_space": self.pipeline_space, - "observed_data": self.observed_configs} - ) - - # instantiate the surrogate model + self.surrogate_model_args.update({ + "pipeline_space": self.pipeline_space, + "observed_data": self.observed_configs + }) self.surrogate_model = instance_from_map( SurrogateModelMapping, self.surrogate_model_name, @@ -241,8 +236,11 @@ def _fantasize_pending(self, train_x, train_y, pending_x): def _fit(self, train_x, train_y, train_lcs): if self.surrogate_model_name in ["gp", "gp_hierarchy"]: self.surrogate_model.fit(train_x, train_y) - elif self.surrogate_model_name in ["deep_gp", "pfn", "dpl"]: + elif self.surrogate_model_name in ["deep_gp", "pfn", "dpl",]: self.surrogate_model.fit(train_x, train_y, train_lcs) + elif self.surrogate_model_name == "ftpfn": + # do nothing - no training required + pass else: # check neps/optimizers/bayesian_optimization/models/__init__.py for options raise ValueError( @@ -284,7 +282,7 @@ def set_state( # only to handle tabular spaces if self.pipeline_space.has_tabular: - if self.surrogate_model_name in ["deep_gp", "pfn"]: + if self.surrogate_model_name in ["deep_gp"]: self.surrogate_model_args.update( {"pipeline_space": self.pipeline_space.raw_tabular_space} ) @@ -323,10 +321,10 @@ def update_model(self, train_x=None, train_y=None, pending_x=None, decay_t=None) if decay_t is None: decay_t = len(train_x) train_x, train_y, train_lcs = self._fantasize_pending(train_x, train_y, pending_x) - self._fit(train_x, train_y, train_lcs) + self.surrogate_model._fit(train_x, train_y, train_lcs) return self.surrogate_model, decay_t - + class PFNSurrogate(FreezeThawModel): """Special class to deal with PFN surrogate model and freeze-thaw acquisition.""" @@ -336,10 +334,32 @@ def __init__(self, *args, **kwargs): self.train_x = None self.train_y = None + def update_model(self, train_x=None, train_y=None, pending_x=None, decay_t=None): + if train_x is None: + train_x = [] + if train_y is None: + train_y = [] + if pending_x is None: + pending_x = [] + + if decay_t is None: + decay_t = len(train_x) + train_x, train_y, train_lcs = self._fantasize_pending(train_x, train_y, pending_x) + self._fit(train_x, train_y, train_lcs) + + return self.surrogate_model, decay_t + def _fit(self, *args): # pylint: disable=unused-argument - assert self.surrogate_model_name == "pfn" + # no training required,, only preprocessing the training data as context during inference self.preprocess_training_set() - self.surrogate_model.fit(self.train_x, self.train_y) + + def _predict(self, test_x, test_lcs): + assert "pfn" in self.surrogate_model_name + test_x = self.preprocess_test_set(test_x) + return self.surrogate_model(self.train_x, self.train_y, test_x) + + def _cast_tensor_shapes(self, x: torch.Tensor) -> torch.Tensor: + return x def preprocess_training_set(self): _configs = self.observed_configs.df.config.values.copy() @@ -361,8 +381,12 @@ def preprocess_training_set(self): idxs = idxs.astype(float) idxs[:, 1] = idxs[:, 1] / _configs[0].fidelity.upper # TODO: account for fantasization - self.train_x = torch.Tensor(np.hstack([idxs, configs])).to(device) - self.train_y = torch.Tensor(performances).to(device) + self.surrogate_model.train_x = self._cast_tensor_shapes( + torch.Tensor(np.hstack([idxs, configs])).to(device) + ) + self.surrogate_model.train_y = self._cast_tensor_shapes( + torch.Tensor(performances).to(device) + ) def preprocess_test_set(self, test_x): _len = len(self.observed_configs.all_configs_list()) @@ -379,10 +403,12 @@ def preprocess_test_set(self, test_x): token_ids = np.vstack((existing_token_ids, new_token_ids)) configs = np.array([normalize_vectorize_config(c) for c in test_x]) - test_x = torch.Tensor(np.hstack([token_ids, configs])).to(device) - return test_x + self.surrogate_model.test_x = self._cast_tensor_shapes( + torch.Tensor(np.hstack([token_ids, configs])).to(device) + ) + return self.surrogate_model.test_x def _predict(self, test_x, test_lcs): assert self.surrogate_model_name == "pfn" test_x = self.preprocess_test_set(test_x) - return self.surrogate_model.predict(self.train_x, self.train_y, test_x) + return self.surrogate_model(self.train_x, self.train_y, test_x) diff --git a/neps/optimizers/multi_fidelity/utils.py b/neps/optimizers/multi_fidelity/utils.py index 686ccccd..657e1775 100644 --- a/neps/optimizers/multi_fidelity/utils.py +++ b/neps/optimizers/multi_fidelity/utils.py @@ -1,6 +1,7 @@ # type: ignore from typing import Any, Sequence +from copy import deepcopy import numpy as np import pandas as pd import torch @@ -32,7 +33,7 @@ def normalize_vectorize_config( config: SearchSpace, ignore_fidelity: bool = True ) -> np.ndarray: _new_vector = [] - for _, hp_list in config.get_normalized_hp_categories(ignore_fidelity).items(): + for _, hp_list in config.get_normalized_hp_categories(ignore_fidelity=ignore_fidelity).items(): _new_vector.extend(hp_list) return np.array(_new_vector) @@ -359,33 +360,12 @@ def token_ids(self) -> np.ndarray: index=[(0, 2), (1, 2), (0, 1)], ) - print(data.df) - print(data.get_learning_curves()) - print( - "Mapping of budget IDs into best performing configurations at each fidelity:\n", - data.get_incumbents_for_budgets(), - ) - print( - "Best Performance at each budget level:\n", - data.get_best_performance_for_each_budget(), - ) - print( - "Configuration ID of the best observed performance so far: ", - data.get_best_learning_curve_id(), - ) - print(data.extract_learning_curve(0, 2)) - # data.df.sort_index(inplace=True) - print(data.get_partial_configs_at_max_seen()) - # When updating multiple indices at a time both the values in the data dictionary and the indices should be lists data.update_data({"perf": [1.8, 1.5]}, index=[(1, 1), (0, 0)]) - print(data.df) data = MFObservedData(["config", "perf"], index_names=["config_id", "budget_id"]) # when adding a single row second level list is not necessary data.add_data(["conf1", 0.5], index=(0, 0)) - print(data.df) data.update_data({"perf": [1.8], "budget_col": [5]}, index=(0, 0)) - print(data.df) diff --git a/pyproject.toml b/pyproject.toml index 3ebfa42f..9c0ff623 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -63,7 +63,7 @@ pyyaml = "^6" tensorboard = "^2" typing-extensions = "*" torchvision = ">=0.8.0" -ifbo = ">=0.3.5" +ifbo = ">=0.3.8" [tool.poetry.group.dev.dependencies] ruff = "^0.4" From 2bd81db7f2cac77ec1c73b4f32cd46c4ee4c2174 Mon Sep 17 00:00:00 2001 From: Neeratyoy Mallik Date: Thu, 29 Aug 2024 20:42:35 +0200 Subject: [PATCH 17/46] Aggressive removal of code --- neps/optimizers/__init__.py | 10 +- .../acquisition_functions/__init__.py | 15 +- .../acquisition_functions/mf_ei.py | 28 +- .../acquisition_functions/mf_pi.py | 9 +- .../acquisition_functions/ucb.py | 13 - .../freeze_thaw_sampler.py | 71 +-- .../bayesian_optimization/models/__init__.py | 6 - .../bayesian_optimization/models/pfn.py | 2 - neps/optimizers/default_searchers/ifbo.yaml | 3 + neps/optimizers/multi_fidelity/_dyhpo.py | 406 ------------------ .../multi_fidelity/{dyhpo.py => ifbo.py} | 56 +-- neps/optimizers/multi_fidelity/mf_bo.py | 64 +-- neps/optimizers/multi_fidelity/utils.py | 4 +- 13 files changed, 87 insertions(+), 600 deletions(-) delete mode 100644 neps/optimizers/multi_fidelity/_dyhpo.py rename neps/optimizers/multi_fidelity/{dyhpo.py => ifbo.py} (91%) diff --git a/neps/optimizers/__init__.py b/neps/optimizers/__init__.py index 7a3619ce..1cff287a 100644 --- a/neps/optimizers/__init__.py +++ b/neps/optimizers/__init__.py @@ -5,10 +5,9 @@ from .base_optimizer import BaseOptimizer from .bayesian_optimization.cost_cooling import CostCooling -from .bayesian_optimization.mf_tpe import MultiFidelityPriorWeightedTreeParzenEstimator from .bayesian_optimization.optimizer import BayesianOptimization from .grid_search.optimizer import GridSearch -from .multi_fidelity.dyhpo import MFEIBO +from .multi_fidelity.ifbo import IFBO from .multi_fidelity.hyperband import ( MOBSTER, AsynchronousHyperband, @@ -41,10 +40,11 @@ "asha": AsynchronousSuccessiveHalving, "hyperband": Hyperband, "asha_prior": AsynchronousSuccessiveHalvingWithPriors, - "multifidelity_tpe": MultiFidelityPriorWeightedTreeParzenEstimator, "hyperband_custom_default": HyperbandCustomDefault, "priorband": PriorBand, + "priorband_bo": partial(PriorBand, model_based=True), + "priorband_asha": PriorBandAsha, + "priorband_asha_hyperband": PriorBandAshaHB, "mobster": MOBSTER, - "ifbo_ei": MFEIBO, - "ifbo": partial(MFEIBO, acquisition="MFPI-random"), + "ifbo": IFBO, } diff --git a/neps/optimizers/bayesian_optimization/acquisition_functions/__init__.py b/neps/optimizers/bayesian_optimization/acquisition_functions/__init__.py index eed5f7f7..8fc5c4bb 100644 --- a/neps/optimizers/bayesian_optimization/acquisition_functions/__init__.py +++ b/neps/optimizers/bayesian_optimization/acquisition_functions/__init__.py @@ -6,11 +6,9 @@ from neps.optimizers.bayesian_optimization.acquisition_functions.ei import ( ComprehensiveExpectedImprovement, ) -from neps.optimizers.bayesian_optimization.acquisition_functions.mf_ei import MFEI from neps.optimizers.bayesian_optimization.acquisition_functions.mf_pi import MFPI_Random from neps.optimizers.bayesian_optimization.acquisition_functions.ucb import ( UpperConfidenceBound, - MF_UCB, ) from neps.optimizers.bayesian_optimization.acquisition_functions.prior_weighted import ( DecayingPriorWeightedAcquisition, @@ -36,11 +34,6 @@ in_fill="posterior", augmented_ei=True, ), - "MFEI": partial( - MFEI, - in_fill="best", - augmented_ei=False, - ), "MFPI-random": partial( MFPI_Random, in_fill="best", @@ -50,17 +43,13 @@ UpperConfidenceBound, maximize=False, ), - "MF-UCB": partial( - MF_UCB, - maximize=False, - ), } __all__ = [ "AcquisitionMapping", "ComprehensiveExpectedImprovement", - "MFEI", "UpperConfidenceBound", - "MF_UCB", "DecayingPriorWeightedAcquisition", + "MFPI_Random", + "UCB", ] diff --git a/neps/optimizers/bayesian_optimization/acquisition_functions/mf_ei.py b/neps/optimizers/bayesian_optimization/acquisition_functions/mf_ei.py index c025578e..a628414c 100644 --- a/neps/optimizers/bayesian_optimization/acquisition_functions/mf_ei.py +++ b/neps/optimizers/bayesian_optimization/acquisition_functions/mf_ei.py @@ -41,23 +41,6 @@ def preprocess_gp(self, x: pd.Series) -> Tuple[pd.Series, torch.Tensor]: x, inc_list = self.preprocess(x) return x, inc_list - def preprocess_deep_gp(self, x: pd.Series) -> Tuple[pd.Series, torch.Tensor]: - x, inc_list = self.preprocess(x) - x_lcs = [] - for idx in x.index: - if idx in self.observations.df.index.levels[0]: - # TODO: Samir, check if `budget_id=None` is okay? - # budget_level = self.get_budget_level(x[idx]) - # extracting the available/observed learning curve - lc = self.observations.extract_learning_curve(idx, budget_id=None) - else: - # initialize a learning curve with a placeholder - # This is later padded accordingly for the Conv1D layer - lc = [] - x_lcs.append(lc) - self.surrogate_model.set_prediction_learning_curves(x_lcs) - return x, inc_list - def preprocess_pfn(self, x: pd.Series) -> Tuple[torch.Tensor, pd.Series, torch.Tensor]: """Prepares the configurations for appropriate EI calculation. @@ -78,7 +61,7 @@ def preprocess_pfn(self, x: pd.Series) -> Tuple[torch.Tensor, pd.Series, torch.T return _x, _x_tok, inc_list -# NOTE: the order of inheritance is important +# NOTE: the order of inheritance is important by MRO class MFEI(MFStepBase, ComprehensiveExpectedImprovement): def __init__( self, @@ -152,17 +135,12 @@ def preprocess(self, x: pd.Series) -> Tuple[pd.Series, torch.Tensor]: def eval(self, x: pd.Series, asscalar: bool = False) -> Tuple[np.ndarray, pd.Series]: # deepcopy _x = pd.Series([x.loc[idx].copy() for idx in x.index.values], index=x.index) - if self.surrogate_model_name == "pfn": + if self.surrogate_model_name == "ftpfn": _x, _x_tok, inc_list = self.preprocess_pfn( x.copy() ) # IMPORTANT change from vanilla-EI ei = self.eval_pfn_ei(_x_tok, inc_list) - elif self.surrogate_model_name in ["deep_gp", "dpl"]: - _x, inc_list = self.preprocess_deep_gp( - _x - ) # IMPORTANT change from vanilla-EI - ei = self.eval_gp_ei(_x.values.tolist(), inc_list) - elif self.surrogate_model_name == "gp": + elif self.surrogate_model_name in ["gp", "gp_hierarchy"]: _x, inc_list = self.preprocess_gp( _x ) # IMPORTANT change from vanilla-EI diff --git a/neps/optimizers/bayesian_optimization/acquisition_functions/mf_pi.py b/neps/optimizers/bayesian_optimization/acquisition_functions/mf_pi.py index e41e0528..981ab3e4 100644 --- a/neps/optimizers/bayesian_optimization/acquisition_functions/mf_pi.py +++ b/neps/optimizers/bayesian_optimization/acquisition_functions/mf_pi.py @@ -90,15 +90,10 @@ def eval(self, x: pd.Series, asscalar: bool = False) -> Tuple[np.ndarray, pd.Ser _x = pd.Series([deepcopy(x.loc[idx]) for idx in x.index.values], index=x.index) if self.surrogate_model_name == "ftpfn": _x, _x_tok, inc_list = self.preprocess_pfn( - deepcopy(x.copy()) - ) # IMPORTANT change from vanilla-EI - pi = self.eval_pfn_pi(_x_tok, inc_list) - elif self.surrogate_model_name in ["deep_gp", "dpl"]: - _x, inc_list = self.preprocess_deep_gp( _x ) # IMPORTANT change from vanilla-EI - pi = self.eval_gp_pi(_x.values.tolist(), inc_list) - elif self.surrogate_model_name == "gp": + pi = self.eval_pfn_pi(_x_tok, inc_list) + elif self.surrogate_model_name in ["gp", "gp_hierarchy"]: _x, inc_list = self.preprocess_gp( _x ) # IMPORTANT change from vanilla-EI diff --git a/neps/optimizers/bayesian_optimization/acquisition_functions/ucb.py b/neps/optimizers/bayesian_optimization/acquisition_functions/ucb.py index adf57266..11b592eb 100644 --- a/neps/optimizers/bayesian_optimization/acquisition_functions/ucb.py +++ b/neps/optimizers/bayesian_optimization/acquisition_functions/ucb.py @@ -45,16 +45,3 @@ def eval( ucb_scores = ucb_scores.detach().numpy() * sign return ucb_scores - - -class MF_UCB(UpperConfidenceBound): - - def preprocess(self, x: Iterable) -> Iterable: - performances = self.observations.get_best_performance_for_each_budget() - pass - - def eval( - self, x: Iterable, asscalar: bool = False - ) -> Union[np.ndarray, torch.Tensor, float]: - x = self.preprocess(x) - return self.eval(x, asscalar=asscalar) diff --git a/neps/optimizers/bayesian_optimization/acquisition_samplers/freeze_thaw_sampler.py b/neps/optimizers/bayesian_optimization/acquisition_samplers/freeze_thaw_sampler.py index 75d1f581..b1e5172a 100644 --- a/neps/optimizers/bayesian_optimization/acquisition_samplers/freeze_thaw_sampler.py +++ b/neps/optimizers/bayesian_optimization/acquisition_samplers/freeze_thaw_sampler.py @@ -1,28 +1,37 @@ -# type: ignore - +from __future__ import annotations +from typing import Callable import warnings import numpy as np import pandas as pd +from copy import deepcopy from neps.search_spaces.search_space import SearchSpace from neps.optimizers.multi_fidelity.utils import MFObservedData -from neps.optimizers.bayesian_optimization.acquisition_samplers.base_acq_sampler import AcquisitionSampler +from neps.optimizers.bayesian_optimization.acquisition_samplers.base_acq_sampler import ( + AcquisitionSampler, +) -class FreezeThawSampler(AcquisitionSampler): - SAMPLES_TO_DRAW = 100 # number of random samples to draw at lowest fidelity +SAMPLES_TO_DRAW = ( + 100 # number of random samples to draw for optimizing acquisition function +) + - def __init__(self, **kwargs): +class FreezeThawSampler(AcquisitionSampler): + def __init__(self, samples_to_draw: int | None = None, **kwargs): super().__init__(**kwargs) self.observations = None self.b_step = None self.n = None self.pipeline_space = None # args to manage tabular spaces/grid - self.is_tabular = False + self.is_tabular = False # flag is set by `set_state()` self.sample_full_table = None + self.samples_to_draw = ( + samples_to_draw if samples_to_draw is not None else SAMPLES_TO_DRAW + ) self.set_sample_full_tabular(True) # sets flag that samples full table def set_sample_full_tabular(self, flag: bool = False): @@ -30,9 +39,13 @@ def set_sample_full_tabular(self, flag: bool = False): self.sample_full_table = flag def _sample_new( - self, index_from: int, n: int = None, ignore_fidelity: bool = False + self, + index_from: int, + n: int | None = None, + ignore_fidelity: bool = False, ) -> pd.Series: - n = n if n is not None else self.SAMPLES_TO_DRAW + n = n if n is not None else self.samples_to_draw + assert self.pipeline_space is not None new_configs = [ self.pipeline_space.sample( patience=self.patience, user_priors=False, ignore_fidelity=ignore_fidelity @@ -47,14 +60,17 @@ def _sample_new( def _sample_new_unique( self, index_from: int, - n: int = None, + n: int | None = None, patience: int = 10, ignore_fidelity: bool = False, ) -> pd.Series: - n = n if n is not None else self.SAMPLES_TO_DRAW + n = n if n is not None else self.samples_to_draw assert ( patience > 0 and n > 0 - ), "Patience and SAMPLES_TO_DRAW must be larger than 0" + ), "Patience and `samples_to_draw` must be larger than 0" + + assert self.observations is not None + assert self.pipeline_space is not None existing_configs = self.observations.all_configs_list() new_configs = [] @@ -99,14 +115,18 @@ def _sample_new_unique( def sample( self, - acquisition_function=None, - n: int = None, - set_new_sample_fidelity: int | float = None, - ) -> list(): + acquisition_function: Callable | None = None, + n: int | None = None, + set_new_sample_fidelity: int | float | None = None, + ) -> pd.DataFrame: """Samples a new set and returns the total set of observed + new configs.""" + assert self.observations is not None + assert self.pipeline_space is not None + assert self.pipeline_space.custom_grid_table is not None + partial_configs = self.observations.get_partial_configs_at_max_seen() - _n = n if n is not None else self.SAMPLES_TO_DRAW + _n = n if n is not None else self.samples_to_draw if self.is_tabular: # handles tabular data such that the entire unseen set of configs from the # table is considered to be the new set of candidates @@ -123,9 +143,9 @@ def sample( placeholder_config = self.pipeline_space.sample( patience=self.patience, user_priors=False, ignore_fidelity=False ) - _configs = [deepcopy(placeholder_config) for _id in _new_configs] + _configs = [placeholder_config.clone() for _id in _new_configs] for _i, val in enumerate(_new_configs): - _configs[_i]["id"].value = val + _configs[_i]["id"].set_value(val) new_configs = pd.Series( _configs, @@ -141,9 +161,12 @@ def sample( # Continuous benchmarks need to deepcopy individual configs here, # because in contrast to tabular benchmarks # they are not reset in every sampling step + + # TODO: I do not know what the f p_config_ is meant to be so I don't know + # if we have a specific clone method or not... partial_configs = pd.Series( [deepcopy(p_config_) for idx, p_config_ in partial_configs.items()], - index=partial_configs.index + index=partial_configs.index, ) # Updating fidelity values @@ -153,20 +176,20 @@ def sample( configs = pd.concat([deepcopy(partial_configs), new_configs]) - return configs + return configs # type: ignore def set_state( self, pipeline_space: SearchSpace, observations: MFObservedData, b_step: int, - n: int = None, - ): + n: int | None = None, + ) -> None: # overload to select incumbent differently through observations self.pipeline_space = pipeline_space self.observations = observations self.b_step = b_step - self.n = n if n is not None else self.SAMPLES_TO_DRAW + self.n = n if n is not None else self.samples_to_draw if ( hasattr(self.pipeline_space, "custom_grid_table") and self.pipeline_space.custom_grid_table is not None diff --git a/neps/optimizers/bayesian_optimization/models/__init__.py b/neps/optimizers/bayesian_optimization/models/__init__.py index 3bebbffb..43a38ef6 100755 --- a/neps/optimizers/bayesian_optimization/models/__init__.py +++ b/neps/optimizers/bayesian_optimization/models/__init__.py @@ -3,16 +3,10 @@ from .gp import ComprehensiveGP from .gp_hierarchy import ComprehensiveGPHierarchy -try: - from .deepGP import DeepGP -except ImportError as e: - DeepGP = MissingDependencyError("gpytorch", e) - from .pfn import IFBOSurrogate SurrogateModelMapping = { - "deep_gp": DeepGP, "gp": ComprehensiveGP, "gp_hierarchy": ComprehensiveGPHierarchy, "ftpfn": IFBOSurrogate, diff --git a/neps/optimizers/bayesian_optimization/models/pfn.py b/neps/optimizers/bayesian_optimization/models/pfn.py index fcfd542b..0db81182 100644 --- a/neps/optimizers/bayesian_optimization/models/pfn.py +++ b/neps/optimizers/bayesian_optimization/models/pfn.py @@ -55,7 +55,6 @@ def get_ei(self, test_x, y_best): @torch.no_grad() def get_lcb(self, test_x, beta: float=(1-.682)/2): logits = self._get_logits(test_x) - # y values are always transformed for maximizing lcb = self.ftpfn.model.criterion.ucb( logits=logits, best_f=None, @@ -67,7 +66,6 @@ def get_lcb(self, test_x, beta: float=(1-.682)/2): @torch.no_grad() def get_ucb(self, test_x, beta: float=(1-.682)/2): logits = self._get_logits(test_x) - # y values are always transformed for maximizing lcb = self.ftpfn.model.criterion.ucb( logits=logits, best_f=None, diff --git a/neps/optimizers/default_searchers/ifbo.yaml b/neps/optimizers/default_searchers/ifbo.yaml index 38442175..76522922 100644 --- a/neps/optimizers/default_searchers/ifbo.yaml +++ b/neps/optimizers/default_searchers/ifbo.yaml @@ -3,4 +3,7 @@ surrogate_model: ftpfn surrogate_model_args: version: "0.0.1" acquisition: MFPI-random +acquisition_sampler: freeze-thaw +acquisition_sampler_args: + samples_to_draw: 250 model_policy: PFNSurrogate \ No newline at end of file diff --git a/neps/optimizers/multi_fidelity/_dyhpo.py b/neps/optimizers/multi_fidelity/_dyhpo.py deleted file mode 100644 index 1a0ce0e5..00000000 --- a/neps/optimizers/multi_fidelity/_dyhpo.py +++ /dev/null @@ -1,406 +0,0 @@ -from typing import Any, List, Union, override - -import numpy as np - -from neps.state.optimizer import BudgetInfo, OptimizationState -from neps.utils.types import ConfigResult, RawConfig -from neps.search_spaces.search_space import FloatParameter, IntegerParameter, SearchSpace -from neps.optimizers.base_optimizer import BaseOptimizer -from neps.optimizers.bayesian_optimization.acquisition_functions.base_acquisition import ( - BaseAcquisition, -) -from neps.optimizers.bayesian_optimization.acquisition_samplers.base_acq_sampler import ( - AcquisitionSampler, -) -from neps.optimizers.multi_fidelity.promotion_policy import PromotionPolicy -from neps.optimizers.multi_fidelity.sampling_policy import ( - BaseDynamicModelPolicy, - ModelPolicy, - RandomPromotionDynamicPolicy, - SamplingPolicy, -) -from neps.optimizers.multi_fidelity.utils import MFObservedData - - -class MFEIBO(BaseOptimizer): - """Base class for MF-BO algorithms that use DyHPO like acquisition and budgeting.""" - - acquisition: str = "EI" - - def __init__( - self, - pipeline_space: SearchSpace, - budget: int, - step_size: Union[int, float] = 1, - optimal_assignment: bool = False, - use_priors: bool = False, - sample_default_first: bool = False, - sample_default_at_target: bool = False, - sampling_policy: Any = None, - promotion_policy: Any = None, - sample_policy_args: Union[dict, None] = None, - promotion_policy_args: Union[dict, None] = None, - promotion_type: str = "model", - sample_type: str = "model", - sampling_args: Union[dict, None] = None, - loss_value_on_error: Union[None, float] = None, - cost_value_on_error: Union[None, float] = None, - patience: int = 100, - ignore_errors: bool = False, - logger=None, - # arguments for model - surrogate_model: Union[str, Any] = "gp", - surrogate_model_args: dict = None, - domain_se_kernel: str = None, - graph_kernels: list = None, - hp_kernels: list = None, - acquisition: Union[str, BaseAcquisition] = acquisition, - acquisition_sampler: Union[str, AcquisitionSampler] = "mutation", - model_policy: Any = RandomPromotionDynamicPolicy, - log_prior_weighted: bool = False, - initial_design_size: int = 10, - model_policy_args: Union[dict, None] = None, - ): - """Initialise - - Args: - pipeline_space: Space in which to search - budget: Maximum budget - use_priors: Allows random samples to be generated from a default - Samples generated from a Gaussian centered around the default value - sampling_policy: The type of sampling procedure to use - promotion_policy: The type of promotion procedure to use - loss_value_on_error: Setting this and cost_value_on_error to any float will - supress any error during bayesian optimization and will use given loss - value instead. default: None - cost_value_on_error: Setting this and loss_value_on_error to any float will - supress any error during bayesian optimization and will use given cost - value instead. default: None - logger: logger object, or None to use the neps logger - sample_default_first: Whether to sample the default configuration first - """ - super().__init__( - pipeline_space=pipeline_space, - budget=budget, - patience=patience, - loss_value_on_error=loss_value_on_error, - cost_value_on_error=cost_value_on_error, - ignore_errors=ignore_errors, - logger=logger, - ) - self._budget_list: List[Union[int, float]] = [] - self.step_size: Union[int, float] = step_size - self._initial_design_size = initial_design_size - self._model_update_failed = False - self.sample_default_first = sample_default_first - self.sample_default_at_target = sample_default_at_target - - self.promotion_type = promotion_type - self.sample_type = sample_type - self.sampling_args = {} if sampling_args is None else sampling_args - self.use_priors = use_priors - self.total_fevals: int = 0 - - # TODO: Use initialized objects where possible instead of ..._args parameters. - # This will also make it easier to write new policies for users. - if model_policy_args is None: - model_policy_args = dict() - if sample_policy_args is None: - sample_policy_args = dict() - if promotion_policy_args is None: - promotion_policy_args = dict() - - self.observed_configs = MFObservedData( - columns=["config", "perf"], - index_names=["config_id", "budget_id"], - ) - - if model_policy is not None: - model_params = dict( - pipeline_space=pipeline_space, - surrogate_model=surrogate_model, - domain_se_kernel=domain_se_kernel, - hp_kernels=hp_kernels, - graph_kernels=graph_kernels, - surrogate_model_args=surrogate_model_args, - acquisition=acquisition, - use_priors=use_priors, - log_prior_weighted=log_prior_weighted, - acquisition_sampler=acquisition_sampler, - logger=logger, - ) - model_params.update(model_policy_args) - if issubclass(model_policy, BaseDynamicModelPolicy): - self.model_policy = model_policy( - observed_configs=self.observed_configs, **model_params - ) - elif issubclass(model_policy, ModelPolicy): - self.model_policy = model_policy(**model_params) - elif issubclass(model_policy, SamplingPolicy): - self.model_policy = model_policy( - pipeline_space=pipeline_space, - patience=patience, - logger=logger, - **model_policy_args, - ) - else: - raise ValueError( - f"Model policy can't be {model_policy}. " - f"It must subclass one of the predefined base classes" - ) - - if sampling_policy is not None: - sampling_params = dict( - pipeline_space=pipeline_space, patience=patience, logger=logger - ) - if issubclass(sampling_policy, SamplingPolicy): - sampling_params.update(sample_policy_args) - self.sampling_policy = sampling_policy(**sampling_params) - else: - raise ValueError( - f"Sampling policy {sampling_policy} must inherit from " - f"SamplingPolicy base class" - ) - - if promotion_policy is not None: - if issubclass(promotion_policy, PromotionPolicy): - promotion_params = dict(eta=3) - promotion_params.update(promotion_policy_args) - self.promotion_policy = promotion_policy(**promotion_params) - else: - raise ValueError( - f"Promotion policy {promotion_policy} must inherit from " - f"PromotionPolicy base class" - ) - - def get_budget_level(self, config: SearchSpace) -> int: - return int((config.fidelity.value - config.fidelity.lower) / self.step_size) - - def get_budget_value(self, budget_level: Union[int, float]) -> Union[int, float]: - if isinstance(self.pipeline_space.fidelity, IntegerParameter): - budget_val = int( - self.step_size * budget_level + self.pipeline_space.fidelity.lower - ) - elif isinstance(self.pipeline_space.fidelity, FloatParameter): - budget_val = ( - self.step_size * budget_level + self.pipeline_space.fidelity.lower - ) - else: - raise NotImplementedError( - f"Fidelity parameter: {self.pipeline_space.fidelity}" - f"must be one of the types: " - f"[IntegerParameter, FloatParameter], but is type:" - f"{type(self.pipeline_space.fidelity)}" - ) - self._budget_list.append(budget_val) - return budget_val - - @property - def is_init_phase(self) -> bool: - if self.num_train_configs < self._initial_design_size: - return True - return False - - @property - def num_train_configs(self): - return len(self.observed_configs.completed_runs) - - @override - def load_optimization_state( - self, - previous_results: dict[str, ConfigResult], - pending_evaluations: dict[str, SearchSpace], - budget_info: BudgetInfo | None, - optimizer_state: dict[str, Any], - ) -> None: - """This is basically the fit method. - - Args: - previous_results (dict[str, ConfigResult]): [description] - pending_evaluations (dict[str, ConfigResult]): [description] - """ - - # previous optimization run exists and needs to be loaded - self._load_previous_observations(previous_results) - self.total_fevals = len(previous_results) + len(pending_evaluations) - - # account for pending evaluations - self._handle_pending_evaluations(pending_evaluations) - - self.observed_configs.df.sort_index( - level=self.observed_configs.df.index.names, inplace=True - ) - self.model_policy.observed_configs = self.observed_configs - # fit any model/surrogates - - if not self.is_init_phase: - self._fit_models() - - def _load_previous_observations(self, previous_results): - for config_id, config_val in previous_results.items(): - _config, _budget_level = config_id.split("_") - perf = self.get_loss(config_val.result) - index = (int(_config), int(_budget_level)) - self.observed_configs.add_data([config_val.config, perf], index=index) - - if not np.isclose( - self.observed_configs.df.loc[index, self.observed_configs.perf_col], - perf, - ): - self.observed_configs.update_data( - { - self.observed_configs.config_col: config_val.config, - self.observed_configs.perf_col: perf, - }, - index=index, - ) - - def _handle_pending_evaluations(self, pending_evaluations): - for config_id, config_val in pending_evaluations.items(): - _config, _budget_level = config_id.split("_") - index = (int(_config), int(_budget_level)) - - if index not in self.observed_configs.df.index: - self.observed_configs.add_data([config_val.config, np.nan], index=index) - else: - self.observed_configs.update_data( - { - self.observed_configs.config_col: config_val.config, - self.observed_configs.perf_col: np.nan, - }, - index=index, - ) - - def _fit_models(self): - # TODO: Once done with development catch the model update exceptions - # and skip model based suggestions if failed (karibbov) - self.model_policy.update_model() - - def is_promotable(self, promotion_type: str = "model") -> Union[int, None]: - """ - Check if there are any configurations to promote, if yes then return the integer - ID of the promoted configuration, else return None. - """ - if promotion_type == "model": - config_id = self.model_policy.sample(is_promotion=True, **self.sampling_args) - elif promotion_type == "policy": - config_id = self.promotion_policy.retrieve_promotions() - elif promotion_type is None: - config_id = None - else: - raise ValueError( - f"'{promotion_type}' based promotion is not possible, please" - f"use either 'model', 'policy' or None as promotion_type" - ) - - return config_id - - def sample_new_config( - self, - sample_type: str = "model", - **kwargs, - ) -> SearchSpace: - """ - Sample completely new configuration that - hasn't been observed in any fidelity before. - Your model_policy and/or sampling_policy must satisfy this constraint - """ - if sample_type == "model": - config = self.model_policy.sample(**self.sampling_args) - elif sample_type == "policy": - config = self.sampling_policy.sample(**self.sampling_args) - elif sample_type is None: - config = self.pipeline_space.sample( - patience=self.patience, - user_priors=self.use_priors, - ignore_fidelity=True, - ) - else: - raise ValueError( - f"'{sample_type}' based sampling is not possible, please" - f"use either 'model', 'policy' or None as sampling_type" - ) - - return config - - def get_config_and_ids(self) -> tuple[RawConfig, str, Union[str, None]]: - """...and this is the method that decides which point to query. - - Returns: - [type]: [description] - """ - _config_id = None - fidelity_value_set = False - if ( - self.num_train_configs == 0 - and self.sample_default_first - and self.pipeline_space.has_prior - ): - config = self.pipeline_space.sample_default_configuration( - patience=self.patience, ignore_fidelity=False - ) - elif ( - (self.num_train_configs == 0 and self._initial_design_size >= 1) - or self.is_init_phase - or self._model_update_failed - ): - config = self.pipeline_space.sample( - patience=self.patience, user_priors=True, ignore_fidelity=False - ) - else: - for _ in range(self.patience): - promoted_config_id = self.is_promotable( - promotion_type=self.promotion_type - ) - if ( - promoted_config_id is not None - and promoted_config_id in self.observed_configs.df.index.levels[0] - ): - current_budget = self.observed_configs.df.loc[ - (promoted_config_id,) - ].index[-1] - next_budget = current_budget + 1 - config = self.observed_configs.df.loc[ - (promoted_config_id, current_budget), - self.observed_configs.config_col, - ] - if np.less_equal( - self.get_budget_value(next_budget), config.fidelity.upper - ): - config.fidelity.set_value(self.get_budget_value(next_budget)) - _config_id = promoted_config_id - fidelity_value_set = True - break - elif promoted_config_id is not None: - self.logger.warn( - f"Configuration ID: '{promoted_config_id}' is " - f"not promotable because it doesn't exist in " - f"the observed configuration IDs: " - f"{self.observed_configs.df.index.levels[0]}.\n\n" - f"Trying to sample again..." - ) - else: - # sample_new_config must return a completely new configuration that - # hasn't been observed in any fidelity before - config = self.sample_new_config(sample_type=self.sample_type) - break - - # if the returned config already observed, - # set the fidelity to the next budget level if not max already - # else set the fidelity to the minimum budget level - else: - config = self.pipeline_space.sample( - patience=self.patience, user_priors=True, ignore_fidelity=False - ) - - if not fidelity_value_set: - config.fidelity.set_value(self.get_budget_value(0)) - - if _config_id is None: - _config_id = ( - self.observed_configs.df.index.get_level_values(0).max() + 1 - if len(self.observed_configs.df.index.get_level_values(0)) > 0 - else 0 - ) - config_id = f"{_config_id}_{self.get_budget_level(config)}" - return config.hp_values(), config_id, None diff --git a/neps/optimizers/multi_fidelity/dyhpo.py b/neps/optimizers/multi_fidelity/ifbo.py similarity index 91% rename from neps/optimizers/multi_fidelity/dyhpo.py rename to neps/optimizers/multi_fidelity/ifbo.py index 482fdbe3..4d5985ea 100755 --- a/neps/optimizers/multi_fidelity/dyhpo.py +++ b/neps/optimizers/multi_fidelity/ifbo.py @@ -5,7 +5,7 @@ import pandas as pd from neps.state.optimizer import BudgetInfo -from neps.utils.types import ConfigResult, RawConfig +from neps.utils.types import ConfigResult from neps.utils.common import instance_from_map, EvaluationData from neps.search_spaces.search_space import FloatParameter, IntegerParameter, SearchSpace from neps.optimizers.base_optimizer import BaseOptimizer @@ -24,10 +24,10 @@ from neps.optimizers.multi_fidelity.utils import MFObservedData -class MFEIBO(BaseOptimizer): +class IFBO(BaseOptimizer): """Base class for MF-BO algorithms that use DyHPO-like acquisition and budgeting.""" - acquisition: str = "MFEI" + acquisition: str = "MFPI-random" def __init__( self, @@ -44,7 +44,7 @@ def __init__( ignore_errors: bool = False, logger=None, # arguments for model - surrogate_model: str | Any = "gp", + surrogate_model: str | Any = "ftpfn", surrogate_model_args: dict = None, domain_se_kernel: str = None, graph_kernels: list = None, @@ -53,7 +53,7 @@ def __init__( acquisition_args: dict = None, acquisition_sampler: str | AcquisitionSampler = "freeze-thaw", acquisition_sampler_args: dict = None, - model_policy: Any = FreezeThawModel, + model_policy: Any = PFNSurrogate, initial_design_fraction: float = 0.75, initial_design_size: int = 10, initial_design_budget: int = None, @@ -125,9 +125,7 @@ def __init__( self._prep_model_args(self.hp_kernels, self.graph_kernels, pipeline_space) # TODO: Better solution than branching based on the surrogate name is needed - if surrogate_model in ["deep_gp", "dpl"]: - raise NotImplementedError - elif surrogate_model == "gp": + if surrogate_model in ["gp", "gp_hierarchy"]: model_policy = FreezeThawModel elif surrogate_model == "ftpfn": model_policy = PFNSurrogate @@ -228,6 +226,7 @@ def _set_initial_design( return _initial_design_size, _initial_design_budget def get_budget_level(self, config: SearchSpace) -> int: + """Calculates the discretized (int) budget level for a given configuration.""" return int( np.ceil((config.fidelity.value - config.fidelity.lower) / self.step_size) ) @@ -252,7 +251,7 @@ def get_budget_value(self, budget_level: int | float) -> int | float: return budget_val def total_budget_spent(self) -> int | float: - """Calculates the toal budget spent so far. + """Calculates the toal budget spent so far, in the unit of fidelity specified. This is calculated as a function of the fidelity range provided, that takes into account the minimum budget and the step size. @@ -331,7 +330,7 @@ def _get_config_id_split(cls, config_id: str) -> tuple[str, str]: def _load_previous_observations(self, previous_results): def index_data_split(config_id: str, config_val): - _config_id, _budget_id = MFEIBO._get_config_id_split(config_id) + _config_id, _budget_id = IFBO._get_config_id_split(config_id) index = int(_config_id), int(_budget_id) _data = [ config_val.config, @@ -418,7 +417,7 @@ def get_config_and_ids( # pylint: disable=no-self-use """ config_id = None previous_config_id = None - if self.is_init_phase(budget_based=False): + if self.is_init_phase(): # sample a new config till initial design size is satisfied self.logger.info("sampling...") config = self.pipeline_space.sample( @@ -462,39 +461,9 @@ def get_config_and_ids( # pylint: disable=no-self-use # NOTE: `samples` and `_samples` should share the same index values, hence, # avoid using `.iloc` and work with `.loc` on these pandas DataFrame/Series - if hasattr(self.acquisition, "mu"): - # collect prediction learning_curves - lcs = [] - # and tabular ids - tabular_ids = [] - for idx in _samples.index: - if self.acquisition_sampler.is_tabular: - tabular_ids.append(samples[idx]["id"].value) - if idx in self.observed_configs.df.index.levels[0]: - # extracting the available/observed learning curve - lc = self.observed_configs.extract_learning_curve( - idx, budget_id=None - ) - else: - # initialize a learning curve with a placeholder - # This is later padded accordingly for the Conv1D layer - lc = [] - lcs.append(lc) - - data = { - "Acq Value": acq.values, - "preds": self.acquisition.mu, - "incumbents": self.acquisition.mu_star, - "std": self.acquisition.std, - "pred_learning_curves": lcs, - } - if self.acquisition_sampler.is_tabular: - data["tabular_ids"] = tabular_ids - # assigning config hyperparameters config = samples.loc[_config_id] # IMPORTANT: setting the fidelity value appropriately - _fid_value = ( config.fidelity.lower if best_idx > max(self.observed_configs.seen_config_ids) @@ -504,7 +473,7 @@ def get_config_and_ids( # pylint: disable=no-self-use best_idx ] ) - + self.step_size # ONE-STEP FIDELITY QUERY + + self.step_size # ONE-STEP FIDELITY QUERY for freeze-thaw ) ) config.update_hp_values({config.fidelity_name: _fid_value}) @@ -514,4 +483,5 @@ def get_config_and_ids( # pylint: disable=no-self-use previous_config_id = f"{_config_id}_{self.get_budget_level(config) - 1}" else: config_id = f"{self.observed_configs.next_config_id()}_{self.get_budget_level(config)}" - return config.hp_values(), config_id, previous_config_id + + return config.hp_values(), config_id, previous_config_id # type: ignore diff --git a/neps/optimizers/multi_fidelity/mf_bo.py b/neps/optimizers/multi_fidelity/mf_bo.py index d380c441..71953d21 100755 --- a/neps/optimizers/multi_fidelity/mf_bo.py +++ b/neps/optimizers/multi_fidelity/mf_bo.py @@ -187,7 +187,7 @@ class FreezeThawModel: def __init__( self, pipeline_space, - surrogate_model: str = "pfn", + surrogate_model: str = "ftpfn", surrogate_model_args: dict = None, ): self.observed_configs = None @@ -196,13 +196,6 @@ def __init__( self.surrogate_model_args = ( surrogate_model_args if surrogate_model_args is not None else {} ) - if self.surrogate_model_name in ["deep_gp"]: - self.surrogate_model_args.update({"pipeline_space": pipeline_space}) - elif self.surrogate_model_name == "dpl": - self.surrogate_model_args.update({ - "pipeline_space": self.pipeline_space, - "observed_data": self.observed_configs - }) self.surrogate_model = instance_from_map( SurrogateModelMapping, self.surrogate_model_name, @@ -214,13 +207,13 @@ def _fantasize_pending(self, train_x, train_y, pending_x): # Select configs that are neither pending nor resulted in error completed_configs = self.observed_configs.completed_runs.copy(deep=True) # IMPORTANT: preprocess observations to get appropriate training data - train_x, train_lcs, train_y = self.observed_configs.get_training_data_4DyHPO( + train_x, train_lcs, train_y = self.observed_configs.get_training_data_4ifbo( completed_configs, self.pipeline_space ) pending_condition = self.observed_configs.pending_condition if pending_condition.any(): pending_configs = self.observed_configs.df.loc[pending_condition] - pending_x, pending_lcs, _ = self.observed_configs.get_training_data_4DyHPO( + pending_x, pending_lcs, _ = self.observed_configs.get_training_data_4ifbo( pending_configs ) self._fit(train_x, train_y, train_lcs) @@ -236,8 +229,6 @@ def _fantasize_pending(self, train_x, train_y, pending_x): def _fit(self, train_x, train_y, train_lcs): if self.surrogate_model_name in ["gp", "gp_hierarchy"]: self.surrogate_model.fit(train_x, train_y) - elif self.surrogate_model_name in ["deep_gp", "pfn", "dpl",]: - self.surrogate_model.fit(train_x, train_y, train_lcs) elif self.surrogate_model_name == "ftpfn": # do nothing - no training required pass @@ -250,7 +241,7 @@ def _fit(self, train_x, train_y, train_lcs): def _predict(self, test_x, test_lcs): if self.surrogate_model_name in ["gp", "gp_hierarchy"]: return self.surrogate_model.predict(test_x) - elif self.surrogate_model_name in ["deep_gp", "pfn", "dpl"]: + elif self.surrogate_model_name == "ftpfn": return self.surrogate_model.predict(test_x, test_lcs) else: # check neps/optimizers/bayesian_optimization/models/__init__.py for options @@ -268,47 +259,12 @@ def set_state( self.surrogate_model_args = ( surrogate_model_args if surrogate_model_args is not None else {} ) - if self.surrogate_model_name == "dpl": - self.surrogate_model_args.update( - {"pipeline_space": self.pipeline_space, - "observed_data": self.observed_configs} - ) - self.surrogate_model = instance_from_map( - SurrogateModelMapping, - self.surrogate_model_name, - name="surrogate model", - kwargs=self.surrogate_model_args, - ) - - # only to handle tabular spaces - if self.pipeline_space.has_tabular: - if self.surrogate_model_name in ["deep_gp"]: - self.surrogate_model_args.update( - {"pipeline_space": self.pipeline_space.raw_tabular_space} - ) - elif self.surrogate_model_name == "dpl": - self.surrogate_model_args.update( - {"pipeline_space": self.pipeline_space, - "observed_data": self.observed_configs} - ) - # instantiate the surrogate model, again, with the new pipeline space - self.surrogate_model = instance_from_map( - SurrogateModelMapping, - self.surrogate_model_name, - name="surrogate model", - kwargs=self.surrogate_model_args, - ) - elif self.surrogate_model_name == "dpl": - self.surrogate_model_args.update( - {"pipeline_space": self.pipeline_space, - "observed_data": self.observed_configs} - ) - self.surrogate_model = instance_from_map( - SurrogateModelMapping, - self.surrogate_model_name, - name="surrogate model", - kwargs=self.surrogate_model_args, - ) + self.surrogate_model = instance_from_map( + SurrogateModelMapping, + self.surrogate_model_name, + name="surrogate model", + kwargs=self.surrogate_model_args, + ) def update_model(self, train_x=None, train_y=None, pending_x=None, decay_t=None): if train_x is None: diff --git a/neps/optimizers/multi_fidelity/utils.py b/neps/optimizers/multi_fidelity/utils.py index 657e1775..e5c2a900 100644 --- a/neps/optimizers/multi_fidelity/utils.py +++ b/neps/optimizers/multi_fidelity/utils.py @@ -6,8 +6,8 @@ import pandas as pd import torch -from neps.optimizers.utils import map_real_hyperparameters_from_tabular_ids from neps.search_spaces.search_space import SearchSpace +from neps.optimizers.utils import map_real_hyperparameters_from_tabular_ids def continuous_to_tabular( @@ -273,7 +273,7 @@ def extract_learning_curve( lc = lcs.loc[config_id, :budget_id].values.flatten().tolist() return deepcopy(lc) - def get_training_data_4DyHPO( + def get_training_data_4ifbo( self, df: pd.DataFrame, pipeline_space: SearchSpace | None = None ): configs = [] From 2916c6c76773cf1b997de72d15d7540e5353fd40 Mon Sep 17 00:00:00 2001 From: Neeratyoy Mallik Date: Thu, 29 Aug 2024 21:09:39 +0200 Subject: [PATCH 18/46] Removing GP support from freeze-thaw --- .../acquisition_functions/__init__.py | 1 - .../acquisition_functions/mf_ei.py | 53 ------------------- .../acquisition_functions/mf_pi.py | 20 ------- neps/optimizers/multi_fidelity/mf_bo.py | 8 +-- 4 files changed, 2 insertions(+), 80 deletions(-) diff --git a/neps/optimizers/bayesian_optimization/acquisition_functions/__init__.py b/neps/optimizers/bayesian_optimization/acquisition_functions/__init__.py index 8fc5c4bb..eba0f694 100644 --- a/neps/optimizers/bayesian_optimization/acquisition_functions/__init__.py +++ b/neps/optimizers/bayesian_optimization/acquisition_functions/__init__.py @@ -51,5 +51,4 @@ "UpperConfidenceBound", "DecayingPriorWeightedAcquisition", "MFPI_Random", - "UCB", ] diff --git a/neps/optimizers/bayesian_optimization/acquisition_functions/mf_ei.py b/neps/optimizers/bayesian_optimization/acquisition_functions/mf_ei.py index a628414c..91c0cd98 100644 --- a/neps/optimizers/bayesian_optimization/acquisition_functions/mf_ei.py +++ b/neps/optimizers/bayesian_optimization/acquisition_functions/mf_ei.py @@ -36,11 +36,6 @@ def set_state( def get_budget_level(self, config) -> int: return int((config.fidelity.value - config.fidelity.lower) / self.b_step) - - def preprocess_gp(self, x: pd.Series) -> Tuple[pd.Series, torch.Tensor]: - x, inc_list = self.preprocess(x) - return x, inc_list - def preprocess_pfn(self, x: pd.Series) -> Tuple[torch.Tensor, pd.Series, torch.Tensor]: """Prepares the configurations for appropriate EI calculation. @@ -140,11 +135,6 @@ def eval(self, x: pd.Series, asscalar: bool = False) -> Tuple[np.ndarray, pd.Ser x.copy() ) # IMPORTANT change from vanilla-EI ei = self.eval_pfn_ei(_x_tok, inc_list) - elif self.surrogate_model_name in ["gp", "gp_hierarchy"]: - _x, inc_list = self.preprocess_gp( - _x - ) # IMPORTANT change from vanilla-EI - ei = self.eval_gp_ei(_x.values.tolist(), inc_list) else: raise ValueError( f"Unrecognized surrogate model name: {self.surrogate_model_name}" @@ -169,49 +159,6 @@ def eval_pfn_ei( ei = ei.flatten() return ei - def eval_gp_ei( - self, x: Iterable, inc_list: Iterable - ) -> Union[np.ndarray, torch.Tensor, float]: - """Vanilla-EI modified to preprocess samples and accept list of incumbents.""" - _x = x.copy() - try: - mu, cov = self.surrogate_model.predict(_x) - except ValueError as e: - raise e - # return -1.0 # in case of error. return ei of -1 - std = torch.sqrt(torch.diag(cov)) - - mu_star = inc_list.to(mu.device) # IMPORTANT change from vanilla-EI - - gauss = Normal(torch.zeros(1, device=mu.device), torch.ones(1, device=mu.device)) - # u = (mu - mu_star - self.xi) / std - # ei = std * updf + (mu - mu_star - self.xi) * ucdf - if self.log_ei: - # we expect that f_min is in log-space - f_min = mu_star - self.xi - v = (f_min - mu) / std - ei = torch.exp(f_min) * gauss.cdf(v) - torch.exp( - 0.5 * torch.diag(cov) + mu - ) * gauss.cdf(v - std) - else: - u = (mu_star - mu - self.xi) / std - ucdf = gauss.cdf(u) - updf = torch.exp(gauss.log_prob(u)) - ei = std * updf + (mu_star - mu - self.xi) * ucdf - # Clip ei if std == 0.0 - # ei = torch.where(torch.isclose(std, torch.tensor(0.0)), 0, ei) - if self.augmented_ei: - sigma_n = self.surrogate_model.likelihood - ei *= 1.0 - torch.sqrt(torch.tensor(sigma_n, device=mu.device)) / torch.sqrt( - sigma_n + torch.diag(cov) - ) - - # Save data for writing - self.mu_star = mu_star.detach().numpy().tolist() - self.mu = mu.detach().numpy().tolist() - self.std = std.detach().numpy().tolist() - return ei - class MFEI_AtMax(MFEI): diff --git a/neps/optimizers/bayesian_optimization/acquisition_functions/mf_pi.py b/neps/optimizers/bayesian_optimization/acquisition_functions/mf_pi.py index 981ab3e4..07296d84 100644 --- a/neps/optimizers/bayesian_optimization/acquisition_functions/mf_pi.py +++ b/neps/optimizers/bayesian_optimization/acquisition_functions/mf_pi.py @@ -93,11 +93,6 @@ def eval(self, x: pd.Series, asscalar: bool = False) -> Tuple[np.ndarray, pd.Ser _x ) # IMPORTANT change from vanilla-EI pi = self.eval_pfn_pi(_x_tok, inc_list) - elif self.surrogate_model_name in ["gp", "gp_hierarchy"]: - _x, inc_list = self.preprocess_gp( - _x - ) # IMPORTANT change from vanilla-EI - pi = self.eval_gp_pi(_x.values.tolist(), inc_list) else: raise ValueError( f"Unrecognized surrogate model name: {self.surrogate_model_name}" @@ -119,21 +114,6 @@ def eval_pfn_pi( pi = pi.flatten() return pi - def eval_gp_pi( - self, x: Iterable, inc_list: Iterable - ) -> Union[np.ndarray, torch.Tensor, float]: - _x = x.copy() - try: - mu, cov = self.surrogate_model.predict(_x) - except ValueError as e: - raise e - std = torch.sqrt(torch.diag(cov)) - mu_star = inc_list.to(mu.device) - - gauss = Normal(torch.zeros(1, device=mu.device), torch.ones(1, device=mu.device)) - pi = gauss.cdf((mu_star - mu) / (std + 1E-9)) - return pi - class MFPI_AtMax(MFPI): diff --git a/neps/optimizers/multi_fidelity/mf_bo.py b/neps/optimizers/multi_fidelity/mf_bo.py index 71953d21..bee42b86 100755 --- a/neps/optimizers/multi_fidelity/mf_bo.py +++ b/neps/optimizers/multi_fidelity/mf_bo.py @@ -227,9 +227,7 @@ def _fantasize_pending(self, train_x, train_y, pending_x): return train_x, train_y, train_lcs def _fit(self, train_x, train_y, train_lcs): - if self.surrogate_model_name in ["gp", "gp_hierarchy"]: - self.surrogate_model.fit(train_x, train_y) - elif self.surrogate_model_name == "ftpfn": + if self.surrogate_model_name == "ftpfn": # do nothing - no training required pass else: @@ -239,9 +237,7 @@ def _fit(self, train_x, train_y, train_lcs): ) def _predict(self, test_x, test_lcs): - if self.surrogate_model_name in ["gp", "gp_hierarchy"]: - return self.surrogate_model.predict(test_x) - elif self.surrogate_model_name == "ftpfn": + if self.surrogate_model_name == "ftpfn": return self.surrogate_model.predict(test_x, test_lcs) else: # check neps/optimizers/bayesian_optimization/models/__init__.py for options From 99a74be414484f695c2e930ec7ad4e7ee2a165d3 Mon Sep 17 00:00:00 2001 From: Neeratyoy Mallik Date: Thu, 29 Aug 2024 23:55:36 +0200 Subject: [PATCH 19/46] Making ifbo related code much leaner --- .../acquisition_functions/mf_ei.py | 45 ++++++++-- .../acquisition_functions/mf_pi.py | 6 +- .../bayesian_optimization/models/__init__.py | 4 +- .../models/{pfn.py => ftpfn.py} | 2 +- neps/optimizers/multi_fidelity/mf_bo.py | 87 ++++++++----------- neps/optimizers/multi_fidelity/utils.py | 32 ------- 6 files changed, 82 insertions(+), 94 deletions(-) rename neps/optimizers/bayesian_optimization/models/{pfn.py => ftpfn.py} (99%) diff --git a/neps/optimizers/bayesian_optimization/acquisition_functions/mf_ei.py b/neps/optimizers/bayesian_optimization/acquisition_functions/mf_ei.py index 91c0cd98..b4c0d58a 100644 --- a/neps/optimizers/bayesian_optimization/acquisition_functions/mf_ei.py +++ b/neps/optimizers/bayesian_optimization/acquisition_functions/mf_ei.py @@ -8,13 +8,13 @@ from neps.optimizers.utils import map_real_hyperparameters_from_tabular_ids from neps.search_spaces.search_space import SearchSpace -from neps.optimizers.multi_fidelity.utils import MFObservedData +from neps.optimizers.multi_fidelity.utils import MFObservedData, normalize_vectorize_config from neps.optimizers.bayesian_optimization.acquisition_functions.base_acquisition import BaseAcquisition from neps.optimizers.bayesian_optimization.acquisition_functions.ei import ComprehensiveExpectedImprovement class MFStepBase(BaseAcquisition): - """A class holding common operations that can be inherited. + """A class holding common operations that can be inherited for freeze-thaw based acquisitions. WARNING: Unsafe use of self attributes, can break if not used correctly. """ @@ -36,6 +36,39 @@ def set_state( def get_budget_level(self, config) -> int: return int((config.fidelity.value - config.fidelity.lower) / self.b_step) + def tokenize_for_freeze_thaw(self, df: pd.DataFrame, as_tensor: bool = False): + """Function to format data for PFN. + + The PFN training data expects the following format: + x: [ + # config ID, normalized fidelity, hyperparameters in unit-hypercube + [0, 0.1, hp1, hp2, ..., hpN], + [1, 0.1, hp1, hp2, ..., hpN], + [2, 0.1, hp1, hp2, ..., hpN], + [1, 0.2, hp1, hp2, ..., hpN], + [3, 0.1, hp1, hp2, ..., hpN], + ... + ] + y: [ + # normalized scalar loss + loss_of_0_at_0.1, + loss_of_1_at_0.1, + loss_of_2_at_0.1, + loss_of_1_at_0.2, + loss_of_3_at_0.1, + ... + ] + """ + configs = np.array([normalize_vectorize_config(c) for c in df]) + fidelity = np.array([c.fidelity.value for c in df]).reshape(-1, 1) + idx = df.index.values.reshape(-1, 1) + data = np.hstack([idx, fidelity, configs]) + + if as_tensor: + device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + data = torch.Tensor(data).to(device) + return data + def preprocess_pfn(self, x: pd.Series) -> Tuple[torch.Tensor, pd.Series, torch.Tensor]: """Prepares the configurations for appropriate EI calculation. @@ -43,15 +76,13 @@ def preprocess_pfn(self, x: pd.Series) -> Tuple[torch.Tensor, pd.Series, torch.T required by the multi-fidelity Expected Improvement acquisition function. """ _x, inc_list = self.preprocess(x.copy()) - _x_tok = self.observations.tokenize(_x, as_tensor=True) + _x_tok = self.tokenize_for_freeze_thaw(_x, as_tensor=True) len_partial = len(self.observations.seen_config_ids) z_min = x[0].fidelity.lower z_max = x[0].fidelity.upper # converting fidelity to the discrete budget level # STRICT ASSUMPTION: fidelity is the second dimension - _x_tok[:len_partial, 1] = ( - _x_tok[:len_partial, 1] + self.b_step - z_min - ) / self.b_step + _x_tok[:len_partial, 1] = (_x_tok[:len_partial, 1] + self.b_step - z_min) / self.b_step _x_tok[:, 1] = _x_tok[:, 1] / z_max return _x, _x_tok, inc_list @@ -324,7 +355,6 @@ def preprocess(self, x: pd.Series) -> Tuple[pd.Series, torch.Tensor]: # expected input: IDs pertaining to the tabular data x = map_real_hyperparameters_from_tabular_ids(x, self.pipeline_space) - indices_to_drop = [] inc_list = [] @@ -357,7 +387,6 @@ def preprocess(self, x: pd.Series) -> Tuple[pd.Series, torch.Tensor]: current_fidelity = 0 config.update_hp_values({config.fidelity_name: horizon}) inc_list.append(inc_value) - #print(f"- {x.index.values[i]}: {current_fidelity} --> {config.fidelity.value}") # Drop unused configs x.drop(labels=indices_to_drop, inplace=True) diff --git a/neps/optimizers/bayesian_optimization/acquisition_functions/mf_pi.py b/neps/optimizers/bayesian_optimization/acquisition_functions/mf_pi.py index 07296d84..e843e3ee 100644 --- a/neps/optimizers/bayesian_optimization/acquisition_functions/mf_pi.py +++ b/neps/optimizers/bayesian_optimization/acquisition_functions/mf_pi.py @@ -12,7 +12,9 @@ from neps.optimizers.utils import map_real_hyperparameters_from_tabular_ids from neps.search_spaces.search_space import SearchSpace from neps.optimizers.multi_fidelity.utils import MFObservedData -from neps.optimizers.bayesian_optimization.acquisition_functions.ei import ComprehensiveExpectedImprovement +from neps.optimizers.bayesian_optimization.acquisition_functions.ei import ( + ComprehensiveExpectedImprovement +) from neps.optimizers.bayesian_optimization.acquisition_functions.mf_ei import MFStepBase @@ -273,7 +275,7 @@ def preprocess(self, x: pd.Series) -> Tuple[pd.Series, torch.Tensor]: """Prepares the configurations for appropriate EI calculation. Takes a set of points and computes the budget and incumbent for each point, as - required by the multi-fidelity Expected Improvement acquisition function. + required by the multi-fidelity acquisition function. """ if self.pipeline_space.has_tabular: # preprocess tabular space differently diff --git a/neps/optimizers/bayesian_optimization/models/__init__.py b/neps/optimizers/bayesian_optimization/models/__init__.py index 43a38ef6..fdc84df4 100755 --- a/neps/optimizers/bayesian_optimization/models/__init__.py +++ b/neps/optimizers/bayesian_optimization/models/__init__.py @@ -3,11 +3,11 @@ from .gp import ComprehensiveGP from .gp_hierarchy import ComprehensiveGPHierarchy -from .pfn import IFBOSurrogate +from .ftpfn import FTPFNSurrogate SurrogateModelMapping = { "gp": ComprehensiveGP, "gp_hierarchy": ComprehensiveGPHierarchy, - "ftpfn": IFBOSurrogate, + "ftpfn": FTPFNSurrogate, } diff --git a/neps/optimizers/bayesian_optimization/models/pfn.py b/neps/optimizers/bayesian_optimization/models/ftpfn.py similarity index 99% rename from neps/optimizers/bayesian_optimization/models/pfn.py rename to neps/optimizers/bayesian_optimization/models/ftpfn.py index 0db81182..55e10cfb 100644 --- a/neps/optimizers/bayesian_optimization/models/pfn.py +++ b/neps/optimizers/bayesian_optimization/models/ftpfn.py @@ -7,7 +7,7 @@ from ifbo import FTPFN -class IFBOSurrogate: +class FTPFNSurrogate: """Special class to deal with PFN surrogate model and freeze-thaw acquisition.""" def __init__(self, target_path: Path = None, version: str = "0.0.1", *args, **kwargs): diff --git a/neps/optimizers/multi_fidelity/mf_bo.py b/neps/optimizers/multi_fidelity/mf_bo.py index bee42b86..323a59b9 100755 --- a/neps/optimizers/multi_fidelity/mf_bo.py +++ b/neps/optimizers/multi_fidelity/mf_bo.py @@ -11,7 +11,7 @@ from neps.optimizers.multi_fidelity.utils import normalize_vectorize_config from neps.optimizers.utils import map_real_hyperparameters_from_tabular_ids from neps.optimizers.multi_fidelity_prior.utils import calc_total_resources_spent, update_fidelity - +from neps.search_spaces.search_space import SearchSpace class MFBOBase: @@ -207,15 +207,21 @@ def _fantasize_pending(self, train_x, train_y, pending_x): # Select configs that are neither pending nor resulted in error completed_configs = self.observed_configs.completed_runs.copy(deep=True) # IMPORTANT: preprocess observations to get appropriate training data - train_x, train_lcs, train_y = self.observed_configs.get_training_data_4ifbo( + train_x, train_lcs, train_y = self.get_training_data_for_freeze_thaw( completed_configs, self.pipeline_space ) + # self.observed_configs.get_training_data_4ifbo( + # completed_configs, self.pipeline_space + # ) pending_condition = self.observed_configs.pending_condition if pending_condition.any(): pending_configs = self.observed_configs.df.loc[pending_condition] - pending_x, pending_lcs, _ = self.observed_configs.get_training_data_4ifbo( + pending_x, pending_lcs, _ = self.get_training_data_for_freeze_thaw( pending_configs ) + # get_training_data_4ifbo( + # pending_configs + # ) self._fit(train_x, train_y, train_lcs) _y, _ = self._predict(pending_x, pending_lcs) _y = _y.tolist() @@ -237,13 +243,33 @@ def _fit(self, train_x, train_y, train_lcs): ) def _predict(self, test_x, test_lcs): - if self.surrogate_model_name == "ftpfn": - return self.surrogate_model.predict(test_x, test_lcs) - else: - # check neps/optimizers/bayesian_optimization/models/__init__.py for options - raise ValueError( - f"Surrogate model {self.surrogate_model_name} not supported!" - ) + raise NotImplementedError + # if self.surrogate_model_name == "ftpfn": + # return self.surrogate_model.predict(test_x, test_lcs) + # else: + # # check neps/optimizers/bayesian_optimization/models/__init__.py for options + # raise ValueError( + # f"Surrogate model {self.surrogate_model_name} not supported!" + # ) + + def get_training_data_for_freeze_thaw( + self, df: pd.DataFrame, pipeline_space: SearchSpace | None = None + ): + configs = [] + learning_curves = [] + performance = [] + for idx, row in df.iterrows(): + config_id = idx[0] + budget_id = idx[1] + if pipeline_space.has_tabular: + _row = pd.Series([row[self.observed_configs.config_col]], index=[config_id]) + _row = map_real_hyperparameters_from_tabular_ids(_row, pipeline_space) + configs.append(_row.values[0]) + else: + configs.append(row[self.observed_configs.config_col]) + performance.append(row[self.observed_configs.perf_col]) + learning_curves.append(self.observed_configs.extract_learning_curve(config_id, budget_id)) + return configs, learning_curves, performance def set_state( self, @@ -305,14 +331,6 @@ def _fit(self, *args): # pylint: disable=unused-argument # no training required,, only preprocessing the training data as context during inference self.preprocess_training_set() - def _predict(self, test_x, test_lcs): - assert "pfn" in self.surrogate_model_name - test_x = self.preprocess_test_set(test_x) - return self.surrogate_model(self.train_x, self.train_y, test_x) - - def _cast_tensor_shapes(self, x: torch.Tensor) -> torch.Tensor: - return x - def preprocess_training_set(self): _configs = self.observed_configs.df.config.values.copy() @@ -333,34 +351,5 @@ def preprocess_training_set(self): idxs = idxs.astype(float) idxs[:, 1] = idxs[:, 1] / _configs[0].fidelity.upper # TODO: account for fantasization - self.surrogate_model.train_x = self._cast_tensor_shapes( - torch.Tensor(np.hstack([idxs, configs])).to(device) - ) - self.surrogate_model.train_y = self._cast_tensor_shapes( - torch.Tensor(performances).to(device) - ) - - def preprocess_test_set(self, test_x): - _len = len(self.observed_configs.all_configs_list()) - device = self.surrogate_model.device - - new_idxs = np.arange(_len, len(test_x)) - base_fidelity = np.array([1] * len(new_idxs)) - new_token_ids = np.hstack( - (new_idxs.T.reshape(-1, 1), base_fidelity.T.reshape(-1, 1)) - ) - # the following operation takes each element in the array and stacks it vertically - # in this case, should convert a (n,) array to (n, 2) by flattening the elements - existing_token_ids = np.vstack(self.observed_configs.token_ids).astype(int) - token_ids = np.vstack((existing_token_ids, new_token_ids)) - - configs = np.array([normalize_vectorize_config(c) for c in test_x]) - self.surrogate_model.test_x = self._cast_tensor_shapes( - torch.Tensor(np.hstack([token_ids, configs])).to(device) - ) - return self.surrogate_model.test_x - - def _predict(self, test_x, test_lcs): - assert self.surrogate_model_name == "pfn" - test_x = self.preprocess_test_set(test_x) - return self.surrogate_model(self.train_x, self.train_y, test_x) + self.surrogate_model.train_x = torch.Tensor(np.hstack([idxs, configs])).to(device) + self.surrogate_model.train_y = torch.Tensor(performances).to(device) diff --git a/neps/optimizers/multi_fidelity/utils.py b/neps/optimizers/multi_fidelity/utils.py index e5c2a900..2a01a844 100644 --- a/neps/optimizers/multi_fidelity/utils.py +++ b/neps/optimizers/multi_fidelity/utils.py @@ -273,25 +273,6 @@ def extract_learning_curve( lc = lcs.loc[config_id, :budget_id].values.flatten().tolist() return deepcopy(lc) - def get_training_data_4ifbo( - self, df: pd.DataFrame, pipeline_space: SearchSpace | None = None - ): - configs = [] - learning_curves = [] - performance = [] - for idx, row in df.iterrows(): - config_id = idx[0] - budget_id = idx[1] - if pipeline_space.has_tabular: - _row = pd.Series([row[self.config_col]], index=[config_id]) - _row = map_real_hyperparameters_from_tabular_ids(_row, pipeline_space) - configs.append(_row.values[0]) - else: - configs.append(row[self.config_col]) - performance.append(row[self.perf_col]) - learning_curves.append(self.extract_learning_curve(config_id, budget_id)) - return configs, learning_curves, performance - def get_best_performance_per_config(self, maximize: bool = False) -> pd.Series: """Returns the best score recorded per config across fidelities seen. """ @@ -325,19 +306,6 @@ def get_tokenized_data(self, df: pd.DataFrame): return configs, idxs, performances - def tokenize(self, df: pd.DataFrame, as_tensor: bool = False): - """Function to format data for PFN.""" - configs = np.array([normalize_vectorize_config(c) for c in df]) - fidelity = np.array([c.fidelity.value for c in df]).reshape(-1, 1) - idx = df.index.values.reshape(-1, 1) - - data = np.hstack([idx, fidelity, configs]) - - if as_tensor: - device = torch.device("cuda" if torch.cuda.is_available() else "cpu") - data = torch.Tensor(data).to(device) - return data - @property def token_ids(self) -> np.ndarray: return self.df.index.values From f0515a0a21a251cb3e1ff881ed60472a92153154 Mon Sep 17 00:00:00 2001 From: Neeratyoy Mallik Date: Fri, 30 Aug 2024 02:16:31 +0200 Subject: [PATCH 20/46] Adding mean prediction for FT-PFN for fantasizing --- .../bayesian_optimization/models/ftpfn.py | 5 ++ neps/optimizers/multi_fidelity/mf_bo.py | 46 ++++++------------- 2 files changed, 18 insertions(+), 33 deletions(-) diff --git a/neps/optimizers/bayesian_optimization/models/ftpfn.py b/neps/optimizers/bayesian_optimization/models/ftpfn.py index 55e10cfb..600409b5 100644 --- a/neps/optimizers/bayesian_optimization/models/ftpfn.py +++ b/neps/optimizers/bayesian_optimization/models/ftpfn.py @@ -38,6 +38,11 @@ def _cast_tensor_shapes(self, x: torch.Tensor) -> torch.Tensor: return x.reshape(x.shape[0], 1) raise ValueError(f"Shape not recognized: {x.shape}") + @torch.no_grad() + def get_mean_performance(self, test_x: torch.Tensor) -> torch.Tensor: + logits = self._get_logits(test_x).squeeze() + return self.ftpfn.model.criterion.mean(logits) + @torch.no_grad() def get_pi(self, test_x, y_best): logits = self._get_logits(test_x) diff --git a/neps/optimizers/multi_fidelity/mf_bo.py b/neps/optimizers/multi_fidelity/mf_bo.py index 323a59b9..8032af99 100755 --- a/neps/optimizers/multi_fidelity/mf_bo.py +++ b/neps/optimizers/multi_fidelity/mf_bo.py @@ -8,7 +8,6 @@ from neps.utils.common import instance_from_map from neps.optimizers.bayesian_optimization.models import SurrogateModelMapping -from neps.optimizers.multi_fidelity.utils import normalize_vectorize_config from neps.optimizers.utils import map_real_hyperparameters_from_tabular_ids from neps.optimizers.multi_fidelity_prior.utils import calc_total_resources_spent, update_fidelity from neps.search_spaces.search_space import SearchSpace @@ -210,18 +209,13 @@ def _fantasize_pending(self, train_x, train_y, pending_x): train_x, train_lcs, train_y = self.get_training_data_for_freeze_thaw( completed_configs, self.pipeline_space ) - # self.observed_configs.get_training_data_4ifbo( - # completed_configs, self.pipeline_space - # ) pending_condition = self.observed_configs.pending_condition if pending_condition.any(): + print(f"\n\nFound pending: {pending_condition.sum()}\n\n") pending_configs = self.observed_configs.df.loc[pending_condition] pending_x, pending_lcs, _ = self.get_training_data_for_freeze_thaw( - pending_configs + pending_configs, self.pipeline_space ) - # get_training_data_4ifbo( - # pending_configs - # ) self._fit(train_x, train_y, train_lcs) _y, _ = self._predict(pending_x, pending_lcs) _y = _y.tolist() @@ -242,18 +236,19 @@ def _fit(self, train_x, train_y, train_lcs): f"Surrogate model {self.surrogate_model_name} not supported!" ) - def _predict(self, test_x, test_lcs): - raise NotImplementedError - # if self.surrogate_model_name == "ftpfn": - # return self.surrogate_model.predict(test_x, test_lcs) - # else: - # # check neps/optimizers/bayesian_optimization/models/__init__.py for options - # raise ValueError( - # f"Surrogate model {self.surrogate_model_name} not supported!" - # ) + def _predict(self, test_x): + if self.surrogate_model_name == "ftpfn": + mean = self.surrogate_model.get_mean_performance(test_x) + if mean.is_cuda: + mean = mean.cpu() + else: + # check neps/optimizers/bayesian_optimization/models/__init__.py for options + raise ValueError( + f"Surrogate model {self.surrogate_model_name} not supported!" + ) def get_training_data_for_freeze_thaw( - self, df: pd.DataFrame, pipeline_space: SearchSpace | None = None + self, df: pd.DataFrame, pipeline_space: SearchSpace ): configs = [] learning_curves = [] @@ -287,21 +282,6 @@ def set_state( name="surrogate model", kwargs=self.surrogate_model_args, ) - - def update_model(self, train_x=None, train_y=None, pending_x=None, decay_t=None): - if train_x is None: - train_x = [] - if train_y is None: - train_y = [] - if pending_x is None: - pending_x = [] - - if decay_t is None: - decay_t = len(train_x) - train_x, train_y, train_lcs = self._fantasize_pending(train_x, train_y, pending_x) - self.surrogate_model._fit(train_x, train_y, train_lcs) - - return self.surrogate_model, decay_t class PFNSurrogate(FreezeThawModel): From 0b2a6ceaec9239eb33186b15e7980f3cec361e50 Mon Sep 17 00:00:00 2001 From: Neeratyoy Mallik Date: Fri, 30 Aug 2024 04:00:34 +0200 Subject: [PATCH 21/46] PFN perf maximizing + ifbo example + 3D plot --- neps/optimizers/multi_fidelity/mf_bo.py | 2 + neps/plot/plot3D.py | 233 ++++++++++++++++++++++++ neps_examples/efficiency/freeze_thaw.py | 156 ++++++++++++++++ neps_examples/template/ifbo_template.py | 37 ++++ 4 files changed, 428 insertions(+) create mode 100644 neps/plot/plot3D.py create mode 100644 neps_examples/efficiency/freeze_thaw.py create mode 100644 neps_examples/template/ifbo_template.py diff --git a/neps/optimizers/multi_fidelity/mf_bo.py b/neps/optimizers/multi_fidelity/mf_bo.py index 8032af99..38c87d88 100755 --- a/neps/optimizers/multi_fidelity/mf_bo.py +++ b/neps/optimizers/multi_fidelity/mf_bo.py @@ -328,6 +328,8 @@ def preprocess_training_set(self): configs, idxs, performances = self.observed_configs.get_tokenized_data( self.observed_configs.df.copy().assign(config=_configs) ) + # NOTE: FT-PFN is pretrained on accuracy and hence, converting to a `maximizing` metric + performances = 1 - performances idxs = idxs.astype(float) idxs[:, 1] = idxs[:, 1] / _configs[0].fidelity.upper # TODO: account for fantasization diff --git a/neps/plot/plot3D.py b/neps/plot/plot3D.py new file mode 100644 index 00000000..143388ff --- /dev/null +++ b/neps/plot/plot3D.py @@ -0,0 +1,233 @@ +from __future__ import annotations + +from dataclasses import dataclass, field + +from pathlib import Path +import multiprocessing as mp +from functools import partial + +from argparse import ArgumentParser + +from matplotlib import pyplot as plt +from matplotlib import cm +from matplotlib.collections import LineCollection +from mpl_toolkits.mplot3d.art3d import Line3DCollection +from matplotlib.colors import Normalize +import matplotlib +matplotlib.use('TkAgg') + +import itertools + +from neps.status.status import get_run_summary_csv +import re +import pandas as pd +import numpy as np + +from typing import Callable + +# Copied from plot.py +HERE = Path(__file__).parent.absolute() +DEFAULT_RESULTS_PATH = HERE.parent / "results" + + +@dataclass +class Plotter3D: + loss_key: str = "Loss" + fidelity_key: str = "epochs" + config_column: str | None = None + run_path: str | Path | None = None + base_results_path: str | Path = DEFAULT_RESULTS_PATH + strict: bool = False + get_x: Callable[[pd.DataFrame], np.array] | None = None + get_y: Callable[[pd.DataFrame], np.array] | None = None + get_z: Callable[[pd.DataFrame], np.array] | None = None + get_color: Callable[[pd.DataFrame], np.array] | None = None + scatter: bool = True + footnote: bool = True + alpha: float = 0.9 + scatter_size: float | int = 3 + bck_color_2d: tuple[float] = (0.8, 0.82, 0.8) + view_angle: tuple[float | int] = (15, -70) + + def __post_init__(self): + if self.run_path is not None: + assert Path(self.run_path).absolute().is_dir(), \ + f"Path {self.run_path} is not a directory" + self.data_path = Path(self.run_path).absolute() / "summary_csv" / "config_data.csv" + assert self.data_path.exists(), f"File {self.data_path} does not exist" + self.df = pd.read_csv(self.data_path, index_col=0, float_precision="round_trip") + + self.loss_range = (self.df["result.loss"].min(), self.df["result.loss"].max()) + _fid_key = f"config.{self.fidelity_key}" + self.epochs_range = (self.df[_fid_key].min(), self.df[_fid_key].max()) + # breakpoint() + + @staticmethod + def get_x(df: pd.DataFrame) -> np.array: + return df["epochID"].to_numpy() + + @staticmethod + def get_y(df: pd.DataFrame) -> np.array: + y_ = df["configID"].to_numpy() + return np.ones_like(y_) * y_[0] + + @staticmethod + def get_z(df: pd.DataFrame) -> np.array: + return df["result.loss"].to_numpy() + + @staticmethod + def get_color(df: pd.DataFrame) -> np.array: + return df.index.to_numpy() + + def prep_df(self, df: pd.DataFrame = None) -> pd.DataFrame: + df = self.df if df is None else df + time_cols = ["metadata.time_started", "metadata.time_end"] + df = df.sort_values(by=time_cols).reset_index(drop=True) + split_values = np.array([[*index.split('_')] for index in self.df.index]) + df[['configID', 'epochID']] = split_values + df.configID = df.configID.astype(int) + df.epochID = df.epochID.astype(int) + if df.epochID.min() == 0: + df.epochID += 1 + return df + + def plot3D( + self, + data: pd.DataFrame = None, + save_path: str | Path | None = None, + filename: str = "freeze_thaw" + ): + data = self.prep_df(data) + + # Create the figure and the axes for the plot + fig, (ax3D, ax, cax) = plt.subplots(1, 3, figsize=(12, 5), width_ratios=(20, 20, 1)) + + # remove a 2D axis and replace with a 3D projection one + ax3D.remove() + ax3D = fig.add_subplot(131, projection='3d') + + # Create the normalizer to normalize the color values + norm = Normalize(self.get_color(data).min(), self.get_color(data).max()) + + # Counters to keep track of the configurations run for only a single fidelity + n_lines = 0 + n_mins = 0 + + data_groups = data.groupby("configID", sort=False) + + for idx, (configID, data_) in enumerate(data_groups): + + x = self.get_x(data_) + y = self.get_y(data_) + z = self.get_z(data_) + + y = np.ones_like(y) * idx + color = self.get_color(data_) + + if len(x) < 2: + n_mins += 1 + if self.scatter: + ax3D.scatter( + y, + z, + s=self.scatter_size, + zs=0, + zdir="x", + c=color, + cmap='RdYlBu_r', + norm=norm, + alpha=self.alpha * 0.8 + ) + ax.scatter( + x, + z, + s=self.scatter_size, + c=color, + cmap='RdYlBu_r', + norm=norm, + alpha=self.alpha * 0.8 + ) + else: + n_lines += 1 + + # Plot 3D + # Get segments for all lines + points3D = np.array([x, y, z]).T.reshape(-1, 1, 3) + segments3D = np.concatenate([points3D[:-1], points3D[1:]], axis=1) + + # Construct lines from segments + lc3D = Line3DCollection(segments3D, cmap='RdYlBu_r', norm=norm, alpha=self.alpha) + lc3D.set_array(color) + + # Draw lines + ax3D.add_collection3d(lc3D) + + # Plot 2D + # Get segments for all lines + points = np.array([x, z]).T.reshape(-1, 1, 2) + segments = np.concatenate([points[:-1], points[1:]], axis=1) + + # Construct lines from segments + lc = LineCollection(segments, cmap="RdYlBu_r", norm=norm, alpha=self.alpha) + lc.set_array(color) + + # Draw lines + ax.add_collection(lc) + + ax3D.axes.set_xlim3d(left=self.epochs_range[0], right=self.epochs_range[1]) + ax3D.axes.set_ylim3d(bottom=0, top=data_groups.ngroups) + ax3D.axes.set_zlim3d(bottom=self.loss_range[0], top=self.loss_range[1]) + + ax3D.set_xlabel('Epochs') + ax3D.set_ylabel('Iteration sampled') + ax3D.set_zlabel(f'{self.loss_key}') + + # set view angle + ax3D.view_init(elev=self.view_angle[0], azim=self.view_angle[1]) + + ax.autoscale_view() + ax.set_xlabel(self.fidelity_key) + ax.set_ylabel(f'{self.loss_key}') + ax.set_facecolor(self.bck_color_2d) + fig.suptitle("ifBO run") + + if self.footnote: + fig.text( + 0.01, 0.02, + f"Total {n_lines + n_mins} configs evaluated; for multiple budgets: " + f"{n_lines}, for single budget: {n_mins}", + ha='left', + va="bottom", + fontsize=10 + ) + + plt.colorbar( + cm.ScalarMappable(norm=norm, cmap="RdYlBu_r"), + cax=cax, + label='Iteration', + use_gridspec=True, + alpha=self.alpha + ) + fig.tight_layout() + + self.save(save_path, filename) + plt.close(fig) + + def save(self, save_path: str | Path | None = None, filename: str = "freeze_thaw"): + run_path = Path(save_path if save_path is not None else self.run_path) + run_path.mkdir(parents=True, exist_ok=True) + assert run_path.is_dir() + plot_path = run_path / f"Plot3D_{filename}.png" + + plt.savefig( + plot_path, + bbox_inches='tight' + ) + + +if __name__ == "__main__": + plotter = Plotter3D( + run_path="./results", + fidelity_key="epochs" + ) + plotter.plot3D() diff --git a/neps_examples/efficiency/freeze_thaw.py b/neps_examples/efficiency/freeze_thaw.py new file mode 100644 index 00000000..98ddc95b --- /dev/null +++ b/neps_examples/efficiency/freeze_thaw.py @@ -0,0 +1,156 @@ +import logging +from pathlib import Path +import torch +import torch.nn as nn +import torch.optim as optim +from torch.utils.data import DataLoader +from torchvision import datasets, transforms + +import neps +from neps.plot.plot3D import Plotter3D + + +class SimpleNN(nn.Module): + def __init__(self, input_size, num_layers, num_neurons): + super().__init__() + layers = [nn.Flatten()] + + for _ in range(num_layers): + layers.append(nn.Linear(input_size, num_neurons)) + layers.append(nn.ReLU()) + input_size = num_neurons # Set input size for the next layer + + layers.append(nn.Linear(num_neurons, 10)) # Output layer for 10 classes + self.model = nn.Sequential(*layers) + + def forward(self, x): + return self.model(x) + + +def training_pipeline( + pipeline_directory, + previous_pipeline_directory, + num_layers, + num_neurons, + epochs, + learning_rate, + weight_decay +): + """ + Trains and validates a simple neural network on the MNIST dataset. + + Args: + num_layers (int): Number of hidden layers in the network. + num_neurons (int): Number of neurons in each hidden layer. + epochs (int): Number of training epochs. + learning_rate (float): Learning rate for the optimizer. + optimizer (str): Name of the optimizer to use ('adam' or 'sgd'). + + Returns: + float: The average loss over the validation set after training. + + Raises: + KeyError: If the specified optimizer is not supported. + """ + # Transformations applied on each image + transform = transforms.Compose( + [ + transforms.ToTensor(), + transforms.Normalize( + (0.1307,), (0.3081,) + ), # Mean and Std Deviation for MNIST + ] + ) + + # Loading MNIST dataset + dataset = datasets.MNIST( + root="./.data", train=True, download=True, transform=transform + ) + train_set, val_set = torch.utils.data.random_split(dataset, [50000, 10000]) + train_loader = DataLoader(train_set, batch_size=64, shuffle=True) + val_loader = DataLoader(val_set, batch_size=1000, shuffle=False) + + model = SimpleNN(28 * 28, num_layers, num_neurons) + criterion = nn.CrossEntropyLoss() + + # Select optimizer + optimizer = optim.AdamW(model.parameters(), lr=learning_rate, weight_decay=weight_decay) + + # Loading potential checkpoint + start_epoch = 1 + if previous_pipeline_directory is not None: + if (Path(previous_pipeline_directory) / "checkpoint.pt").exists(): + states = torch.load(Path(previous_pipeline_directory) / "checkpoint.pt") + model = states["model"] + optimizer = states["optimizer"] + start_epoch = states["epochs"] + + # Training loop + for epoch in range(start_epoch, epochs + 1): + model.train() + for batch_idx, (data, target) in enumerate(train_loader): + optimizer.zero_grad() + output = model(data) + loss = criterion(output, target) + loss.backward() + optimizer.step() + + # Validation loop + model.eval() + val_loss = 0 + val_correct = 0 + val_total = 0 + with torch.no_grad(): + for data, target in val_loader: + output = model(data) + val_loss += criterion(output, target).item() + + # Get the predicted class + _, predicted = torch.max(output.data, 1) + + # Count correct predictions + val_total += target.size(0) + val_correct += (predicted == target).sum().item() + + val_loss /= len(val_loader.dataset) + val_err = 1 - val_correct / val_total + + # Saving checkpoint + states = { + "model": model, + "optimizer": optimizer, + "epochs": epochs, + } + torch.save(states, Path(pipeline_directory) / "checkpoint.pt") + + return val_err + + +if __name__ == "__main__": + + pipeline_space = { + "learning_rate": neps.Float(1e-5, 1e-1, log=True), + "num_layers": neps.Integer(1, 5), + "num_neurons": neps.Integer(64, 128), + "weight_decay": neps.Float(1e-5, 0.1, log=True), + "epochs": neps.Integer(1, 10, is_fidelity=True), + } + + neps.run( + pipeline_space=pipeline_space, + run_pipeline=training_pipeline, + searcher="ifbo", + max_evaluations_total=25, + root_directory="debug/ifbo-mnist/", + overwrite_working_directory=True + + ) + + + # NOTE: this is experimental and may not work as expected + ## plotting a 3D plot for learning curves explored by ifbo + plotter = Plotter3D( + run_path="debug/ifbo-mnist/", # same as `root_directory` above + fidelity_key="epochs", # same as `pipeline_space` + ) + plotter.plot3D(filename="ifbo") diff --git a/neps_examples/template/ifbo_template.py b/neps_examples/template/ifbo_template.py new file mode 100644 index 00000000..9e99c820 --- /dev/null +++ b/neps_examples/template/ifbo_template.py @@ -0,0 +1,37 @@ +import numpy as np + +from neps.plot.plot3D import Plotter3D + +from .priorband_template import pipeline_space, run_pipeline + + +ASSUMED_MAX_LOSS = 10 + + +def ifbo_run_pipeline( + pipeline_directory, # The directory where the config is saved + previous_pipeline_directory, # The directory of the config's immediate lower fidelity + **config, # The hyperparameters to be used in the pipeline +) -> dict | float: + result_dict = run_pipeline( + pipeline_directory=pipeline_directory, # NOTE: can only support <=10 HPs and no categoricals + previous_pipeline_directory=previous_pipeline_directory, + **config, + ) + # NOTE: Normalize the loss to be between 0 and 1 + ## crucial for ifBO's FT-PFN surrogate to work as expected + result_dict["loss"] = np.clip(result_dict["loss"], 0, ASSUMED_MAX_LOSS) / ASSUMED_MAX_LOSS + return result_dict + + +if __name__ == "__main__": + import neps + + neps.run( + run_pipeline=run_pipeline, + pipeline_space=pipeline_space(), + root_directory="results", + max_evaluations_total=50, + searcher="ifbo", + ) +# end of ifbo_run_pipeline \ No newline at end of file From d84f46190add1bf7d78c01689592e0a785964137 Mon Sep 17 00:00:00 2001 From: Neeratyoy Mallik Date: Fri, 30 Aug 2024 04:08:59 +0200 Subject: [PATCH 22/46] Updating example for debugging --- neps/__init__.py | 2 ++ neps_examples/efficiency/freeze_thaw.py | 16 +++++++++++++++- 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/neps/__init__.py b/neps/__init__.py index caca68e2..ab1f3d50 100644 --- a/neps/__init__.py +++ b/neps/__init__.py @@ -12,6 +12,7 @@ IntegerParameter, ) from neps.status.status import get_summary_dict, status +from neps.plot.tensorboard_eval import tblogger Integer = IntegerParameter Float = FloatParameter @@ -38,4 +39,5 @@ "GraphGrammar", "GraphGrammarCell", "GraphGrammarRepetitive", + "tblogger" ] diff --git a/neps_examples/efficiency/freeze_thaw.py b/neps_examples/efficiency/freeze_thaw.py index 98ddc95b..f88c7810 100644 --- a/neps_examples/efficiency/freeze_thaw.py +++ b/neps_examples/efficiency/freeze_thaw.py @@ -7,6 +7,7 @@ from torchvision import datasets, transforms import neps +from neps import tblogger from neps.plot.plot3D import Plotter3D @@ -123,6 +124,20 @@ def training_pipeline( } torch.save(states, Path(pipeline_directory) / "checkpoint.pt") + # Logging + tblogger.log( + loss=val_loss, + current_epoch=epochs, + write_summary_incumbent=True, # Set to `True` for a live incumbent trajectory. + writer_config_scalar=True, # Set to `True` for a live loss trajectory for each config. + writer_config_hparam=True, # Set to `True` for live parallel coordinate, scatter plot matrix, and table view. + # Appending extra data + extra_data={ + "train_loss": tblogger.scalar_logging(loss.item()), + "val_err": tblogger.scalar_logging(val_err), + }, + ) + return val_err @@ -146,7 +161,6 @@ def training_pipeline( ) - # NOTE: this is experimental and may not work as expected ## plotting a 3D plot for learning curves explored by ifbo plotter = Plotter3D( From 929ca3aba72a16ff524834893915cbd5b7529b3f Mon Sep 17 00:00:00 2001 From: Neeratyoy Mallik Date: Fri, 30 Aug 2024 04:55:33 +0200 Subject: [PATCH 23/46] Adding ifbo fantasization initial --- neps/optimizers/multi_fidelity/mf_bo.py | 23 +++++++++++++++-------- neps/optimizers/multi_fidelity/utils.py | 1 - 2 files changed, 15 insertions(+), 9 deletions(-) diff --git a/neps/optimizers/multi_fidelity/mf_bo.py b/neps/optimizers/multi_fidelity/mf_bo.py index 38c87d88..eeb52366 100755 --- a/neps/optimizers/multi_fidelity/mf_bo.py +++ b/neps/optimizers/multi_fidelity/mf_bo.py @@ -9,9 +9,11 @@ from neps.utils.common import instance_from_map from neps.optimizers.bayesian_optimization.models import SurrogateModelMapping from neps.optimizers.utils import map_real_hyperparameters_from_tabular_ids -from neps.optimizers.multi_fidelity_prior.utils import calc_total_resources_spent, update_fidelity +from neps.optimizers.multi_fidelity_prior.utils import ( + calc_total_resources_spent, normalize_vectorize_config, update_fidelity +) from neps.search_spaces.search_space import SearchSpace - +from neps.optimizers.multi_fidelity.utils import class MFBOBase: """Designed to work with model-based search on SH-based multi-fidelity algorithms. @@ -213,13 +215,17 @@ def _fantasize_pending(self, train_x, train_y, pending_x): if pending_condition.any(): print(f"\n\nFound pending: {pending_condition.sum()}\n\n") pending_configs = self.observed_configs.df.loc[pending_condition] - pending_x, pending_lcs, _ = self.get_training_data_for_freeze_thaw( - pending_configs, self.pipeline_space - ) self._fit(train_x, train_y, train_lcs) - _y, _ = self._predict(pending_x, pending_lcs) + pending_x = [] # torch.Tensor([]) + for _id in pending_configs.index.get_level_values(0): + _config = pending_configs.loc[_id].config.values[0] + # TODO: fix this + _fid = (_config.fidelity.value - _config.fidelity.lower) / \ + (_config.fidelity.upper - _config.fidelity.lower) + pending_x.append([_id, _fid, *normalize_vectorize_config(_config)]) + pending_x = torch.Tensor(pending_x) + _y = self._predict(pending_x) _y = _y.tolist() - train_x.extend(pending_x) train_y.extend(_y) train_lcs.extend(pending_lcs) @@ -236,11 +242,12 @@ def _fit(self, train_x, train_y, train_lcs): f"Surrogate model {self.surrogate_model_name} not supported!" ) - def _predict(self, test_x): + def _predict(self, test_x) -> torch.Tensor: if self.surrogate_model_name == "ftpfn": mean = self.surrogate_model.get_mean_performance(test_x) if mean.is_cuda: mean = mean.cpu() + return mean else: # check neps/optimizers/bayesian_optimization/models/__init__.py for options raise ValueError( diff --git a/neps/optimizers/multi_fidelity/utils.py b/neps/optimizers/multi_fidelity/utils.py index 2a01a844..11bba5bc 100644 --- a/neps/optimizers/multi_fidelity/utils.py +++ b/neps/optimizers/multi_fidelity/utils.py @@ -7,7 +7,6 @@ import torch from neps.search_spaces.search_space import SearchSpace -from neps.optimizers.utils import map_real_hyperparameters_from_tabular_ids def continuous_to_tabular( From 6e02535f51f7953b7b7e39ba1e4e7495a938c295 Mon Sep 17 00:00:00 2001 From: Neeratyoy Mallik Date: Fri, 30 Aug 2024 05:05:04 +0200 Subject: [PATCH 24/46] Import error fix --- neps/optimizers/multi_fidelity/mf_bo.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/neps/optimizers/multi_fidelity/mf_bo.py b/neps/optimizers/multi_fidelity/mf_bo.py index eeb52366..cfb38cc5 100755 --- a/neps/optimizers/multi_fidelity/mf_bo.py +++ b/neps/optimizers/multi_fidelity/mf_bo.py @@ -9,11 +9,9 @@ from neps.utils.common import instance_from_map from neps.optimizers.bayesian_optimization.models import SurrogateModelMapping from neps.optimizers.utils import map_real_hyperparameters_from_tabular_ids -from neps.optimizers.multi_fidelity_prior.utils import ( - calc_total_resources_spent, normalize_vectorize_config, update_fidelity -) +from neps.optimizers.multi_fidelity_prior.utils import calc_total_resources_spent, update_fidelity from neps.search_spaces.search_space import SearchSpace -from neps.optimizers.multi_fidelity.utils import +from neps.optimizers.multi_fidelity.utils import normalize_vectorize_config class MFBOBase: """Designed to work with model-based search on SH-based multi-fidelity algorithms. @@ -213,6 +211,10 @@ def _fantasize_pending(self, train_x, train_y, pending_x): ) pending_condition = self.observed_configs.pending_condition if pending_condition.any(): + raise NotImplementedError( + "Fantasization not implemented yet!" + "This optimizer cannot be run with multiple workers yet." + ) print(f"\n\nFound pending: {pending_condition.sum()}\n\n") pending_configs = self.observed_configs.df.loc[pending_condition] self._fit(train_x, train_y, train_lcs) From 78fd1e94b01c896bd6d1cf341755b9b30627e344 Mon Sep 17 00:00:00 2001 From: Neeratyoy Mallik Date: Sun, 1 Sep 2024 07:59:14 +0200 Subject: [PATCH 25/46] Aggresive cleanup and leaning --- .../acquisition_functions/__init__.py | 8 +- .../acquisition_functions/mf_ei.py | 396 ------------------ .../acquisition_functions/mf_pi.py | 304 +++----------- .../freeze_thaw_sampler.py | 10 +- neps/optimizers/multi_fidelity/ifbo.py | 1 + neps/optimizers/multi_fidelity/mf_bo.py | 187 ++++----- neps/optimizers/multi_fidelity/utils.py | 75 +++- neps/plot/plot3D.py | 1 - neps_examples/efficiency/freeze_thaw.py | 2 +- 9 files changed, 198 insertions(+), 786 deletions(-) delete mode 100644 neps/optimizers/bayesian_optimization/acquisition_functions/mf_ei.py diff --git a/neps/optimizers/bayesian_optimization/acquisition_functions/__init__.py b/neps/optimizers/bayesian_optimization/acquisition_functions/__init__.py index eba0f694..add581b5 100644 --- a/neps/optimizers/bayesian_optimization/acquisition_functions/__init__.py +++ b/neps/optimizers/bayesian_optimization/acquisition_functions/__init__.py @@ -27,8 +27,8 @@ augmented_ei=False, log_ei=True, ), - # # Uses the augmented EI heuristic and changed the in-fill criterion to the best test location with - # # the highest *posterior mean*, which are preferred when the optimisation is noisy. + ## Uses the augmented EI heuristic and changed the in-fill criterion to the best test location with + ## the highest *posterior mean*, which are preferred when the optimisation is noisy. "AEI": partial( ComprehensiveExpectedImprovement, in_fill="posterior", @@ -36,8 +36,8 @@ ), "MFPI-random": partial( MFPI_Random, - in_fill="best", - augmented_ei=False, + threshold="random", + horizon="random", ), "UCB": partial( UpperConfidenceBound, diff --git a/neps/optimizers/bayesian_optimization/acquisition_functions/mf_ei.py b/neps/optimizers/bayesian_optimization/acquisition_functions/mf_ei.py deleted file mode 100644 index b4c0d58a..00000000 --- a/neps/optimizers/bayesian_optimization/acquisition_functions/mf_ei.py +++ /dev/null @@ -1,396 +0,0 @@ -# type: ignore -from typing import Any, Iterable, Tuple, Union - -import numpy as np -import pandas as pd -import torch -from torch.distributions import Normal - -from neps.optimizers.utils import map_real_hyperparameters_from_tabular_ids -from neps.search_spaces.search_space import SearchSpace -from neps.optimizers.multi_fidelity.utils import MFObservedData, normalize_vectorize_config -from neps.optimizers.bayesian_optimization.acquisition_functions.base_acquisition import BaseAcquisition -from neps.optimizers.bayesian_optimization.acquisition_functions.ei import ComprehensiveExpectedImprovement - - -class MFStepBase(BaseAcquisition): - """A class holding common operations that can be inherited for freeze-thaw based acquisitions. - - WARNING: Unsafe use of self attributes, can break if not used correctly. - """ - def set_state( - self, - pipeline_space: SearchSpace, - surrogate_model: Any, - observations: MFObservedData, - b_step: Union[int, float], - **kwargs, - ): - # overload to select incumbent differently through observations - self.pipeline_space = pipeline_space - self.surrogate_model = surrogate_model - self.observations = observations - self.b_step = b_step - return - - def get_budget_level(self, config) -> int: - return int((config.fidelity.value - config.fidelity.lower) / self.b_step) - - def tokenize_for_freeze_thaw(self, df: pd.DataFrame, as_tensor: bool = False): - """Function to format data for PFN. - - The PFN training data expects the following format: - x: [ - # config ID, normalized fidelity, hyperparameters in unit-hypercube - [0, 0.1, hp1, hp2, ..., hpN], - [1, 0.1, hp1, hp2, ..., hpN], - [2, 0.1, hp1, hp2, ..., hpN], - [1, 0.2, hp1, hp2, ..., hpN], - [3, 0.1, hp1, hp2, ..., hpN], - ... - ] - y: [ - # normalized scalar loss - loss_of_0_at_0.1, - loss_of_1_at_0.1, - loss_of_2_at_0.1, - loss_of_1_at_0.2, - loss_of_3_at_0.1, - ... - ] - """ - configs = np.array([normalize_vectorize_config(c) for c in df]) - fidelity = np.array([c.fidelity.value for c in df]).reshape(-1, 1) - idx = df.index.values.reshape(-1, 1) - data = np.hstack([idx, fidelity, configs]) - - if as_tensor: - device = torch.device("cuda" if torch.cuda.is_available() else "cpu") - data = torch.Tensor(data).to(device) - return data - - def preprocess_pfn(self, x: pd.Series) -> Tuple[torch.Tensor, pd.Series, torch.Tensor]: - """Prepares the configurations for appropriate EI calculation. - - Takes a set of points and computes the budget and incumbent for each point, as - required by the multi-fidelity Expected Improvement acquisition function. - """ - _x, inc_list = self.preprocess(x.copy()) - _x_tok = self.tokenize_for_freeze_thaw(_x, as_tensor=True) - len_partial = len(self.observations.seen_config_ids) - z_min = x[0].fidelity.lower - z_max = x[0].fidelity.upper - # converting fidelity to the discrete budget level - # STRICT ASSUMPTION: fidelity is the second dimension - _x_tok[:len_partial, 1] = (_x_tok[:len_partial, 1] + self.b_step - z_min) / self.b_step - _x_tok[:, 1] = _x_tok[:, 1] / z_max - return _x, _x_tok, inc_list - - -# NOTE: the order of inheritance is important by MRO -class MFEI(MFStepBase, ComprehensiveExpectedImprovement): - def __init__( - self, - pipeline_space: SearchSpace, - surrogate_model_name: str = None, - augmented_ei: bool = False, - xi: float = 0.0, - in_fill: str = "best", - inc_normalization: bool = False, - log_ei: bool = False, - ): - super().__init__(augmented_ei, xi, in_fill, log_ei) - self.pipeline_space = pipeline_space - self.surrogate_model_name = surrogate_model_name - self.inc_normalization = inc_normalization - self.surrogate_model = None - self.observations = None - self.b_step = None - - def preprocess_inc_list(self, **kwargs) -> list: - assert "budget_list" in kwargs, "Requires a list of query step for candidate set." - budget_list = kwargs["budget_list"] - performances = self.observations.get_best_performance_for_each_budget() - inc_list = [] - for budget_level in budget_list: - if budget_level in performances.index: - inc = performances[budget_level] - else: - inc = self.observations.get_best_seen_performance() - inc_list.append(inc) - return inc_list - - def preprocess(self, x: pd.Series) -> Tuple[pd.Series, torch.Tensor]: - """Prepares the configurations for appropriate EI calculation. - - Takes a set of points and computes the budget and incumbent for each point, as - required by the multi-fidelity Expected Improvement acquisition function. - """ - budget_list = [] - if self.pipeline_space.has_tabular: - # preprocess tabular space differently - # expected input: IDs pertaining to the tabular data - x = map_real_hyperparameters_from_tabular_ids(x, self.pipeline_space) - indices_to_drop = [] - for i, config in x.items(): - target_fidelity = config.fidelity.lower - if i <= max(self.observations.seen_config_ids): - # IMPORTANT to set the fidelity at which EI will be calculated only for - # the partial configs that have been observed already - target_fidelity = config.fidelity.value + self.b_step - - if np.less_equal(target_fidelity, config.fidelity.upper): - # only consider the configs with fidelity lower than the max fidelity - config.update_hp_values({config.fidelity_name: target_fidelity}) - budget_list.append(self.get_budget_level(config)) - else: - # if the target_fidelity higher than the max drop the configuration - indices_to_drop.append(i) - else: - config.update_hp_values({config.fidelity_name: target_fidelity}) - budget_list.append(self.get_budget_level(config)) - - # Drop unused configs - x.drop(labels=indices_to_drop, inplace=True) - - # Collecting incumbent list per configuration - inc_list = self.preprocess_inc_list(budget_list=budget_list) - - return x, torch.Tensor(inc_list) - - def eval(self, x: pd.Series, asscalar: bool = False) -> Tuple[np.ndarray, pd.Series]: - # deepcopy - _x = pd.Series([x.loc[idx].copy() for idx in x.index.values], index=x.index) - if self.surrogate_model_name == "ftpfn": - _x, _x_tok, inc_list = self.preprocess_pfn( - x.copy() - ) # IMPORTANT change from vanilla-EI - ei = self.eval_pfn_ei(_x_tok, inc_list) - else: - raise ValueError( - f"Unrecognized surrogate model name: {self.surrogate_model_name}" - ) - - if self.inc_normalization: - ei = ei / inc_list - - if ei.is_cuda: - ei = ei.cpu() - if len(_x) > 1 and asscalar: - return ei.detach().numpy(), _x - else: - return ei.detach().numpy().item(), _x - - def eval_pfn_ei( - self, x: Iterable, inc_list: Iterable - ) -> Union[np.ndarray, torch.Tensor, float]: - """PFN-EI modified to preprocess samples and accept list of incumbents.""" - ei = self.surrogate_model.get_ei(x.to(self.surrogate_model.device), inc_list) - if len(ei.shape) == 2: - ei = ei.flatten() - return ei - - -class MFEI_AtMax(MFEI): - - def preprocess_inc_list(self, **kwargs) -> list: - assert "len_x" in kwargs, "Requires the length of the candidate set." - len_x = kwargs["len_x"] - # finds global incumbent - inc_value = min(self.observations.get_best_performance_for_each_budget()) - # uses the best seen value as the incumbent in EI computation for all candidates - inc_list = [inc_value] * len_x - return inc_list - - def preprocess(self, x: pd.Series) -> Tuple[pd.Series, torch.Tensor]: - """Prepares the configurations for appropriate EI calculation. - - Takes a set of points and computes the budget and incumbent for each point. - Unlike the base class MFEI, sets the target fidelity to be max budget and the - incumbent choice to be the max seen across history for all candidates. - """ - budget_list = [] - if self.pipeline_space.has_tabular: - # preprocess tabular space differently - # expected input: IDs pertaining to the tabular data - x = map_real_hyperparameters_from_tabular_ids(x, self.pipeline_space) - - indices_to_drop = [] - for i, config in x.items(): - target_fidelity = config.fidelity.upper # change from MFEI - - if config.fidelity.value == target_fidelity: - # if the target_fidelity already reached, drop the configuration - indices_to_drop.append(i) - else: - config.update_hp_values({config.fidelity_name: target_fidelity}) - budget_list.append(self.get_budget_level(config)) - - # drop unused configs - x.drop(labels=indices_to_drop, inplace=True) - - # create the same incumbent for all candidates - inc_list = self.preprocess_inc_list(len_x=len(x.index.values)) - - return x, torch.Tensor(inc_list) - - -class MFEI_Dyna(MFEI_AtMax): - """ - Computes extrapolation length of curves to maximum fidelity seen. - Uses the global incumbent as the best score in EI computation. - """ - - def preprocess(self, x: pd.Series) -> Tuple[pd.Series, torch.Tensor]: - """Prepares the configurations for appropriate EI calculation. - - Takes a set of points and computes the budget and incumbent for each point. - Unlike the base class MFEI, sets the target fidelity to be max budget and the - incumbent choice to be the max seen across history for all candidates. - """ - if self.pipeline_space.has_tabular: - # preprocess tabular space differently - # expected input: IDs pertaining to the tabular data - x = map_real_hyperparameters_from_tabular_ids(x, self.pipeline_space) - - # find the maximum observed steps per config to obtain the current pseudo_z_max - max_z_level_per_x = self.observations.get_max_observed_fidelity_level_per_config() - pseudo_z_level_max = max_z_level_per_x.max() # highest seen fidelity step so far - # find the fidelity step at which the best seen performance was recorded - z_inc_level = self.observations.get_budget_level_for_best_performance() - # retrieving actual fidelity values from budget level - ## marker 1: the fidelity value at which the best seen performance was recorded - z_inc = self.b_step * z_inc_level + self.pipeline_space.fidelity.lower - ## marker 2: the maximum fidelity value recorded in observation history - pseudo_z_max = self.b_step * pseudo_z_level_max + self.pipeline_space.fidelity.lower - - def update_fidelity(config): - # for all configs, set to pseudo_z_max - ## that is, choose the highest seen fidelity in observation history - z_extrapolate = pseudo_z_max - config.update_hp_values({config.fidelity_name: z_extrapolate}) - return config - - # collect IDs for partial configurations - _partial_config_ids = (x.index <= max(self.observations.seen_config_ids)) - # filter for configurations that reached max budget - indices_to_drop = [ - _idx - for _idx, _x in x.loc[_partial_config_ids].items() - if _x.fidelity.value == self.pipeline_space.fidelity.upper - ] - # drop unused configs - x.drop(labels=indices_to_drop, inplace=True) - - # set fidelity for all partial configs - x = x.apply(update_fidelity) - - # create the same incumbent for all candidates - inc_list = self.preprocess_inc_list(len_x=len(x.index.values)) - - return x, torch.Tensor(inc_list) - - -class MFEI_Random(MFEI): - - BUDGET = 1000 - - - def __init__( - self, - pipeline_space: SearchSpace, - horizon: str = "random", - threshold: str = "random", - surrogate_model_name: str = None, - augmented_ei: bool = False, - xi: float = 0.0, - in_fill: str = "best", - log_ei: bool = False, - ): - super().__init__(pipeline_space, surrogate_model_name, augmented_ei, xi, in_fill, log_ei) - self.horizon = horizon - self.threshold = threshold - - def set_state( - self, - pipeline_space: SearchSpace, - surrogate_model: Any, - observations: MFObservedData, - b_step: Union[int, float], - **kwargs, - ): - # set RNG - self.rng = np.random.RandomState(seed=42) - for i in range(len(observations.completed_runs)): - self.rng.uniform(-4,-1) - self.rng.randint(1,51) - - return super().set_state(pipeline_space, surrogate_model, observations, b_step) - - def sample_horizon(self, steps_passed): - if self.horizon == 'random': - shortest = self.pipeline_space.fidelity.lower - longest = min(self.pipeline_space.fidelity.upper, self.BUDGET - steps_passed) - return self.rng.randint(shortest, longest+1) - elif self.horizon == 'max': - return min(self.pipeline_space.fidelity.upper, self.BUDGET - steps_passed) - else: - return int(self.horizon) - - def sample_threshold(self, f_inc): - if self.threshold == 'random': - lu = 10**self.rng.uniform(-4,-1) # % of gap closed - else: - lu = float(self.threshold) - return f_inc * (1 - lu) - - def preprocess(self, x: pd.Series) -> Tuple[pd.Series, torch.Tensor]: - """Prepares the configurations for appropriate EI calculation. - - Takes a set of points and computes the budget and incumbent for each point, as - required by the multi-fidelity Expected Improvement acquisition function. - """ - if self.pipeline_space.has_tabular: - # preprocess tabular space differently - # expected input: IDs pertaining to the tabular data - x = map_real_hyperparameters_from_tabular_ids(x, self.pipeline_space) - - indices_to_drop = [] - inc_list = [] - - steps_passed = len(self.observations.completed_runs) - - # Like EI-AtMax, use the global incumbent as a basis for the EI threshold - inc_value = min(self.observations.get_best_performance_for_each_budget()) - # Extension: Add a random min improvement threshold to encourage high risk high gain - inc_value = self.sample_threshold(inc_value) - - # Like MFEI: set fidelities to query using horizon as self.b_step - # Extension: Unlike DyHPO, we sample the horizon randomly over the full range - horizon = self.sample_horizon(steps_passed) - for i, config in x.items(): - if i <= max(self.observations.seen_config_ids): - current_fidelity = config.fidelity.value - if np.equal(config.fidelity.value, config.fidelity.upper): - # this training run has ended, drop it from future selection - indices_to_drop.append(i) - else: - # a candidate partial training run to continue - target_fidelity = config.fidelity.value + horizon - # if horizon exceeds max, query at max - config.uppdate_hp_values({ - config.fidelity_name: min(target_fidelity, config.fidelity.upper) - }) - inc_list.append(inc_value) - else: - # a candidate new training run that we would need to start - current_fidelity = 0 - config.update_hp_values({config.fidelity_name: horizon}) - inc_list.append(inc_value) - - # Drop unused configs - x.drop(labels=indices_to_drop, inplace=True) - - assert len(inc_list) == len(x) - - return x, torch.Tensor(inc_list) diff --git a/neps/optimizers/bayesian_optimization/acquisition_functions/mf_pi.py b/neps/optimizers/bayesian_optimization/acquisition_functions/mf_pi.py index e843e3ee..71955820 100644 --- a/neps/optimizers/bayesian_optimization/acquisition_functions/mf_pi.py +++ b/neps/optimizers/bayesian_optimization/acquisition_functions/mf_pi.py @@ -1,53 +1,48 @@ # type: ignore -from pathlib import Path from typing import Any, Iterable, Tuple, Union import numpy as np import pandas as pd import torch -from torch.distributions import Normal from copy import deepcopy from neps.optimizers.utils import map_real_hyperparameters_from_tabular_ids from neps.search_spaces.search_space import SearchSpace -from neps.optimizers.multi_fidelity.utils import MFObservedData -from neps.optimizers.bayesian_optimization.acquisition_functions.ei import ( - ComprehensiveExpectedImprovement +from neps.optimizers.multi_fidelity.utils import ( + get_freeze_thaw_normalized_step, get_tokenized_data, MFObservedData ) -from neps.optimizers.bayesian_optimization.acquisition_functions.mf_ei import MFStepBase +from neps.optimizers.bayesian_optimization.acquisition_functions.base_acquisition import BaseAcquisition -# NOTE: the order of inheritance is important -class MFPI(MFStepBase, ComprehensiveExpectedImprovement): +class MFPI(BaseAcquisition): + def __init__( self, pipeline_space: SearchSpace, surrogate_model_name: str = None, - augmented_ei: bool = False, - xi: float = 0.0, - in_fill: str = "best", - log_ei: bool = False, ): - super().__init__(augmented_ei, xi, in_fill, log_ei) + super().__init__() self.pipeline_space = pipeline_space self.surrogate_model_name = surrogate_model_name self.surrogate_model = None self.observations = None self.b_step = None - def preprocess_inc_list(self, **kwargs) -> list: - assert "budget_list" in kwargs, "Requires a list of query step for candidate set." - budget_list = kwargs["budget_list"] - performances = self.observations.get_best_performance_for_each_budget() - inc_list = [] - for budget_level in budget_list: - if budget_level in performances.index: - inc = performances[budget_level] - else: - inc = self.observations.get_best_seen_performance() - inc_list.append(inc) - return inc_list + def set_state( + self, + pipeline_space: SearchSpace, + surrogate_model: Any, + observations: MFObservedData, + b_step: Union[int, float], + **kwargs, + ): + # overload to select incumbent differently through observations + self.pipeline_space = pipeline_space + self.surrogate_model = surrogate_model + self.observations = observations + self.b_step = b_step + return def preprocess(self, x: pd.Series) -> Tuple[pd.Series, torch.Tensor]: """Prepares the configurations for appropriate EI calculation. @@ -55,51 +50,37 @@ def preprocess(self, x: pd.Series) -> Tuple[pd.Series, torch.Tensor]: Takes a set of points and computes the budget and incumbent for each point, as required by the multi-fidelity Expected Improvement acquisition function. """ - budget_list = [] - if self.pipeline_space.has_tabular: - # preprocess tabular space differently - # expected input: IDs pertaining to the tabular data - x = map_real_hyperparameters_from_tabular_ids(x, self.pipeline_space) - indices_to_drop = [] - for i, config in x.items(): - target_fidelity = config.fidelity.lower - if i <= max(self.observations.seen_config_ids): - # IMPORTANT to set the fidelity at which EI will be calculated only for - # the partial configs that have been observed already - target_fidelity = config.fidelity.value + self.b_step - - if np.less_equal(target_fidelity, config.fidelity.upper): - # only consider the configs with fidelity lower than the max fidelity - config.update_hp_values({config.fidelity_name: target_fidelity}) - budget_list.append(self.get_budget_level(config)) - else: - # if the target_fidelity higher than the max drop the configuration - indices_to_drop.append(i) - else: - config.update_hp_values({config.fidelity_name: target_fidelity}) - budget_list.append(self.get_budget_level(config)) - - # Drop unused configs - x.drop(labels=indices_to_drop, inplace=True) - - # Collecting incumbent list per configuration - inc_list = self.preprocess_inc_list(budget_list=budget_list) - - return x, torch.Tensor(inc_list) + raise NotImplementedError def eval(self, x: pd.Series, asscalar: bool = False) -> Tuple[np.ndarray, pd.Series]: # deepcopy - _x = pd.Series([deepcopy(x.loc[idx]) for idx in x.index.values], index=x.index) + # _x = pd.Series([deepcopy(x.loc[idx]) for idx in x.index.values], index=x.index) if self.surrogate_model_name == "ftpfn": - _x, _x_tok, inc_list = self.preprocess_pfn( - _x - ) # IMPORTANT change from vanilla-EI + # preprocesses configs to have the appropriate fidelity values for acquisition + _x, inc_list = self.preprocess(x.copy()) + _x_tok = get_tokenized_data(_x.values, ignore_fidelity=True) + # padding IDs + _idx = torch.Tensor(_x.index.values + 1) + idx_mask = np.where(_idx > max(self.observations.seen_config_ids))[0] + _idx[idx_mask] = 0 + # normalizing steps + _steps = torch.Tensor([ + get_freeze_thaw_normalized_step( + _conf.fidelity.value, + self.pipeline_space.fidelity.lower, + self.pipeline_space.fidelity.upper, + self.b_step + ) + for _conf in _x + ]) + _x_tok = torch.hstack(( + (_idx).reshape(-1, 1), _steps.reshape(-1, 1), torch.Tensor(_x_tok) + )) pi = self.eval_pfn_pi(_x_tok, inc_list) else: raise ValueError( f"Unrecognized surrogate model name: {self.surrogate_model_name}" ) - if pi.is_cuda: pi = pi.cpu() if len(_x) > 1 and asscalar: @@ -117,106 +98,6 @@ def eval_pfn_pi( return pi -class MFPI_AtMax(MFPI): - - def preprocess_inc_list(self, **kwargs) -> list: - assert "len_x" in kwargs, "Requires the length of the candidate set." - len_x = kwargs["len_x"] - # finds global incumbent - inc_value = min(self.observations.get_best_performance_for_each_budget()) - # uses the best seen value as the incumbent in EI computation for all candidates - inc_list = [inc_value] * len_x - return inc_list - - def preprocess(self, x: pd.Series) -> Tuple[pd.Series, torch.Tensor]: - """Prepares the configurations for appropriate EI calculation. - - Takes a set of points and computes the budget and incumbent for each point. - Unlike the base class MFPI, sets the target fidelity to be max budget and the - incumbent choice to be the max seen across history for all candidates. - """ - budget_list = [] - if self.pipeline_space.has_tabular: - # preprocess tabular space differently - # expected input: IDs pertaining to the tabular data - x = map_real_hyperparameters_from_tabular_ids(x, self.pipeline_space) - - indices_to_drop = [] - for i, config in x.items(): - target_fidelity = config.fidelity.upper # change from MFEI - - if config.fidelity.value == target_fidelity: - # if the target_fidelity already reached, drop the configuration - indices_to_drop.append(i) - else: - config.update_hp_values({config.fidelity_name: target_fidelity}) - budget_list.append(self.get_budget_level(config)) - - # drop unused configs - x.drop(labels=indices_to_drop, inplace=True) - - # create the same incumbent for all candidates - inc_list = self.preprocess_inc_list(len_x=len(x.index.values)) - - return x, torch.Tensor(inc_list) - - -class MFPI_Dyna(MFPI_AtMax): - """ - Computes extrapolation length of curves to maximum fidelity seen. - Uses the global incumbent as the best score in EI computation. - """ - - def preprocess(self, x: pd.Series) -> Tuple[pd.Series, torch.Tensor]: - """Prepares the configurations for appropriate EI calculation. - - Takes a set of points and computes the budget and incumbent for each point. - Unlike the base class MFEI, sets the target fidelity to be max budget and the - incumbent choice to be the max seen across history for all candidates. - """ - if self.pipeline_space.has_tabular: - # preprocess tabular space differently - # expected input: IDs pertaining to the tabular data - x = map_real_hyperparameters_from_tabular_ids(x, self.pipeline_space) - - # find the maximum observed steps per config to obtain the current pseudo_z_max - max_z_level_per_x = self.observations.get_max_observed_fidelity_level_per_config() - pseudo_z_level_max = max_z_level_per_x.max() # highest seen fidelity step so far - # find the fidelity step at which the best seen performance was recorded - z_inc_level = self.observations.get_budget_level_for_best_performance() - # retrieving actual fidelity values from budget level - ## marker 1: the fidelity value at which the best seen performance was recorded - z_inc = self.b_step * z_inc_level + self.pipeline_space.fidelity.lower - ## marker 2: the maximum fidelity value recorded in observation history - pseudo_z_max = self.b_step * pseudo_z_level_max + self.pipeline_space.fidelity.lower - - def update_fidelity(config): - # for all configs, set to pseudo_z_max - ## that is, choose the highest seen fidelity in observation history - z_extrapolate = pseudo_z_max - config.update_hp_values({config.fidelity_name: z_extrapolate}) - return config - - # collect IDs for partial configurations - _partial_config_ids = (x.index <= max(self.observations.seen_config_ids)) - # filter for configurations that reached max budget - indices_to_drop = [ - _idx - for _idx, _x in x.loc[_partial_config_ids].items() - if _x.fidelity.value == self.pipeline_space.fidelity.upper - ] - # drop unused configs - x.drop(labels=indices_to_drop, inplace=True) - - # set fidelity for all partial configs - x = x.apply(update_fidelity) - - # create the same incumbent for all candidates - inc_list = self.preprocess_inc_list(len_x=len(x.index.values)) - - return x, torch.Tensor(inc_list) - - class MFPI_Random(MFPI): BUDGET = 1000 @@ -227,17 +108,11 @@ def __init__( horizon: str = "random", threshold: str = "random", surrogate_model_name: str = None, - augmented_ei: bool = False, - xi: float = 0.0, - in_fill: str = "best", - log_ei: bool = False, ): - super().__init__(pipeline_space, surrogate_model_name, augmented_ei, xi, in_fill, log_ei) + super().__init__(pipeline_space, surrogate_model_name) self.horizon = horizon self.threshold = threshold - - def set_state( self, pipeline_space: SearchSpace, @@ -264,7 +139,7 @@ def sample_horizon(self, steps_passed): else: return int(self.horizon) - def sample_threshold(self, f_inc): + def sample_performance_threshold(self, f_inc): if self.threshold == 'random': lu = 10**self.rng.uniform(-4,-1) # % of gap closed else: @@ -282,7 +157,6 @@ def preprocess(self, x: pd.Series) -> Tuple[pd.Series, torch.Tensor]: # expected input: IDs pertaining to the tabular data x = map_real_hyperparameters_from_tabular_ids(x, self.pipeline_space) - indices_to_drop = [] inc_list = [] @@ -290,22 +164,22 @@ def preprocess(self, x: pd.Series) -> Tuple[pd.Series, torch.Tensor]: # Like EI-AtMax, use the global incumbent as a basis for the EI threshold inc_value = min(self.observations.get_best_performance_for_each_budget()) + # Extension: Add a random min improvement threshold to encourage high risk high gain - t_value = self.sample_threshold(inc_value) + t_value = self.sample_performance_threshold(inc_value) inc_value = t_value # Like MFEI: set fidelities to query using horizon as self.b_step # Extension: Unlike DyHPO, we sample the horizon randomly over the full range horizon = self.sample_horizon(steps_passed) + for i, config in x.items(): if i <= max(self.observations.seen_config_ids): - current_fidelity = config.fidelity.value if np.equal(config.fidelity.value, config.fidelity.upper): # this training run has ended, drop it from future selection indices_to_drop.append(i) else: # a candidate partial training run to continue - target_fidelity = config.fidelity.value + horizon config.update_hp_values({ config.fidelity_name: min( config.fidelity.value + horizon, config.fidelity.upper @@ -314,90 +188,6 @@ def preprocess(self, x: pd.Series) -> Tuple[pd.Series, torch.Tensor]: inc_list.append(inc_value) else: # a candidate new training run that we would need to start - current_fidelity = 0 - config.update_hp_values({config.fidelity_name: horizon}) - inc_list.append(inc_value) - - # Drop unused configs - x.drop(labels=indices_to_drop, inplace=True) - - assert len(inc_list) == len(x) - - return x, torch.Tensor(inc_list) - - -class MFPI_Random_HiT(MFPI): - - BUDGET = 1000 # total budget in freeze-thaw steps available - - def set_state( - self, - pipeline_space: SearchSpace, - surrogate_model: Any, - observations: MFObservedData, - b_step: Union[int, float], - **kwargs, - ): - # set RNG - self.rng = np.random.RandomState(seed=42) - for i in range(len(observations.completed_runs)): - self.rng.uniform(-4,0) - self.rng.randint(1,51) - - return super().set_state(pipeline_space, surrogate_model, observations, b_step) - - def sample_horizon(self, steps_passed): - shortest = self.pipeline_space.fidelity.lower - longest = min(self.pipeline_space.fidelity.upper, self.BUDGET - steps_passed) - return self.rng.randint(shortest, longest+1) - - def sample_threshold(self, f_inc): - lu = 10**self.rng.uniform(-4,0) # % of gap closed - return f_inc * (1 - lu) - - def preprocess(self, x: pd.Series) -> Tuple[pd.Series, torch.Tensor]: - """Prepares the configurations for appropriate EI calculation. - - Takes a set of points and computes the budget and incumbent for each point, as - required by the multi-fidelity Expected Improvement acquisition function. - """ - if self.pipeline_space.has_tabular: - # preprocess tabular space differently - # expected input: IDs pertaining to the tabular data - x = map_real_hyperparameters_from_tabular_ids(x, self.pipeline_space) - - - indices_to_drop = [] - inc_list = [] - - steps_passed = len(self.observations.completed_runs) - - # Like EI-AtMax, use the global incumbent as a basis for the EI threshold - inc_value = min(self.observations.get_best_performance_for_each_budget()) - # Extension: Add a random min improvement threshold to encourage high risk high gain - t_value = self.sample_threshold(inc_value) - inc_value = t_value - - # Like MFEI: set fidelities to query using horizon as self.b_step - # Extension: Unlike DyHPO, we sample the horizon randomly over the full range - horizon = self.sample_horizon(steps_passed) - for i, config in x.items(): - if i <= max(self.observations.seen_config_ids): - current_fidelity = config.fidelity.value - if np.equal(config.fidelity.value, config.fidelity.upper): - # this training run has ended, drop it from future selection - indices_to_drop.append(i) - else: - # a candidate partial training run to continue - target_fidelity = config.fidelity.value + horizon - # if horizon exceeds max, query at max - config.update_hp_values({config.fidelity_name: min( - config.fidelity.value + horizon, config.fidelity.upper - )}) - inc_list.append(inc_value) - else: - # a candidate new training run that we would need to start - current_fidelity = 0 config.update_hp_values({config.fidelity_name: horizon}) inc_list.append(inc_value) diff --git a/neps/optimizers/bayesian_optimization/acquisition_samplers/freeze_thaw_sampler.py b/neps/optimizers/bayesian_optimization/acquisition_samplers/freeze_thaw_sampler.py index b1e5172a..5db827ed 100644 --- a/neps/optimizers/bayesian_optimization/acquisition_samplers/freeze_thaw_sampler.py +++ b/neps/optimizers/bayesian_optimization/acquisition_samplers/freeze_thaw_sampler.py @@ -170,9 +170,13 @@ def sample( ) # Updating fidelity values - if set_new_sample_fidelity is not None: - for config in new_configs: - config.update_hp_values({config.fidelity_name: set_new_sample_fidelity}) + new_fid = ( + set_new_sample_fidelity + if set_new_sample_fidelity is not None + else self.pipeline_space.fidelity.lower + ) + for config in new_configs: + config.update_hp_values({config.fidelity_name: new_fid}) configs = pd.concat([deepcopy(partial_configs), new_configs]) diff --git a/neps/optimizers/multi_fidelity/ifbo.py b/neps/optimizers/multi_fidelity/ifbo.py index 4d5985ea..048a52fb 100755 --- a/neps/optimizers/multi_fidelity/ifbo.py +++ b/neps/optimizers/multi_fidelity/ifbo.py @@ -137,6 +137,7 @@ def __init__( pipeline_space=pipeline_space, surrogate_model=surrogate_model, surrogate_model_args=self.surrogate_model_args, + step_size=self.step_size, ) self.acquisition_args = {} if acquisition_args is None else acquisition_args self.acquisition_args.update( diff --git a/neps/optimizers/multi_fidelity/mf_bo.py b/neps/optimizers/multi_fidelity/mf_bo.py index cfb38cc5..1dd9bc87 100755 --- a/neps/optimizers/multi_fidelity/mf_bo.py +++ b/neps/optimizers/multi_fidelity/mf_bo.py @@ -2,16 +2,15 @@ from copy import deepcopy -import numpy as np -import pandas as pd import torch from neps.utils.common import instance_from_map from neps.optimizers.bayesian_optimization.models import SurrogateModelMapping -from neps.optimizers.utils import map_real_hyperparameters_from_tabular_ids +from neps.optimizers.multi_fidelity.utils import ( + get_tokenized_data, get_training_data_for_freeze_thaw +) from neps.optimizers.multi_fidelity_prior.utils import calc_total_resources_spent, update_fidelity -from neps.search_spaces.search_space import SearchSpace -from neps.optimizers.multi_fidelity.utils import normalize_vectorize_config + class MFBOBase: """Designed to work with model-based search on SH-based multi-fidelity algorithms. @@ -188,6 +187,7 @@ def __init__( pipeline_space, surrogate_model: str = "ftpfn", surrogate_model_args: dict = None, + step_size: int = 1, ): self.observed_configs = None self.pipeline_space = pipeline_space @@ -201,79 +201,16 @@ def __init__( name="surrogate model", kwargs=self.surrogate_model_args, ) + self.step_size = step_size def _fantasize_pending(self, train_x, train_y, pending_x): - # Select configs that are neither pending nor resulted in error - completed_configs = self.observed_configs.completed_runs.copy(deep=True) - # IMPORTANT: preprocess observations to get appropriate training data - train_x, train_lcs, train_y = self.get_training_data_for_freeze_thaw( - completed_configs, self.pipeline_space - ) - pending_condition = self.observed_configs.pending_condition - if pending_condition.any(): - raise NotImplementedError( - "Fantasization not implemented yet!" - "This optimizer cannot be run with multiple workers yet." - ) - print(f"\n\nFound pending: {pending_condition.sum()}\n\n") - pending_configs = self.observed_configs.df.loc[pending_condition] - self._fit(train_x, train_y, train_lcs) - pending_x = [] # torch.Tensor([]) - for _id in pending_configs.index.get_level_values(0): - _config = pending_configs.loc[_id].config.values[0] - # TODO: fix this - _fid = (_config.fidelity.value - _config.fidelity.lower) / \ - (_config.fidelity.upper - _config.fidelity.lower) - pending_x.append([_id, _fid, *normalize_vectorize_config(_config)]) - pending_x = torch.Tensor(pending_x) - _y = self._predict(pending_x) - _y = _y.tolist() - train_x.extend(pending_x) - train_y.extend(_y) - train_lcs.extend(pending_lcs) - - return train_x, train_y, train_lcs + raise NotImplementedError("Fantasization not implemented yet!") def _fit(self, train_x, train_y, train_lcs): - if self.surrogate_model_name == "ftpfn": - # do nothing - no training required - pass - else: - # check neps/optimizers/bayesian_optimization/models/__init__.py for options - raise ValueError( - f"Surrogate model {self.surrogate_model_name} not supported!" - ) + raise NotImplementedError("Predict not implemented yet!") def _predict(self, test_x) -> torch.Tensor: - if self.surrogate_model_name == "ftpfn": - mean = self.surrogate_model.get_mean_performance(test_x) - if mean.is_cuda: - mean = mean.cpu() - return mean - else: - # check neps/optimizers/bayesian_optimization/models/__init__.py for options - raise ValueError( - f"Surrogate model {self.surrogate_model_name} not supported!" - ) - - def get_training_data_for_freeze_thaw( - self, df: pd.DataFrame, pipeline_space: SearchSpace - ): - configs = [] - learning_curves = [] - performance = [] - for idx, row in df.iterrows(): - config_id = idx[0] - budget_id = idx[1] - if pipeline_space.has_tabular: - _row = pd.Series([row[self.observed_configs.config_col]], index=[config_id]) - _row = map_real_hyperparameters_from_tabular_ids(_row, pipeline_space) - configs.append(_row.values[0]) - else: - configs.append(row[self.observed_configs.config_col]) - performance.append(row[self.observed_configs.perf_col]) - learning_curves.append(self.observed_configs.extract_learning_curve(config_id, budget_id)) - return configs, learning_curves, performance + raise NotImplementedError("Predict not implemented yet!") def set_state( self, @@ -301,46 +238,70 @@ def __init__(self, *args, **kwargs): self.train_x = None self.train_y = None - def update_model(self, train_x=None, train_y=None, pending_x=None, decay_t=None): - if train_x is None: - train_x = [] - if train_y is None: - train_y = [] - if pending_x is None: - pending_x = [] - - if decay_t is None: - decay_t = len(train_x) - train_x, train_y, train_lcs = self._fantasize_pending(train_x, train_y, pending_x) - self._fit(train_x, train_y, train_lcs) - - return self.surrogate_model, decay_t - - def _fit(self, *args): # pylint: disable=unused-argument + def update_model(self): + # tokenize the observations + idxs, steps, configs, performance = get_training_data_for_freeze_thaw( + self.observed_configs.df.loc[self.observed_configs.completed_runs_index], + self.observed_configs.config_col, + self.observed_configs.perf_col, + self.pipeline_space, + step_size=self.step_size, + maximize=True + ) + df_idxs = torch.Tensor(idxs) + df_x = torch.Tensor(get_tokenized_data(configs)) + df_steps = torch.Tensor(steps) + train_x = torch.hstack([ + df_idxs.reshape(df_steps.shape[0], 1), + df_steps.reshape(df_steps.shape[0], 1), + df_x + ]) + train_y = torch.Tensor(performance) + + # fit the model, on only completed runs + self._fit(train_x, train_y) + + # fantasize pending evaluations + if self.observed_configs.pending_condition.any(): + # tokenize the pending observations + _idxs, _steps, _configs, _ = get_training_data_for_freeze_thaw( + self.observed_configs.df.loc[self.observed_configs.pending_runs_index], + self.observed_configs.config_col, + self.observed_configs.perf_col, + self.pipeline_space, + step_size=self.step_size, + maximize=True + ) + _df_x = torch.Tensor(get_tokenized_data(_configs)) + _df_idxs = torch.Tensor(_idxs) + _df_steps = torch.Tensor(_steps) + _test_x = torch.hstack([ + _df_idxs.reshape(_df_idxs.shape[0], 1), + _df_steps.reshape(_df_steps.shape[0], 1), + _df_x + ]) + _performances = self._predict(_test_x) # returns maximizing metric + # update the training data + train_x = torch.vstack([train_x, _test_x]) + train_y = torch.vstack([train_y, _performances]) + # refit the model, on completed runs + fantasized pending runs + self._fit(train_x, train_y) + + def _fit(self, train_x: torch.Tensor, train_y: torch.Tensor): # pylint: disable=unused-argument # no training required,, only preprocessing the training data as context during inference - self.preprocess_training_set() - - def preprocess_training_set(self): - _configs = self.observed_configs.df.config.values.copy() - - # onlf if tabular space is present - if self.pipeline_space.has_tabular: - # placeholder index, will be driooed - _idxs = np.arange(len(_configs)) - # mapping the (id, epoch) space of tabular configs to the actual HPs - _configs = map_real_hyperparameters_from_tabular_ids( - pd.Series(_configs, index=_idxs), self.pipeline_space - ).values + assert self.surrogate_model is not None, "Surrogate model not set!" + self.surrogate_model.train_x = train_x + self.surrogate_model.train_y = train_y - device = self.surrogate_model.device - # TODO: fix or make consistent with `tokenize`` - configs, idxs, performances = self.observed_configs.get_tokenized_data( - self.observed_configs.df.copy().assign(config=_configs) - ) - # NOTE: FT-PFN is pretrained on accuracy and hence, converting to a `maximizing` metric - performances = 1 - performances - idxs = idxs.astype(float) - idxs[:, 1] = idxs[:, 1] / _configs[0].fidelity.upper - # TODO: account for fantasization - self.surrogate_model.train_x = torch.Tensor(np.hstack([idxs, configs])).to(device) - self.surrogate_model.train_y = torch.Tensor(performances).to(device) + def _predict(self, test_x: torch.Tensor) -> torch.Tensor: + assert self.train_x is not None and self.train_y is not None, "Model not trained yet!" + if self.surrogate_model_name == "ftpfn": + mean = self.surrogate_model.get_mean_performance(test_x) + if mean.is_cuda: + mean = mean.cpu() + return mean + else: + # check neps/optimizers/bayesian_optimization/models/__init__.py for options + raise ValueError( + f"Surrogate model {self.surrogate_model_name} not supported!" + ) diff --git a/neps/optimizers/multi_fidelity/utils.py b/neps/optimizers/multi_fidelity/utils.py index 11bba5bc..91bf3c2c 100644 --- a/neps/optimizers/multi_fidelity/utils.py +++ b/neps/optimizers/multi_fidelity/utils.py @@ -7,6 +7,7 @@ import torch from neps.search_spaces.search_space import SearchSpace +from neps.optimizers.utils import map_real_hyperparameters_from_tabular_ids def continuous_to_tabular( @@ -37,6 +38,60 @@ def normalize_vectorize_config( return np.array(_new_vector) +def get_tokenized_data( + configs: list[SearchSpace], + ignore_fidelity: bool = True, +) -> np.ndarray: # pd.Series: # tuple[np.ndarray, np.ndarray, np.ndarray]: + """ Extracts configurations, indices and performances given a DataFrame + + Tokenizes the given set of observations as required by a PFN surrogate model. + """ + configs = np.array([ + normalize_vectorize_config(c, ignore_fidelity=ignore_fidelity) for c in configs + ]) + return configs + + +def get_freeze_thaw_normalized_step(fid_step: int, lower: int, upper: int, step: int) -> float: + max_fid_step = int(np.ceil((upper - lower) / step)) + 1 + return fid_step / max_fid_step + + +def get_training_data_for_freeze_thaw( + df: pd.DataFrame | MFObservedData.df, + config_key: str, + perf_key: str, + pipeline_space: SearchSpace, + step_size: int, + maximize: bool = False, +) -> tuple[list[int], list[int], list[SearchSpace], list[float]]: + configs = [] + performance = [] + idxs = [] + steps = [] + for idx, row in df.iterrows(): + config_id = idx[0] + budget_id = idx[1] + if pipeline_space.has_tabular: + _row = pd.Series([row[config_key]], index=[config_id]) + _row = map_real_hyperparameters_from_tabular_ids(_row, pipeline_space) + configs.append(_row.values[0]) + else: + configs.append(row[config_key]) + performance.append(row[perf_key]) + steps.append( + get_freeze_thaw_normalized_step( + budget_id + 1, # NePS fidelity IDs begin with 0 + pipeline_space.fidelity.lower, + pipeline_space.fidelity.upper, + step_size, + ) + ) + idxs.append(idx[0] + 1) # NePS config IDs begin with 0 + if maximize: + performance = (1 - np.array(performance)).tolist() + return idxs, steps, configs, performance + class MFObservedData: """ (Under development) @@ -101,9 +156,17 @@ def seen_budget_levels(self) -> list: # Considers pending and error budgets as seen return self.df.index.levels[1].to_list() + @property + def pending_runs_index(self) -> pd.Index | pd.MultiIndex: + return self.df.loc[self.pending_condition].index + @property def completed_runs(self): return self.df[~(self.pending_condition | self.error_condition)] + + @property + def completed_runs_index(self) -> pd.Index | pd.MultiIndex: + return self.completed_runs.index def next_config_id(self) -> int: if len(self.seen_config_ids): @@ -172,7 +235,7 @@ def get_learning_curves(self): ) def all_configs_list(self) -> list[Any]: - return self.df.loc[:, self.config_col].values.tolist() + return self.df.loc[:, self.config_col].sort_index().values.tolist() def get_incumbents_for_budgets(self, maximize: bool = False): """ @@ -295,16 +358,6 @@ def get_max_observed_fidelity_level_per_config(self) -> pd.Series: } return pd.Series(max_z_observed) - def get_tokenized_data(self, df: pd.DataFrame): - idxs = df.index.values - idxs = np.array([list(idx) for idx in idxs]) - idxs[:, 1] += 1 # all fidelity IDs begin with 0 in NePS - performances = df.perf.values - configs = df.config.values - configs = np.array([normalize_vectorize_config(c) for c in configs]) - - return configs, idxs, performances - @property def token_ids(self) -> np.ndarray: return self.df.index.values diff --git a/neps/plot/plot3D.py b/neps/plot/plot3D.py index 143388ff..72aae544 100644 --- a/neps/plot/plot3D.py +++ b/neps/plot/plot3D.py @@ -60,7 +60,6 @@ def __post_init__(self): self.loss_range = (self.df["result.loss"].min(), self.df["result.loss"].max()) _fid_key = f"config.{self.fidelity_key}" self.epochs_range = (self.df[_fid_key].min(), self.df[_fid_key].max()) - # breakpoint() @staticmethod def get_x(df: pd.DataFrame) -> np.array: diff --git a/neps_examples/efficiency/freeze_thaw.py b/neps_examples/efficiency/freeze_thaw.py index f88c7810..3d2cf868 100644 --- a/neps_examples/efficiency/freeze_thaw.py +++ b/neps_examples/efficiency/freeze_thaw.py @@ -128,7 +128,7 @@ def training_pipeline( tblogger.log( loss=val_loss, current_epoch=epochs, - write_summary_incumbent=True, # Set to `True` for a live incumbent trajectory. + write_summary_incumbent=False, # Set to `True` for a live incumbent trajectory. writer_config_scalar=True, # Set to `True` for a live loss trajectory for each config. writer_config_hparam=True, # Set to `True` for live parallel coordinate, scatter plot matrix, and table view. # Appending extra data From 9e746587a39ca609a9367b80a6a852d974fcab3e Mon Sep 17 00:00:00 2001 From: Neeratyoy Mallik Date: Sun, 1 Sep 2024 09:14:02 +0200 Subject: [PATCH 26/46] Fixing an assertion + minor comments --- neps/optimizers/multi_fidelity/mf_bo.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/neps/optimizers/multi_fidelity/mf_bo.py b/neps/optimizers/multi_fidelity/mf_bo.py index 1dd9bc87..ef31f9cc 100755 --- a/neps/optimizers/multi_fidelity/mf_bo.py +++ b/neps/optimizers/multi_fidelity/mf_bo.py @@ -246,7 +246,7 @@ def update_model(self): self.observed_configs.perf_col, self.pipeline_space, step_size=self.step_size, - maximize=True + maximize=True # inverts performance since NePS minimizes ) df_idxs = torch.Tensor(idxs) df_x = torch.Tensor(get_tokenized_data(configs)) @@ -270,7 +270,7 @@ def update_model(self): self.observed_configs.perf_col, self.pipeline_space, step_size=self.step_size, - maximize=True + maximize=True # inverts performance since NePS minimizes ) _df_x = torch.Tensor(get_tokenized_data(_configs)) _df_idxs = torch.Tensor(_idxs) @@ -283,7 +283,7 @@ def update_model(self): _performances = self._predict(_test_x) # returns maximizing metric # update the training data train_x = torch.vstack([train_x, _test_x]) - train_y = torch.vstack([train_y, _performances]) + train_y = torch.hstack([train_y, _performances]) # refit the model, on completed runs + fantasized pending runs self._fit(train_x, train_y) @@ -294,7 +294,7 @@ def _fit(self, train_x: torch.Tensor, train_y: torch.Tensor): # pylint: disable self.surrogate_model.train_y = train_y def _predict(self, test_x: torch.Tensor) -> torch.Tensor: - assert self.train_x is not None and self.train_y is not None, "Model not trained yet!" + assert self.surrogate_model.train_x is not None and self.surrogate_model.train_y is not None, "Model not trained yet!" if self.surrogate_model_name == "ftpfn": mean = self.surrogate_model.get_mean_performance(test_x) if mean.is_cuda: From 1975c2979f5dd9372c334d44738de79e3b278671 Mon Sep 17 00:00:00 2001 From: Neeratyoy Mallik Date: Tue, 3 Sep 2024 04:27:35 +0200 Subject: [PATCH 27/46] Updating ifbo dep version --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 9c0ff623..8d31710c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -63,7 +63,7 @@ pyyaml = "^6" tensorboard = "^2" typing-extensions = "*" torchvision = ">=0.8.0" -ifbo = ">=0.3.8" +ifbo = ">=0.3.9" [tool.poetry.group.dev.dependencies] ruff = "^0.4" From 445a861098698de807ad90babb016691a5f7f9b8 Mon Sep 17 00:00:00 2001 From: Neeratyoy Mallik Date: Tue, 3 Sep 2024 11:41:47 +0200 Subject: [PATCH 28/46] Minor signature change --- neps/optimizers/bayesian_optimization/models/ftpfn.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/neps/optimizers/bayesian_optimization/models/ftpfn.py b/neps/optimizers/bayesian_optimization/models/ftpfn.py index 600409b5..add29d0d 100644 --- a/neps/optimizers/bayesian_optimization/models/ftpfn.py +++ b/neps/optimizers/bayesian_optimization/models/ftpfn.py @@ -10,8 +10,7 @@ class FTPFNSurrogate: """Special class to deal with PFN surrogate model and freeze-thaw acquisition.""" - def __init__(self, target_path: Path = None, version: str = "0.0.1", *args, **kwargs): - super().__init__(*args, **kwargs) + def __init__(self, target_path: Path = None, version: str = "0.0.1", **kwargs): self.ftpfn = FTPFN(target_path=target_path, version=version) self.target_path = self.ftpfn.target_path self.version = self.ftpfn.version From cc6bcecd43cdbe4b08c3370307d9cd32dab48b98 Mon Sep 17 00:00:00 2001 From: Neeratyoy Mallik Date: Tue, 3 Sep 2024 11:43:54 +0200 Subject: [PATCH 29/46] Updating ifbo version --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 8d31710c..236bf952 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -63,7 +63,7 @@ pyyaml = "^6" tensorboard = "^2" typing-extensions = "*" torchvision = ">=0.8.0" -ifbo = ">=0.3.9" +ifbo = ">=0.3.10" [tool.poetry.group.dev.dependencies] ruff = "^0.4" From 6ad86724e20b24e90c0b6cbb908462e35cbf75f9 Mon Sep 17 00:00:00 2001 From: Neeratyoy Mallik Date: Thu, 5 Sep 2024 18:20:49 +0200 Subject: [PATCH 30/46] Update freeze_thaw.py with ifbo HPs --- neps_examples/efficiency/freeze_thaw.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/neps_examples/efficiency/freeze_thaw.py b/neps_examples/efficiency/freeze_thaw.py index 3d2cf868..1b3c955f 100644 --- a/neps_examples/efficiency/freeze_thaw.py +++ b/neps_examples/efficiency/freeze_thaw.py @@ -155,6 +155,9 @@ def training_pipeline( pipeline_space=pipeline_space, run_pipeline=training_pipeline, searcher="ifbo", + # ifbo hyperparameters + version="0.0.1", + step_size=2, max_evaluations_total=25, root_directory="debug/ifbo-mnist/", overwrite_working_directory=True From 91030e925cdf5007de8fcd64378edb5ea652310d Mon Sep 17 00:00:00 2001 From: Neeratyoy Mallik Date: Thu, 5 Sep 2024 19:46:58 +0200 Subject: [PATCH 31/46] Handling varying step_size in ifbo --- neps/optimizers/multi_fidelity/ifbo.py | 30 ++++++++++++++++++++++++- neps/search_spaces/search_space.py | 11 +++++---- neps_examples/efficiency/freeze_thaw.py | 14 +++++++----- 3 files changed, 45 insertions(+), 10 deletions(-) diff --git a/neps/optimizers/multi_fidelity/ifbo.py b/neps/optimizers/multi_fidelity/ifbo.py index 048a52fb..686996a9 100755 --- a/neps/optimizers/multi_fidelity/ifbo.py +++ b/neps/optimizers/multi_fidelity/ifbo.py @@ -3,6 +3,7 @@ import numpy as np import pandas as pd +import warnings from neps.state.optimizer import BudgetInfo from neps.utils.types import ConfigResult @@ -76,7 +77,10 @@ def __init__( logger: logger object, or None to use the neps logger sample_default_first: Whether to sample the default configuration first """ - super().__init__( + # Adjust pipeline space fidelity steps to be equally spaced + pipeline_space = self._adjust_fidelity_for_freeze_thaw_steps(pipeline_space, step_size) + # Super constructor call + super().__init__( pipeline_space=pipeline_space, budget=budget, patience=patience, @@ -168,6 +172,30 @@ def __init__( self.evaluation_data = EvaluationData() + def _adjust_fidelity_for_freeze_thaw_steps( + self, + pipeline_space: SearchSpace, + step_size: int + ) -> SearchSpace: + """Adjusts the fidelity range to be divisible by `step_size` for Freeze-Thaw. + """ + if not pipeline_space.has_fidelity: + return pipeline_space + # Check if the fidelity range is divided into equal sized steps by `step_size` + remainder = (pipeline_space.fidelity.upper - pipeline_space.fidelity.lower) % step_size + if remainder == 0: + return pipeline_space + # Adjust the fidelity lower bound to be divisible by `step_size` into equal steps + offset = step_size - remainder + # Pushing the lower bound of the fidelity space by an offset to ensure equal-sized steps + pipeline_space.fidelity.lower += offset + warnings.warn( + f"Adjusted fidelity lower bound to {pipeline_space.fidelity.lower} " + f"for equal-sized steps of {step_size}." + ) + print("New fidelity: ", pipeline_space.fidelity) + return pipeline_space + def _prep_model_args(self, hp_kernels, graph_kernels, pipeline_space): if self.surrogate_model_name in ["gp", "gp_hierarchy"]: # setup for GP implemented in NePS diff --git a/neps/search_spaces/search_space.py b/neps/search_spaces/search_space.py index 04476de8..ccaeca87 100644 --- a/neps/search_spaces/search_space.py +++ b/neps/search_spaces/search_space.py @@ -330,25 +330,28 @@ def sample( sampled_hps[name] = hp.clone() continue - for _ in range(patience): + for attempt in range(patience): try: if user_priors and isinstance(hp, ParameterWithPrior): sampled_hps[name] = hp.sample(user_priors=user_priors) else: sampled_hps[name] = hp.sample() break - except ValueError: + except Exception as e: logger.warning( - f"Could not sample valid value for hyperparameter {name}!" + f"Attempt {attempt + 1}/{patience} failed for sampling {name}: {str(e)}" ) else: + logger.error( + f"Failed to sample valid value for {name} after {patience} attempts" + ) raise ValueError( f"Could not sample valid value for hyperparameter {name}" f" in {patience} tries!" ) return SearchSpace(**sampled_hps) - + def mutate( self, *, diff --git a/neps_examples/efficiency/freeze_thaw.py b/neps_examples/efficiency/freeze_thaw.py index 1b3c955f..8a174859 100644 --- a/neps_examples/efficiency/freeze_thaw.py +++ b/neps_examples/efficiency/freeze_thaw.py @@ -142,6 +142,7 @@ def training_pipeline( if __name__ == "__main__": + logging.basicConfig(level=logging.INFO) pipeline_space = { "learning_rate": neps.Float(1e-5, 1e-1, log=True), @@ -155,13 +156,16 @@ def training_pipeline( pipeline_space=pipeline_space, run_pipeline=training_pipeline, searcher="ifbo", - # ifbo hyperparameters - version="0.0.1", - step_size=2, max_evaluations_total=25, root_directory="debug/ifbo-mnist/", - overwrite_working_directory=True - + # overwrite_working_directory=True, + # (optional) ifbo hyperparameters + step_size=3, + # (optional) ifbo surrogate model hyperparameters (for FT-PFN) + surrogate_model_args=dict( + version="0.0.1", + target_path=None, + ), ) # NOTE: this is experimental and may not work as expected From 72113f39f7ec613f039f3d60302ba0fb0cad494e Mon Sep 17 00:00:00 2001 From: Neeratyoy Mallik Date: Thu, 5 Sep 2024 19:58:22 +0200 Subject: [PATCH 32/46] Simplify initial design for ifbo --- neps/optimizers/multi_fidelity/ifbo.py | 63 +++---------------------- neps_examples/efficiency/freeze_thaw.py | 13 +++-- 2 files changed, 15 insertions(+), 61 deletions(-) diff --git a/neps/optimizers/multi_fidelity/ifbo.py b/neps/optimizers/multi_fidelity/ifbo.py index 686996a9..90bd0d6f 100755 --- a/neps/optimizers/multi_fidelity/ifbo.py +++ b/neps/optimizers/multi_fidelity/ifbo.py @@ -55,9 +55,7 @@ def __init__( acquisition_sampler: str | AcquisitionSampler = "freeze-thaw", acquisition_sampler_args: dict = None, model_policy: Any = PFNSurrogate, - initial_design_fraction: float = 0.75, - initial_design_size: int = 10, - initial_design_budget: int = None, + initial_design_size: int = 1, ): """Initialise @@ -76,6 +74,7 @@ def __init__( value instead. default: None logger: logger object, or None to use the neps logger sample_default_first: Whether to sample the default configuration first + initial_design_size: Number of configurations to sample before starting optimization """ # Adjust pipeline space fidelity steps to be equally spaced pipeline_space = self._adjust_fidelity_for_freeze_thaw_steps(pipeline_space, step_size) @@ -95,11 +94,8 @@ def __init__( self.min_budget = self.pipeline_space.fidelity.lower # TODO: generalize this to work with real data (not benchmarks) self.max_budget = self.pipeline_space.fidelity.upper - - self._initial_design_fraction = initial_design_fraction - self._initial_design_size, self._initial_design_budget = self._set_initial_design( - initial_design_size, initial_design_budget, self._initial_design_fraction - ) + self._initial_design_size = initial_design_size + # TODO: Write use cases for these parameters self._model_update_failed = False self.sample_default_first = sample_default_first @@ -215,45 +211,6 @@ def _prep_model_args(self, hp_kernels, graph_kernels, pipeline_space): else pipeline_space.get_vectorial_dim() ) - def _set_initial_design( - self, - initial_design_size: int = None, - initial_design_budget: int = None, - initial_design_fraction: float = 0.75, - ) -> tuple[int | float, int | float]: - """Sets the initial design size and budget.""" - - # user specified initial_design_size takes precedence - if initial_design_budget is not None: - _initial_design_budget = initial_design_budget - else: - _initial_design_budget = self.max_budget - - # user specified initial_design_size takes precedence - _initial_design_size = np.inf - if initial_design_size is not None: - _initial_design_size = initial_design_size - if ( - initial_design_size is None - or _initial_design_size * self.min_budget > _initial_design_budget - ): - # if the initial design budget is less than the budget spent on sampling - # the initial design at the minimum budget (fidelity) - # 2 choices here: - # 1. Reduce initial_design_size - # 2. Increase initial_design_budget - # we choose to reduce initial_design_size - _init_budget = initial_design_fraction * self.max_budget - # number of min budget evaluations fitting within initial design budget - _initial_design_size = _init_budget // self.min_budget - - self.logger.info( - f"\n\ninitial_design_size: {_initial_design_size}\n" - f"initial_design_budget: {_initial_design_budget}\n" - f"min_budget: {self.min_budget}\n\n" - ) - return _initial_design_size, _initial_design_budget - def get_budget_level(self, config: SearchSpace) -> int: """Calculates the discretized (int) budget level for a given configuration.""" return int( @@ -297,15 +254,9 @@ def total_budget_spent(self) -> int | float: return total_budget_spent - def is_init_phase(self, budget_based: bool = False) -> bool: - if budget_based: - # Check if we are still in the initial design phase based on - # either the budget spent so far or the number of configurations evaluated - if self.total_budget_spent() < self._initial_design_budget: - return True - else: - if self.num_train_configs < self._initial_design_size: - return True + def is_init_phase(self) -> bool: + if self.num_train_configs < self._initial_design_size: + return True return False @property diff --git a/neps_examples/efficiency/freeze_thaw.py b/neps_examples/efficiency/freeze_thaw.py index 8a174859..fb0c4531 100644 --- a/neps_examples/efficiency/freeze_thaw.py +++ b/neps_examples/efficiency/freeze_thaw.py @@ -128,9 +128,12 @@ def training_pipeline( tblogger.log( loss=val_loss, current_epoch=epochs, - write_summary_incumbent=False, # Set to `True` for a live incumbent trajectory. - writer_config_scalar=True, # Set to `True` for a live loss trajectory for each config. - writer_config_hparam=True, # Set to `True` for live parallel coordinate, scatter plot matrix, and table view. + # Set to `True` for a live incumbent trajectory. + write_summary_incumbent=False, + # Set to `True` for a live loss trajectory for each config. + writer_config_scalar=True, + # Set to `True` for live parallel coordinate, scatter plot matrix, and table view. + writer_config_hparam=True, # Appending extra data extra_data={ "train_loss": tblogger.scalar_logging(loss.item()), @@ -158,7 +161,7 @@ def training_pipeline( searcher="ifbo", max_evaluations_total=25, root_directory="debug/ifbo-mnist/", - # overwrite_working_directory=True, + overwrite_working_directory=True, # set to False for a multi-worker run # (optional) ifbo hyperparameters step_size=3, # (optional) ifbo surrogate model hyperparameters (for FT-PFN) @@ -168,7 +171,7 @@ def training_pipeline( ), ) - # NOTE: this is experimental and may not work as expected + # NOTE: this is `experimental` and may not work as expected ## plotting a 3D plot for learning curves explored by ifbo plotter = Plotter3D( run_path="debug/ifbo-mnist/", # same as `root_directory` above From 9068fa39f9e1369cf6dfdf6992e48e19836db126 Mon Sep 17 00:00:00 2001 From: karibbov Date: Fri, 6 Sep 2024 16:49:41 +0200 Subject: [PATCH 33/46] plot3D refactor; small bugfix --- neps/plot/plot3D.py | 146 ++++++++++++++++++++++---------------------- 1 file changed, 73 insertions(+), 73 deletions(-) diff --git a/neps/plot/plot3D.py b/neps/plot/plot3D.py index 72aae544..7bca2c54 100644 --- a/neps/plot/plot3D.py +++ b/neps/plot/plot3D.py @@ -1,29 +1,21 @@ from __future__ import annotations -from dataclasses import dataclass, field - +from dataclasses import dataclass from pathlib import Path -import multiprocessing as mp -from functools import partial - -from argparse import ArgumentParser -from matplotlib import pyplot as plt -from matplotlib import cm +import matplotlib +from matplotlib import ( + cm, + pyplot as plt, +) from matplotlib.collections import LineCollection -from mpl_toolkits.mplot3d.art3d import Line3DCollection from matplotlib.colors import Normalize -import matplotlib -matplotlib.use('TkAgg') +from mpl_toolkits.mplot3d.art3d import Line3DCollection -import itertools +matplotlib.use("TkAgg") -from neps.status.status import get_run_summary_csv -import re -import pandas as pd import numpy as np - -from typing import Callable +import pandas as pd # Copied from plot.py HERE = Path(__file__).parent.absolute() @@ -34,14 +26,7 @@ class Plotter3D: loss_key: str = "Loss" fidelity_key: str = "epochs" - config_column: str | None = None run_path: str | Path | None = None - base_results_path: str | Path = DEFAULT_RESULTS_PATH - strict: bool = False - get_x: Callable[[pd.DataFrame], np.array] | None = None - get_y: Callable[[pd.DataFrame], np.array] | None = None - get_z: Callable[[pd.DataFrame], np.array] | None = None - get_color: Callable[[pd.DataFrame], np.array] | None = None scatter: bool = True footnote: bool = True alpha: float = 0.9 @@ -51,15 +36,20 @@ class Plotter3D: def __post_init__(self): if self.run_path is not None: - assert Path(self.run_path).absolute().is_dir(), \ - f"Path {self.run_path} is not a directory" - self.data_path = Path(self.run_path).absolute() / "summary_csv" / "config_data.csv" + assert ( + Path(self.run_path).absolute().is_dir() + ), f"Path {self.run_path} is not a directory" + self.data_path = ( + Path(self.run_path).absolute() / "summary_csv" / "config_data.csv" + ) assert self.data_path.exists(), f"File {self.data_path} does not exist" - self.df = pd.read_csv(self.data_path, index_col=0, float_precision="round_trip") + self.df = pd.read_csv( + self.data_path, index_col=0, float_precision="round_trip" + ) - self.loss_range = (self.df["result.loss"].min(), self.df["result.loss"].max()) - _fid_key = f"config.{self.fidelity_key}" - self.epochs_range = (self.df[_fid_key].min(), self.df[_fid_key].max()) + # Assigned at prep_df stage + self.loss_range = () + self.epochs_range = () @staticmethod def get_x(df: pd.DataFrame) -> np.array: @@ -80,42 +70,49 @@ def get_color(df: pd.DataFrame) -> np.array: def prep_df(self, df: pd.DataFrame = None) -> pd.DataFrame: df = self.df if df is None else df - time_cols = ["metadata.time_started", "metadata.time_end"] - df = df.sort_values(by=time_cols).reset_index(drop=True) - split_values = np.array([[*index.split('_')] for index in self.df.index]) - df[['configID', 'epochID']] = split_values + + _fid_key = f"config.{self.fidelity_key}" + self.loss_range = (df["result.loss"].min(), df["result.loss"].max()) + self.epochs_range = (df[_fid_key].min(), df[_fid_key].max()) + + split_values = np.array([[*index.split("_")] for index in df.index]) + df[["configID", "epochID"]] = split_values df.configID = df.configID.astype(int) df.epochID = df.epochID.astype(int) if df.epochID.min() == 0: df.epochID += 1 - return df + + # indices become sampling order + time_cols = ["metadata.time_started", "metadata.time_end"] + return df.sort_values(by=time_cols).reset_index(drop=True) def plot3D( self, data: pd.DataFrame = None, save_path: str | Path | None = None, - filename: str = "freeze_thaw" - ): + filename: str = "freeze_thaw", + ) -> None: data = self.prep_df(data) # Create the figure and the axes for the plot - fig, (ax3D, ax, cax) = plt.subplots(1, 3, figsize=(12, 5), width_ratios=(20, 20, 1)) + fig, (ax3D, ax, cax) = plt.subplots( + 1, 3, figsize=(12, 5), width_ratios=(20, 20, 1) + ) # remove a 2D axis and replace with a 3D projection one ax3D.remove() - ax3D = fig.add_subplot(131, projection='3d') + ax3D = fig.add_subplot(131, projection="3d") # Create the normalizer to normalize the color values norm = Normalize(self.get_color(data).min(), self.get_color(data).max()) # Counters to keep track of the configurations run for only a single fidelity n_lines = 0 - n_mins = 0 + n_points = 0 data_groups = data.groupby("configID", sort=False) - for idx, (configID, data_) in enumerate(data_groups): - + for idx, (_configID, data_) in enumerate(data_groups): x = self.get_x(data_) y = self.get_y(data_) z = self.get_z(data_) @@ -124,27 +121,29 @@ def plot3D( color = self.get_color(data_) if len(x) < 2: - n_mins += 1 + n_points += 1 if self.scatter: + # 3D points ax3D.scatter( y, z, - s=self.scatter_size, - zs=0, + s=self.scatter_size, + zs=0, zdir="x", c=color, - cmap='RdYlBu_r', + cmap="RdYlBu_r", norm=norm, - alpha=self.alpha * 0.8 + alpha=self.alpha * 0.8, ) + # 2D points ax.scatter( x, z, s=self.scatter_size, c=color, - cmap='RdYlBu_r', + cmap="RdYlBu_r", norm=norm, - alpha=self.alpha * 0.8 + alpha=self.alpha * 0.8, ) else: n_lines += 1 @@ -155,7 +154,9 @@ def plot3D( segments3D = np.concatenate([points3D[:-1], points3D[1:]], axis=1) # Construct lines from segments - lc3D = Line3DCollection(segments3D, cmap='RdYlBu_r', norm=norm, alpha=self.alpha) + lc3D = Line3DCollection( + segments3D, cmap="RdYlBu_r", norm=norm, alpha=self.alpha + ) lc3D.set_array(color) # Draw lines @@ -167,7 +168,9 @@ def plot3D( segments = np.concatenate([points[:-1], points[1:]], axis=1) # Construct lines from segments - lc = LineCollection(segments, cmap="RdYlBu_r", norm=norm, alpha=self.alpha) + lc = LineCollection( + segments, cmap="RdYlBu_r", norm=norm, alpha=self.alpha + ) lc.set_array(color) # Draw lines @@ -177,56 +180,53 @@ def plot3D( ax3D.axes.set_ylim3d(bottom=0, top=data_groups.ngroups) ax3D.axes.set_zlim3d(bottom=self.loss_range[0], top=self.loss_range[1]) - ax3D.set_xlabel('Epochs') - ax3D.set_ylabel('Iteration sampled') - ax3D.set_zlabel(f'{self.loss_key}') + ax3D.set_xlabel("Epochs") + ax3D.set_ylabel("Iteration sampled") + ax3D.set_zlabel(f"{self.loss_key}") # set view angle ax3D.view_init(elev=self.view_angle[0], azim=self.view_angle[1]) ax.autoscale_view() ax.set_xlabel(self.fidelity_key) - ax.set_ylabel(f'{self.loss_key}') + ax.set_ylabel(f"{self.loss_key}") ax.set_facecolor(self.bck_color_2d) fig.suptitle("ifBO run") if self.footnote: fig.text( - 0.01, 0.02, - f"Total {n_lines + n_mins} configs evaluated; for multiple budgets: " - f"{n_lines}, for single budget: {n_mins}", - ha='left', + 0.01, + 0.02, + f"Total {n_lines + n_points} configs evaluated; for multiple budgets: " + f"{n_lines}, for single budget: {n_points}", + ha="left", va="bottom", - fontsize=10 + fontsize=10, ) plt.colorbar( cm.ScalarMappable(norm=norm, cmap="RdYlBu_r"), cax=cax, - label='Iteration', + label="Iteration", use_gridspec=True, - alpha=self.alpha + alpha=self.alpha, ) fig.tight_layout() self.save(save_path, filename) plt.close(fig) - def save(self, save_path: str | Path | None = None, filename: str = "freeze_thaw"): + def save( + self, save_path: str | Path | None = None, filename: str = "freeze_thaw" + ) -> None: run_path = Path(save_path if save_path is not None else self.run_path) run_path.mkdir(parents=True, exist_ok=True) assert run_path.is_dir() plot_path = run_path / f"Plot3D_{filename}.png" - - plt.savefig( - plot_path, - bbox_inches='tight' - ) + + plt.savefig(plot_path, bbox_inches="tight") if __name__ == "__main__": - plotter = Plotter3D( - run_path="./results", - fidelity_key="epochs" - ) + plotter = Plotter3D(run_path="./results", fidelity_key="epochs") plotter.plot3D() From 076284c3c030bc41d8cf444c68e94b9d1fcbf504 Mon Sep 17 00:00:00 2001 From: eddiebergman Date: Mon, 16 Sep 2024 14:09:17 +0200 Subject: [PATCH 34/46] fix: Update state tests with new optimizers --- neps/optimizers/multi_fidelity/ifbo.py | 19 ++++++++++--------- tests/test_state/test_neps_state.py | 10 ++++++++++ 2 files changed, 20 insertions(+), 9 deletions(-) diff --git a/neps/optimizers/multi_fidelity/ifbo.py b/neps/optimizers/multi_fidelity/ifbo.py index 90bd0d6f..5d4d11ce 100755 --- a/neps/optimizers/multi_fidelity/ifbo.py +++ b/neps/optimizers/multi_fidelity/ifbo.py @@ -77,9 +77,11 @@ def __init__( initial_design_size: Number of configurations to sample before starting optimization """ # Adjust pipeline space fidelity steps to be equally spaced - pipeline_space = self._adjust_fidelity_for_freeze_thaw_steps(pipeline_space, step_size) + pipeline_space = self._adjust_fidelity_for_freeze_thaw_steps( + pipeline_space, step_size + ) # Super constructor call - super().__init__( + super().__init__( pipeline_space=pipeline_space, budget=budget, patience=patience, @@ -95,7 +97,7 @@ def __init__( # TODO: generalize this to work with real data (not benchmarks) self.max_budget = self.pipeline_space.fidelity.upper self._initial_design_size = initial_design_size - + # TODO: Write use cases for these parameters self._model_update_failed = False self.sample_default_first = sample_default_first @@ -169,16 +171,15 @@ def __init__( self.evaluation_data = EvaluationData() def _adjust_fidelity_for_freeze_thaw_steps( - self, - pipeline_space: SearchSpace, - step_size: int + self, pipeline_space: SearchSpace, step_size: int ) -> SearchSpace: - """Adjusts the fidelity range to be divisible by `step_size` for Freeze-Thaw. - """ + """Adjusts the fidelity range to be divisible by `step_size` for Freeze-Thaw.""" if not pipeline_space.has_fidelity: return pipeline_space # Check if the fidelity range is divided into equal sized steps by `step_size` - remainder = (pipeline_space.fidelity.upper - pipeline_space.fidelity.lower) % step_size + remainder = ( + pipeline_space.fidelity.upper - pipeline_space.fidelity.lower + ) % step_size if remainder == 0: return pipeline_space # Adjust the fidelity lower bound to be divisible by `step_size` into equal steps diff --git a/tests/test_state/test_neps_state.py b/tests/test_state/test_neps_state.py index af993311..ab3a6b6a 100644 --- a/tests/test_state/test_neps_state.py +++ b/tests/test_state/test_neps_state.py @@ -91,20 +91,30 @@ def case_search_space_fid_with_prior() -> SearchSpace: "hyperband", "hyperband_custom_default", "priorband", + "priorband_bo", "mobster", "mf_ei_bo", + "priorband_asha", + "ifbo", + "priorband_asha_hyperband", ] OPTIMIZER_REQUIRES_BUDGET = [ "successive_halving_prior", "hyperband_custom_default", "asha", "priorband", + "priorband_bo", + "priorband_asha", + "priorband_asha_hyperband", "hyperband", "asha_prior", "mobster", ] REQUIRES_PRIOR = { "priorband", + "priorband_bo", + "priorband_asha", + "priorband_asha_hyperband", } REQUIRES_COST = ["cost_cooling_bayesian_optimization", "cost_cooling"] From 998b0b728ec3228d3b4f5751156d8f7884cfa136 Mon Sep 17 00:00:00 2001 From: eddiebergman Date: Mon, 16 Sep 2024 14:14:52 +0200 Subject: [PATCH 35/46] style: Fix typing/assertions for plotter3d --- neps/plot/plot3D.py | 75 +++++++++++++++++++++++++-------------------- 1 file changed, 42 insertions(+), 33 deletions(-) diff --git a/neps/plot/plot3D.py b/neps/plot/plot3D.py index 7bca2c54..ef197ba4 100644 --- a/neps/plot/plot3D.py +++ b/neps/plot/plot3D.py @@ -3,20 +3,13 @@ from dataclasses import dataclass from pathlib import Path -import matplotlib -from matplotlib import ( - cm, - pyplot as plt, -) +import numpy as np +import pandas as pd +from matplotlib import cm, pyplot as plt from matplotlib.collections import LineCollection from matplotlib.colors import Normalize from mpl_toolkits.mplot3d.art3d import Line3DCollection -matplotlib.use("TkAgg") - -import numpy as np -import pandas as pd - # Copied from plot.py HERE = Path(__file__).parent.absolute() DEFAULT_RESULTS_PATH = HERE.parent / "results" @@ -31,8 +24,8 @@ class Plotter3D: footnote: bool = True alpha: float = 0.9 scatter_size: float | int = 3 - bck_color_2d: tuple[float] = (0.8, 0.82, 0.8) - view_angle: tuple[float | int] = (15, -70) + bck_color_2d: tuple[float, float, float] = (0.8, 0.82, 0.8) + view_angle: tuple[float, float] = (15, -70) def __post_init__(self): if self.run_path is not None: @@ -44,36 +37,38 @@ def __post_init__(self): ) assert self.data_path.exists(), f"File {self.data_path} does not exist" self.df = pd.read_csv( - self.data_path, index_col=0, float_precision="round_trip" + self.data_path, + index_col=0, + float_precision="round_trip", # type: ignore ) # Assigned at prep_df stage - self.loss_range = () - self.epochs_range = () + self.loss_range: tuple[float, float] | None = None + self.epochs_range: tuple[float, float] | None = None @staticmethod - def get_x(df: pd.DataFrame) -> np.array: + def get_x(df: pd.DataFrame) -> np.ndarray: return df["epochID"].to_numpy() @staticmethod - def get_y(df: pd.DataFrame) -> np.array: + def get_y(df: pd.DataFrame) -> np.ndarray: y_ = df["configID"].to_numpy() return np.ones_like(y_) * y_[0] @staticmethod - def get_z(df: pd.DataFrame) -> np.array: + def get_z(df: pd.DataFrame) -> np.ndarray: return df["result.loss"].to_numpy() @staticmethod - def get_color(df: pd.DataFrame) -> np.array: + def get_color(df: pd.DataFrame) -> np.ndarray: return df.index.to_numpy() - def prep_df(self, df: pd.DataFrame = None) -> pd.DataFrame: + def prep_df(self, df: pd.DataFrame | None = None) -> pd.DataFrame: df = self.df if df is None else df _fid_key = f"config.{self.fidelity_key}" - self.loss_range = (df["result.loss"].min(), df["result.loss"].max()) - self.epochs_range = (df[_fid_key].min(), df[_fid_key].max()) + self.loss_range = (df["result.loss"].min(), df["result.loss"].max()) # type: ignore + self.epochs_range = (df[_fid_key].min(), df[_fid_key].max()) # type: ignore split_values = np.array([[*index.split("_")] for index in df.index]) df[["configID", "epochID"]] = split_values @@ -88,7 +83,7 @@ def prep_df(self, df: pd.DataFrame = None) -> pd.DataFrame: def plot3D( self, - data: pd.DataFrame = None, + data: pd.DataFrame | None = None, save_path: str | Path | None = None, filename: str = "freeze_thaw", ) -> None: @@ -155,12 +150,15 @@ def plot3D( # Construct lines from segments lc3D = Line3DCollection( - segments3D, cmap="RdYlBu_r", norm=norm, alpha=self.alpha + segments3D, # type: ignore + cmap="RdYlBu_r", + norm=norm, + alpha=self.alpha, ) lc3D.set_array(color) # Draw lines - ax3D.add_collection3d(lc3D) + ax3D.add_collection3d(lc3D) # type: ignore # Plot 2D # Get segments for all lines @@ -169,23 +167,29 @@ def plot3D( # Construct lines from segments lc = LineCollection( - segments, cmap="RdYlBu_r", norm=norm, alpha=self.alpha + segments, # type: ignore + cmap="RdYlBu_r", + norm=norm, + alpha=self.alpha, # type: ignore ) lc.set_array(color) # Draw lines ax.add_collection(lc) - ax3D.axes.set_xlim3d(left=self.epochs_range[0], right=self.epochs_range[1]) - ax3D.axes.set_ylim3d(bottom=0, top=data_groups.ngroups) - ax3D.axes.set_zlim3d(bottom=self.loss_range[0], top=self.loss_range[1]) + assert self.loss_range is not None + assert self.epochs_range is not None + + ax3D.axes.set_xlim3d(left=self.epochs_range[0], right=self.epochs_range[1]) # type: ignore + ax3D.axes.set_ylim3d(bottom=0, top=data_groups.ngroups) # type: ignore + ax3D.axes.set_zlim3d(bottom=self.loss_range[0], top=self.loss_range[1]) # type: ignore ax3D.set_xlabel("Epochs") ax3D.set_ylabel("Iteration sampled") - ax3D.set_zlabel(f"{self.loss_key}") + ax3D.set_zlabel(f"{self.loss_key}") # type: ignore # set view angle - ax3D.view_init(elev=self.view_angle[0], azim=self.view_angle[1]) + ax3D.view_init(elev=self.view_angle[0], azim=self.view_angle[1]) # type: ignore ax.autoscale_view() ax.set_xlabel(self.fidelity_key) @@ -217,9 +221,14 @@ def plot3D( plt.close(fig) def save( - self, save_path: str | Path | None = None, filename: str = "freeze_thaw" + self, + save_path: str | Path | None = None, + filename: str = "freeze_thaw", ) -> None: - run_path = Path(save_path if save_path is not None else self.run_path) + path = save_path if save_path is not None else self.run_path + assert path is not None + + run_path = Path(path) run_path.mkdir(parents=True, exist_ok=True) assert run_path.is_dir() plot_path = run_path / f"Plot3D_{filename}.png" From 6b75aa8fe51ed9f0bc7c097c17efa62f368e3e33 Mon Sep 17 00:00:00 2001 From: eddiebergman Date: Tue, 17 Sep 2024 15:39:34 +0200 Subject: [PATCH 36/46] fix(FTPFN): Workaround for windows download of surrogate model https://github.com/automl/ifBO/issues/10 --- .../bayesian_optimization/models/ftpfn.py | 95 +++++++++++++++---- 1 file changed, 78 insertions(+), 17 deletions(-) diff --git a/neps/optimizers/bayesian_optimization/models/ftpfn.py b/neps/optimizers/bayesian_optimization/models/ftpfn.py index add29d0d..09e5f56d 100644 --- a/neps/optimizers/bayesian_optimization/models/ftpfn.py +++ b/neps/optimizers/bayesian_optimization/models/ftpfn.py @@ -1,31 +1,88 @@ +from __future__ import annotations + from typing import Any -import numpy as np -import pandas as pd from pathlib import Path import torch from ifbo import FTPFN +def _download_workaround_for_ifbo_issue_10(path: Path | None, version: str) -> Path: + # TODO: https://github.com/automl/ifBO/issues/10 + import requests + from ifbo.download import FILE_URL, FILENAME + from ifbo.surrogate import _resolve_model_path + + target_path = _resolve_model_path(path) # type: ignore + target_path.mkdir(parents=True, exist_ok=True) + + _target_zip_path = target_path / FILENAME(version) + _file_url = FILE_URL(version) + + # Download the tar.gz file and decompress it + response = requests.get(_file_url, allow_redirects=True) + if response.status_code != 200: + raise ValueError( + f"Failed to download the surrogate model from {_file_url}." + f" Got status code: {response.status_code}" + ) + + with open(_target_zip_path, "wb") as f: + try: + f.write(response.content) + except Exception as e: + raise ValueError( + f"Failed to write the surrogate model to {_target_zip_path}." + ) from e + + # Decompress the .tar.gz file using tarfile + import tarfile + + try: + with tarfile.open(_target_zip_path, "r:gz") as tar: + tar.extractall(path=target_path) + except Exception as e: + raise ValueError( + f"Failed to decompress the surrogate model at {_target_zip_path}." + ) from e + + return target_path + + class FTPFNSurrogate: """Special class to deal with PFN surrogate model and freeze-thaw acquisition.""" - def __init__(self, target_path: Path = None, version: str = "0.0.1", **kwargs): + def __init__( + self, + target_path: Path | None = None, + version: str = "0.0.1", + **kwargs: Any, + ): + if target_path is None: + # TODO: We also probably want to link this to the actual root directory + # or some shared directory between runs as relying on the path of the initial + # python invocation is likely to lead to issues somewhere. + # TODO: ifbo support for windows has issues with decompression + # We basically just do the same thing they do but manually + target_path = _download_workaround_for_ifbo_issue_10(target_path, version) + self.ftpfn = FTPFN(target_path=target_path, version=version) self.target_path = self.ftpfn.target_path self.version = self.ftpfn.version - self.train_x = None - self.train_y = None + self.train_x: torch.Tensor | None = None + self.train_y: torch.Tensor | None = None @property def device(self): return self.ftpfn.device - - def _get_logits(self, test_x: torch.Tensor) -> torch.Tensor: + + def _get_logits(self, test_x: torch.Tensor) -> torch.Tensor: + assert self.train_x is not None, "Train data is not set." + assert self.train_y is not None, "Train data is not set." return self.ftpfn.model( self._cast_tensor_shapes(self.train_x), self._cast_tensor_shapes(self.train_y), - self._cast_tensor_shapes(test_x) + self._cast_tensor_shapes(test_x), ) def _cast_tensor_shapes(self, x: torch.Tensor) -> torch.Tensor: @@ -33,7 +90,7 @@ def _cast_tensor_shapes(self, x: torch.Tensor) -> torch.Tensor: return x if len(x.shape) == 2: return x.reshape(x.shape[0], 1, x.shape[1]) - if len(x.shape) == 1: + if len(x.shape) == 1: return x.reshape(x.shape[0], 1) raise ValueError(f"Shape not recognized: {x.shape}") @@ -43,37 +100,41 @@ def get_mean_performance(self, test_x: torch.Tensor) -> torch.Tensor: return self.ftpfn.model.criterion.mean(logits) @torch.no_grad() - def get_pi(self, test_x, y_best): + def get_pi(self, test_x: torch.Tensor, y_best: torch.Tensor) -> torch.Tensor: logits = self._get_logits(test_x) return self.ftpfn.model.criterion.pi( logits.squeeze(), best_f=(1 - y_best).unsqueeze(1) ) - + @torch.no_grad() - def get_ei(self, test_x, y_best): + def get_ei(self, test_x: torch.Tensor, y_best: torch.Tensor) -> torch.Tensor: logits = self._get_logits(test_x) return self.ftpfn.model.criterion.ei( logits.squeeze(), best_f=(1 - y_best).unsqueeze(1) ) @torch.no_grad() - def get_lcb(self, test_x, beta: float=(1-.682)/2): + def get_lcb( + self, test_x: torch.Tensor, beta: float = (1 - 0.682) / 2 + ) -> torch.Tensor: logits = self._get_logits(test_x) lcb = self.ftpfn.model.criterion.ucb( logits=logits, best_f=None, rest_prob=beta, - maximize=False # IMPORTANT to be False, should calculate the LCB using the lower-bound ICDF as per beta + maximize=False, # IMPORTANT to be False, should calculate the LCB using the lower-bound ICDF as per beta ) return lcb - + @torch.no_grad() - def get_ucb(self, test_x, beta: float=(1-.682)/2): + def get_ucb( + self, test_x: torch.Tensor, beta: float = (1 - 0.682) / 2 + ) -> torch.Tensor: logits = self._get_logits(test_x) lcb = self.ftpfn.model.criterion.ucb( logits=logits, best_f=None, rest_prob=beta, - maximize=True # IMPORTANT to be True, should calculate the UCB using the upper-bound ICDF as per beta + maximize=True, # IMPORTANT to be True, should calculate the UCB using the upper-bound ICDF as per beta ) return lcb From 24f7bde769f199c5188e268b4107ef36fd5a76d6 Mon Sep 17 00:00:00 2001 From: eddiebergman Date: Tue, 17 Sep 2024 15:40:11 +0200 Subject: [PATCH 37/46] fix(tensorboard): Ensure tensorboard knows about end of config --- neps/plot/tensorboard_eval.py | 21 ++++++++++++++++----- neps/runtime.py | 9 +++++++++ neps/utils/common.py | 17 ++++++++++++++--- 3 files changed, 39 insertions(+), 8 deletions(-) diff --git a/neps/plot/tensorboard_eval.py b/neps/plot/tensorboard_eval.py index a6c27450..4ccde828 100644 --- a/neps/plot/tensorboard_eval.py +++ b/neps/plot/tensorboard_eval.py @@ -6,6 +6,7 @@ from collections.abc import Mapping from pathlib import Path from typing import Any, ClassVar +from typing import TYPE_CHECKING, Any, ClassVar, Mapping from typing_extensions import override import numpy as np @@ -13,10 +14,17 @@ from torch.utils.tensorboard.summary import hparams from torch.utils.tensorboard.writer import SummaryWriter -from neps.runtime import get_in_progress_trial, get_workers_neps_state +from neps.runtime import ( + get_in_progress_trial, + get_workers_neps_state, + register_notify_trial_end, +) from neps.status.status import get_summary_dict from neps.utils.common import get_initial_directory +if TYPE_CHECKING: + from neps.state.trial import Trial + class SummaryWriter_(SummaryWriter): # noqa: N801 """This class inherits from the base SummaryWriter class and provides @@ -88,6 +96,8 @@ def _initiate_internal_configurations() -> None: trial = get_in_progress_trial() neps_state = get_workers_neps_state() + register_notify_trial_end("NEPS_TBLOGGER", tblogger.end_of_config) + # We are assuming that neps state is all filebased here root_dir = Path(neps_state.location) assert root_dir.exists() @@ -98,12 +108,12 @@ def _initiate_internal_configurations() -> None: if trial.metadata.previous_trial_location is not None else None ) + tblogger.config_id = trial.metadata.id tblogger.optimizer_dir = root_dir tblogger.config = trial.config @staticmethod def _is_initialized() -> bool: - # Returns 'True' if config_writer is already initialized. 'False' otherwise return tblogger.config_writer is not None @staticmethod @@ -111,7 +121,7 @@ def _initialize_writers() -> None: # This code runs only once per config, to assign that config a config_writer. if ( tblogger.config_previous_directory is None - and tblogger.config_working_directory + and tblogger.config_working_directory is not None ): # If no fidelities are there yet, define the writer via the config_id tblogger.config_id = str(tblogger.config_working_directory).rsplit( @@ -121,8 +131,9 @@ def _initialize_writers() -> None: tblogger.config_working_directory / "tbevents" ) return + # Searching for the initial directory where tensorboard events are stored. - if tblogger.config_working_directory: + if tblogger.config_working_directory is not None: init_dir = get_initial_directory( pipeline_directory=tblogger.config_working_directory ) @@ -136,7 +147,7 @@ def _initialize_writers() -> None: ) @staticmethod - def end_of_config() -> None: + def end_of_config(trial: Trial) -> None: # noqa: ARG004 """Closes the writer.""" if tblogger.config_writer: # Close and reset previous config writers for consistent logging. diff --git a/neps/runtime.py b/neps/runtime.py index 5e864159..f50f2515 100644 --- a/neps/runtime.py +++ b/neps/runtime.py @@ -89,6 +89,13 @@ def get_in_progress_trial() -> Trial: return _CURRENTLY_RUNNING_TRIAL_IN_PROCESS +_TRIAL_END_CALLBACKS: dict[str, Callable[[Trial], None]] = {} + + +def register_notify_trial_end(key: str, callback: Callable[[Trial], None]) -> None: + _TRIAL_END_CALLBACKS[key] = callback + + @contextmanager def _set_global_trial(trial: Trial) -> Iterator[None]: global _CURRENTLY_RUNNING_TRIAL_IN_PROCESS # noqa: PLW0603 @@ -103,6 +110,8 @@ def _set_global_trial(trial: Trial) -> Iterator[None]: ) _CURRENTLY_RUNNING_TRIAL_IN_PROCESS = trial yield + for _key, callback in _TRIAL_END_CALLBACKS.items(): + callback(trial) _CURRENTLY_RUNNING_TRIAL_IN_PROCESS = None diff --git a/neps/utils/common.py b/neps/utils/common.py index 8e90680a..952c2f07 100644 --- a/neps/utils/common.py +++ b/neps/utils/common.py @@ -8,6 +8,9 @@ from functools import partial from pathlib import Path from typing import Any +from functools import partial +from pathlib import Path +from typing import Any, Iterable, Mapping, Sequence import numpy as np import torch @@ -142,6 +145,9 @@ def load_lightning_checkpoint( return checkpoint_path, checkpoint +_INTIAL_DIRECTORY_CACHE: dict[str, Path] = {} + + # TODO: We should have a better way to have a shared folder between trials. # Right now, the fidelity lineage is linear, however this will be a difficulty # when/if we have a tree structure. @@ -157,13 +163,15 @@ def get_initial_directory(pipeline_directory: Path | str | None = None) -> Path: """ neps_state = get_workers_neps_state() if pipeline_directory is not None: - pipeline_directory = Path(pipeline_directory) # TODO: Hard coded assumption - config_id = pipeline_directory.name.split("_", maxsplit=1)[-1] + config_id = Path(pipeline_directory).name.split("_", maxsplit=1)[-1] trial = neps_state.get_trial_by_id(config_id) else: trial = get_in_progress_trial() + if trial.metadata.id in _INTIAL_DIRECTORY_CACHE: + return _INTIAL_DIRECTORY_CACHE[trial.metadata.id] + # Recursively find the initial directory while (prev_trial_id := trial.metadata.previous_trial_id) is not None: trial = neps_state.get_trial_by_id(prev_trial_id) @@ -172,7 +180,10 @@ def get_initial_directory(pipeline_directory: Path | str | None = None) -> Path: # TODO: Hard coded assumption that we are operating in a filebased neps assert isinstance(initial_dir, str) - return Path(initial_dir) + path = Path(initial_dir) + + _INTIAL_DIRECTORY_CACHE[trial.metadata.id] = path + return path def get_searcher_data( From 34775cde00791c1bcc6f397e92225c8a88cc2f68 Mon Sep 17 00:00:00 2001 From: eddiebergman Date: Tue, 17 Sep 2024 16:10:45 +0200 Subject: [PATCH 38/46] fix: Create default path to avoid warning --- neps/optimizers/bayesian_optimization/models/ftpfn.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/neps/optimizers/bayesian_optimization/models/ftpfn.py b/neps/optimizers/bayesian_optimization/models/ftpfn.py index 09e5f56d..3eaae694 100644 --- a/neps/optimizers/bayesian_optimization/models/ftpfn.py +++ b/neps/optimizers/bayesian_optimization/models/ftpfn.py @@ -13,7 +13,7 @@ def _download_workaround_for_ifbo_issue_10(path: Path | None, version: str) -> P from ifbo.download import FILE_URL, FILENAME from ifbo.surrogate import _resolve_model_path - target_path = _resolve_model_path(path) # type: ignore + target_path = Path(path) if path is not None else Path.cwd().resolve() target_path.mkdir(parents=True, exist_ok=True) _target_zip_path = target_path / FILENAME(version) From 304d3fb858f9a43ffda3de4ca4d5c13d11ad9425 Mon Sep 17 00:00:00 2001 From: eddiebergman Date: Tue, 17 Sep 2024 16:30:37 +0200 Subject: [PATCH 39/46] fix: Pathing --- neps/optimizers/bayesian_optimization/models/ftpfn.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/neps/optimizers/bayesian_optimization/models/ftpfn.py b/neps/optimizers/bayesian_optimization/models/ftpfn.py index 3eaae694..ab2b45f1 100644 --- a/neps/optimizers/bayesian_optimization/models/ftpfn.py +++ b/neps/optimizers/bayesian_optimization/models/ftpfn.py @@ -11,9 +11,8 @@ def _download_workaround_for_ifbo_issue_10(path: Path | None, version: str) -> P # TODO: https://github.com/automl/ifBO/issues/10 import requests from ifbo.download import FILE_URL, FILENAME - from ifbo.surrogate import _resolve_model_path - target_path = Path(path) if path is not None else Path.cwd().resolve() + target_path = Path(path) if path is not None else Path.cwd().resolve() / ".model" target_path.mkdir(parents=True, exist_ok=True) _target_zip_path = target_path / FILENAME(version) From 53c70446feb9f11f8ec36f547da223a270412179 Mon Sep 17 00:00:00 2001 From: eddiebergman Date: Tue, 17 Sep 2024 16:54:27 +0200 Subject: [PATCH 40/46] fix: Pre-commit fixes, docs, unused, typing --- neps/__init__.py | 4 +- neps/optimizers/multi_fidelity/ifbo.py | 4 +- neps/plot/plot3D.py | 15 +++++- neps/runtime.py | 1 + neps/search_spaces/search_space.py | 16 +++--- neps/utils/common.py | 69 -------------------------- pyproject.toml | 2 + 7 files changed, 28 insertions(+), 83 deletions(-) diff --git a/neps/__init__.py b/neps/__init__.py index ab1f3d50..b2276ca3 100644 --- a/neps/__init__.py +++ b/neps/__init__.py @@ -1,5 +1,6 @@ from neps.api import run from neps.plot.plot import plot +from neps.plot.tensorboard_eval import tblogger from neps.search_spaces import ( ArchitectureParameter, CategoricalParameter, @@ -12,7 +13,6 @@ IntegerParameter, ) from neps.status.status import get_summary_dict, status -from neps.plot.tensorboard_eval import tblogger Integer = IntegerParameter Float = FloatParameter @@ -39,5 +39,5 @@ "GraphGrammar", "GraphGrammarCell", "GraphGrammarRepetitive", - "tblogger" + "tblogger", ] diff --git a/neps/optimizers/multi_fidelity/ifbo.py b/neps/optimizers/multi_fidelity/ifbo.py index 5d4d11ce..dbdeb17a 100755 --- a/neps/optimizers/multi_fidelity/ifbo.py +++ b/neps/optimizers/multi_fidelity/ifbo.py @@ -7,7 +7,7 @@ from neps.state.optimizer import BudgetInfo from neps.utils.types import ConfigResult -from neps.utils.common import instance_from_map, EvaluationData +from neps.utils.common import instance_from_map from neps.search_spaces.search_space import FloatParameter, IntegerParameter, SearchSpace from neps.optimizers.base_optimizer import BaseOptimizer from neps.optimizers.bayesian_optimization.acquisition_functions import AcquisitionMapping @@ -168,8 +168,6 @@ def __init__( ) self.count = 0 - self.evaluation_data = EvaluationData() - def _adjust_fidelity_for_freeze_thaw_steps( self, pipeline_space: SearchSpace, step_size: int ) -> SearchSpace: diff --git a/neps/plot/plot3D.py b/neps/plot/plot3D.py index ef197ba4..2001825c 100644 --- a/neps/plot/plot3D.py +++ b/neps/plot/plot3D.py @@ -1,3 +1,5 @@ +"""Plot a 3D landscape of learning curves for a given run.""" + from __future__ import annotations from dataclasses import dataclass @@ -17,6 +19,8 @@ @dataclass class Plotter3D: + """Plot a 3d landscape of learning curves for a given run.""" + loss_key: str = "Loss" fidelity_key: str = "epochs" run_path: str | Path | None = None @@ -27,7 +31,7 @@ class Plotter3D: bck_color_2d: tuple[float, float, float] = (0.8, 0.82, 0.8) view_angle: tuple[float, float] = (15, -70) - def __post_init__(self): + def __post_init__(self) -> None: if self.run_path is not None: assert ( Path(self.run_path).absolute().is_dir() @@ -48,22 +52,27 @@ def __post_init__(self): @staticmethod def get_x(df: pd.DataFrame) -> np.ndarray: + """Get the x-axis values for the plot.""" return df["epochID"].to_numpy() @staticmethod def get_y(df: pd.DataFrame) -> np.ndarray: + """Get the y-axis values for the plot.""" y_ = df["configID"].to_numpy() return np.ones_like(y_) * y_[0] @staticmethod def get_z(df: pd.DataFrame) -> np.ndarray: + """Get the z-axis values for the plot.""" return df["result.loss"].to_numpy() @staticmethod def get_color(df: pd.DataFrame) -> np.ndarray: + """Get the color values for the plot.""" return df.index.to_numpy() def prep_df(self, df: pd.DataFrame | None = None) -> pd.DataFrame: + """Prepare the dataframe for plotting.""" df = self.df if df is None else df _fid_key = f"config.{self.fidelity_key}" @@ -81,12 +90,13 @@ def prep_df(self, df: pd.DataFrame | None = None) -> pd.DataFrame: time_cols = ["metadata.time_started", "metadata.time_end"] return df.sort_values(by=time_cols).reset_index(drop=True) - def plot3D( + def plot3D( # noqa: N802, PLR0915 self, data: pd.DataFrame | None = None, save_path: str | Path | None = None, filename: str = "freeze_thaw", ) -> None: + """Plot the 3D landscape of learning curves.""" data = self.prep_df(data) # Create the figure and the axes for the plot @@ -225,6 +235,7 @@ def save( save_path: str | Path | None = None, filename: str = "freeze_thaw", ) -> None: + """Save the plot to a file.""" path = save_path if save_path is not None else self.run_path assert path is not None diff --git a/neps/runtime.py b/neps/runtime.py index f50f2515..f1c5fcfe 100644 --- a/neps/runtime.py +++ b/neps/runtime.py @@ -93,6 +93,7 @@ def get_in_progress_trial() -> Trial: def register_notify_trial_end(key: str, callback: Callable[[Trial], None]) -> None: + """Register a callback to be called when a trial ends.""" _TRIAL_END_CALLBACKS[key] = callback diff --git a/neps/search_spaces/search_space.py b/neps/search_spaces/search_space.py index ccaeca87..8621a928 100644 --- a/neps/search_spaces/search_space.py +++ b/neps/search_spaces/search_space.py @@ -337,9 +337,10 @@ def sample( else: sampled_hps[name] = hp.sample() break - except Exception as e: + except Exception as e: # noqa: BLE001 logger.warning( - f"Attempt {attempt + 1}/{patience} failed for sampling {name}: {str(e)}" + f"Attempt {attempt + 1}/{patience} failed for" + f" sampling {name}: {e!s}" ) else: logger.error( @@ -351,7 +352,7 @@ def sample( ) return SearchSpace(**sampled_hps) - + def mutate( self, *, @@ -622,8 +623,8 @@ def get_search_space_grid( Include default hyperparameters in the grid. If all HPs have a `default` then add a single configuration. - If only partial HPs have defaults then add all combinations of defaults, but only to - the end of the list of configs. + If only partial HPs have defaults then add all combinations of defaults, but + only to the end of the list of configs. Args: size_per_numerical_hp: The size of the grid for each numerical hyperparameter. @@ -902,8 +903,9 @@ def update_hp_values(self, new_values: dict[str, Any]) -> None: """ _hp_dict = self.hp_values() _intersect = set(_hp_dict.keys()) & set(new_values.keys()) - assert len(_intersect) == len(new_values), \ - "All hyperparameters must be present! "\ + assert len(_intersect) == len(new_values), ( + "All hyperparameters must be present! " f"{set(_hp_dict.keys()) - set(new_values.keys())} are missing" + ) _hp_dict.update(new_values) self.set_hyperparameters_from_dict(_hp_dict) diff --git a/neps/utils/common.py b/neps/utils/common.py index 952c2f07..2c6f9d35 100644 --- a/neps/utils/common.py +++ b/neps/utils/common.py @@ -369,72 +369,3 @@ def instance_from_map( # noqa: C901, PLR0912 raise TypeError(f"{e} when calling {instance} with {args_dict}") from e return instance - - -def get_rnd_state() -> dict: - np_state = list(np.random.get_state()) - np_state[1] = np_state[1].tolist() - state = { - "random_state": random.getstate(), - "np_seed_state": np_state, - "torch_seed_state": torch.random.get_rng_state().tolist(), - } - if torch.cuda.is_available(): - state["torch_cuda_seed_state"] = [ - dev.tolist() for dev in torch.cuda.get_rng_state_all() - ] - return state - - -def set_rnd_state(state: dict): - # rnd_s1, rnd_s2, rnd_s3 = state["random_state"] - random.setstate( - tuple( - tuple(rnd_s) if isinstance(rnd_s, list) else rnd_s - for rnd_s in state["random_state"] - ) - ) - np.random.set_state(tuple(state["np_seed_state"])) - torch.random.set_rng_state(torch.ByteTensor(state["torch_seed_state"])) - if torch.cuda.is_available() and "torch_cuda_seed_state" in state: - torch.cuda.set_rng_state_all( - [torch.ByteTensor(dev) for dev in state["torch_cuda_seed_state"]] - ) - - -class AttrDict(dict): - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - self.__dict__ = self - - -class DataWriter: - """A class to specify how to save/write a data to the folder by - implementing your own write_data function. - Use the set_attributes function to set all your necessary attributes and the data - and then write_data will be called with only the directory path as argument - during the write process. - """ - - def __init__(self, name: str): - self.name = name - - def set_attributes(self, attribute_dict: dict[str, Any]): - for attribute_name, attribute in attribute_dict.items(): - setattr(self, attribute_name, attribute) - - def write_data(self, to_directory: Path): - raise NotImplementedError - - -class EvaluationData: - """A class to store some data for a single evaluation (configuration) - and write that data to its corresponding config folder. - """ - - def __init__(self): - self.data_dict: dict[str, DataWriter] = {} - - def write_all(self, directory: Path): - for _, data_writer in self.data_dict.items(): - data_writer.write_data(directory) diff --git a/pyproject.toml b/pyproject.toml index 236bf952..a08a9e7c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -106,6 +106,7 @@ exclude = [ "neps/search_spaces/architecture/**/*.py", "neps/search_spaces/yaml_search_space_utils.py", "neps/utils/run_args_from_yaml.py", + "neps/utils/common.py", "neps/api.py", "tests", "neps_examples", @@ -209,6 +210,7 @@ ignore = [ "COM812", # Require trailing commas, recommended to ignore due to ruff formatter "PLR2004", # No magic numbers inline "N817", # CamelCase import as (ignore for ConfigSpace) + "N999", # Invalid name for module "NPY002", # Replace legacy `np.random.choice` call with `np.random.Generator` ] From b65143faa9a6610c02b2fc86acc1747439bbc877 Mon Sep 17 00:00:00 2001 From: eddiebergman Date: Tue, 17 Sep 2024 17:32:51 +0200 Subject: [PATCH 41/46] style: ruff --- .../acquisition_samplers/freeze_thaw_sampler.py | 2 +- neps/plot/plot3D.py | 5 ++++- neps/plot/tensorboard_eval.py | 3 +-- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/neps/optimizers/bayesian_optimization/acquisition_samplers/freeze_thaw_sampler.py b/neps/optimizers/bayesian_optimization/acquisition_samplers/freeze_thaw_sampler.py index 5db827ed..93c7370f 100644 --- a/neps/optimizers/bayesian_optimization/acquisition_samplers/freeze_thaw_sampler.py +++ b/neps/optimizers/bayesian_optimization/acquisition_samplers/freeze_thaw_sampler.py @@ -122,12 +122,12 @@ def sample( """Samples a new set and returns the total set of observed + new configs.""" assert self.observations is not None assert self.pipeline_space is not None - assert self.pipeline_space.custom_grid_table is not None partial_configs = self.observations.get_partial_configs_at_max_seen() _n = n if n is not None else self.samples_to_draw if self.is_tabular: + assert self.pipeline_space.custom_grid_table is not None # handles tabular data such that the entire unseen set of configs from the # table is considered to be the new set of candidates _partial_ids = {conf["id"].value for conf in partial_configs} diff --git a/neps/plot/plot3D.py b/neps/plot/plot3D.py index 2001825c..d543ef82 100644 --- a/neps/plot/plot3D.py +++ b/neps/plot/plot3D.py @@ -7,7 +7,10 @@ import numpy as np import pandas as pd -from matplotlib import cm, pyplot as plt +from matplotlib import ( + cm, + pyplot as plt, +) from matplotlib.collections import LineCollection from matplotlib.colors import Normalize from mpl_toolkits.mplot3d.art3d import Line3DCollection diff --git a/neps/plot/tensorboard_eval.py b/neps/plot/tensorboard_eval.py index 4ccde828..2211537d 100644 --- a/neps/plot/tensorboard_eval.py +++ b/neps/plot/tensorboard_eval.py @@ -5,8 +5,7 @@ import math from collections.abc import Mapping from pathlib import Path -from typing import Any, ClassVar -from typing import TYPE_CHECKING, Any, ClassVar, Mapping +from typing import TYPE_CHECKING, Any, ClassVar from typing_extensions import override import numpy as np From b1c6ec4d08cd2cd49bb787382c050ba256222b77 Mon Sep 17 00:00:00 2001 From: eddiebergman Date: Tue, 17 Sep 2024 17:44:46 +0200 Subject: [PATCH 42/46] fix: Incorrect typing leading to attribute error --- neps/optimizers/multi_fidelity/utils.py | 46 ++++++++++++++----------- 1 file changed, 26 insertions(+), 20 deletions(-) diff --git a/neps/optimizers/multi_fidelity/utils.py b/neps/optimizers/multi_fidelity/utils.py index 91bf3c2c..f551e73f 100644 --- a/neps/optimizers/multi_fidelity/utils.py +++ b/neps/optimizers/multi_fidelity/utils.py @@ -33,7 +33,9 @@ def normalize_vectorize_config( config: SearchSpace, ignore_fidelity: bool = True ) -> np.ndarray: _new_vector = [] - for _, hp_list in config.get_normalized_hp_categories(ignore_fidelity=ignore_fidelity).items(): + for _, hp_list in config.get_normalized_hp_categories( + ignore_fidelity=ignore_fidelity + ).items(): _new_vector.extend(hp_list) return np.array(_new_vector) @@ -42,23 +44,25 @@ def get_tokenized_data( configs: list[SearchSpace], ignore_fidelity: bool = True, ) -> np.ndarray: # pd.Series: # tuple[np.ndarray, np.ndarray, np.ndarray]: - """ Extracts configurations, indices and performances given a DataFrame + """Extracts configurations, indices and performances given a DataFrame Tokenizes the given set of observations as required by a PFN surrogate model. """ - configs = np.array([ - normalize_vectorize_config(c, ignore_fidelity=ignore_fidelity) for c in configs - ]) + configs = np.array( + [normalize_vectorize_config(c, ignore_fidelity=ignore_fidelity) for c in configs] + ) return configs -def get_freeze_thaw_normalized_step(fid_step: int, lower: int, upper: int, step: int) -> float: +def get_freeze_thaw_normalized_step( + fid_step: int, lower: int, upper: int, step: int +) -> float: max_fid_step = int(np.ceil((upper - lower) / step)) + 1 return fid_step / max_fid_step def get_training_data_for_freeze_thaw( - df: pd.DataFrame | MFObservedData.df, + df: pd.DataFrame, config_key: str, perf_key: str, pipeline_space: SearchSpace, @@ -92,6 +96,7 @@ def get_training_data_for_freeze_thaw( performance = (1 - np.array(performance)).tolist() return idxs, steps, configs, performance + class MFObservedData: """ (Under development) @@ -163,7 +168,7 @@ def pending_runs_index(self) -> pd.Index | pd.MultiIndex: @property def completed_runs(self): return self.df[~(self.pending_condition | self.error_condition)] - + @property def completed_runs_index(self) -> pd.Index | pd.MultiIndex: return self.completed_runs.index @@ -270,8 +275,7 @@ def get_best_performance_for_each_budget(self, maximize: bool = False): return performance def get_budget_level_for_best_performance(self, maximize: bool = False) -> int: - """Returns the lowest budget level at which the highest performance was recorded. - """ + """Returns the lowest budget level at which the highest performance was recorded.""" perf_per_z = self.get_best_performance_for_each_budget(maximize=maximize) y_star = self.get_best_seen_performance(maximize=maximize) # uses the minimum of the budget that see the maximum obseved score @@ -320,7 +324,9 @@ def extract_learning_curve( if budget_id is None: # budget_id only None when predicting # extract full observed learning curve for prediction pipeline - budget_id = max(self.df.loc[config_id].index.get_level_values("budget_id").values) + 1 + budget_id = ( + max(self.df.loc[config_id].index.get_level_values("budget_id").values) + 1 + ) # For the first epoch we have no learning curve available if budget_id == 0: @@ -336,24 +342,24 @@ def extract_learning_curve( return deepcopy(lc) def get_best_performance_per_config(self, maximize: bool = False) -> pd.Series: - """Returns the best score recorded per config across fidelities seen. - """ + """Returns the best score recorded per config across fidelities seen.""" op = np.max if maximize else np.min perf = ( - self.df - .sort_values("budget_id", ascending=False) # sorts with largest budget first + self.df.sort_values( + "budget_id", ascending=False + ) # sorts with largest budget first .groupby("config_id") # retains only config_id .first() # retrieves the largest budget seen for each config_id - .learning_curves # extracts all values seen till largest budget for a config - .apply(op) # finds the minimum over per-config learning curve + .learning_curves.apply( # extracts all values seen till largest budget for a config + op + ) # finds the minimum over per-config learning curve ) return perf def get_max_observed_fidelity_level_per_config(self) -> pd.Series: - """Returns the highest fidelity level recorded per config seen. - """ + """Returns the highest fidelity level recorded per config seen.""" max_z_observed = { - _id: self.df.loc[_id,:].index.sort_values()[-1] + _id: self.df.loc[_id, :].index.sort_values()[-1] for _id in self.df.index.get_level_values("config_id").sort_values() } return pd.Series(max_z_observed) From 7ad4eaa29fcb0ff385985c285ca8450f38ae368d Mon Sep 17 00:00:00 2001 From: eddiebergman Date: Tue, 17 Sep 2024 17:56:21 +0200 Subject: [PATCH 43/46] fix: Only re-download the model if not downloaded before --- neps/optimizers/bayesian_optimization/models/ftpfn.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/neps/optimizers/bayesian_optimization/models/ftpfn.py b/neps/optimizers/bayesian_optimization/models/ftpfn.py index ab2b45f1..d6d276a0 100644 --- a/neps/optimizers/bayesian_optimization/models/ftpfn.py +++ b/neps/optimizers/bayesian_optimization/models/ftpfn.py @@ -16,6 +16,16 @@ def _download_workaround_for_ifbo_issue_10(path: Path | None, version: str) -> P target_path.mkdir(parents=True, exist_ok=True) _target_zip_path = target_path / FILENAME(version) + + # Just a heuristic check to determine if the model already exists. + # Kind of hard to know what the name of the extracted file will be + # Basically we just check if the tar.gz file is there and unpacked. + # If there is a new version, then it wont exist and we will download it. + if _target_zip_path.exists() and any( + p.name.endswith(".pt") for p in target_path.iterdir() + ): + return target_path + _file_url = FILE_URL(version) # Download the tar.gz file and decompress it From dcdb722832247a0e4d6f63043c667a9448f8d62c Mon Sep 17 00:00:00 2001 From: eddiebergman Date: Tue, 17 Sep 2024 17:59:59 +0200 Subject: [PATCH 44/46] feat: Cache ftpfn model --- neps/optimizers/bayesian_optimization/models/ftpfn.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/neps/optimizers/bayesian_optimization/models/ftpfn.py b/neps/optimizers/bayesian_optimization/models/ftpfn.py index d6d276a0..3831ec61 100644 --- a/neps/optimizers/bayesian_optimization/models/ftpfn.py +++ b/neps/optimizers/bayesian_optimization/models/ftpfn.py @@ -58,6 +58,9 @@ def _download_workaround_for_ifbo_issue_10(path: Path | None, version: str) -> P return target_path +_CACHED_FTPFN_MODEL: dict[tuple[str, str], FTPFN] = {} + + class FTPFNSurrogate: """Special class to deal with PFN surrogate model and freeze-thaw acquisition.""" @@ -75,7 +78,13 @@ def __init__( # We basically just do the same thing they do but manually target_path = _download_workaround_for_ifbo_issue_10(target_path, version) - self.ftpfn = FTPFN(target_path=target_path, version=version) + key = (str(target_path), version) + ftpfn = _CACHED_FTPFN_MODEL.get(key) + if ftpfn is None: + ftpfn = FTPFN(target_path=target_path, version=version) + _CACHED_FTPFN_MODEL[key] = ftpfn + + self.ftpfn = ftpfn self.target_path = self.ftpfn.target_path self.version = self.ftpfn.version self.train_x: torch.Tensor | None = None From fae06281874feec907ebaa102269ab4ea9a02a54 Mon Sep 17 00:00:00 2001 From: eddiebergman Date: Tue, 17 Sep 2024 18:03:58 +0200 Subject: [PATCH 45/46] fix: Ignore `.data` folder used in example that downloads MNIST --- .gitignore | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.gitignore b/.gitignore index e8be93e7..58b5d46c 100644 --- a/.gitignore +++ b/.gitignore @@ -47,3 +47,6 @@ jahs_bench_data/ # Yaml tests path + +# From example that uses MNIST +.data From a2c50694792eafa8eb5297db1f775be77040fafc Mon Sep 17 00:00:00 2001 From: Neeratyoy Mallik Date: Wed, 18 Sep 2024 11:07:25 +0200 Subject: [PATCH 46/46] Freeze-thaw example tblogger update --- neps_examples/efficiency/freeze_thaw.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/neps_examples/efficiency/freeze_thaw.py b/neps_examples/efficiency/freeze_thaw.py index fb0c4531..32943ec2 100644 --- a/neps_examples/efficiency/freeze_thaw.py +++ b/neps_examples/efficiency/freeze_thaw.py @@ -129,7 +129,7 @@ def training_pipeline( loss=val_loss, current_epoch=epochs, # Set to `True` for a live incumbent trajectory. - write_summary_incumbent=False, + write_summary_incumbent=True, # Set to `True` for a live loss trajectory for each config. writer_config_scalar=True, # Set to `True` for live parallel coordinate, scatter plot matrix, and table view. @@ -159,11 +159,11 @@ def training_pipeline( pipeline_space=pipeline_space, run_pipeline=training_pipeline, searcher="ifbo", - max_evaluations_total=25, - root_directory="debug/ifbo-mnist/", - overwrite_working_directory=True, # set to False for a multi-worker run + max_evaluations_total=50, + root_directory="./debug/ifbo-mnist/", + overwrite_working_directory=False, # set to False for a multi-worker run # (optional) ifbo hyperparameters - step_size=3, + step_size=1, # (optional) ifbo surrogate model hyperparameters (for FT-PFN) surrogate_model_args=dict( version="0.0.1", @@ -174,7 +174,7 @@ def training_pipeline( # NOTE: this is `experimental` and may not work as expected ## plotting a 3D plot for learning curves explored by ifbo plotter = Plotter3D( - run_path="debug/ifbo-mnist/", # same as `root_directory` above + run_path="./debug/ifbo-mnist/", # same as `root_directory` above fidelity_key="epochs", # same as `pipeline_space` ) plotter.plot3D(filename="ifbo")