From f594f52880ef98f8185a3e9bfab9a6808ac8b809 Mon Sep 17 00:00:00 2001 From: Neeratyoy Mallik Date: Wed, 18 Sep 2024 12:25:49 +0200 Subject: [PATCH] Add ifbo (#115) Co-authored-by: Timur M. Carstensen Co-authored-by: karibbov Co-authored-by: eddiebergman --- .github/workflows/pre-commit.yaml | 3 +- .github/workflows/tests.yaml | 2 +- .gitignore | 3 + docs/_code/api_generator.py | 2 +- docs/_code/example_generator.py | 2 - docs/doc_yamls/architecture_search_space.py | 2 +- neps/__init__.py | 2 + neps/api.py | 4 +- neps/env.py | 3 +- neps/optimizers/__init__.py | 11 +- neps/optimizers/base_optimizer.py | 2 +- .../acquisition_functions/__init__.py | 24 +- .../acquisition_functions/ei.py | 1 + .../acquisition_functions/mf_ei.py | 205 ------ .../acquisition_functions/mf_pi.py | 199 ++++++ .../acquisition_functions/ucb.py | 13 - .../freeze_thaw_sampler.py | 138 ++-- .../acquisition_samplers/mutation_sampler.py | 7 +- .../acquisition_samplers/random_sampler.py | 2 - .../bayesian_optimization/cost_cooling.py | 4 +- .../bayesian_optimization/kernels/__init__.py | 2 +- .../kernels/get_kernels.py | 2 - .../bayesian_optimization/mf_tpe.py | 8 +- .../bayesian_optimization/models/__init__.py | 12 +- .../bayesian_optimization/models/deepGP.py | 634 ------------------ .../bayesian_optimization/models/ftpfn.py | 158 +++++ .../bayesian_optimization/optimizer.py | 2 +- neps/optimizers/default_searchers/ifbo.yaml | 9 + neps/optimizers/grid_search/optimizer.py | 3 +- neps/optimizers/info.py | 2 - neps/optimizers/multi_fidelity/_dyhpo.py | 409 ----------- neps/optimizers/multi_fidelity/hyperband.py | 7 +- .../multi_fidelity/{dyhpo.py => ifbo.py} | 181 +++-- neps/optimizers/multi_fidelity/mf_bo.py | 219 +++--- .../multi_fidelity/promotion_policy.py | 6 +- .../multi_fidelity/sampling_policy.py | 2 - .../multi_fidelity/successive_halving.py | 10 +- neps/optimizers/multi_fidelity/utils.py | 196 ++++-- .../multi_fidelity_prior/async_priorband.py | 7 +- .../multi_fidelity_prior/priorband.py | 6 +- neps/optimizers/multi_fidelity_prior/utils.py | 2 - .../prototype_optimizer.py | 5 +- neps/optimizers/random_search/optimizer.py | 3 +- .../regularized_evolution/optimizer.py | 13 +- neps/optimizers/utils.py | 39 +- neps/plot/plot3D.py | 255 +++++++ neps/plot/tensorboard_eval.py | 23 +- neps/runtime.py | 16 +- neps/search_spaces/architecture/api.py | 2 +- neps/search_spaces/architecture/cfg.py | 2 +- .../architecture/core_graph_grammar.py | 2 +- .../architecture/graph_grammar.py | 34 +- .../hyperparameters/categorical.py | 17 +- neps/search_spaces/hyperparameters/float.py | 3 +- neps/search_spaces/hyperparameters/integer.py | 3 +- .../hyperparameters/numerical.py | 3 +- neps/search_spaces/parameter.py | 5 +- neps/search_spaces/search_space.py | 43 +- neps/search_spaces/yaml_search_space_utils.py | 2 - neps/state/_eval.py | 3 +- neps/state/filebased.py | 3 +- neps/state/neps_state.py | 15 +- neps/state/optimizer.py | 3 +- neps/state/protocols.py | 5 +- neps/state/seed_snapshot.py | 11 +- neps/state/trial.py | 7 +- neps/utils/_locker.py | 3 +- neps/utils/common.py | 22 +- neps/utils/data_loading.py | 3 +- neps/utils/files.py | 3 +- neps/utils/run_args.py | 17 +- neps/utils/types.py | 9 +- neps/utils/validation.py | 3 +- neps_examples/basic_usage/architecture.py | 2 +- neps_examples/efficiency/freeze_thaw.py | 180 +++++ .../experimental/hierarchical_architecture.py | 2 - ...erarchical_architecture_hierarchical_GP.py | 2 - neps_examples/template/ifbo_template.py | 37 + pyproject.toml | 15 +- tests/regression_objectives.py | 5 +- tests/regression_runner.py | 2 - tests/test_neps_api/test_api.py | 2 - .../test_default_report_values.py | 2 - .../test_error_handling_strategies.py | 2 - tests/test_runtime/test_stopping_criterion.py | 2 - tests/test_state/test_filebased_neps_state.py | 2 - tests/test_state/test_neps_state.py | 12 +- tests/test_state/test_rng.py | 2 - tests/test_state/test_synced.py | 7 +- tests/test_state/test_trial.py | 5 +- 90 files changed, 1484 insertions(+), 1880 deletions(-) delete mode 100644 neps/optimizers/bayesian_optimization/acquisition_functions/mf_ei.py create mode 100644 neps/optimizers/bayesian_optimization/acquisition_functions/mf_pi.py delete mode 100644 neps/optimizers/bayesian_optimization/models/deepGP.py create mode 100644 neps/optimizers/bayesian_optimization/models/ftpfn.py create mode 100644 neps/optimizers/default_searchers/ifbo.yaml delete mode 100644 neps/optimizers/multi_fidelity/_dyhpo.py rename neps/optimizers/multi_fidelity/{dyhpo.py => ifbo.py} (79%) create mode 100644 neps/plot/plot3D.py create mode 100644 neps_examples/efficiency/freeze_thaw.py create mode 100644 neps_examples/template/ifbo_template.py diff --git a/.github/workflows/pre-commit.yaml b/.github/workflows/pre-commit.yaml index 2eb93db1..a7bd4690 100644 --- a/.github/workflows/pre-commit.yaml +++ b/.github/workflows/pre-commit.yaml @@ -27,8 +27,7 @@ jobs: - uses: actions/checkout@v4 - uses: actions/setup-python@v5 with: - python-version: 3.8 + python-version: '3.10' - run: pip install pre-commit - run: pre-commit install - run: pre-commit run --all-files - diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml index dd60a230..0ec1b3ed 100644 --- a/.github/workflows/tests.yaml +++ b/.github/workflows/tests.yaml @@ -16,7 +16,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: ['3.8', '3.9', '3.10', '3.11'] + python-version: ['3.10', '3.11'] os: [ubuntu-latest, macos-latest, windows-latest] defaults: run: diff --git a/.gitignore b/.gitignore index e8be93e7..58b5d46c 100644 --- a/.gitignore +++ b/.gitignore @@ -47,3 +47,6 @@ jahs_bench_data/ # Yaml tests path + +# From example that uses MNIST +.data diff --git a/docs/_code/api_generator.py b/docs/_code/api_generator.py index 1b9951bf..b19f40a2 100644 --- a/docs/_code/api_generator.py +++ b/docs/_code/api_generator.py @@ -2,7 +2,7 @@ # https://mkdocstrings.github.io/recipes/ """ -from __future__ import annotations + import logging from pathlib import Path diff --git a/docs/_code/example_generator.py b/docs/_code/example_generator.py index ca866a0e..6452bbda 100644 --- a/docs/_code/example_generator.py +++ b/docs/_code/example_generator.py @@ -2,7 +2,6 @@ # https://mkdocstrings.github.io/recipes/ """ -from __future__ import annotations import logging from pathlib import Path @@ -16,7 +15,6 @@ EXAMPLE_FOLDER = ROOT / "neps_examples" TAB = " " - if not SRCDIR.exists(): raise FileNotFoundError( f"{SRCDIR} does not exist, make sure you are running this from the root of the repository." diff --git a/docs/doc_yamls/architecture_search_space.py b/docs/doc_yamls/architecture_search_space.py index 36f8bb38..cdac0da0 100644 --- a/docs/doc_yamls/architecture_search_space.py +++ b/docs/doc_yamls/architecture_search_space.py @@ -1,4 +1,4 @@ -from __future__ import annotations + from torch import nn import neps from neps.search_spaces.architecture import primitives as ops diff --git a/neps/__init__.py b/neps/__init__.py index caca68e2..b2276ca3 100644 --- a/neps/__init__.py +++ b/neps/__init__.py @@ -1,5 +1,6 @@ from neps.api import run from neps.plot.plot import plot +from neps.plot.tensorboard_eval import tblogger from neps.search_spaces import ( ArchitectureParameter, CategoricalParameter, @@ -38,4 +39,5 @@ "GraphGrammar", "GraphGrammarCell", "GraphGrammarRepetitive", + "tblogger", ] diff --git a/neps/api.py b/neps/api.py index 6be520ad..4f81b0cf 100644 --- a/neps/api.py +++ b/neps/api.py @@ -1,6 +1,6 @@ """API for the neps package.""" -from __future__ import annotations + import inspect import logging @@ -80,7 +80,7 @@ def run( root_directory: The directory to save progress to. This is also used to synchronize multiple calls to run(.) for parallelization. run_args: An option for providing the optimization settings e.g. - max_evaluation_total in a YAML file. + max_evaluations_total in a YAML file. overwrite_working_directory: If true, delete the working directory at the start of the run. This is, e.g., useful when debugging a run_pipeline function. post_run_summary: If True, creates a csv file after each worker is done, diff --git a/neps/env.py b/neps/env.py index 155c3d32..256a5415 100644 --- a/neps/env.py +++ b/neps/env.py @@ -3,7 +3,8 @@ from __future__ import annotations import os -from typing import Any, Callable, TypeVar +from collections.abc import Callable +from typing import Any, TypeVar T = TypeVar("T") V = TypeVar("V") diff --git a/neps/optimizers/__init__.py b/neps/optimizers/__init__.py index 31cb4c4a..1cff287a 100644 --- a/neps/optimizers/__init__.py +++ b/neps/optimizers/__init__.py @@ -1,14 +1,13 @@ -from __future__ import annotations + from functools import partial from typing import Callable, Mapping from .base_optimizer import BaseOptimizer from .bayesian_optimization.cost_cooling import CostCooling -from .bayesian_optimization.mf_tpe import MultiFidelityPriorWeightedTreeParzenEstimator from .bayesian_optimization.optimizer import BayesianOptimization from .grid_search.optimizer import GridSearch -from .multi_fidelity.dyhpo import MFEIBO +from .multi_fidelity.ifbo import IFBO from .multi_fidelity.hyperband import ( MOBSTER, AsynchronousHyperband, @@ -41,9 +40,11 @@ "asha": AsynchronousSuccessiveHalving, "hyperband": Hyperband, "asha_prior": AsynchronousSuccessiveHalvingWithPriors, - "multifidelity_tpe": MultiFidelityPriorWeightedTreeParzenEstimator, "hyperband_custom_default": HyperbandCustomDefault, "priorband": PriorBand, + "priorband_bo": partial(PriorBand, model_based=True), + "priorband_asha": PriorBandAsha, + "priorband_asha_hyperband": PriorBandAshaHB, "mobster": MOBSTER, - "mf_ei_bo": MFEIBO, + "ifbo": IFBO, } diff --git a/neps/optimizers/base_optimizer.py b/neps/optimizers/base_optimizer.py index 34804626..c5b5f83f 100644 --- a/neps/optimizers/base_optimizer.py +++ b/neps/optimizers/base_optimizer.py @@ -1,4 +1,4 @@ -from __future__ import annotations + import logging from abc import abstractmethod diff --git a/neps/optimizers/bayesian_optimization/acquisition_functions/__init__.py b/neps/optimizers/bayesian_optimization/acquisition_functions/__init__.py index 89cfb4fb..add581b5 100644 --- a/neps/optimizers/bayesian_optimization/acquisition_functions/__init__.py +++ b/neps/optimizers/bayesian_optimization/acquisition_functions/__init__.py @@ -1,4 +1,4 @@ -from __future__ import annotations + from functools import partial from typing import Callable @@ -6,10 +6,9 @@ from neps.optimizers.bayesian_optimization.acquisition_functions.ei import ( ComprehensiveExpectedImprovement, ) -from neps.optimizers.bayesian_optimization.acquisition_functions.mf_ei import MFEI +from neps.optimizers.bayesian_optimization.acquisition_functions.mf_pi import MFPI_Random from neps.optimizers.bayesian_optimization.acquisition_functions.ucb import ( UpperConfidenceBound, - MF_UCB, ) from neps.optimizers.bayesian_optimization.acquisition_functions.prior_weighted import ( DecayingPriorWeightedAcquisition, @@ -28,33 +27,28 @@ augmented_ei=False, log_ei=True, ), - # # Uses the augmented EI heuristic and changed the in-fill criterion to the best test location with - # # the highest *posterior mean*, which are preferred when the optimisation is noisy. + ## Uses the augmented EI heuristic and changed the in-fill criterion to the best test location with + ## the highest *posterior mean*, which are preferred when the optimisation is noisy. "AEI": partial( ComprehensiveExpectedImprovement, in_fill="posterior", augmented_ei=True, ), - "MFEI": partial( - MFEI, - in_fill="best", - augmented_ei=False, + "MFPI-random": partial( + MFPI_Random, + threshold="random", + horizon="random", ), "UCB": partial( UpperConfidenceBound, maximize=False, ), - "MF-UCB": partial( - MF_UCB, - maximize=False, - ), } __all__ = [ "AcquisitionMapping", "ComprehensiveExpectedImprovement", - "MFEI", "UpperConfidenceBound", - "MF_UCB", "DecayingPriorWeightedAcquisition", + "MFPI_Random", ] diff --git a/neps/optimizers/bayesian_optimization/acquisition_functions/ei.py b/neps/optimizers/bayesian_optimization/acquisition_functions/ei.py index ba5eb38b..90a99f26 100644 --- a/neps/optimizers/bayesian_optimization/acquisition_functions/ei.py +++ b/neps/optimizers/bayesian_optimization/acquisition_functions/ei.py @@ -10,6 +10,7 @@ if TYPE_CHECKING: from neps.search_spaces import SearchSpace + class ComprehensiveExpectedImprovement(BaseAcquisition): def __init__( self, diff --git a/neps/optimizers/bayesian_optimization/acquisition_functions/mf_ei.py b/neps/optimizers/bayesian_optimization/acquisition_functions/mf_ei.py deleted file mode 100644 index 3d19040d..00000000 --- a/neps/optimizers/bayesian_optimization/acquisition_functions/mf_ei.py +++ /dev/null @@ -1,205 +0,0 @@ -# type: ignore -from typing import Any, Iterable, Tuple, Union - -import numpy as np -import pandas as pd -import torch -from torch.distributions import Normal - -from ....optimizers.utils import map_real_hyperparameters_from_tabular_ids -from ....search_spaces.search_space import SearchSpace -from ...multi_fidelity.utils import MFObservedData -from .ei import ComprehensiveExpectedImprovement - - -class MFEI(ComprehensiveExpectedImprovement): - def __init__( - self, - pipeline_space: SearchSpace, - surrogate_model_name: str = None, - augmented_ei: bool = False, - xi: float = 0.0, - in_fill: str = "best", - log_ei: bool = False, - ): - super().__init__(augmented_ei, xi, in_fill, log_ei) - self.pipeline_space = pipeline_space - self.surrogate_model_name = surrogate_model_name - self.surrogate_model = None - self.observations = None - self.b_step = None - - def get_budget_level(self, config) -> int: - return int((config.fidelity.value - config.fidelity.lower) / self.b_step) - - def preprocess(self, x: pd.Series) -> Tuple[Iterable, Iterable]: - """Prepares the configurations for appropriate EI calculation. - - Takes a set of points and computes the budget and incumbent for each point, as - required by the multi-fidelity Expected Improvement acquisition function. - """ - budget_list = [] - - if self.pipeline_space.has_tabular: - # preprocess tabular space differently - # expected input: IDs pertaining to the tabular data - # expected output: IDs pertaining to current observations and set of HPs - x = map_real_hyperparameters_from_tabular_ids(x, self.pipeline_space) - indices_to_drop = [] - for i, config in x.items(): - target_fidelity = config.fidelity.lower - if i <= max(self.observations.seen_config_ids): - # IMPORTANT to set the fidelity at which EI will be calculated only for - # the partial configs that have been observed already - target_fidelity = config.fidelity.value + self.b_step - - if np.less_equal(target_fidelity, config.fidelity.upper): - # only consider the configs with fidelity lower than the max fidelity - config.fidelity.set_value(target_fidelity) - budget_list.append(self.get_budget_level(config)) - else: - # if the target_fidelity higher than the max drop the configuration - indices_to_drop.append(i) - else: - config.fidelity.set_value(target_fidelity) - budget_list.append(self.get_budget_level(config)) - - # Drop unused configs - x.drop(labels=indices_to_drop, inplace=True) - - performances = self.observations.get_best_performance_for_each_budget() - inc_list = [] - for budget_level in budget_list: - if budget_level in performances.index: - inc = performances[budget_level] - else: - inc = self.observations.get_best_seen_performance() - inc_list.append(inc) - - return x, torch.Tensor(inc_list) - - def preprocess_gp(self, x: Iterable) -> Tuple[Iterable, Iterable]: - x, inc_list = self.preprocess(x) - return x.values.tolist(), inc_list - - def preprocess_deep_gp(self, x: Iterable) -> Tuple[Iterable, Iterable]: - x, inc_list = self.preprocess(x) - x_lcs = [] - for idx in x.index: - if idx in self.observations.df.index.levels[0]: - budget_level = self.get_budget_level(x[idx]) - lc = self.observations.extract_learning_curve(idx, budget_level) - else: - # initialize a learning curve with a place holder - # This is later padded accordingly for the Conv1D layer - lc = [0.0] - x_lcs.append(lc) - self.surrogate_model.set_prediction_learning_curves(x_lcs) - return x.values.tolist(), inc_list - - def preprocess_pfn(self, x: Iterable) -> Tuple[Iterable, Iterable, Iterable]: - """Prepares the configurations for appropriate EI calculation. - - Takes a set of points and computes the budget and incumbent for each point, as - required by the multi-fidelity Expected Improvement acquisition function. - """ - _x, inc_list = self.preprocess(x.copy()) - _x_tok = self.observations.tokenize(_x, as_tensor=True) - len_partial = len(self.observations.seen_config_ids) - z_min = x[0].fidelity.lower - # converting fidelity to the discrete budget level - # STRICT ASSUMPTION: fidelity is the first dimension - _x_tok[:len_partial, 0] = ( - _x_tok[:len_partial, 0] + self.b_step - z_min - ) / self.b_step - return _x_tok, _x, inc_list - - def eval(self, x: pd.Series, asscalar: bool = False) -> Tuple[np.ndarray, pd.Series]: - # _x = x.copy() # preprocessing needs to change the reference x Series so we don't copy here - if self.surrogate_model_name == "pfn": - _x_tok, _x, inc_list = self.preprocess_pfn( - x.copy() - ) # IMPORTANT change from vanilla-EI - ei = self.eval_pfn_ei(_x_tok, inc_list) - elif self.surrogate_model_name == "deep_gp": - _x, inc_list = self.preprocess_deep_gp( - x.copy() - ) # IMPORTANT change from vanilla-EI - ei = self.eval_gp_ei(_x, inc_list) - _x = pd.Series(_x, index=np.arange(len(_x))) - else: - _x, inc_list = self.preprocess_gp( - x.copy() - ) # IMPORTANT change from vanilla-EI - ei = self.eval_gp_ei(_x, inc_list) - _x = pd.Series(_x, index=np.arange(len(_x))) - - if ei.is_cuda: - ei = ei.cpu() - if len(x) > 1 and asscalar: - return ei.detach().numpy(), _x - else: - return ei.detach().numpy().item(), _x - - def eval_pfn_ei( - self, x: Iterable, inc_list: Iterable - ) -> Union[np.ndarray, torch.Tensor, float]: - """PFN-EI modified to preprocess samples and accept list of incumbents.""" - # x, inc_list = self.preprocess(x) # IMPORTANT change from vanilla-EI - # _x = x.copy() - ei = self.surrogate_model.get_ei(x.to(self.surrogate_model.device), inc_list) - if len(ei.shape) == 2: - ei = ei.flatten() - return ei - - def eval_gp_ei( - self, x: Iterable, inc_list: Iterable - ) -> Union[np.ndarray, torch.Tensor, float]: - """Vanilla-EI modified to preprocess samples and accept list of incumbents.""" - # x, inc_list = self.preprocess(x) # IMPORTANT change from vanilla-EI - _x = x.copy() - try: - mu, cov = self.surrogate_model.predict(_x) - except ValueError as e: - raise e - # return -1.0 # in case of error. return ei of -1 - std = torch.sqrt(torch.diag(cov)) - - mu_star = inc_list.to(mu.device) # IMPORTANT change from vanilla-EI - - gauss = Normal(torch.zeros(1, device=mu.device), torch.ones(1, device=mu.device)) - # u = (mu - mu_star - self.xi) / std - # ei = std * updf + (mu - mu_star - self.xi) * ucdf - if self.log_ei: - # we expect that f_min is in log-space - f_min = mu_star - self.xi - v = (f_min - mu) / std - ei = torch.exp(f_min) * gauss.cdf(v) - torch.exp( - 0.5 * torch.diag(cov) + mu - ) * gauss.cdf(v - std) - else: - u = (mu_star - mu - self.xi) / std - ucdf = gauss.cdf(u) - updf = torch.exp(gauss.log_prob(u)) - ei = std * updf + (mu_star - mu - self.xi) * ucdf - if self.augmented_ei: - sigma_n = self.surrogate_model.likelihood - ei *= 1.0 - torch.sqrt(torch.tensor(sigma_n, device=mu.device)) / torch.sqrt( - sigma_n + torch.diag(cov) - ) - return ei - - def set_state( - self, - pipeline_space: SearchSpace, - surrogate_model: Any, - observations: MFObservedData, - b_step: Union[int, float], - **kwargs, - ): - # overload to select incumbent differently through observations - self.pipeline_space = pipeline_space - self.surrogate_model = surrogate_model - self.observations = observations - self.b_step = b_step - return diff --git a/neps/optimizers/bayesian_optimization/acquisition_functions/mf_pi.py b/neps/optimizers/bayesian_optimization/acquisition_functions/mf_pi.py new file mode 100644 index 00000000..71955820 --- /dev/null +++ b/neps/optimizers/bayesian_optimization/acquisition_functions/mf_pi.py @@ -0,0 +1,199 @@ +# type: ignore +from typing import Any, Iterable, Tuple, Union + +import numpy as np +import pandas as pd +import torch + +from copy import deepcopy + +from neps.optimizers.utils import map_real_hyperparameters_from_tabular_ids +from neps.search_spaces.search_space import SearchSpace +from neps.optimizers.multi_fidelity.utils import ( + get_freeze_thaw_normalized_step, get_tokenized_data, MFObservedData +) +from neps.optimizers.bayesian_optimization.acquisition_functions.base_acquisition import BaseAcquisition + + +class MFPI(BaseAcquisition): + + def __init__( + self, + pipeline_space: SearchSpace, + surrogate_model_name: str = None, + ): + super().__init__() + self.pipeline_space = pipeline_space + self.surrogate_model_name = surrogate_model_name + self.surrogate_model = None + self.observations = None + self.b_step = None + + def set_state( + self, + pipeline_space: SearchSpace, + surrogate_model: Any, + observations: MFObservedData, + b_step: Union[int, float], + **kwargs, + ): + # overload to select incumbent differently through observations + self.pipeline_space = pipeline_space + self.surrogate_model = surrogate_model + self.observations = observations + self.b_step = b_step + return + + def preprocess(self, x: pd.Series) -> Tuple[pd.Series, torch.Tensor]: + """Prepares the configurations for appropriate EI calculation. + + Takes a set of points and computes the budget and incumbent for each point, as + required by the multi-fidelity Expected Improvement acquisition function. + """ + raise NotImplementedError + + def eval(self, x: pd.Series, asscalar: bool = False) -> Tuple[np.ndarray, pd.Series]: + # deepcopy + # _x = pd.Series([deepcopy(x.loc[idx]) for idx in x.index.values], index=x.index) + if self.surrogate_model_name == "ftpfn": + # preprocesses configs to have the appropriate fidelity values for acquisition + _x, inc_list = self.preprocess(x.copy()) + _x_tok = get_tokenized_data(_x.values, ignore_fidelity=True) + # padding IDs + _idx = torch.Tensor(_x.index.values + 1) + idx_mask = np.where(_idx > max(self.observations.seen_config_ids))[0] + _idx[idx_mask] = 0 + # normalizing steps + _steps = torch.Tensor([ + get_freeze_thaw_normalized_step( + _conf.fidelity.value, + self.pipeline_space.fidelity.lower, + self.pipeline_space.fidelity.upper, + self.b_step + ) + for _conf in _x + ]) + _x_tok = torch.hstack(( + (_idx).reshape(-1, 1), _steps.reshape(-1, 1), torch.Tensor(_x_tok) + )) + pi = self.eval_pfn_pi(_x_tok, inc_list) + else: + raise ValueError( + f"Unrecognized surrogate model name: {self.surrogate_model_name}" + ) + if pi.is_cuda: + pi = pi.cpu() + if len(_x) > 1 and asscalar: + return pi.detach().numpy(), _x + else: + return pi.detach().numpy().item(), _x + + def eval_pfn_pi( + self, x: Iterable, inc_list: Iterable + ) -> Union[np.ndarray, torch.Tensor, float]: + """PFN-PI modified to preprocess samples and accept list of incumbents.""" + pi = self.surrogate_model.get_pi(x.to(self.surrogate_model.device), inc_list) + if len(pi.shape) == 2: + pi = pi.flatten() + return pi + + +class MFPI_Random(MFPI): + + BUDGET = 1000 + + def __init__( + self, + pipeline_space: SearchSpace, + horizon: str = "random", + threshold: str = "random", + surrogate_model_name: str = None, + ): + super().__init__(pipeline_space, surrogate_model_name) + self.horizon = horizon + self.threshold = threshold + + def set_state( + self, + pipeline_space: SearchSpace, + surrogate_model: Any, + observations: MFObservedData, + b_step: Union[int, float], + **kwargs, + ): + # set RNG + self.rng = np.random.RandomState(seed=42) + for i in range(len(observations.completed_runs)): + self.rng.uniform(-4,-1) + self.rng.randint(1,51) + + return super().set_state(pipeline_space, surrogate_model, observations, b_step) + + def sample_horizon(self, steps_passed): + if self.horizon == 'random': + shortest = self.pipeline_space.fidelity.lower + longest = min(self.pipeline_space.fidelity.upper, self.BUDGET - steps_passed) + return self.rng.randint(shortest, longest+1) + elif self.horizon == 'max': + return min(self.pipeline_space.fidelity.upper, self.BUDGET - steps_passed) + else: + return int(self.horizon) + + def sample_performance_threshold(self, f_inc): + if self.threshold == 'random': + lu = 10**self.rng.uniform(-4,-1) # % of gap closed + else: + lu = float(self.threshold) + return f_inc * (1 - lu) + + def preprocess(self, x: pd.Series) -> Tuple[pd.Series, torch.Tensor]: + """Prepares the configurations for appropriate EI calculation. + + Takes a set of points and computes the budget and incumbent for each point, as + required by the multi-fidelity acquisition function. + """ + if self.pipeline_space.has_tabular: + # preprocess tabular space differently + # expected input: IDs pertaining to the tabular data + x = map_real_hyperparameters_from_tabular_ids(x, self.pipeline_space) + + indices_to_drop = [] + inc_list = [] + + steps_passed = len(self.observations.completed_runs) + + # Like EI-AtMax, use the global incumbent as a basis for the EI threshold + inc_value = min(self.observations.get_best_performance_for_each_budget()) + + # Extension: Add a random min improvement threshold to encourage high risk high gain + t_value = self.sample_performance_threshold(inc_value) + inc_value = t_value + + # Like MFEI: set fidelities to query using horizon as self.b_step + # Extension: Unlike DyHPO, we sample the horizon randomly over the full range + horizon = self.sample_horizon(steps_passed) + + for i, config in x.items(): + if i <= max(self.observations.seen_config_ids): + if np.equal(config.fidelity.value, config.fidelity.upper): + # this training run has ended, drop it from future selection + indices_to_drop.append(i) + else: + # a candidate partial training run to continue + config.update_hp_values({ + config.fidelity_name: min( + config.fidelity.value + horizon, config.fidelity.upper + ) # if horizon exceeds max, query at max + }) + inc_list.append(inc_value) + else: + # a candidate new training run that we would need to start + config.update_hp_values({config.fidelity_name: horizon}) + inc_list.append(inc_value) + + # Drop unused configs + x.drop(labels=indices_to_drop, inplace=True) + + assert len(inc_list) == len(x) + + return x, torch.Tensor(inc_list) diff --git a/neps/optimizers/bayesian_optimization/acquisition_functions/ucb.py b/neps/optimizers/bayesian_optimization/acquisition_functions/ucb.py index adf57266..11b592eb 100644 --- a/neps/optimizers/bayesian_optimization/acquisition_functions/ucb.py +++ b/neps/optimizers/bayesian_optimization/acquisition_functions/ucb.py @@ -45,16 +45,3 @@ def eval( ucb_scores = ucb_scores.detach().numpy() * sign return ucb_scores - - -class MF_UCB(UpperConfidenceBound): - - def preprocess(self, x: Iterable) -> Iterable: - performances = self.observations.get_best_performance_for_each_budget() - pass - - def eval( - self, x: Iterable, asscalar: bool = False - ) -> Union[np.ndarray, torch.Tensor, float]: - x = self.preprocess(x) - return self.eval(x, asscalar=asscalar) diff --git a/neps/optimizers/bayesian_optimization/acquisition_samplers/freeze_thaw_sampler.py b/neps/optimizers/bayesian_optimization/acquisition_samplers/freeze_thaw_sampler.py index 89b7d9d3..93c7370f 100644 --- a/neps/optimizers/bayesian_optimization/acquisition_samplers/freeze_thaw_sampler.py +++ b/neps/optimizers/bayesian_optimization/acquisition_samplers/freeze_thaw_sampler.py @@ -1,40 +1,51 @@ -# type: ignore from __future__ import annotations +from typing import Callable import warnings -from copy import deepcopy import numpy as np import pandas as pd +from copy import deepcopy -from ....search_spaces.search_space import SearchSpace -from ...multi_fidelity.utils import MFObservedData -from .base_acq_sampler import AcquisitionSampler +from neps.search_spaces.search_space import SearchSpace +from neps.optimizers.multi_fidelity.utils import MFObservedData +from neps.optimizers.bayesian_optimization.acquisition_samplers.base_acq_sampler import ( + AcquisitionSampler, +) -class FreezeThawSampler(AcquisitionSampler): +SAMPLES_TO_DRAW = ( + 100 # number of random samples to draw for optimizing acquisition function +) - SAMPLES_TO_DRAW = 100 # number of random samples to draw at lowest fidelity - def __init__(self, **kwargs): +class FreezeThawSampler(AcquisitionSampler): + def __init__(self, samples_to_draw: int | None = None, **kwargs): super().__init__(**kwargs) self.observations = None self.b_step = None self.n = None self.pipeline_space = None # args to manage tabular spaces/grid - self.is_tabular = False + self.is_tabular = False # flag is set by `set_state()` self.sample_full_table = None + self.samples_to_draw = ( + samples_to_draw if samples_to_draw is not None else SAMPLES_TO_DRAW + ) self.set_sample_full_tabular(True) # sets flag that samples full table - def set_sample_full_tabular(self, flag: bool=False): + def set_sample_full_tabular(self, flag: bool = False): if self.is_tabular: self.sample_full_table = flag def _sample_new( - self, index_from: int, n: int = None, ignore_fidelity: bool = False + self, + index_from: int, + n: int | None = None, + ignore_fidelity: bool = False, ) -> pd.Series: - n = n if n is not None else self.SAMPLES_TO_DRAW + n = n if n is not None else self.samples_to_draw + assert self.pipeline_space is not None new_configs = [ self.pipeline_space.sample( patience=self.patience, user_priors=False, ignore_fidelity=ignore_fidelity @@ -49,14 +60,17 @@ def _sample_new( def _sample_new_unique( self, index_from: int, - n: int = None, + n: int | None = None, patience: int = 10, ignore_fidelity: bool = False, ) -> pd.Series: - n = n if n is not None else self.SAMPLES_TO_DRAW + n = n if n is not None else self.samples_to_draw assert ( patience > 0 and n > 0 - ), "Patience and SAMPLES_TO_DRAW must be larger than 0" + ), "Patience and `samples_to_draw` must be larger than 0" + + assert self.observations is not None + assert self.pipeline_space is not None existing_configs = self.observations.all_configs_list() new_configs = [] @@ -101,32 +115,23 @@ def _sample_new_unique( def sample( self, - acquisition_function=None, - n: int = None, - set_new_sample_fidelity: int | float = None, - ) -> list(): + acquisition_function: Callable | None = None, + n: int | None = None, + set_new_sample_fidelity: int | float | None = None, + ) -> pd.DataFrame: """Samples a new set and returns the total set of observed + new configs.""" - partial_configs = self.observations.get_partial_configs_at_max_seen() - new_configs = self._sample_new( - index_from=self.observations.next_config_id(), n=n, ignore_fidelity=False - ) + assert self.observations is not None + assert self.pipeline_space is not None - def __sample_single_new_tabular(index: int): - """ - A function to use in a list comprehension to slightly speed up - the sampling process when self.SAMPLE_TO_DRAW is large - """ - config = self.pipeline_space.sample( - patience=self.patience, user_priors=False, ignore_fidelity=False - ) - config["id"].set_value(_new_configs[index]) - config.fidelity.set_value(set_new_sample_fidelity) - return config + partial_configs = self.observations.get_partial_configs_at_max_seen() + _n = n if n is not None else self.samples_to_draw if self.is_tabular: - _n = n if n is not None else self.SAMPLES_TO_DRAW + assert self.pipeline_space.custom_grid_table is not None + # handles tabular data such that the entire unseen set of configs from the + # table is considered to be the new set of candidates _partial_ids = {conf["id"].value for conf in partial_configs} - _all_ids = set(self.pipeline_space.custom_grid_table.index.values) + _all_ids = set(list(self.pipeline_space.custom_grid_table.keys())) # accounting for unseen configs only, samples remaining table if flag is set max_n = len(_all_ids) + 1 if self.sample_full_table else _n @@ -135,48 +140,63 @@ def __sample_single_new_tabular(index: int): _new_configs = np.random.choice( list(_all_ids - _partial_ids), size=_n, replace=False ) - new_configs = [__sample_single_new_tabular(i) for i in range(_n)] + placeholder_config = self.pipeline_space.sample( + patience=self.patience, user_priors=False, ignore_fidelity=False + ) + _configs = [placeholder_config.clone() for _id in _new_configs] + for _i, val in enumerate(_new_configs): + _configs[_i]["id"].set_value(val) + new_configs = pd.Series( - new_configs, + _configs, index=np.arange( - len(partial_configs), len(partial_configs) + len(new_configs) + len(partial_configs), len(partial_configs) + len(_new_configs) ), ) + else: + # handles sampling new configurations for continuous spaces + new_configs = self._sample_new( + index_from=self.observations.next_config_id(), n=_n, ignore_fidelity=False + ) + # Continuous benchmarks need to deepcopy individual configs here, + # because in contrast to tabular benchmarks + # they are not reset in every sampling step + + # TODO: I do not know what the f p_config_ is meant to be so I don't know + # if we have a specific clone method or not... + partial_configs = pd.Series( + [deepcopy(p_config_) for idx, p_config_ in partial_configs.items()], + index=partial_configs.index, + ) - elif set_new_sample_fidelity is not None: - for config in new_configs: - config.fidelity.set_value(set_new_sample_fidelity) - - # Deep copy configs for fidelity updates - partial_configs_list = [] - index_list = [] - for idx, config in partial_configs.items(): - _config = config.clone() - partial_configs_list.append(_config) - index_list.append(idx) - - # We build a new series of partial configs to avoid - # incrementing fidelities multiple times due to pass-by-reference - partial_configs = pd.Series(partial_configs_list, index=index_list) + # Updating fidelity values + new_fid = ( + set_new_sample_fidelity + if set_new_sample_fidelity is not None + else self.pipeline_space.fidelity.lower + ) + for config in new_configs: + config.update_hp_values({config.fidelity_name: new_fid}) - configs = pd.concat([partial_configs, new_configs]) + configs = pd.concat([deepcopy(partial_configs), new_configs]) - return configs + return configs # type: ignore def set_state( self, pipeline_space: SearchSpace, observations: MFObservedData, b_step: int, - n: int = None, - ): + n: int | None = None, + ) -> None: # overload to select incumbent differently through observations self.pipeline_space = pipeline_space self.observations = observations self.b_step = b_step - self.n = n if n is not None else self.SAMPLES_TO_DRAW + self.n = n if n is not None else self.samples_to_draw if ( hasattr(self.pipeline_space, "custom_grid_table") and self.pipeline_space.custom_grid_table is not None ): self.is_tabular = True + self.set_sample_full_tabular(True) diff --git a/neps/optimizers/bayesian_optimization/acquisition_samplers/mutation_sampler.py b/neps/optimizers/bayesian_optimization/acquisition_samplers/mutation_sampler.py index 4c6b17df..227becf9 100644 --- a/neps/optimizers/bayesian_optimization/acquisition_samplers/mutation_sampler.py +++ b/neps/optimizers/bayesian_optimization/acquisition_samplers/mutation_sampler.py @@ -1,11 +1,11 @@ from __future__ import annotations from typing import TYPE_CHECKING, Callable, Sequence +from typing_extensions import override import numpy as np import torch from more_itertools import first -from typing_extensions import override from neps.optimizers.bayesian_optimization.acquisition_samplers.base_acq_sampler import ( AcquisitionSampler, @@ -109,8 +109,9 @@ def create_pool( n_best = len(self.x) if len(self.x) < self.n_best else self.n_best best_configs = [ - x for (_, x) in sorted(zip(self.y, self.x), key=lambda pair: pair[0]) - ][:n_best] + x for (_, x) in + sorted(zip(self.y, self.x), key=lambda pair: pair[0]) + ][:n_best] seen: set[int] = set() diff --git a/neps/optimizers/bayesian_optimization/acquisition_samplers/random_sampler.py b/neps/optimizers/bayesian_optimization/acquisition_samplers/random_sampler.py index e3b75515..5d783a3e 100644 --- a/neps/optimizers/bayesian_optimization/acquisition_samplers/random_sampler.py +++ b/neps/optimizers/bayesian_optimization/acquisition_samplers/random_sampler.py @@ -1,5 +1,3 @@ -from __future__ import annotations - from ....search_spaces.search_space import SearchSpace from .base_acq_sampler import AcquisitionSampler diff --git a/neps/optimizers/bayesian_optimization/cost_cooling.py b/neps/optimizers/bayesian_optimization/cost_cooling.py index f2878fe9..d5f9848a 100644 --- a/neps/optimizers/bayesian_optimization/cost_cooling.py +++ b/neps/optimizers/bayesian_optimization/cost_cooling.py @@ -1,5 +1,3 @@ -from __future__ import annotations - from typing import Any from typing_extensions import override @@ -236,7 +234,7 @@ def load_optimization_state( self.acquisition.set_state( self.surrogate_model, alpha=1 - - (budget_info.used_cost_budget / budget_info.max_cost_budget), + - (budget_info.used_cost_budget / budget_info.max_cost_budget), cost_model=self.cost_model, ) self.acquisition_sampler.set_state(x=train_x, y=train_y) diff --git a/neps/optimizers/bayesian_optimization/kernels/__init__.py b/neps/optimizers/bayesian_optimization/kernels/__init__.py index 8d11ea81..7217957b 100644 --- a/neps/optimizers/bayesian_optimization/kernels/__init__.py +++ b/neps/optimizers/bayesian_optimization/kernels/__init__.py @@ -1,4 +1,4 @@ -from __future__ import annotations + from functools import partial from typing import Callable diff --git a/neps/optimizers/bayesian_optimization/kernels/get_kernels.py b/neps/optimizers/bayesian_optimization/kernels/get_kernels.py index f606f442..927e23c2 100644 --- a/neps/optimizers/bayesian_optimization/kernels/get_kernels.py +++ b/neps/optimizers/bayesian_optimization/kernels/get_kernels.py @@ -1,5 +1,3 @@ -from __future__ import annotations - from neps.utils.common import instance_from_map from ....search_spaces.architecture.core_graph_grammar import CoreGraphGrammar from ....search_spaces.hyperparameters.categorical import CategoricalParameter diff --git a/neps/optimizers/bayesian_optimization/mf_tpe.py b/neps/optimizers/bayesian_optimization/mf_tpe.py index 45e4adc4..1c2a58df 100644 --- a/neps/optimizers/bayesian_optimization/mf_tpe.py +++ b/neps/optimizers/bayesian_optimization/mf_tpe.py @@ -1,13 +1,11 @@ -from __future__ import annotations - import random from copy import deepcopy -from typing import Any, Iterable +from typing import Any, Iterable, Literal +from typing_extensions import override import numpy as np import torch from scipy.stats import spearmanr -from typing_extensions import Literal, override from neps.state.optimizer import BudgetInfo, OptimizationState from neps.utils.types import ConfigResult, RawConfig @@ -225,7 +223,7 @@ def _enhance_priors(self): def _get_rung_maps(self, s: int = 0) -> dict: """Maps rungs (0,1,...,k) to a fidelity value based on fidelity bounds, eta, s.""" eta = round(1 / self.good_fraction) - new_min_budget = self.min_fidelity * (1 / eta**s) + new_min_budget = self.min_fidelity * (1 / eta ** s) nrungs = ( np.floor(np.log(self.max_fidelity / new_min_budget) / np.log(eta)).astype(int) + 1 diff --git a/neps/optimizers/bayesian_optimization/models/__init__.py b/neps/optimizers/bayesian_optimization/models/__init__.py index c76bedfd..fdc84df4 100755 --- a/neps/optimizers/bayesian_optimization/models/__init__.py +++ b/neps/optimizers/bayesian_optimization/models/__init__.py @@ -3,19 +3,11 @@ from .gp import ComprehensiveGP from .gp_hierarchy import ComprehensiveGPHierarchy -try: - from .deepGP import DeepGP -except ImportError as e: - DeepGP = MissingDependencyError("gpytorch", e) +from .ftpfn import FTPFNSurrogate -try: - from .pfn import PFN_SURROGATE # only if available locally -except Exception as e: - PFN_SURROGATE = MissingDependencyError("pfn", e) SurrogateModelMapping = { - "deep_gp": DeepGP, "gp": ComprehensiveGP, "gp_hierarchy": ComprehensiveGPHierarchy, - "pfn": PFN_SURROGATE, + "ftpfn": FTPFNSurrogate, } diff --git a/neps/optimizers/bayesian_optimization/models/deepGP.py b/neps/optimizers/bayesian_optimization/models/deepGP.py deleted file mode 100644 index d5145043..00000000 --- a/neps/optimizers/bayesian_optimization/models/deepGP.py +++ /dev/null @@ -1,634 +0,0 @@ -from __future__ import annotations - -import logging -import os -from copy import deepcopy -from pathlib import Path - -import gpytorch -import numpy as np -import torch -import torch.nn as nn - -from ....search_spaces.search_space import ( - CategoricalParameter, - FloatParameter, - IntegerParameter, - SearchSpace, -) - - -def count_non_improvement_steps(root_directory: Path | str) -> int: - root_directory = Path(root_directory) - - all_losses_file = root_directory / "all_losses_and_configs.txt" - best_loss_fiel = root_directory / "best_loss_trajectory.txt" - - # Read all losses from the file in the order they are explored - losses = [ - float(line[6:]) - for line in all_losses_file.read_text(encoding="utf-8").splitlines() - if "Loss: " in line - ] - # Get the best seen loss value - best_loss = float(best_loss_fiel.read_text(encoding="utf-8").splitlines()[-1].strip()) - - # Count the non-improvement - count = 0 - for loss in reversed(losses): - if np.greater(loss, best_loss): - count += 1 - else: - break - - return count - - -class NeuralFeatureExtractor(nn.Module): - """ - Neural network to be used in the DeepGP - """ - - def __init__(self, input_size: int, **kwargs): - super().__init__() - - # Set number of hyperparameters - self.input_size = input_size - - self.n_layers = kwargs.get("n_layers", 2) - self.activation = nn.LeakyReLU() - - layer1_units = kwargs.get("layer1_units", 128) - self.fc1 = nn.Linear(input_size, layer1_units) - self.bn1 = nn.BatchNorm1d(layer1_units) - - previous_layer_units = layer1_units - for i in range(2, self.n_layers): - next_layer_units = kwargs.get(f"layer{i}_units", 256) - setattr( - self, - f"fc{i}", - nn.Linear(previous_layer_units, next_layer_units), - ) - setattr( - self, - f"bn{i}", - nn.BatchNorm1d(next_layer_units), - ) - previous_layer_units = next_layer_units - - setattr( - self, - f"fc{self.n_layers}", - nn.Linear( - previous_layer_units + kwargs.get("cnn_nr_channels", 4), - # accounting for the learning curve features - kwargs.get(f"layer{self.n_layers}_units", 256), - ), - ) - self.cnn = nn.Sequential( - nn.Conv1d( - in_channels=1, - kernel_size=(kwargs.get("cnn_kernel_size", 3),), - out_channels=4, - ), - nn.AdaptiveMaxPool1d(1), - ) - - def forward(self, x, budgets, learning_curves): - # add an extra dimensionality for the budget - # making it nr_rows x 1. - budgets = torch.unsqueeze(budgets, dim=1) - # concatenate budgets with examples - x = torch.cat((x, budgets), dim=1) - x = self.fc1(x) - x = self.activation(self.bn1(x)) - - for i in range(2, self.n_layers): - x = self.activation(getattr(self, f"bn{i}")(getattr(self, f"fc{i}")(x))) - - # add an extra dimensionality for the learning curve - # making it nr_rows x 1 x lc_values. - learning_curves = torch.unsqueeze(learning_curves, 1) - lc_features = self.cnn(learning_curves) - # revert the output from the cnn into nr_rows x nr_kernels. - lc_features = torch.squeeze(lc_features, 2) - - # put learning curve features into the last layer along with the higher level features. - x = torch.cat((x, lc_features), dim=1) - x = self.activation(getattr(self, f"fc{self.n_layers}")(x)) - - return x - - -class GPRegressionModel(gpytorch.models.ExactGP): - """ - A simple GP model. - """ - - def __init__( - self, - train_x: torch.Tensor, - train_y: torch.Tensor, - likelihood: gpytorch.likelihoods.GaussianLikelihood, - ): - """ - Constructor of the GPRegressionModel. - - Args: - train_x: The initial train examples for the GP. - train_y: The initial train labels for the GP. - likelihood: The likelihood to be used. - """ - super().__init__(train_x, train_y, likelihood) - - self.mean_module = gpytorch.means.ConstantMean() - self.covar_module = gpytorch.kernels.ScaleKernel(gpytorch.kernels.RBFKernel()) - - def forward(self, x): - mean_x = self.mean_module(x) - covar_x = self.covar_module(x) - - return gpytorch.distributions.MultivariateNormal(mean_x, covar_x) - - -class DeepGP: - """ - Gaussian process with a deep kernel - """ - - def __init__( - self, - pipeline_space: SearchSpace, - neural_network_args: dict | None = None, - logger=None, - surrogate_model_fit_args: dict | None = None, - # IMPORTANT: Checkpointing does not use file locking, - # IMPORTANT: hence, it is not suitable for multiprocessing settings - checkpointing: bool = False, - root_directory: Path | str | None = None, - checkpoint_file: Path | str = "surrogate_checkpoint.pth", - refine_epochs: int = 50, - **kwargs, - ): - self.surrogate_model_fit_args = ( - surrogate_model_fit_args if surrogate_model_fit_args is not None else {} - ) - - self.checkpointing = checkpointing - self.refine_epochs = refine_epochs - if checkpointing: - assert ( - root_directory is not None - ), "neps root_directory must be provided for the checkpointing" - self.root_dir = Path(os.getcwd(), root_directory) - self.checkpoint_path = Path(os.getcwd(), root_directory, checkpoint_file) - - super().__init__() - self.__preprocess_search_space(pipeline_space) - # set the categories array for the encoder - self.categories_array = np.array(self.categories) - - if neural_network_args is None: - neural_network_args = {} - self.nn_args = neural_network_args - - self.device = ( - torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu") - ) - # self.device = torch.device("cpu") - - # Save the NN args, necessary for preprocessing - self.cnn_kernel_size = neural_network_args.get("cnn_kernel_size", 3) - self.model, self.likelihood, self.mll = self.__initialize_gp_model( - neural_network_args.get("n_layers", 2) - ) - - # build the neural network - self.nn = NeuralFeatureExtractor(self.input_size, **neural_network_args) - - self.logger = logger or logging.getLogger("neps") - - def __initialize_gp_model( - self, - train_size: int, - ) -> tuple[ - GPRegressionModel, - gpytorch.likelihoods.GaussianLikelihood, - gpytorch.mlls.ExactMarginalLogLikelihood, - ]: - """ - Called when the surrogate is first initialized or restarted. - - Args: - train_size: The size of the current training set. - - Returns: - model, likelihood, mll - The GP model, the likelihood and - the marginal likelihood. - """ - train_x = torch.ones(train_size, train_size).to(self.device) - train_y = torch.ones(train_size).to(self.device) - - likelihood = gpytorch.likelihoods.GaussianLikelihood().to(self.device) - model = GPRegressionModel( - train_x=train_x, train_y=train_y, likelihood=likelihood - ).to(self.device) - mll = gpytorch.mlls.ExactMarginalLogLikelihood(likelihood, model).to(self.device) - return model, likelihood, mll - - def __preprocess_search_space(self, pipeline_space: SearchSpace): - self.categories = [] - self.categorical_hps = [] - - parameter_count = 0 - for hp_name, hp in pipeline_space.items(): - # Collect all categories in a list for the encoder - if isinstance(hp, CategoricalParameter): - self.categorical_hps.append(hp_name) - self.categories.extend(hp.choices) - parameter_count += len(hp.choices) - else: - parameter_count += 1 - - # add 1 for budget - self.input_size = parameter_count - self.continuous_params_size = self.input_size - len(self.categories) - self.min_fidelity = pipeline_space.fidelity.lower - self.max_fidelity = pipeline_space.fidelity.upper - - def __encode_config(self, config: SearchSpace): - categorical_encoding = np.zeros_like(self.categories_array) - continuous_values = [] - - for hp_name, hp in config.items(): - if hp.is_fidelity: - continue # Ignore fidelity - if hp_name in self.categorical_hps: - label = hp.value - categorical_encoding[np.argwhere(self.categories_array == label)] = 1 - else: - continuous_values.append(hp.value_to_normalized(hp.value)) - - continuous_encoding = np.array(continuous_values) - - encoding = np.concatenate([categorical_encoding, continuous_encoding]) - return encoding - - def __extract_budgets( - self, x_train: list[SearchSpace], normalized: bool = True - ) -> np.ndarray: - budgets = np.array([config.fidelity.value for config in x_train], dtype=np.single) - if normalized: - normalized_budgets = (budgets - self.min_fidelity) / ( - self.max_fidelity - self.min_fidelity - ) - budgets = normalized_budgets - return budgets - - def __preprocess_learning_curves( - self, learning_curves: list[list[float]], padding_value: float = 0.0 - ) -> np.ndarray: - # Add padding to the learning curves to make them the same size - - # Get max learning_curve length - max_length = 0 - for lc in learning_curves: - length = len(lc) - if length > max_length: - max_length = length - - for lc in learning_curves: - # add padding to the learning curve to fit the cnn kernel or - # the max_length depending on which is the largest - padding_length = max([max_length - len(lc), self.cnn_kernel_size - len(lc)]) - lc.extend([padding_value] * padding_length) - - # TODO: check if the lc values are within bounds [0, 1] (karibbov) - # TODO: add normalize_lcs option in the future - - return np.array(learning_curves, dtype=np.single) - - def __reset_xy( - self, - x_train: list[SearchSpace], - y_train: list[float], - learning_curves: list[list[float]], - normalize_y: bool = False, - normalize_budget: bool = True, - ): - self.normalize_budget = normalize_budget - self.normalize_y = normalize_y - - x_train, train_budgets, learning_curves = self._preprocess_input( - x_train, learning_curves, normalize_budget - ) - - y_train = self._preprocess_y(y_train, normalize_y) - - self.x_train = x_train - self.train_budgets = train_budgets - self.learning_curves = learning_curves - self.y_train = y_train - - def _preprocess_input( - self, - x: list[SearchSpace], - learning_curves: list[list[float]], - normalize_budget: bool = True, - ): - budgets = self.__extract_budgets(x, normalize_budget) - learning_curves = self.__preprocess_learning_curves(learning_curves) - - x = np.array([self.__encode_config(config) for config in x], dtype=np.single) - - x = torch.tensor(x).to(device=self.device) - budgets = torch.tensor(budgets).to(device=self.device) - learning_curves = torch.tensor(learning_curves).to(device=self.device) - - return x, budgets, learning_curves - - def _preprocess_y(self, y_train: list[float], normalize_y: bool = False): - y_train_array = np.array(y_train, dtype=np.single) - self.min_y = y_train_array.min() - self.max_y = y_train_array.max() - if normalize_y: - y_train_array = (y_train_array - self.min_y) / (self.max_y - self.min_y) - y_train_array = torch.tensor(y_train_array).to(device=self.device) - return y_train_array - - def fit( - self, - x_train: list[SearchSpace], - y_train: list[float], - learning_curves: list[list[float]], - ): - self._fit(x_train, y_train, learning_curves, **self.surrogate_model_fit_args) - - def _fit( - self, - x_train: list[SearchSpace], - y_train: list[float], - learning_curves: list[list[float]], - normalize_y: bool = False, - normalize_budget: bool = True, - n_epochs: int = 1000, - batch_size: int = 64, - optimizer_args: dict | None = None, - early_stopping: bool = True, - patience: int = 10, - perf_patience: int = 10, - ): - self.__reset_xy( - x_train, - y_train, - learning_curves, - normalize_y=normalize_y, - normalize_budget=normalize_budget, - ) - self.model, self.likelihood, self.mll = self.__initialize_gp_model(len(y_train)) - self.nn = NeuralFeatureExtractor(self.input_size, **self.nn_args) - self.model.to(self.device) - self.likelihood.to(self.device) - self.nn.to(self.device) - - if self.checkpointing and self.checkpoint_path.exists(): - non_improvement_steps = count_non_improvement_steps(self.root_dir) - # If checkpointing and patience is not exhausted load a partial model - if non_improvement_steps < perf_patience: - n_epochs = self.refine_epochs - self.load_checkpoint() - self.logger.debug(f"No improvement for: {non_improvement_steps} evaulations") - self.logger.debug(f"N Epochs for the full training: {n_epochs}") - - initial_state = self.get_state() - try: - self.__train_model( - self.x_train, - self.train_budgets, - self.learning_curves, - self.y_train, - n_epochs=n_epochs, - batch_size=batch_size, - optimizer_args=optimizer_args, - early_stopping=early_stopping, - patience=patience, - ) - if self.checkpointing: - self.save_checkpoint() - except gpytorch.utils.errors.NotPSDError: - self.logger.info("Model training failed loading the untrained model") - self.load_checkpoint(initial_state) - # Delete checkpoint to restart training - self.delete_checkpoint() - - def __train_model( - self, - x_train: torch.Tensor, - train_budgets: torch.Tensor, - learning_curves: torch.Tensor, - y_train: torch.Tensor, - n_epochs: int = 1000, - batch_size: int = 64, - optimizer_args: dict | None = None, - early_stopping: bool = True, - patience: int = 10, - ): - if optimizer_args is None: - optimizer_args = {"lr": 0.001} - - self.model.train() - self.likelihood.train() - self.nn.train() - self.optimizer = torch.optim.Adam( - [ - dict({"params": self.model.parameters()}, **optimizer_args), - dict({"params": self.nn.parameters()}, **optimizer_args), - ] - ) - - count_down = patience - min_avg_loss_val = np.inf - average_loss: float = 0.0 - - for epoch_nr in range(0, n_epochs): - if early_stopping and count_down == 0: - self.logger.info( - f"Epoch: {epoch_nr - 1} surrogate training stops due to early " - f"stopping with the patience: {patience} and " - f"the minimum average loss of {min_avg_loss_val} and " - f"the final average loss of {average_loss}" - ) - break - - n_examples_batch = x_train.size(dim=0) - - # get a random permutation for mini-batches - permutation = torch.randperm(n_examples_batch) - - # optimize over mini-batches - total_scaled_loss = 0.0 - for batch_idx, start_index in enumerate( - range(0, n_examples_batch, batch_size) - ): - end_index = start_index + batch_size - if end_index > n_examples_batch: - end_index = n_examples_batch - indices = permutation[start_index:end_index] - batch_x, batch_budget, batch_lc, batch_y = ( - x_train[indices], - train_budgets[indices], - learning_curves[indices], - y_train[indices], - ) - - minibatch_size = end_index - start_index - # if only one example in the batch, skip the batch. - # Otherwise, the code will fail because of batchnorm - if minibatch_size <= 1: - continue - - # Zero backprop gradients - self.optimizer.zero_grad() - - projected_x = self.nn(batch_x, batch_budget, batch_lc) - self.model.set_train_data(projected_x, batch_y, strict=False) - output = self.model(projected_x) - - # try: - # Calc loss and backprop derivatives - loss = -self.mll(output, self.model.train_targets) - episodic_loss_value: float = loss.detach().to("cpu").item() - # weighted sum over losses in the batch - total_scaled_loss = ( - total_scaled_loss + episodic_loss_value * minibatch_size - ) - - mse = gpytorch.metrics.mean_squared_error( - output, self.model.train_targets - ) - self.logger.debug( - f"Epoch {epoch_nr} Batch {batch_idx} - MSE {mse:.5f}, " - f"Loss: {episodic_loss_value:.3f}, " - f"lengthscale: {self.model.covar_module.base_kernel.lengthscale.item():.3f}, " - f"noise: {self.model.likelihood.noise.item():.3f}, " - ) - - loss.backward() - self.optimizer.step() - - # Get average weighted loss over every batch - average_loss = total_scaled_loss / n_examples_batch - if average_loss < min_avg_loss_val: - min_avg_loss_val = average_loss - count_down = patience - elif early_stopping: - self.logger.debug( - f"No improvement over the minimum loss value of {min_avg_loss_val} " - f"for the past {patience - count_down} epochs " - f"the training will stop in {count_down} epochs" - ) - count_down -= 1 - # except Exception as training_error: - # self.logger.error( - # f'The following error happened while training: {training_error}') - # # An error has happened, trigger the restart of the optimization and restart - # # the model with default hyperparameters. - # self.restart = True - # training_errored = True - # break - - def set_prediction_learning_curves(self, learning_curves: list[list[float]]): - self.prediction_learning_curves = learning_curves - - def predict( - self, x: list[SearchSpace], learning_curves: list[list[float]] | None = None - ): - # Preprocess input - if learning_curves is None: - learning_curves = self.prediction_learning_curves - x_test, test_budgets, learning_curves = self._preprocess_input( - x, learning_curves, self.normalize_budget - ) - - self.model.eval() - self.nn.eval() - self.likelihood.eval() - - with torch.no_grad(): - projected_train_x = self.nn( - self.x_train, self.train_budgets, self.learning_curves - ) - self.model.set_train_data( - inputs=projected_train_x, targets=self.y_train, strict=False - ) - - projected_test_x = self.nn(x_test, test_budgets, learning_curves) - - preds = self.likelihood(self.model(projected_test_x)) - - means = preds.mean.detach().cpu() - - if self.normalize_y: - means = (means + self.min_y) * (self.max_y - self.min_y) - - cov = torch.diag(torch.pow(preds.stddev.detach(), 2)).cpu() - - return means, cov - - def load_checkpoint(self, state: dict | None = None): - """ - Load the state from a previous checkpoint. - """ - if state is None: - checkpoint = torch.load(self.checkpoint_path) - else: - checkpoint = state - self.model.load_state_dict(checkpoint["gp_state_dict"]) - self.nn.load_state_dict(checkpoint["nn_state_dict"]) - self.likelihood.load_state_dict(checkpoint["likelihood_state_dict"]) - - self.model.to(self.device) - self.likelihood.to(self.device) - self.nn.to(self.device) - - def save_checkpoint(self, state: dict | None = None): - """ - Save the given state or the current state in a - checkpoint file. - - Args: - checkpoint_path: path to the checkpoint file - state: The state to save, if none, it will - save the current state. - """ - - if state is None: - torch.save( - self.get_state(), - self.checkpoint_path, - ) - else: - torch.save( - state, - self.checkpoint_path, - ) - - def get_state(self) -> dict[str, dict]: - """ - Get the current state of the surrogate. - - Returns: - current_state: A dictionary that represents - the current state of the surrogate model. - """ - current_state = { - "gp_state_dict": deepcopy(self.model.state_dict()), - "nn_state_dict": deepcopy(self.nn.state_dict()), - "likelihood_state_dict": deepcopy(self.likelihood.state_dict()), - } - - return current_state - - def delete_checkpoint(self): - self.checkpoint_path.unlink(missing_ok=True) diff --git a/neps/optimizers/bayesian_optimization/models/ftpfn.py b/neps/optimizers/bayesian_optimization/models/ftpfn.py new file mode 100644 index 00000000..3831ec61 --- /dev/null +++ b/neps/optimizers/bayesian_optimization/models/ftpfn.py @@ -0,0 +1,158 @@ +from __future__ import annotations + +from typing import Any +from pathlib import Path +import torch + +from ifbo import FTPFN + + +def _download_workaround_for_ifbo_issue_10(path: Path | None, version: str) -> Path: + # TODO: https://github.com/automl/ifBO/issues/10 + import requests + from ifbo.download import FILE_URL, FILENAME + + target_path = Path(path) if path is not None else Path.cwd().resolve() / ".model" + target_path.mkdir(parents=True, exist_ok=True) + + _target_zip_path = target_path / FILENAME(version) + + # Just a heuristic check to determine if the model already exists. + # Kind of hard to know what the name of the extracted file will be + # Basically we just check if the tar.gz file is there and unpacked. + # If there is a new version, then it wont exist and we will download it. + if _target_zip_path.exists() and any( + p.name.endswith(".pt") for p in target_path.iterdir() + ): + return target_path + + _file_url = FILE_URL(version) + + # Download the tar.gz file and decompress it + response = requests.get(_file_url, allow_redirects=True) + if response.status_code != 200: + raise ValueError( + f"Failed to download the surrogate model from {_file_url}." + f" Got status code: {response.status_code}" + ) + + with open(_target_zip_path, "wb") as f: + try: + f.write(response.content) + except Exception as e: + raise ValueError( + f"Failed to write the surrogate model to {_target_zip_path}." + ) from e + + # Decompress the .tar.gz file using tarfile + import tarfile + + try: + with tarfile.open(_target_zip_path, "r:gz") as tar: + tar.extractall(path=target_path) + except Exception as e: + raise ValueError( + f"Failed to decompress the surrogate model at {_target_zip_path}." + ) from e + + return target_path + + +_CACHED_FTPFN_MODEL: dict[tuple[str, str], FTPFN] = {} + + +class FTPFNSurrogate: + """Special class to deal with PFN surrogate model and freeze-thaw acquisition.""" + + def __init__( + self, + target_path: Path | None = None, + version: str = "0.0.1", + **kwargs: Any, + ): + if target_path is None: + # TODO: We also probably want to link this to the actual root directory + # or some shared directory between runs as relying on the path of the initial + # python invocation is likely to lead to issues somewhere. + # TODO: ifbo support for windows has issues with decompression + # We basically just do the same thing they do but manually + target_path = _download_workaround_for_ifbo_issue_10(target_path, version) + + key = (str(target_path), version) + ftpfn = _CACHED_FTPFN_MODEL.get(key) + if ftpfn is None: + ftpfn = FTPFN(target_path=target_path, version=version) + _CACHED_FTPFN_MODEL[key] = ftpfn + + self.ftpfn = ftpfn + self.target_path = self.ftpfn.target_path + self.version = self.ftpfn.version + self.train_x: torch.Tensor | None = None + self.train_y: torch.Tensor | None = None + + @property + def device(self): + return self.ftpfn.device + + def _get_logits(self, test_x: torch.Tensor) -> torch.Tensor: + assert self.train_x is not None, "Train data is not set." + assert self.train_y is not None, "Train data is not set." + return self.ftpfn.model( + self._cast_tensor_shapes(self.train_x), + self._cast_tensor_shapes(self.train_y), + self._cast_tensor_shapes(test_x), + ) + + def _cast_tensor_shapes(self, x: torch.Tensor) -> torch.Tensor: + if len(x.shape) == 3 and x.shape[1] == 1: + return x + if len(x.shape) == 2: + return x.reshape(x.shape[0], 1, x.shape[1]) + if len(x.shape) == 1: + return x.reshape(x.shape[0], 1) + raise ValueError(f"Shape not recognized: {x.shape}") + + @torch.no_grad() + def get_mean_performance(self, test_x: torch.Tensor) -> torch.Tensor: + logits = self._get_logits(test_x).squeeze() + return self.ftpfn.model.criterion.mean(logits) + + @torch.no_grad() + def get_pi(self, test_x: torch.Tensor, y_best: torch.Tensor) -> torch.Tensor: + logits = self._get_logits(test_x) + return self.ftpfn.model.criterion.pi( + logits.squeeze(), best_f=(1 - y_best).unsqueeze(1) + ) + + @torch.no_grad() + def get_ei(self, test_x: torch.Tensor, y_best: torch.Tensor) -> torch.Tensor: + logits = self._get_logits(test_x) + return self.ftpfn.model.criterion.ei( + logits.squeeze(), best_f=(1 - y_best).unsqueeze(1) + ) + + @torch.no_grad() + def get_lcb( + self, test_x: torch.Tensor, beta: float = (1 - 0.682) / 2 + ) -> torch.Tensor: + logits = self._get_logits(test_x) + lcb = self.ftpfn.model.criterion.ucb( + logits=logits, + best_f=None, + rest_prob=beta, + maximize=False, # IMPORTANT to be False, should calculate the LCB using the lower-bound ICDF as per beta + ) + return lcb + + @torch.no_grad() + def get_ucb( + self, test_x: torch.Tensor, beta: float = (1 - 0.682) / 2 + ) -> torch.Tensor: + logits = self._get_logits(test_x) + lcb = self.ftpfn.model.criterion.ucb( + logits=logits, + best_f=None, + rest_prob=beta, + maximize=True, # IMPORTANT to be True, should calculate the UCB using the upper-bound ICDF as per beta + ) + return lcb diff --git a/neps/optimizers/bayesian_optimization/optimizer.py b/neps/optimizers/bayesian_optimization/optimizer.py index 9fc3aeae..9ff00f28 100644 --- a/neps/optimizers/bayesian_optimization/optimizer.py +++ b/neps/optimizers/bayesian_optimization/optimizer.py @@ -4,7 +4,7 @@ from typing import Any, TYPE_CHECKING, Literal from typing_extensions import override -from neps.state.optimizer import BudgetInfo, OptimizationState +from neps.state.optimizer import BudgetInfo from neps.utils.types import ConfigResult, RawConfig from neps.utils.common import instance_from_map from neps.search_spaces import ( diff --git a/neps/optimizers/default_searchers/ifbo.yaml b/neps/optimizers/default_searchers/ifbo.yaml new file mode 100644 index 00000000..76522922 --- /dev/null +++ b/neps/optimizers/default_searchers/ifbo.yaml @@ -0,0 +1,9 @@ +strategy: ifbo +surrogate_model: ftpfn +surrogate_model_args: + version: "0.0.1" +acquisition: MFPI-random +acquisition_sampler: freeze-thaw +acquisition_sampler_args: + samples_to_draw: 250 +model_policy: PFNSurrogate \ No newline at end of file diff --git a/neps/optimizers/grid_search/optimizer.py b/neps/optimizers/grid_search/optimizer.py index 4f5ff24e..e9f1d9a3 100644 --- a/neps/optimizers/grid_search/optimizer.py +++ b/neps/optimizers/grid_search/optimizer.py @@ -1,5 +1,3 @@ -from __future__ import annotations - import random from typing import Any from typing_extensions import override @@ -20,6 +18,7 @@ def __init__( size_per_numerical_hp=grid_step_size, include_endpoints=True, ) + # TODO: handle this shuffling better and offer more control to the user random.shuffle(self.configs_list) @override diff --git a/neps/optimizers/info.py b/neps/optimizers/info.py index 7088f341..40b08174 100644 --- a/neps/optimizers/info.py +++ b/neps/optimizers/info.py @@ -1,5 +1,3 @@ -from __future__ import annotations - import os import yaml diff --git a/neps/optimizers/multi_fidelity/_dyhpo.py b/neps/optimizers/multi_fidelity/_dyhpo.py deleted file mode 100644 index da3e36bf..00000000 --- a/neps/optimizers/multi_fidelity/_dyhpo.py +++ /dev/null @@ -1,409 +0,0 @@ -from __future__ import annotations - -from typing import Any, List, Union -from typing_extensions import override - -import numpy as np - -from neps.state.optimizer import BudgetInfo, OptimizationState -from neps.utils.types import ConfigResult, RawConfig -from neps.search_spaces.search_space import FloatParameter, IntegerParameter, SearchSpace -from neps.optimizers.base_optimizer import BaseOptimizer -from neps.optimizers.bayesian_optimization.acquisition_functions.base_acquisition import ( - BaseAcquisition, -) -from neps.optimizers.bayesian_optimization.acquisition_samplers.base_acq_sampler import ( - AcquisitionSampler, -) -from neps.optimizers.multi_fidelity.promotion_policy import PromotionPolicy -from neps.optimizers.multi_fidelity.sampling_policy import ( - BaseDynamicModelPolicy, - ModelPolicy, - RandomPromotionDynamicPolicy, - SamplingPolicy, -) -from neps.optimizers.multi_fidelity.utils import MFObservedData - - -class MFEIBO(BaseOptimizer): - """Base class for MF-BO algorithms that use DyHPO like acquisition and budgeting.""" - - acquisition: str = "EI" - - def __init__( - self, - pipeline_space: SearchSpace, - budget: int, - step_size: Union[int, float] = 1, - optimal_assignment: bool = False, - use_priors: bool = False, - sample_default_first: bool = False, - sample_default_at_target: bool = False, - sampling_policy: Any = None, - promotion_policy: Any = None, - sample_policy_args: Union[dict, None] = None, - promotion_policy_args: Union[dict, None] = None, - promotion_type: str = "model", - sample_type: str = "model", - sampling_args: Union[dict, None] = None, - loss_value_on_error: Union[None, float] = None, - cost_value_on_error: Union[None, float] = None, - patience: int = 100, - ignore_errors: bool = False, - logger=None, - # arguments for model - surrogate_model: Union[str, Any] = "gp", - surrogate_model_args: dict = None, - domain_se_kernel: str = None, - graph_kernels: list = None, - hp_kernels: list = None, - acquisition: Union[str, BaseAcquisition] = acquisition, - acquisition_sampler: Union[str, AcquisitionSampler] = "mutation", - model_policy: Any = RandomPromotionDynamicPolicy, - log_prior_weighted: bool = False, - initial_design_size: int = 10, - model_policy_args: Union[dict, None] = None, - ): - """Initialise - - Args: - pipeline_space: Space in which to search - budget: Maximum budget - use_priors: Allows random samples to be generated from a default - Samples generated from a Gaussian centered around the default value - sampling_policy: The type of sampling procedure to use - promotion_policy: The type of promotion procedure to use - loss_value_on_error: Setting this and cost_value_on_error to any float will - supress any error during bayesian optimization and will use given loss - value instead. default: None - cost_value_on_error: Setting this and loss_value_on_error to any float will - supress any error during bayesian optimization and will use given cost - value instead. default: None - logger: logger object, or None to use the neps logger - sample_default_first: Whether to sample the default configuration first - """ - super().__init__( - pipeline_space=pipeline_space, - budget=budget, - patience=patience, - loss_value_on_error=loss_value_on_error, - cost_value_on_error=cost_value_on_error, - ignore_errors=ignore_errors, - logger=logger, - ) - self._budget_list: List[Union[int, float]] = [] - self.step_size: Union[int, float] = step_size - self._initial_design_size = initial_design_size - self._model_update_failed = False - self.sample_default_first = sample_default_first - self.sample_default_at_target = sample_default_at_target - - self.promotion_type = promotion_type - self.sample_type = sample_type - self.sampling_args = {} if sampling_args is None else sampling_args - self.use_priors = use_priors - self.total_fevals: int = 0 - - # TODO: Use initialized objects where possible instead of ..._args parameters. - # This will also make it easier to write new policies for users. - if model_policy_args is None: - model_policy_args = dict() - if sample_policy_args is None: - sample_policy_args = dict() - if promotion_policy_args is None: - promotion_policy_args = dict() - - self.observed_configs = MFObservedData( - columns=["config", "perf"], - index_names=["config_id", "budget_id"], - ) - - if model_policy is not None: - model_params = dict( - pipeline_space=pipeline_space, - surrogate_model=surrogate_model, - domain_se_kernel=domain_se_kernel, - hp_kernels=hp_kernels, - graph_kernels=graph_kernels, - surrogate_model_args=surrogate_model_args, - acquisition=acquisition, - use_priors=use_priors, - log_prior_weighted=log_prior_weighted, - acquisition_sampler=acquisition_sampler, - logger=logger, - ) - model_params.update(model_policy_args) - if issubclass(model_policy, BaseDynamicModelPolicy): - self.model_policy = model_policy( - observed_configs=self.observed_configs, **model_params - ) - elif issubclass(model_policy, ModelPolicy): - self.model_policy = model_policy(**model_params) - elif issubclass(model_policy, SamplingPolicy): - self.model_policy = model_policy( - pipeline_space=pipeline_space, - patience=patience, - logger=logger, - **model_policy_args, - ) - else: - raise ValueError( - f"Model policy can't be {model_policy}. " - f"It must subclass one of the predefined base classes" - ) - - if sampling_policy is not None: - sampling_params = dict( - pipeline_space=pipeline_space, patience=patience, logger=logger - ) - if issubclass(sampling_policy, SamplingPolicy): - sampling_params.update(sample_policy_args) - self.sampling_policy = sampling_policy(**sampling_params) - else: - raise ValueError( - f"Sampling policy {sampling_policy} must inherit from " - f"SamplingPolicy base class" - ) - - if promotion_policy is not None: - if issubclass(promotion_policy, PromotionPolicy): - promotion_params = dict(eta=3) - promotion_params.update(promotion_policy_args) - self.promotion_policy = promotion_policy(**promotion_params) - else: - raise ValueError( - f"Promotion policy {promotion_policy} must inherit from " - f"PromotionPolicy base class" - ) - - def get_budget_level(self, config: SearchSpace) -> int: - return int((config.fidelity.value - config.fidelity.lower) / self.step_size) - - def get_budget_value(self, budget_level: Union[int, float]) -> Union[int, float]: - if isinstance(self.pipeline_space.fidelity, IntegerParameter): - budget_val = int( - self.step_size * budget_level + self.pipeline_space.fidelity.lower - ) - elif isinstance(self.pipeline_space.fidelity, FloatParameter): - budget_val = ( - self.step_size * budget_level + self.pipeline_space.fidelity.lower - ) - else: - raise NotImplementedError( - f"Fidelity parameter: {self.pipeline_space.fidelity}" - f"must be one of the types: " - f"[IntegerParameter, FloatParameter], but is type:" - f"{type(self.pipeline_space.fidelity)}" - ) - self._budget_list.append(budget_val) - return budget_val - - @property - def is_init_phase(self) -> bool: - if self.num_train_configs < self._initial_design_size: - return True - return False - - @property - def num_train_configs(self): - return len(self.observed_configs.completed_runs) - - @override - def load_optimization_state( - self, - previous_results: dict[str, ConfigResult], - pending_evaluations: dict[str, SearchSpace], - budget_info: BudgetInfo | None, - optimizer_state: dict[str, Any], - ) -> None: - """This is basically the fit method. - - Args: - previous_results (dict[str, ConfigResult]): [description] - pending_evaluations (dict[str, ConfigResult]): [description] - """ - - # previous optimization run exists and needs to be loaded - self._load_previous_observations(previous_results) - self.total_fevals = len(previous_results) + len(pending_evaluations) - - # account for pending evaluations - self._handle_pending_evaluations(pending_evaluations) - - self.observed_configs.df.sort_index( - level=self.observed_configs.df.index.names, inplace=True - ) - self.model_policy.observed_configs = self.observed_configs - # fit any model/surrogates - - if not self.is_init_phase: - self._fit_models() - - def _load_previous_observations(self, previous_results): - for config_id, config_val in previous_results.items(): - _config, _budget_level = config_id.split("_") - perf = self.get_loss(config_val.result) - index = (int(_config), int(_budget_level)) - self.observed_configs.add_data([config_val.config, perf], index=index) - - if not np.isclose( - self.observed_configs.df.loc[index, self.observed_configs.perf_col], - perf, - ): - self.observed_configs.update_data( - { - self.observed_configs.config_col: config_val.config, - self.observed_configs.perf_col: perf, - }, - index=index, - ) - - def _handle_pending_evaluations(self, pending_evaluations): - for config_id, config_val in pending_evaluations.items(): - _config, _budget_level = config_id.split("_") - index = (int(_config), int(_budget_level)) - - if index not in self.observed_configs.df.index: - self.observed_configs.add_data([config_val.config, np.nan], index=index) - else: - self.observed_configs.update_data( - { - self.observed_configs.config_col: config_val.config, - self.observed_configs.perf_col: np.nan, - }, - index=index, - ) - - def _fit_models(self): - # TODO: Once done with development catch the model update exceptions - # and skip model based suggestions if failed (karibbov) - self.model_policy.update_model() - - def is_promotable(self, promotion_type: str = "model") -> Union[int, None]: - """ - Check if there are any configurations to promote, if yes then return the integer - ID of the promoted configuration, else return None. - """ - if promotion_type == "model": - config_id = self.model_policy.sample(is_promotion=True, **self.sampling_args) - elif promotion_type == "policy": - config_id = self.promotion_policy.retrieve_promotions() - elif promotion_type is None: - config_id = None - else: - raise ValueError( - f"'{promotion_type}' based promotion is not possible, please" - f"use either 'model', 'policy' or None as promotion_type" - ) - - return config_id - - def sample_new_config( - self, - sample_type: str = "model", - **kwargs, - ) -> SearchSpace: - """ - Sample completely new configuration that - hasn't been observed in any fidelity before. - Your model_policy and/or sampling_policy must satisfy this constraint - """ - if sample_type == "model": - config = self.model_policy.sample(**self.sampling_args) - elif sample_type == "policy": - config = self.sampling_policy.sample(**self.sampling_args) - elif sample_type is None: - config = self.pipeline_space.sample( - patience=self.patience, - user_priors=self.use_priors, - ignore_fidelity=True, - ) - else: - raise ValueError( - f"'{sample_type}' based sampling is not possible, please" - f"use either 'model', 'policy' or None as sampling_type" - ) - - return config - - def get_config_and_ids(self) -> tuple[RawConfig, str, Union[str, None]]: - """...and this is the method that decides which point to query. - - Returns: - [type]: [description] - """ - _config_id = None - fidelity_value_set = False - if ( - self.num_train_configs == 0 - and self.sample_default_first - and self.pipeline_space.has_prior - ): - config = self.pipeline_space.sample_default_configuration( - patience=self.patience, ignore_fidelity=False - ) - elif ( - (self.num_train_configs == 0 and self._initial_design_size >= 1) - or self.is_init_phase - or self._model_update_failed - ): - config = self.pipeline_space.sample( - patience=self.patience, user_priors=True, ignore_fidelity=False - ) - else: - for _ in range(self.patience): - promoted_config_id = self.is_promotable( - promotion_type=self.promotion_type - ) - if ( - promoted_config_id is not None - and promoted_config_id in self.observed_configs.df.index.levels[0] - ): - current_budget = self.observed_configs.df.loc[ - (promoted_config_id,) - ].index[-1] - next_budget = current_budget + 1 - config = self.observed_configs.df.loc[ - (promoted_config_id, current_budget), - self.observed_configs.config_col, - ] - if np.less_equal( - self.get_budget_value(next_budget), config.fidelity.upper - ): - config.fidelity.set_value(self.get_budget_value(next_budget)) - _config_id = promoted_config_id - fidelity_value_set = True - break - elif promoted_config_id is not None: - self.logger.warn( - f"Configuration ID: '{promoted_config_id}' is " - f"not promotable because it doesn't exist in " - f"the observed configuration IDs: " - f"{self.observed_configs.df.index.levels[0]}.\n\n" - f"Trying to sample again..." - ) - else: - # sample_new_config must return a completely new configuration that - # hasn't been observed in any fidelity before - config = self.sample_new_config(sample_type=self.sample_type) - break - - # if the returned config already observed, - # set the fidelity to the next budget level if not max already - # else set the fidelity to the minimum budget level - else: - config = self.pipeline_space.sample( - patience=self.patience, user_priors=True, ignore_fidelity=False - ) - - if not fidelity_value_set: - config.fidelity.set_value(self.get_budget_value(0)) - - if _config_id is None: - _config_id = ( - self.observed_configs.df.index.get_level_values(0).max() + 1 - if len(self.observed_configs.df.index.get_level_values(0)) > 0 - else 0 - ) - config_id = f"{_config_id}_{self.get_budget_level(config)}" - return config.hp_values(), config_id, None diff --git a/neps/optimizers/multi_fidelity/hyperband.py b/neps/optimizers/multi_fidelity/hyperband.py index dde96c56..510fb582 100644 --- a/neps/optimizers/multi_fidelity/hyperband.py +++ b/neps/optimizers/multi_fidelity/hyperband.py @@ -1,11 +1,9 @@ -from __future__ import annotations - import typing from copy import deepcopy -from typing import Any +from typing import Any, Literal +from typing_extensions import override import numpy as np -from typing_extensions import Literal, override from neps.state.optimizer import BudgetInfo from neps.utils.types import ConfigResult, RawConfig @@ -537,5 +535,4 @@ def __init__( sh.model_policy = self.model_policy sh.sample_new_config = self.sample_new_config - # TODO: TrulyAsyncHyperband diff --git a/neps/optimizers/multi_fidelity/dyhpo.py b/neps/optimizers/multi_fidelity/ifbo.py similarity index 79% rename from neps/optimizers/multi_fidelity/dyhpo.py rename to neps/optimizers/multi_fidelity/ifbo.py index 59804637..dbdeb17a 100755 --- a/neps/optimizers/multi_fidelity/dyhpo.py +++ b/neps/optimizers/multi_fidelity/ifbo.py @@ -1,12 +1,12 @@ -from __future__ import annotations - from typing import Any from typing_extensions import override import numpy as np +import pandas as pd +import warnings from neps.state.optimizer import BudgetInfo -from neps.utils.types import ConfigResult, RawConfig +from neps.utils.types import ConfigResult from neps.utils.common import instance_from_map from neps.search_spaces.search_space import FloatParameter, IntegerParameter, SearchSpace from neps.optimizers.base_optimizer import BaseOptimizer @@ -25,17 +25,17 @@ from neps.optimizers.multi_fidelity.utils import MFObservedData -class MFEIBO(BaseOptimizer): +class IFBO(BaseOptimizer): """Base class for MF-BO algorithms that use DyHPO-like acquisition and budgeting.""" - acquisition: str = "MFEI" + acquisition: str = "MFPI-random" def __init__( self, pipeline_space: SearchSpace, - budget: int | None = None, + budget: int = None, step_size: int | float = 1, - optimal_assignment: bool = False, + optimal_assignment: bool = False, # pylint: disable=unused-argument use_priors: bool = False, sample_default_first: bool = False, sample_default_at_target: bool = False, @@ -45,19 +45,17 @@ def __init__( ignore_errors: bool = False, logger=None, # arguments for model - surrogate_model: str | Any = "deep_gp", - surrogate_model_args: dict | None = None, - domain_se_kernel: str | None = None, - graph_kernels: list | None = None, - hp_kernels: list | None = None, + surrogate_model: str | Any = "ftpfn", + surrogate_model_args: dict = None, + domain_se_kernel: str = None, + graph_kernels: list = None, + hp_kernels: list = None, acquisition: str | BaseAcquisition = acquisition, - acquisition_args: dict | None = None, + acquisition_args: dict = None, acquisition_sampler: str | AcquisitionSampler = "freeze-thaw", - acquisition_sampler_args: dict | None = None, - model_policy: Any = FreezeThawModel, - initial_design_fraction: float = 0.75, - initial_design_size: int = 10, - initial_design_budget: int | None = None, + acquisition_sampler_args: dict = None, + model_policy: Any = PFNSurrogate, + initial_design_size: int = 1, ): """Initialise @@ -76,7 +74,13 @@ def __init__( value instead. default: None logger: logger object, or None to use the neps logger sample_default_first: Whether to sample the default configuration first + initial_design_size: Number of configurations to sample before starting optimization """ + # Adjust pipeline space fidelity steps to be equally spaced + pipeline_space = self._adjust_fidelity_for_freeze_thaw_steps( + pipeline_space, step_size + ) + # Super constructor call super().__init__( pipeline_space=pipeline_space, budget=budget, @@ -92,14 +96,8 @@ def __init__( self.min_budget = self.pipeline_space.fidelity.lower # TODO: generalize this to work with real data (not benchmarks) self.max_budget = self.pipeline_space.fidelity.upper + self._initial_design_size = initial_design_size - self._initial_design_fraction = initial_design_fraction - ( - self._initial_design_size, - self._initial_design_budget, - ) = self._set_initial_design( - initial_design_size, initial_design_budget, self._initial_design_fraction - ) # TODO: Write use cases for these parameters self._model_update_failed = False self.sample_default_first = sample_default_first @@ -129,9 +127,9 @@ def __init__( self._prep_model_args(self.hp_kernels, self.graph_kernels, pipeline_space) # TODO: Better solution than branching based on the surrogate name is needed - if surrogate_model in ["deep_gp", "gp"]: + if surrogate_model in ["gp", "gp_hierarchy"]: model_policy = FreezeThawModel - elif surrogate_model == "pfn": + elif surrogate_model == "ftpfn": model_policy = PFNSurrogate else: raise ValueError("Invalid model option selected!") @@ -141,6 +139,7 @@ def __init__( pipeline_space=pipeline_space, surrogate_model=surrogate_model, surrogate_model_args=self.surrogate_model_args, + step_size=self.step_size, ) self.acquisition_args = {} if acquisition_args is None else acquisition_args self.acquisition_args.update( @@ -169,6 +168,29 @@ def __init__( ) self.count = 0 + def _adjust_fidelity_for_freeze_thaw_steps( + self, pipeline_space: SearchSpace, step_size: int + ) -> SearchSpace: + """Adjusts the fidelity range to be divisible by `step_size` for Freeze-Thaw.""" + if not pipeline_space.has_fidelity: + return pipeline_space + # Check if the fidelity range is divided into equal sized steps by `step_size` + remainder = ( + pipeline_space.fidelity.upper - pipeline_space.fidelity.lower + ) % step_size + if remainder == 0: + return pipeline_space + # Adjust the fidelity lower bound to be divisible by `step_size` into equal steps + offset = step_size - remainder + # Pushing the lower bound of the fidelity space by an offset to ensure equal-sized steps + pipeline_space.fidelity.lower += offset + warnings.warn( + f"Adjusted fidelity lower bound to {pipeline_space.fidelity.lower} " + f"for equal-sized steps of {step_size}." + ) + print("New fidelity: ", pipeline_space.fidelity) + return pipeline_space + def _prep_model_args(self, hp_kernels, graph_kernels, pipeline_space): if self.surrogate_model_name in ["gp", "gp_hierarchy"]: # setup for GP implemented in NePS @@ -188,46 +210,8 @@ def _prep_model_args(self, hp_kernels, graph_kernels, pipeline_space): else pipeline_space.get_vectorial_dim() ) - def _set_initial_design( - self, - initial_design_size: int = None, - initial_design_budget: int = None, - initial_design_fraction: float = 0.75, - ) -> tuple[int | float, int | float]: - """Sets the initial design size and budget.""" - - # user specified initial_design_size takes precedence - if initial_design_budget is not None: - _initial_design_budget = initial_design_budget - else: - _initial_design_budget = self.max_budget - - # user specified initial_design_size takes precedence - _initial_design_size = np.inf - if initial_design_size is not None: - _initial_design_size = initial_design_size - if ( - initial_design_size is None - or _initial_design_size * self.min_budget > _initial_design_budget - ): - # if the initial design budget is less than the budget spent on sampling - # the initial design at the minimum budget (fidelity) - # 2 choices here: - # 1. Reduce initial_design_size - # 2. Increase initial_design_budget - # we choose to reduce initial_design_size - _init_budget = initial_design_fraction * self.max_budget - # number of min budget evaluations fitting within initial design budget - _initial_design_size = _init_budget // self.min_budget - - self.logger.info( - f"\n\ninitial_design_size: {_initial_design_size}\n" - f"initial_design_budget: {_initial_design_budget}\n" - f"min_budget: {self.min_budget}\n\n" - ) - return _initial_design_size, _initial_design_budget - def get_budget_level(self, config: SearchSpace) -> int: + """Calculates the discretized (int) budget level for a given configuration.""" return int( np.ceil((config.fidelity.value - config.fidelity.lower) / self.step_size) ) @@ -252,7 +236,7 @@ def get_budget_value(self, budget_level: int | float) -> int | float: return budget_val def total_budget_spent(self) -> int | float: - """Calculates the toal budget spent so far. + """Calculates the toal budget spent so far, in the unit of fidelity specified. This is calculated as a function of the fidelity range provided, that takes into account the minimum budget and the step size. @@ -269,15 +253,9 @@ def total_budget_spent(self) -> int | float: return total_budget_spent - def is_init_phase(self, budget_based: bool = True) -> bool: - if budget_based: - # Check if we are still in the initial design phase based on - # either the budget spent so far or the number of configurations evaluated - if self.total_budget_spent() < self._initial_design_budget: - return True - else: - if self.num_train_configs < self._initial_design_size: - return True + def is_init_phase(self) -> bool: + if self.num_train_configs < self._initial_design_size: + return True return False @property @@ -302,7 +280,6 @@ def load_optimization_state( columns=["config", "perf", "learning_curves"], index_names=["config_id", "budget_id"], ) - # previous optimization run exists and needs to be loaded self._load_previous_observations(previous_results) self.total_fevals = len(previous_results) + len(pending_evaluations) @@ -314,7 +291,6 @@ def load_optimization_state( self.observed_configs.df.sort_index( level=self.observed_configs.df.index.names, inplace=True ) - # TODO: can we do better than keeping a copy of the observed configs? # TODO: can we not hide this in load_results and have something that pops out # more, like a set_state or policy_args @@ -333,7 +309,7 @@ def _get_config_id_split(cls, config_id: str) -> tuple[str, str]: def _load_previous_observations(self, previous_results): def index_data_split(config_id: str, config_val): - _config_id, _budget_id = MFEIBO._get_config_id_split(config_id) + _config_id, _budget_id = IFBO._get_config_id_split(config_id) index = int(_config_id), int(_budget_id) _data = [ config_val.config, @@ -406,11 +382,13 @@ def _randomly_promote(self) -> tuple[SearchSpace, int]: budget = self.observed_configs.df.loc[_config_id].index.values[-1] # calculating fidelity value new_fidelity = self.get_budget_value(budget + 1) - # settingt the config fidelity - config.fidelity.set_value(new_fidelity) + # setting the config fidelity + config.update_hp_values({config.fidelity_name: new_fidelity}) return config, _config_id - def get_config_and_ids(self) -> tuple[RawConfig, str, str | None]: + def get_config_and_ids( # pylint: disable=no-self-use + self, + ) -> tuple[SearchSpace, str, str | None]: """...and this is the method that decides which point to query. Returns: @@ -418,17 +396,17 @@ def get_config_and_ids(self) -> tuple[RawConfig, str, str | None]: """ config_id = None previous_config_id = None - if self.is_init_phase(budget_based=False): + if self.is_init_phase(): # sample a new config till initial design size is satisfied self.logger.info("sampling...") config = self.pipeline_space.sample( patience=self.patience, user_priors=True, ignore_fidelity=False ) - assert config.fidelity is not None - config.fidelity.set_value(self.min_budget) - + _config_dict = config.hp_values() + _config_dict.update({config.fidelity_name: self.min_budget}) + config.set_hyperparameters_from_dict(_config_dict) _config_id = self.observed_configs.next_config_id() - elif self.is_init_phase(budget_based=True) or self._model_update_failed: + elif self.is_init_phase() or self._model_update_failed: # promote a config randomly if initial design size is satisfied but the # initial design budget has not been exhausted self.logger.info("promoting...") @@ -441,28 +419,43 @@ def get_config_and_ids(self) -> tuple[RawConfig, str, str | None]: # main acquisition call here after initial design is turned off self.logger.info("acquiring...") # generates candidate samples for acquisition calculation - assert self.pipeline_space.fidelity is not None samples = self.acquisition_sampler.sample( set_new_sample_fidelity=self.pipeline_space.fidelity.lower ) # fidelity values here should be the observations or min. fidelity + # calculating acquisition function values for the candidate samples acq, _samples = self.acquisition.eval( # type: ignore[attr-defined] x=samples, asscalar=True ) + acq = pd.Series(acq, index=_samples.index) + # maximizing acquisition function - _idx = np.argsort(acq)[-1] + best_idx = acq.sort_values().index[-1] # extracting the config ID for the selected maximizer - _config_id = samples.index[_samples.index.values[_idx]] + _config_id = best_idx # samples.index[_samples.index.values[_idx]] # `_samples` should have new configs with fidelities set to as required # NOTE: len(samples) need not be equal to len(_samples) as `samples` contain # all (partials + new) configurations obtained from the sampler, but # in `_samples`, configs are removed that have reached maximum epochs allowed # NOTE: `samples` and `_samples` should share the same index values, hence, - # avoid using `.iloc` and work with `.loc` on pandas DataFrame/Series + # avoid using `.iloc` and work with `.loc` on these pandas DataFrame/Series - # Is this "config = _samples.loc[_config_id]"? + # assigning config hyperparameters config = samples.loc[_config_id] - config.fidelity.set_value(_samples.loc[_config_id].fidelity.value) + # IMPORTANT: setting the fidelity value appropriately + _fid_value = ( + config.fidelity.lower + if best_idx > max(self.observed_configs.seen_config_ids) + else ( + self.get_budget_value( + self.observed_configs.get_max_observed_fidelity_level_per_config().loc[ + best_idx + ] + ) + + self.step_size # ONE-STEP FIDELITY QUERY for freeze-thaw + ) + ) + config.update_hp_values({config.fidelity_name: _fid_value}) # generating correct IDs if _config_id in self.observed_configs.seen_config_ids: config_id = f"{_config_id}_{self.get_budget_level(config)}" @@ -470,4 +463,4 @@ def get_config_and_ids(self) -> tuple[RawConfig, str, str | None]: else: config_id = f"{self.observed_configs.next_config_id()}_{self.get_budget_level(config)}" - return config.hp_values(), config_id, previous_config_id + return config.hp_values(), config_id, previous_config_id # type: ignore diff --git a/neps/optimizers/multi_fidelity/mf_bo.py b/neps/optimizers/multi_fidelity/mf_bo.py index a24c9d1b..ef31f9cc 100755 --- a/neps/optimizers/multi_fidelity/mf_bo.py +++ b/neps/optimizers/multi_fidelity/mf_bo.py @@ -1,17 +1,15 @@ # type: ignore -from __future__ import annotations -from copy import deepcopy -import numpy as np -import pandas as pd +from copy import deepcopy import torch from neps.utils.common import instance_from_map -from ..bayesian_optimization.models import SurrogateModelMapping -from ..multi_fidelity.utils import normalize_vectorize_config -from ..multi_fidelity_prior.utils import calc_total_resources_spent, update_fidelity -from ..utils import map_real_hyperparameters_from_tabular_ids +from neps.optimizers.bayesian_optimization.models import SurrogateModelMapping +from neps.optimizers.multi_fidelity.utils import ( + get_tokenized_data, get_training_data_for_freeze_thaw +) +from neps.optimizers.multi_fidelity_prior.utils import calc_total_resources_spent, update_fidelity class MFBOBase: @@ -142,7 +140,7 @@ def is_init_phase(self) -> bool: def sample_new_config( self, rung: int = None, - **kwargs, + **kwargs, # pylint: disable=unused-argument ): """Samples configuration from policies or random.""" if self.model_based and not self.is_init_phase(): @@ -187,8 +185,9 @@ class FreezeThawModel: def __init__( self, pipeline_space, - surrogate_model: str = "deep_gp", + surrogate_model: str = "ftpfn", surrogate_model_args: dict = None, + step_size: int = 1, ): self.observed_configs = None self.pipeline_space = pipeline_space @@ -196,103 +195,40 @@ def __init__( self.surrogate_model_args = ( surrogate_model_args if surrogate_model_args is not None else {} ) - if self.surrogate_model_name in ["deep_gp", "pfn"]: - self.surrogate_model_args.update({"pipeline_space": pipeline_space}) - - # instantiate the surrogate model self.surrogate_model = instance_from_map( SurrogateModelMapping, self.surrogate_model_name, name="surrogate model", kwargs=self.surrogate_model_args, ) + self.step_size = step_size def _fantasize_pending(self, train_x, train_y, pending_x): - # Select configs that are neither pending nor resulted in error - completed_configs = self.observed_configs.completed_runs.copy(deep=True) - # IMPORTANT: preprocess observations to get appropriate training data - train_x, train_lcs, train_y = self.observed_configs.get_training_data_4DyHPO( - completed_configs, self.pipeline_space - ) - pending_condition = self.observed_configs.pending_condition - if pending_condition.any(): - pending_configs = self.observed_configs.df.loc[pending_condition] - pending_x, pending_lcs, _ = self.observed_configs.get_training_data_4DyHPO( - pending_configs - ) - self._fit(train_x, train_y, train_lcs) - _y, _ = self._predict(pending_x, pending_lcs) - _y = _y.tolist() - - train_x.extend(pending_x) - train_y.extend(_y) - train_lcs.extend(pending_lcs) - - return train_x, train_y, train_lcs + raise NotImplementedError("Fantasization not implemented yet!") def _fit(self, train_x, train_y, train_lcs): - if self.surrogate_model_name in ["gp", "gp_hierarchy"]: - self.surrogate_model.fit(train_x, train_y) - elif self.surrogate_model_name in ["deep_gp", "pfn"]: - self.surrogate_model.fit(train_x, train_y, train_lcs) - else: - # check neps/optimizers/bayesian_optimization/models/__init__.py for options - raise ValueError( - f"Surrogate model {self.surrogate_model_name} not supported!" - ) + raise NotImplementedError("Predict not implemented yet!") - def _predict(self, test_x, test_lcs): - if self.surrogate_model_name in ["gp", "gp_hierarchy"]: - return self.surrogate_model.predict(test_x) - elif self.surrogate_model_name in ["deep_gp", "pfn"]: - return self.surrogate_model.predict(test_x, test_lcs) - else: - # check neps/optimizers/bayesian_optimization/models/__init__.py for options - raise ValueError( - f"Surrogate model {self.surrogate_model_name} not supported!" - ) + def _predict(self, test_x) -> torch.Tensor: + raise NotImplementedError("Predict not implemented yet!") def set_state( self, pipeline_space, surrogate_model_args, - **kwargs, + **kwargs, # pylint: disable=unused-argument ): self.pipeline_space = pipeline_space self.surrogate_model_args = ( surrogate_model_args if surrogate_model_args is not None else {} ) - # only to handle tabular spaces - if self.pipeline_space.has_tabular: - if self.surrogate_model_name in ["deep_gp", "pfn"]: - self.surrogate_model_args.update( - {"pipeline_space": self.pipeline_space.raw_tabular_space} - ) - # instantiate the surrogate model, again, with the new pipeline space - self.surrogate_model = instance_from_map( - SurrogateModelMapping, - self.surrogate_model_name, - name="surrogate model", - kwargs=self.surrogate_model_args, - ) - - def update_model(self, train_x=None, train_y=None, pending_x=None, decay_t=None): - if train_x is None: - train_x = [] - if train_y is None: - train_y = [] - if pending_x is None: - pending_x = [] - - if decay_t is None: - decay_t = len(train_x) - train_x, train_y, train_lcs = self._fantasize_pending( - train_x, train_y, pending_x + self.surrogate_model = instance_from_map( + SurrogateModelMapping, + self.surrogate_model_name, + name="surrogate model", + kwargs=self.surrogate_model_args, ) - self._fit(train_x, train_y, train_lcs) - - return self.surrogate_model, decay_t - + class PFNSurrogate(FreezeThawModel): """Special class to deal with PFN surrogate model and freeze-thaw acquisition.""" @@ -302,51 +238,70 @@ def __init__(self, *args, **kwargs): self.train_x = None self.train_y = None - def _fit(self, *args): - assert self.surrogate_model_name == "pfn" - self.preprocess_training_set() - self.surrogate_model.fit(self.train_x, self.train_y) - - def preprocess_training_set(self): - _configs = self.observed_configs.df.config.values.copy() - - # onlf if tabular space is present - if self.pipeline_space.has_tabular: - # placeholder index, will be driooed - _idxs = np.arange(len(_configs)) - # mapping the (id, epoch) space of tabular configs to the actual HPs - _configs = map_real_hyperparameters_from_tabular_ids( - pd.Series(_configs, index=_idxs), self.pipeline_space - ).values - - device = self.surrogate_model.device - # TODO: fix or make consistent with `tokenize`` - configs, idxs, performances = self.observed_configs.get_tokenized_data( - self.observed_configs.df.copy().assign(config=_configs) + def update_model(self): + # tokenize the observations + idxs, steps, configs, performance = get_training_data_for_freeze_thaw( + self.observed_configs.df.loc[self.observed_configs.completed_runs_index], + self.observed_configs.config_col, + self.observed_configs.perf_col, + self.pipeline_space, + step_size=self.step_size, + maximize=True # inverts performance since NePS minimizes ) - # TODO: account for fantasization - self.train_x = torch.Tensor(np.hstack([idxs, configs])).to(device) - self.train_y = torch.Tensor(performances).to(device) - - def preprocess_test_set(self, test_x): - _len = len(self.observed_configs.all_configs_list()) - device = self.surrogate_model.device - - new_idxs = np.arange(_len, len(test_x)) - base_fidelity = np.array([1] * len(new_idxs)) - new_token_ids = np.hstack( - (new_idxs.T.reshape(-1, 1), base_fidelity.T.reshape(-1, 1)) - ) - # the following operation takes each element in the array and stacks it vertically - # in this case, should convert a (n,) array to (n, 2) by flattening the elements - existing_token_ids = np.vstack(self.observed_configs.token_ids).astype(int) - token_ids = np.vstack((existing_token_ids, new_token_ids)) - - configs = np.array([normalize_vectorize_config(c) for c in test_x]) - test_x = torch.Tensor(np.hstack([token_ids, configs])).to(device) - return test_x - - def _predict(self, test_x, test_lcs): - assert self.surrogate_model_name == "pfn" - test_x = self.preprocess_test_set(test_x) - return self.surrogate_model.predict(self.train_x, self.train_y, test_x) + df_idxs = torch.Tensor(idxs) + df_x = torch.Tensor(get_tokenized_data(configs)) + df_steps = torch.Tensor(steps) + train_x = torch.hstack([ + df_idxs.reshape(df_steps.shape[0], 1), + df_steps.reshape(df_steps.shape[0], 1), + df_x + ]) + train_y = torch.Tensor(performance) + + # fit the model, on only completed runs + self._fit(train_x, train_y) + + # fantasize pending evaluations + if self.observed_configs.pending_condition.any(): + # tokenize the pending observations + _idxs, _steps, _configs, _ = get_training_data_for_freeze_thaw( + self.observed_configs.df.loc[self.observed_configs.pending_runs_index], + self.observed_configs.config_col, + self.observed_configs.perf_col, + self.pipeline_space, + step_size=self.step_size, + maximize=True # inverts performance since NePS minimizes + ) + _df_x = torch.Tensor(get_tokenized_data(_configs)) + _df_idxs = torch.Tensor(_idxs) + _df_steps = torch.Tensor(_steps) + _test_x = torch.hstack([ + _df_idxs.reshape(_df_idxs.shape[0], 1), + _df_steps.reshape(_df_steps.shape[0], 1), + _df_x + ]) + _performances = self._predict(_test_x) # returns maximizing metric + # update the training data + train_x = torch.vstack([train_x, _test_x]) + train_y = torch.hstack([train_y, _performances]) + # refit the model, on completed runs + fantasized pending runs + self._fit(train_x, train_y) + + def _fit(self, train_x: torch.Tensor, train_y: torch.Tensor): # pylint: disable=unused-argument + # no training required,, only preprocessing the training data as context during inference + assert self.surrogate_model is not None, "Surrogate model not set!" + self.surrogate_model.train_x = train_x + self.surrogate_model.train_y = train_y + + def _predict(self, test_x: torch.Tensor) -> torch.Tensor: + assert self.surrogate_model.train_x is not None and self.surrogate_model.train_y is not None, "Model not trained yet!" + if self.surrogate_model_name == "ftpfn": + mean = self.surrogate_model.get_mean_performance(test_x) + if mean.is_cuda: + mean = mean.cpu() + return mean + else: + # check neps/optimizers/bayesian_optimization/models/__init__.py for options + raise ValueError( + f"Surrogate model {self.surrogate_model_name} not supported!" + ) diff --git a/neps/optimizers/multi_fidelity/promotion_policy.py b/neps/optimizers/multi_fidelity/promotion_policy.py index 41b25176..102b7f82 100644 --- a/neps/optimizers/multi_fidelity/promotion_policy.py +++ b/neps/optimizers/multi_fidelity/promotion_policy.py @@ -1,5 +1,3 @@ -from __future__ import annotations - from abc import ABC, abstractmethod import numpy as np @@ -104,6 +102,6 @@ def retrieve_promotions(self) -> dict: top_k = len(self.rung_members_performance[rung]) // self.eta _ordered_idx = np.argsort(self.rung_members_performance[rung]) self.rung_promotions[rung] = np.array(self.rung_members[rung])[_ordered_idx][ - :top_k - ].tolist() + :top_k + ].tolist() return self.rung_promotions diff --git a/neps/optimizers/multi_fidelity/sampling_policy.py b/neps/optimizers/multi_fidelity/sampling_policy.py index 9321633c..4ae6118d 100644 --- a/neps/optimizers/multi_fidelity/sampling_policy.py +++ b/neps/optimizers/multi_fidelity/sampling_policy.py @@ -1,6 +1,4 @@ # mypy: disable-error-code = assignment -from __future__ import annotations - import logging from abc import ABC, abstractmethod from typing import Any diff --git a/neps/optimizers/multi_fidelity/successive_halving.py b/neps/optimizers/multi_fidelity/successive_halving.py index 6d2ed8ef..6df62333 100644 --- a/neps/optimizers/multi_fidelity/successive_halving.py +++ b/neps/optimizers/multi_fidelity/successive_halving.py @@ -1,5 +1,4 @@ # type: ignore - from __future__ import annotations import random @@ -8,7 +7,8 @@ import numpy as np import pandas as pd -from typing_extensions import Literal, override +from typing import Literal +from typing_extensions import override from neps.utils.types import ConfigResult, RawConfig from neps.search_spaces import ( @@ -178,7 +178,7 @@ def get_incumbent_score(self): def _get_rung_map(self, s: int = 0) -> dict: """Maps rungs (0,1,...,k) to a fidelity value based on fidelity bounds, eta, s.""" assert s <= self.stopping_rate_limit - new_min_budget = self.min_budget * (self.eta**s) + new_min_budget = self.min_budget * (self.eta ** s) nrungs = ( np.floor(np.log(self.max_budget / new_min_budget) / np.log(self.eta)).astype( int @@ -199,7 +199,7 @@ def _get_rung_map(self, s: int = 0) -> dict: def _get_config_map(self, s: int = 0) -> dict: """Maps rungs (0,1,...,k) to the number of configs for each fidelity""" assert s <= self.stopping_rate_limit - new_min_budget = self.min_budget * (self.eta**s) + new_min_budget = self.min_budget * (self.eta ** s) nrungs = ( np.floor(np.log(self.max_budget / new_min_budget) / np.log(self.eta)).astype( int @@ -209,7 +209,7 @@ def _get_config_map(self, s: int = 0) -> dict: s_max = self.stopping_rate_limit + 1 _s = self.stopping_rate_limit - s # L2 from Alg 1 in https://arxiv.org/pdf/1603.06560.pdf - _n_config = np.floor(s_max / (_s + 1)) * self.eta**_s + _n_config = np.floor(s_max / (_s + 1)) * self.eta ** _s config_map = dict() for i in range(nrungs): config_map[i + s] = int(_n_config) diff --git a/neps/optimizers/multi_fidelity/utils.py b/neps/optimizers/multi_fidelity/utils.py index dd36e489..f551e73f 100644 --- a/neps/optimizers/multi_fidelity/utils.py +++ b/neps/optimizers/multi_fidelity/utils.py @@ -1,14 +1,13 @@ # type: ignore -from __future__ import annotations - from typing import Any, Sequence +from copy import deepcopy import numpy as np import pandas as pd import torch -from ...optimizers.utils import map_real_hyperparameters_from_tabular_ids -from ...search_spaces.search_space import SearchSpace +from neps.search_spaces.search_space import SearchSpace +from neps.optimizers.utils import map_real_hyperparameters_from_tabular_ids def continuous_to_tabular( @@ -34,11 +33,70 @@ def normalize_vectorize_config( config: SearchSpace, ignore_fidelity: bool = True ) -> np.ndarray: _new_vector = [] - for _, hp_list in config.get_normalized_hp_categories(ignore_fidelity).items(): + for _, hp_list in config.get_normalized_hp_categories( + ignore_fidelity=ignore_fidelity + ).items(): _new_vector.extend(hp_list) return np.array(_new_vector) +def get_tokenized_data( + configs: list[SearchSpace], + ignore_fidelity: bool = True, +) -> np.ndarray: # pd.Series: # tuple[np.ndarray, np.ndarray, np.ndarray]: + """Extracts configurations, indices and performances given a DataFrame + + Tokenizes the given set of observations as required by a PFN surrogate model. + """ + configs = np.array( + [normalize_vectorize_config(c, ignore_fidelity=ignore_fidelity) for c in configs] + ) + return configs + + +def get_freeze_thaw_normalized_step( + fid_step: int, lower: int, upper: int, step: int +) -> float: + max_fid_step = int(np.ceil((upper - lower) / step)) + 1 + return fid_step / max_fid_step + + +def get_training_data_for_freeze_thaw( + df: pd.DataFrame, + config_key: str, + perf_key: str, + pipeline_space: SearchSpace, + step_size: int, + maximize: bool = False, +) -> tuple[list[int], list[int], list[SearchSpace], list[float]]: + configs = [] + performance = [] + idxs = [] + steps = [] + for idx, row in df.iterrows(): + config_id = idx[0] + budget_id = idx[1] + if pipeline_space.has_tabular: + _row = pd.Series([row[config_key]], index=[config_id]) + _row = map_real_hyperparameters_from_tabular_ids(_row, pipeline_space) + configs.append(_row.values[0]) + else: + configs.append(row[config_key]) + performance.append(row[perf_key]) + steps.append( + get_freeze_thaw_normalized_step( + budget_id + 1, # NePS fidelity IDs begin with 0 + pipeline_space.fidelity.lower, + pipeline_space.fidelity.upper, + step_size, + ) + ) + idxs.append(idx[0] + 1) # NePS config IDs begin with 0 + if maximize: + performance = (1 - np.array(performance)).tolist() + return idxs, steps, configs, performance + + class MFObservedData: """ (Under development) @@ -55,6 +113,7 @@ class MFObservedData: default_config_col = "config" default_perf_col = "perf" default_lc_col = "learning_curves" + # TODO: deepcopy all the mutable outputs from the dataframe def __init__( self, @@ -79,6 +138,7 @@ def __init__( self.config_idx = index_names[0] self.budget_idx = index_names[1] + self.index_names = index_names index = pd.MultiIndex.from_tuples([], names=index_names) @@ -101,10 +161,18 @@ def seen_budget_levels(self) -> list: # Considers pending and error budgets as seen return self.df.index.levels[1].to_list() + @property + def pending_runs_index(self) -> pd.Index | pd.MultiIndex: + return self.df.loc[self.pending_condition].index + @property def completed_runs(self): return self.df[~(self.pending_condition | self.error_condition)] + @property + def completed_runs_index(self) -> pd.Index | pd.MultiIndex: + return self.completed_runs.index + def next_config_id(self) -> int: if len(self.seen_config_ids): return max(self.seen_config_ids) + 1 @@ -129,8 +197,9 @@ def add_data( data_list = data if not self.df.index.isin(index_list).any(): - _df = pd.DataFrame(data_list, columns=self.df.columns, index=index_list) - self.df = pd.concat((self.df, _df)) + index = pd.MultiIndex.from_tuples(index_list, names=self.index_names) + _df = pd.DataFrame(data_list, columns=self.df.columns, index=index) + self.df = _df.copy() if self.df.empty else pd.concat((self.df, _df)) elif error: raise ValueError( f"Data with at least one of the given indices already " @@ -171,14 +240,14 @@ def get_learning_curves(self): ) def all_configs_list(self) -> list[Any]: - return self.df.loc[:, self.config_col].values.tolist() + return self.df.loc[:, self.config_col].sort_index().values.tolist() def get_incumbents_for_budgets(self, maximize: bool = False): """ Returns a series object with the best partial configuration for each budget id Note: this will always map the best lowest ID if two configurations - has the same performance at the same fidelity + have the same performance at the same fidelity """ learning_curves = self.get_learning_curves() if maximize: @@ -205,6 +274,15 @@ def get_best_performance_for_each_budget(self, maximize: bool = False): return performance + def get_budget_level_for_best_performance(self, maximize: bool = False) -> int: + """Returns the lowest budget level at which the highest performance was recorded.""" + perf_per_z = self.get_best_performance_for_each_budget(maximize=maximize) + y_star = self.get_best_seen_performance(maximize=maximize) + # uses the minimum of the budget that see the maximum obseved score + op = max if maximize else min + z_inc = int(op([_z for _z, _y in perf_per_z.items() if _y == y_star])) + return z_inc + def get_best_learning_curve_id(self, maximize: bool = False): """ Returns a single configuration id of the best observed performance @@ -240,7 +318,19 @@ def reduce_to_max_seen_budgets(self): def get_partial_configs_at_max_seen(self): return self.reduce_to_max_seen_budgets()[self.config_col] - def extract_learning_curve(self, config_id: int, budget_id: int) -> list[float]: + def extract_learning_curve( + self, config_id: int, budget_id: int | None = None + ) -> list[float]: + if budget_id is None: + # budget_id only None when predicting + # extract full observed learning curve for prediction pipeline + budget_id = ( + max(self.df.loc[config_id].index.get_level_values("budget_id").values) + 1 + ) + + # For the first epoch we have no learning curve available + if budget_id == 0: + return [] # reduce budget_id to discount the current validation loss # both during training and prediction phase budget_id = max(0, budget_id - 1) @@ -249,49 +339,30 @@ def extract_learning_curve(self, config_id: int, budget_id: int) -> list[float]: else: lcs = self.get_learning_curves() lc = lcs.loc[config_id, :budget_id].values.flatten().tolist() - return lc + return deepcopy(lc) + + def get_best_performance_per_config(self, maximize: bool = False) -> pd.Series: + """Returns the best score recorded per config across fidelities seen.""" + op = np.max if maximize else np.min + perf = ( + self.df.sort_values( + "budget_id", ascending=False + ) # sorts with largest budget first + .groupby("config_id") # retains only config_id + .first() # retrieves the largest budget seen for each config_id + .learning_curves.apply( # extracts all values seen till largest budget for a config + op + ) # finds the minimum over per-config learning curve + ) + return perf - def get_training_data_4DyHPO( - self, df: pd.DataFrame, pipeline_space: SearchSpace | None = None - ): - configs = [] - learning_curves = [] - performance = [] - for idx, row in df.iterrows(): - config_id = idx[0] - budget_id = idx[1] - if pipeline_space.has_tabular: - _row = pd.Series([row[self.config_col]], index=[config_id]) - _row = map_real_hyperparameters_from_tabular_ids(_row, pipeline_space) - configs.append(_row.values[0]) - else: - configs.append(row[self.config_col]) - performance.append(row[self.perf_col]) - learning_curves.append(self.extract_learning_curve(config_id, budget_id)) - return configs, learning_curves, performance - - def get_tokenized_data(self, df: pd.DataFrame): - idxs = df.index.values - idxs = np.array([list(idx) for idx in idxs]) - idxs[:, 1] += 1 # all fidelity IDs begin with 0 in NePS - performances = df.perf.values - configs = df.config.values - configs = np.array([normalize_vectorize_config(c) for c in configs]) - - return configs, idxs, performances - - def tokenize(self, df: pd.DataFrame, as_tensor: bool = False): - """Function to format data for PFN.""" - configs = np.array([normalize_vectorize_config(c) for c in df]) - fidelity = np.array([c.fidelity.value for c in df]).reshape(-1, 1) - idx = df.index.values.reshape(-1, 1) - - data = np.hstack([idx, fidelity, configs]) - - if as_tensor: - device = torch.device("cuda" if torch.cuda.is_available() else "cpu") - data = torch.Tensor(data).to(device) - return data + def get_max_observed_fidelity_level_per_config(self) -> pd.Series: + """Returns the highest fidelity level recorded per config seen.""" + max_z_observed = { + _id: self.df.loc[_id, :].index.sort_values()[-1] + for _id in self.df.index.get_level_values("config_id").sort_values() + } + return pd.Series(max_z_observed) @property def token_ids(self) -> np.ndarray: @@ -315,33 +386,12 @@ def token_ids(self) -> np.ndarray: index=[(0, 2), (1, 2), (0, 1)], ) - print(data.df) - print(data.get_learning_curves()) - print( - "Mapping of budget IDs into best performing configurations at each fidelity:\n", - data.get_incumbents_for_budgets(), - ) - print( - "Best Performance at each budget level:\n", - data.get_best_performance_for_each_budget(), - ) - print( - "Configuration ID of the best observed performance so far: ", - data.get_best_learning_curve_id(), - ) - print(data.extract_learning_curve(0, 2)) - # data.df.sort_index(inplace=True) - print(data.get_partial_configs_at_max_seen()) - # When updating multiple indices at a time both the values in the data dictionary and the indices should be lists data.update_data({"perf": [1.8, 1.5]}, index=[(1, 1), (0, 0)]) - print(data.df) data = MFObservedData(["config", "perf"], index_names=["config_id", "budget_id"]) # when adding a single row second level list is not necessary data.add_data(["conf1", 0.5], index=(0, 0)) - print(data.df) data.update_data({"perf": [1.8], "budget_col": [5]}, index=(0, 0)) - print(data.df) diff --git a/neps/optimizers/multi_fidelity_prior/async_priorband.py b/neps/optimizers/multi_fidelity_prior/async_priorband.py index 40f6cb29..ce2352cf 100644 --- a/neps/optimizers/multi_fidelity_prior/async_priorband.py +++ b/neps/optimizers/multi_fidelity_prior/async_priorband.py @@ -1,11 +1,10 @@ -from __future__ import annotations - import typing import numpy as np -from typing_extensions import Literal, override +from typing import Literal +from typing_extensions import override -from neps.state.optimizer import BudgetInfo, OptimizationState +from neps.state.optimizer import BudgetInfo from neps.utils.types import ConfigResult, RawConfig from neps.search_spaces.search_space import SearchSpace from neps.optimizers.bayesian_optimization.acquisition_functions.base_acquisition import ( diff --git a/neps/optimizers/multi_fidelity_prior/priorband.py b/neps/optimizers/multi_fidelity_prior/priorband.py index 614ad4b0..be7b3151 100644 --- a/neps/optimizers/multi_fidelity_prior/priorband.py +++ b/neps/optimizers/multi_fidelity_prior/priorband.py @@ -1,9 +1,7 @@ -from __future__ import annotations - import typing +from typing import Literal import numpy as np -from typing_extensions import Literal from neps.utils.types import RawConfig from neps.search_spaces.search_space import SearchSpace @@ -146,7 +144,7 @@ def calc_sampling_args(self, rung) -> dict: # scales weight of prior by eta raised to the current rung level # at the base rung thus w_prior = w_random # at the max rung r, w_prior = eta^r * w_random - _w_prior = (self.eta**rung) * _w_random + _w_prior = (self.eta ** rung) * _w_random elif self.prior_weight_type == "linear": _w_random = 1 w_prior_min_rung = 1 * _w_random diff --git a/neps/optimizers/multi_fidelity_prior/utils.py b/neps/optimizers/multi_fidelity_prior/utils.py index edbbadc7..9f4c1a47 100644 --- a/neps/optimizers/multi_fidelity_prior/utils.py +++ b/neps/optimizers/multi_fidelity_prior/utils.py @@ -1,5 +1,3 @@ -from __future__ import annotations - import numpy as np import pandas as pd import scipy diff --git a/neps/optimizers/multiple_knowledge_sources/prototype_optimizer.py b/neps/optimizers/multiple_knowledge_sources/prototype_optimizer.py index 845552ea..d14657bf 100644 --- a/neps/optimizers/multiple_knowledge_sources/prototype_optimizer.py +++ b/neps/optimizers/multiple_knowledge_sources/prototype_optimizer.py @@ -1,8 +1,5 @@ -from __future__ import annotations - import logging -from typing import Any -from typing_extensions import override +from typing import Any, override from neps.state.optimizer import BudgetInfo, OptimizationState from neps.utils.types import ConfigResult, RawConfig diff --git a/neps/optimizers/random_search/optimizer.py b/neps/optimizers/random_search/optimizer.py index 5aeaff33..abe16866 100644 --- a/neps/optimizers/random_search/optimizer.py +++ b/neps/optimizers/random_search/optimizer.py @@ -1,8 +1,7 @@ -from __future__ import annotations from typing import Any from typing_extensions import override -from neps.state.optimizer import BudgetInfo, OptimizationState +from neps.state.optimizer import BudgetInfo from neps.utils.types import ConfigResult, RawConfig from neps.search_spaces.search_space import SearchSpace from neps.optimizers.base_optimizer import BaseOptimizer diff --git a/neps/optimizers/regularized_evolution/optimizer.py b/neps/optimizers/regularized_evolution/optimizer.py index 0860ba1c..d112be31 100644 --- a/neps/optimizers/regularized_evolution/optimizer.py +++ b/neps/optimizers/regularized_evolution/optimizer.py @@ -1,5 +1,3 @@ -from __future__ import annotations - import math import os import random @@ -10,7 +8,7 @@ import numpy as np import yaml -from neps.state.optimizer import BudgetInfo, OptimizationState +from neps.state.optimizer import BudgetInfo from neps.utils.types import ConfigResult, RawConfig from neps.search_spaces.search_space import SearchSpace @@ -68,7 +66,7 @@ def load_optimization_state( self.population = [ (x, y) for x, y in zip( - train_x[-self.population_size :], train_y[-self.population_size :] + train_x[-self.population_size:], train_y[-self.population_size:] ) ] self.pending_evaluations = [el for el in pending_evaluations.values()] @@ -85,12 +83,13 @@ def get_config_and_ids(self) -> tuple[RawConfig, str, str | None]: for _ in range(cur_population_size * 2) ] if self.assisted_zero_cost_proxy is not None: - zero_cost_proxy_values = self.assisted_zero_cost_proxy(x=configs) # type: ignore[misc] + zero_cost_proxy_values = self.assisted_zero_cost_proxy( + x=configs) # type: ignore[misc] else: raise Exception("Zero cost proxy function is not defined!") indices = np.argsort(zero_cost_proxy_values)[-cur_population_size:][ - ::-1 - ] + ::-1 + ] for idx, config_idx in enumerate(indices): filename = str(idx).zfill( int(math.log10(cur_population_size)) + 1 diff --git a/neps/optimizers/utils.py b/neps/optimizers/utils.py index c203f4db..e9d29222 100644 --- a/neps/optimizers/utils.py +++ b/neps/optimizers/utils.py @@ -1,13 +1,7 @@ import pandas as pd -from ..search_spaces.search_space import SearchSpace - - -# def map_real_hyperparameters_from_tabular_ids( -# ids: pd.Series, pipeline_space: SearchSpace -# ) -> pd.Series: -# return x - +from neps.search_spaces.search_space import SearchSpace + def map_real_hyperparameters_from_tabular_ids( x: pd.Series, pipeline_space: SearchSpace @@ -25,23 +19,12 @@ def map_real_hyperparameters_from_tabular_ids( """ if len(x) == 0: return x - # extract fid name - _x = x.iloc[0].hp_values() - _x.pop("id") - fid_name = list(_x.keys())[0] - for i in x.index.values: - # extracting actual HPs from the tabular space - _config = pipeline_space.custom_grid_table.loc[x.loc[i]["id"].value].to_dict() - # updating fidelities as per the candidate set passed - _config.update({fid_name: x.loc[i][fid_name].value}) - # placeholder config from the raw tabular space - config = pipeline_space.raw_tabular_space.sample( - patience=100, - user_priors=True, - ignore_fidelity=True # True allows fidelity to appear in the sample - ) - # copying values from table to placeholder config of type SearchSpace - config.load_from(_config) - # replacing the ID in the candidate set with the actual HPs of the config - x.loc[i] = config - return x + # copying hyperparameter configs based on IDs + _x = pd.Series( + [pipeline_space.custom_grid_table[x.loc[idx]["id"].value] for idx in x.index.values], + index=x.index + ) + # setting the passed fidelities for the corresponding IDs + for idx in _x.index.values: + _x.loc[idx].fidelity.value = x.loc[idx].fidelity.value + return _x diff --git a/neps/plot/plot3D.py b/neps/plot/plot3D.py new file mode 100644 index 00000000..d543ef82 --- /dev/null +++ b/neps/plot/plot3D.py @@ -0,0 +1,255 @@ +"""Plot a 3D landscape of learning curves for a given run.""" + +from __future__ import annotations + +from dataclasses import dataclass +from pathlib import Path + +import numpy as np +import pandas as pd +from matplotlib import ( + cm, + pyplot as plt, +) +from matplotlib.collections import LineCollection +from matplotlib.colors import Normalize +from mpl_toolkits.mplot3d.art3d import Line3DCollection + +# Copied from plot.py +HERE = Path(__file__).parent.absolute() +DEFAULT_RESULTS_PATH = HERE.parent / "results" + + +@dataclass +class Plotter3D: + """Plot a 3d landscape of learning curves for a given run.""" + + loss_key: str = "Loss" + fidelity_key: str = "epochs" + run_path: str | Path | None = None + scatter: bool = True + footnote: bool = True + alpha: float = 0.9 + scatter_size: float | int = 3 + bck_color_2d: tuple[float, float, float] = (0.8, 0.82, 0.8) + view_angle: tuple[float, float] = (15, -70) + + def __post_init__(self) -> None: + if self.run_path is not None: + assert ( + Path(self.run_path).absolute().is_dir() + ), f"Path {self.run_path} is not a directory" + self.data_path = ( + Path(self.run_path).absolute() / "summary_csv" / "config_data.csv" + ) + assert self.data_path.exists(), f"File {self.data_path} does not exist" + self.df = pd.read_csv( + self.data_path, + index_col=0, + float_precision="round_trip", # type: ignore + ) + + # Assigned at prep_df stage + self.loss_range: tuple[float, float] | None = None + self.epochs_range: tuple[float, float] | None = None + + @staticmethod + def get_x(df: pd.DataFrame) -> np.ndarray: + """Get the x-axis values for the plot.""" + return df["epochID"].to_numpy() + + @staticmethod + def get_y(df: pd.DataFrame) -> np.ndarray: + """Get the y-axis values for the plot.""" + y_ = df["configID"].to_numpy() + return np.ones_like(y_) * y_[0] + + @staticmethod + def get_z(df: pd.DataFrame) -> np.ndarray: + """Get the z-axis values for the plot.""" + return df["result.loss"].to_numpy() + + @staticmethod + def get_color(df: pd.DataFrame) -> np.ndarray: + """Get the color values for the plot.""" + return df.index.to_numpy() + + def prep_df(self, df: pd.DataFrame | None = None) -> pd.DataFrame: + """Prepare the dataframe for plotting.""" + df = self.df if df is None else df + + _fid_key = f"config.{self.fidelity_key}" + self.loss_range = (df["result.loss"].min(), df["result.loss"].max()) # type: ignore + self.epochs_range = (df[_fid_key].min(), df[_fid_key].max()) # type: ignore + + split_values = np.array([[*index.split("_")] for index in df.index]) + df[["configID", "epochID"]] = split_values + df.configID = df.configID.astype(int) + df.epochID = df.epochID.astype(int) + if df.epochID.min() == 0: + df.epochID += 1 + + # indices become sampling order + time_cols = ["metadata.time_started", "metadata.time_end"] + return df.sort_values(by=time_cols).reset_index(drop=True) + + def plot3D( # noqa: N802, PLR0915 + self, + data: pd.DataFrame | None = None, + save_path: str | Path | None = None, + filename: str = "freeze_thaw", + ) -> None: + """Plot the 3D landscape of learning curves.""" + data = self.prep_df(data) + + # Create the figure and the axes for the plot + fig, (ax3D, ax, cax) = plt.subplots( + 1, 3, figsize=(12, 5), width_ratios=(20, 20, 1) + ) + + # remove a 2D axis and replace with a 3D projection one + ax3D.remove() + ax3D = fig.add_subplot(131, projection="3d") + + # Create the normalizer to normalize the color values + norm = Normalize(self.get_color(data).min(), self.get_color(data).max()) + + # Counters to keep track of the configurations run for only a single fidelity + n_lines = 0 + n_points = 0 + + data_groups = data.groupby("configID", sort=False) + + for idx, (_configID, data_) in enumerate(data_groups): + x = self.get_x(data_) + y = self.get_y(data_) + z = self.get_z(data_) + + y = np.ones_like(y) * idx + color = self.get_color(data_) + + if len(x) < 2: + n_points += 1 + if self.scatter: + # 3D points + ax3D.scatter( + y, + z, + s=self.scatter_size, + zs=0, + zdir="x", + c=color, + cmap="RdYlBu_r", + norm=norm, + alpha=self.alpha * 0.8, + ) + # 2D points + ax.scatter( + x, + z, + s=self.scatter_size, + c=color, + cmap="RdYlBu_r", + norm=norm, + alpha=self.alpha * 0.8, + ) + else: + n_lines += 1 + + # Plot 3D + # Get segments for all lines + points3D = np.array([x, y, z]).T.reshape(-1, 1, 3) + segments3D = np.concatenate([points3D[:-1], points3D[1:]], axis=1) + + # Construct lines from segments + lc3D = Line3DCollection( + segments3D, # type: ignore + cmap="RdYlBu_r", + norm=norm, + alpha=self.alpha, + ) + lc3D.set_array(color) + + # Draw lines + ax3D.add_collection3d(lc3D) # type: ignore + + # Plot 2D + # Get segments for all lines + points = np.array([x, z]).T.reshape(-1, 1, 2) + segments = np.concatenate([points[:-1], points[1:]], axis=1) + + # Construct lines from segments + lc = LineCollection( + segments, # type: ignore + cmap="RdYlBu_r", + norm=norm, + alpha=self.alpha, # type: ignore + ) + lc.set_array(color) + + # Draw lines + ax.add_collection(lc) + + assert self.loss_range is not None + assert self.epochs_range is not None + + ax3D.axes.set_xlim3d(left=self.epochs_range[0], right=self.epochs_range[1]) # type: ignore + ax3D.axes.set_ylim3d(bottom=0, top=data_groups.ngroups) # type: ignore + ax3D.axes.set_zlim3d(bottom=self.loss_range[0], top=self.loss_range[1]) # type: ignore + + ax3D.set_xlabel("Epochs") + ax3D.set_ylabel("Iteration sampled") + ax3D.set_zlabel(f"{self.loss_key}") # type: ignore + + # set view angle + ax3D.view_init(elev=self.view_angle[0], azim=self.view_angle[1]) # type: ignore + + ax.autoscale_view() + ax.set_xlabel(self.fidelity_key) + ax.set_ylabel(f"{self.loss_key}") + ax.set_facecolor(self.bck_color_2d) + fig.suptitle("ifBO run") + + if self.footnote: + fig.text( + 0.01, + 0.02, + f"Total {n_lines + n_points} configs evaluated; for multiple budgets: " + f"{n_lines}, for single budget: {n_points}", + ha="left", + va="bottom", + fontsize=10, + ) + + plt.colorbar( + cm.ScalarMappable(norm=norm, cmap="RdYlBu_r"), + cax=cax, + label="Iteration", + use_gridspec=True, + alpha=self.alpha, + ) + fig.tight_layout() + + self.save(save_path, filename) + plt.close(fig) + + def save( + self, + save_path: str | Path | None = None, + filename: str = "freeze_thaw", + ) -> None: + """Save the plot to a file.""" + path = save_path if save_path is not None else self.run_path + assert path is not None + + run_path = Path(path) + run_path.mkdir(parents=True, exist_ok=True) + assert run_path.is_dir() + plot_path = run_path / f"Plot3D_{filename}.png" + + plt.savefig(plot_path, bbox_inches="tight") + + +if __name__ == "__main__": + plotter = Plotter3D(run_path="./results", fidelity_key="epochs") + plotter.plot3D() diff --git a/neps/plot/tensorboard_eval.py b/neps/plot/tensorboard_eval.py index e77329b4..2211537d 100644 --- a/neps/plot/tensorboard_eval.py +++ b/neps/plot/tensorboard_eval.py @@ -3,8 +3,9 @@ from __future__ import annotations import math +from collections.abc import Mapping from pathlib import Path -from typing import Any, ClassVar, Mapping +from typing import TYPE_CHECKING, Any, ClassVar from typing_extensions import override import numpy as np @@ -12,10 +13,17 @@ from torch.utils.tensorboard.summary import hparams from torch.utils.tensorboard.writer import SummaryWriter -from neps.runtime import get_in_progress_trial, get_workers_neps_state +from neps.runtime import ( + get_in_progress_trial, + get_workers_neps_state, + register_notify_trial_end, +) from neps.status.status import get_summary_dict from neps.utils.common import get_initial_directory +if TYPE_CHECKING: + from neps.state.trial import Trial + class SummaryWriter_(SummaryWriter): # noqa: N801 """This class inherits from the base SummaryWriter class and provides @@ -87,6 +95,8 @@ def _initiate_internal_configurations() -> None: trial = get_in_progress_trial() neps_state = get_workers_neps_state() + register_notify_trial_end("NEPS_TBLOGGER", tblogger.end_of_config) + # We are assuming that neps state is all filebased here root_dir = Path(neps_state.location) assert root_dir.exists() @@ -97,12 +107,12 @@ def _initiate_internal_configurations() -> None: if trial.metadata.previous_trial_location is not None else None ) + tblogger.config_id = trial.metadata.id tblogger.optimizer_dir = root_dir tblogger.config = trial.config @staticmethod def _is_initialized() -> bool: - # Returns 'True' if config_writer is already initialized. 'False' otherwise return tblogger.config_writer is not None @staticmethod @@ -110,7 +120,7 @@ def _initialize_writers() -> None: # This code runs only once per config, to assign that config a config_writer. if ( tblogger.config_previous_directory is None - and tblogger.config_working_directory + and tblogger.config_working_directory is not None ): # If no fidelities are there yet, define the writer via the config_id tblogger.config_id = str(tblogger.config_working_directory).rsplit( @@ -120,8 +130,9 @@ def _initialize_writers() -> None: tblogger.config_working_directory / "tbevents" ) return + # Searching for the initial directory where tensorboard events are stored. - if tblogger.config_working_directory: + if tblogger.config_working_directory is not None: init_dir = get_initial_directory( pipeline_directory=tblogger.config_working_directory ) @@ -135,7 +146,7 @@ def _initialize_writers() -> None: ) @staticmethod - def end_of_config() -> None: + def end_of_config(trial: Trial) -> None: # noqa: ARG004 """Closes the writer.""" if tblogger.config_writer: # Close and reset previous config writers for consistent logging. diff --git a/neps/runtime.py b/neps/runtime.py index 5cf0f29f..f1c5fcfe 100644 --- a/neps/runtime.py +++ b/neps/runtime.py @@ -7,18 +7,15 @@ import os import shutil import time +from collections.abc import Callable, Iterable, Iterator, Mapping from contextlib import contextmanager from dataclasses import dataclass from pathlib import Path from typing import ( TYPE_CHECKING, Any, - Callable, Generic, - Iterable, - Iterator, Literal, - Mapping, TypeVar, ) @@ -51,7 +48,6 @@ def _default_worker_name() -> str: Loc = TypeVar("Loc") - # NOTE: As each NEPS process is only ever evaluating a single trial, this global can # be retrieved in NePS and refers to what this process is currently evaluating. # Note that before `_set_in_progress_trial` is called, this should be cleared @@ -93,6 +89,14 @@ def get_in_progress_trial() -> Trial: return _CURRENTLY_RUNNING_TRIAL_IN_PROCESS +_TRIAL_END_CALLBACKS: dict[str, Callable[[Trial], None]] = {} + + +def register_notify_trial_end(key: str, callback: Callable[[Trial], None]) -> None: + """Register a callback to be called when a trial ends.""" + _TRIAL_END_CALLBACKS[key] = callback + + @contextmanager def _set_global_trial(trial: Trial) -> Iterator[None]: global _CURRENTLY_RUNNING_TRIAL_IN_PROCESS # noqa: PLW0603 @@ -107,6 +111,8 @@ def _set_global_trial(trial: Trial) -> Iterator[None]: ) _CURRENTLY_RUNNING_TRIAL_IN_PROCESS = trial yield + for _key, callback in _TRIAL_END_CALLBACKS.items(): + callback(trial) _CURRENTLY_RUNNING_TRIAL_IN_PROCESS = None diff --git a/neps/search_spaces/architecture/api.py b/neps/search_spaces/architecture/api.py index 98af2f14..a3af1510 100644 --- a/neps/search_spaces/architecture/api.py +++ b/neps/search_spaces/architecture/api.py @@ -1,4 +1,4 @@ -from __future__ import annotations + import inspect from typing import Callable diff --git a/neps/search_spaces/architecture/cfg.py b/neps/search_spaces/architecture/cfg.py index f7815f6d..7e4aa453 100644 --- a/neps/search_spaces/architecture/cfg.py +++ b/neps/search_spaces/architecture/cfg.py @@ -1,4 +1,4 @@ -from __future__ import annotations + import itertools import math import sys diff --git a/neps/search_spaces/architecture/core_graph_grammar.py b/neps/search_spaces/architecture/core_graph_grammar.py index 277ae9fc..17323b48 100644 --- a/neps/search_spaces/architecture/core_graph_grammar.py +++ b/neps/search_spaces/architecture/core_graph_grammar.py @@ -1,4 +1,4 @@ -from __future__ import annotations + import collections import inspect diff --git a/neps/search_spaces/architecture/graph_grammar.py b/neps/search_spaces/architecture/graph_grammar.py index e21e94d8..1c9fa159 100644 --- a/neps/search_spaces/architecture/graph_grammar.py +++ b/neps/search_spaces/architecture/graph_grammar.py @@ -6,7 +6,7 @@ from functools import partial from typing import Any, ClassVar, Mapping from typing_extensions import override, Self -from neps.utils.types import NotSet, _NotSet +from neps.utils.types import NotSet import networkx as nx import numpy as np @@ -46,22 +46,26 @@ class GraphParameter(ParameterWithPrior[nx.DiGraph, str], MutatableParameter): @property @abstractmethod - def id(self) -> str: ... + def id(self) -> str: + ... # NOTE(eddiebergman): Unlike traditional parameters, it seems @property @abstractmethod - def value(self) -> nx.DiGraph: ... + def value(self) -> nx.DiGraph: + ... # NOTE(eddiebergman): This is a function common to the three graph # parameters that is used for `load_from` @abstractmethod - def create_from_id(self, value: str) -> None: ... + def create_from_id(self, value: str) -> None: + ... # NOTE(eddiebergman): Function shared between graph parameters. # Used to `set_value()` @abstractmethod - def reset(self) -> None: ... + def reset(self) -> None: + ... @override def __eq__(self, other: Any) -> bool: @@ -71,7 +75,8 @@ def __eq__(self, other: Any) -> bool: return self.id == other.id @abstractmethod - def compute_prior(self, normalized_value: float) -> float: ... + def compute_prior(self, normalized_value: float) -> float: + ... @override def set_value(self, value: str | None) -> None: @@ -137,7 +142,9 @@ def load_from(self, value: str | Self) -> None: self.create_from_id(value) @abstractmethod - def mutate(self, parent: Self | None = None, *, mutation_strategy: str = "bananas") -> Self: ... + def mutate(self, parent: Self | None = None, *, + mutation_strategy: str = "bananas") -> Self: + ... @abstractmethod def crossover(self, parent1: Self, parent2: Self | None = None) -> tuple[Self, Self]: @@ -154,7 +161,7 @@ def normalized_to_value(self, normalized_value: float) -> nx.DiGraph: @override def clone(self) -> Self: - new_self = self.__class__(**self.input_kwargs) + new_self = self.__class__(**self.input_kwargs) # HACK(eddiebergman): It seems the subclasses all have these and # so we just copy over those attributes, deepcloning anything that is mutable @@ -178,6 +185,7 @@ def clone(self) -> Self: return new_self + class GraphGrammar(GraphParameter, CoreGraphGrammar): hp_name = "graph_grammar" @@ -227,7 +235,8 @@ def __init__( def sample(self, *, user_priors: bool = False) -> Self: copy_self = self.clone() copy_self.reset() - copy_self.string_tree = copy_self.grammars[0].sampler(1, user_priors=user_priors)[0] + copy_self.string_tree = copy_self.grammars[0].sampler(1, user_priors=user_priors)[ + 0] _ = copy_self.value # required for checking if graph is valid! return copy_self @@ -507,7 +516,8 @@ def crossover( def sample(self, *, user_priors: bool = False) -> Self: copy_self = self.clone() copy_self.reset() - copy_self.string_tree_list = [grammar.sampler(1)[0] for grammar in copy_self.grammars] + copy_self.string_tree_list = [grammar.sampler(1)[0] for grammar in + copy_self.grammars] copy_self.string_tree = copy_self.assemble_trees( copy_self.string_tree_list[0], copy_self.string_tree_list[1:], @@ -1029,8 +1039,8 @@ def recursive_worker( [ grammar.compute_space_size for grammar, n_grammar in zip( - self.grammars, self.number_of_repetitive_motifs_per_grammar - ) + self.grammars, self.number_of_repetitive_motifs_per_grammar + ) for _ in range(n_grammar) ] ) diff --git a/neps/search_spaces/hyperparameters/categorical.py b/neps/search_spaces/hyperparameters/categorical.py index 39694a19..bc1f9423 100644 --- a/neps/search_spaces/hyperparameters/categorical.py +++ b/neps/search_spaces/hyperparameters/categorical.py @@ -2,16 +2,9 @@ from __future__ import annotations -from typing import ( - TYPE_CHECKING, - Any, - ClassVar, - Iterable, - Literal, - Mapping, - Union, -) -from typing_extensions import Self, TypeAlias, override +from collections.abc import Iterable, Mapping +from typing import TYPE_CHECKING, Any, ClassVar, Literal, TypeAlias +from typing_extensions import Self, override import numpy as np import numpy.typing as npt @@ -22,7 +15,7 @@ if TYPE_CHECKING: from neps.utils.types import f64 -CategoricalTypes: TypeAlias = Union[float, int, str] +CategoricalTypes: TypeAlias = float | int | str class CategoricalParameter( @@ -81,7 +74,7 @@ def __init__( super().__init__(value=None, is_fidelity=False, default=default) for choice in choices: - if not isinstance(choice, (float, int, str)): + if not isinstance(choice, float | int | str): raise TypeError( f'Choice "{choice}" is not of a valid type (float, int, str)' ) diff --git a/neps/search_spaces/hyperparameters/float.py b/neps/search_spaces/hyperparameters/float.py index b780f3ff..4e4016bd 100644 --- a/neps/search_spaces/hyperparameters/float.py +++ b/neps/search_spaces/hyperparameters/float.py @@ -3,7 +3,8 @@ from __future__ import annotations import math -from typing import TYPE_CHECKING, ClassVar, Literal, Mapping +from collections.abc import Mapping +from typing import TYPE_CHECKING, ClassVar, Literal from typing_extensions import Self, override import numpy as np diff --git a/neps/search_spaces/hyperparameters/integer.py b/neps/search_spaces/hyperparameters/integer.py index 6462cc63..7f816588 100644 --- a/neps/search_spaces/hyperparameters/integer.py +++ b/neps/search_spaces/hyperparameters/integer.py @@ -2,7 +2,8 @@ from __future__ import annotations -from typing import TYPE_CHECKING, ClassVar, Literal, Mapping +from collections.abc import Mapping +from typing import TYPE_CHECKING, ClassVar, Literal from typing_extensions import Self, override import numpy as np diff --git a/neps/search_spaces/hyperparameters/numerical.py b/neps/search_spaces/hyperparameters/numerical.py index 9aaaf6d1..a97473e1 100644 --- a/neps/search_spaces/hyperparameters/numerical.py +++ b/neps/search_spaces/hyperparameters/numerical.py @@ -22,8 +22,9 @@ from __future__ import annotations +from collections.abc import Mapping from functools import lru_cache -from typing import TYPE_CHECKING, Any, ClassVar, Literal, Mapping, TypeVar +from typing import TYPE_CHECKING, Any, ClassVar, Literal, TypeVar from typing_extensions import Self, override import numpy as np diff --git a/neps/search_spaces/parameter.py b/neps/search_spaces/parameter.py index a2f6c09c..277b8ca7 100644 --- a/neps/search_spaces/parameter.py +++ b/neps/search_spaces/parameter.py @@ -23,8 +23,9 @@ from __future__ import annotations from abc import ABC, abstractmethod -from typing import Any, ClassVar, Generic, Mapping, TypeVar, runtime_checkable -from typing_extensions import Protocol, Self +from collections.abc import Mapping +from typing import Any, ClassVar, Generic, Protocol, TypeVar, runtime_checkable +from typing_extensions import Self ValueT = TypeVar("ValueT") SerializedT = TypeVar("SerializedT") diff --git a/neps/search_spaces/search_space.py b/neps/search_spaces/search_space.py index 3f0d6703..8621a928 100644 --- a/neps/search_spaces/search_space.py +++ b/neps/search_spaces/search_space.py @@ -7,9 +7,10 @@ import logging import operator import pprint +from collections.abc import Hashable, Iterator, Mapping from itertools import product from pathlib import Path -from typing import TYPE_CHECKING, Any, Hashable, Iterator, Literal, Mapping +from typing import TYPE_CHECKING, Any, Literal from typing_extensions import Self import ConfigSpace as CS @@ -111,7 +112,7 @@ def pipeline_space_from_yaml( # noqa: C901 format, contents, or if the dictionary is invalid. """ try: - if isinstance(config, (str, Path)): + if isinstance(config, str | Path): # try to load the YAML file try: yaml_file_path = Path(config) @@ -329,18 +330,22 @@ def sample( sampled_hps[name] = hp.clone() continue - for _ in range(patience): + for attempt in range(patience): try: if user_priors and isinstance(hp, ParameterWithPrior): sampled_hps[name] = hp.sample(user_priors=user_priors) else: sampled_hps[name] = hp.sample() break - except ValueError: + except Exception as e: # noqa: BLE001 logger.warning( - f"Could not sample valid value for hyperparameter {name}!" + f"Attempt {attempt + 1}/{patience} failed for" + f" sampling {name}: {e!s}" ) else: + logger.error( + f"Failed to sample valid value for {name} after {patience} attempts" + ) raise ValueError( f"Could not sample valid value for hyperparameter {name}" f" in {patience} tries!" @@ -559,7 +564,7 @@ def get_vectorial_dim(self) -> dict[Literal["continuous", "categorical"], int] | The vectorial dimension """ if not any( - isinstance(hp, (NumericalParameter, CategoricalParameter, ConstantParameter)) + isinstance(hp, NumericalParameter | CategoricalParameter | ConstantParameter) for hp in self.values() ): return None @@ -614,6 +619,13 @@ def get_search_space_grid( Does not support graph parameters currently. + !!! note "TODO" + + Include default hyperparameters in the grid. + If all HPs have a `default` then add a single configuration. + If only partial HPs have defaults then add all combinations of defaults, but + only to the end of the list of configs. + Args: size_per_numerical_hp: The size of the grid for each numerical hyperparameter. include_endpoints: Whether to include the endpoints of the grid. @@ -657,7 +669,9 @@ def get_search_space_grid( SearchSpace( **{ name: ConstantParameter(value=value) # type: ignore - for name, value in zip(self.hyperparameters.keys(), config_values) + for name, value in zip( + self.hyperparameters.keys(), config_values, strict=False + ) } ) for config_values in full_grid @@ -880,3 +894,18 @@ def is_equal_value( return False return True + + def update_hp_values(self, new_values: dict[str, Any]) -> None: + """Update the hyperparameter values with new values. + + Args: + new_values: The new values to set for the hyperparameters. + """ + _hp_dict = self.hp_values() + _intersect = set(_hp_dict.keys()) & set(new_values.keys()) + assert len(_intersect) == len(new_values), ( + "All hyperparameters must be present! " + f"{set(_hp_dict.keys()) - set(new_values.keys())} are missing" + ) + _hp_dict.update(new_values) + self.set_hyperparameters_from_dict(_hp_dict) diff --git a/neps/search_spaces/yaml_search_space_utils.py b/neps/search_spaces/yaml_search_space_utils.py index 8b25b1b0..ff6d72ad 100644 --- a/neps/search_spaces/yaml_search_space_utils.py +++ b/neps/search_spaces/yaml_search_space_utils.py @@ -1,5 +1,3 @@ -from __future__ import annotations - import logging import re from typing import Literal, overload diff --git a/neps/state/_eval.py b/neps/state/_eval.py index 0d08dfdd..48c7e960 100644 --- a/neps/state/_eval.py +++ b/neps/state/_eval.py @@ -4,8 +4,9 @@ import logging import time import traceback +from collections.abc import Callable, Mapping from pathlib import Path -from typing import TYPE_CHECKING, Any, Callable, Literal, Mapping, TypeVar +from typing import TYPE_CHECKING, Any, Literal, TypeVar from neps.exceptions import NePSError diff --git a/neps/state/filebased.py b/neps/state/filebased.py index 360364cf..3e68b165 100644 --- a/neps/state/filebased.py +++ b/neps/state/filebased.py @@ -21,10 +21,11 @@ import json import logging import pprint +from collections.abc import Iterable, Iterator from contextlib import contextmanager from dataclasses import asdict, dataclass, field from pathlib import Path -from typing import ClassVar, Iterable, Iterator, TypeVar +from typing import ClassVar, TypeVar from typing_extensions import override from uuid import uuid4 diff --git a/neps/state/neps_state.py b/neps/state/neps_state.py index 8afaee62..dd7d9279 100644 --- a/neps/state/neps_state.py +++ b/neps/state/neps_state.py @@ -12,8 +12,9 @@ import logging import time +from collections.abc import Callable from dataclasses import dataclass, field -from typing import TYPE_CHECKING, Callable, Generic, TypeVar, overload +from typing import TYPE_CHECKING, Generic, TypeVar, overload from more_itertools import take @@ -88,10 +89,13 @@ def sample_trial( Returns: The new trial. """ - with self._optimizer_state.acquire() as ( - opt_state, - put_opt, - ), self._seed_state.acquire() as (seed_state, put_seed_state): + with ( + self._optimizer_state.acquire() as ( + opt_state, + put_opt, + ), + self._seed_state.acquire() as (seed_state, put_seed_state), + ): trials: dict[Trial.ID, Trial] = {} for trial_id, shared_trial in self._trials.all().items(): trial = shared_trial.synced() @@ -195,6 +199,7 @@ def get_errors(self) -> ErrDump: @overload def get_next_pending_trial(self) -> Trial | None: ... + @overload def get_next_pending_trial(self, n: int | None = None) -> list[Trial]: ... diff --git a/neps/state/optimizer.py b/neps/state/optimizer.py index f4000b07..3b3019f0 100644 --- a/neps/state/optimizer.py +++ b/neps/state/optimizer.py @@ -2,8 +2,9 @@ from __future__ import annotations +from collections.abc import Mapping from dataclasses import dataclass -from typing import Any, Mapping +from typing import Any @dataclass diff --git a/neps/state/protocols.py b/neps/state/protocols.py index 78fcee0d..6ee54be4 100644 --- a/neps/state/protocols.py +++ b/neps/state/protocols.py @@ -6,11 +6,12 @@ from __future__ import annotations import logging +from collections.abc import Callable, Iterable, Iterator from contextlib import contextmanager from copy import deepcopy from dataclasses import dataclass -from typing import TYPE_CHECKING, Callable, ClassVar, Generic, Iterable, Iterator, TypeVar -from typing_extensions import Protocol, Self +from typing import TYPE_CHECKING, ClassVar, Generic, Protocol, TypeVar +from typing_extensions import Self from neps.exceptions import ( LockFailedError, diff --git a/neps/state/seed_snapshot.py b/neps/state/seed_snapshot.py index 0f9fad87..4a26370b 100644 --- a/neps/state/seed_snapshot.py +++ b/neps/state/seed_snapshot.py @@ -5,18 +5,17 @@ import contextlib import random from dataclasses import dataclass -from typing import TYPE_CHECKING, Any, List, Tuple, Union -from typing_extensions import TypeAlias +from typing import TYPE_CHECKING, Any, TypeAlias import numpy as np if TYPE_CHECKING: import torch - NP_RNG_STATE: TypeAlias = Tuple[str, np.ndarray, int, int, float] - PY_RNG_STATE: TypeAlias = Tuple[int, Tuple[int, ...], Union[int, None]] + NP_RNG_STATE: TypeAlias = tuple[str, np.ndarray, int, int, float] + PY_RNG_STATE: TypeAlias = tuple[int, tuple[int, ...], int | None] TORCH_RNG_STATE: TypeAlias = torch.Tensor - TORCH_CUDA_RNG_STATE: TypeAlias = List[torch.Tensor] + TORCH_CUDA_RNG_STATE: TypeAlias = list[torch.Tensor] @dataclass @@ -105,7 +104,7 @@ def __eq__(self, other: Any, /) -> bool: # noqa: PLR0911 if not all( torch.equal(a, b) - for a, b in zip(self.torch_cuda_rng, other.torch_cuda_rng) + for a, b in zip(self.torch_cuda_rng, other.torch_cuda_rng, strict=False) ): return False diff --git a/neps/state/trial.py b/neps/state/trial.py index 862e2bbb..7efd2d1e 100644 --- a/neps/state/trial.py +++ b/neps/state/trial.py @@ -3,20 +3,19 @@ from __future__ import annotations import logging +from collections.abc import Callable, Mapping from dataclasses import asdict, dataclass from enum import Enum -from typing import TYPE_CHECKING, Any, Callable, ClassVar, Literal, Mapping +from typing import TYPE_CHECKING, Any, ClassVar, Literal from typing_extensions import Self import numpy as np from neps.exceptions import NePSError -from neps.utils.types import ConfigResult +from neps.utils.types import ERROR, ConfigResult, RawConfig if TYPE_CHECKING: from neps.search_spaces import SearchSpace - from neps.utils.types import ERROR, RawConfig - logger = logging.getLogger(__name__) diff --git a/neps/utils/_locker.py b/neps/utils/_locker.py index 0b9a92d5..f2d430f8 100644 --- a/neps/utils/_locker.py +++ b/neps/utils/_locker.py @@ -1,8 +1,9 @@ from __future__ import annotations +from collections.abc import Iterator from contextlib import contextmanager from pathlib import Path -from typing import IO, Iterator +from typing import IO import portalocker as pl diff --git a/neps/utils/common.py b/neps/utils/common.py index 2a9ca586..2c6f9d35 100644 --- a/neps/utils/common.py +++ b/neps/utils/common.py @@ -3,10 +3,16 @@ from __future__ import annotations import inspect +import random +from collections.abc import Iterable, Mapping, Sequence +from functools import partial +from pathlib import Path +from typing import Any from functools import partial from pathlib import Path from typing import Any, Iterable, Mapping, Sequence +import numpy as np import torch import yaml @@ -139,6 +145,9 @@ def load_lightning_checkpoint( return checkpoint_path, checkpoint +_INTIAL_DIRECTORY_CACHE: dict[str, Path] = {} + + # TODO: We should have a better way to have a shared folder between trials. # Right now, the fidelity lineage is linear, however this will be a difficulty # when/if we have a tree structure. @@ -154,13 +163,15 @@ def get_initial_directory(pipeline_directory: Path | str | None = None) -> Path: """ neps_state = get_workers_neps_state() if pipeline_directory is not None: - pipeline_directory = Path(pipeline_directory) # TODO: Hard coded assumption - config_id = pipeline_directory.name.split("_", maxsplit=1)[-1] + config_id = Path(pipeline_directory).name.split("_", maxsplit=1)[-1] trial = neps_state.get_trial_by_id(config_id) else: trial = get_in_progress_trial() + if trial.metadata.id in _INTIAL_DIRECTORY_CACHE: + return _INTIAL_DIRECTORY_CACHE[trial.metadata.id] + # Recursively find the initial directory while (prev_trial_id := trial.metadata.previous_trial_id) is not None: trial = neps_state.get_trial_by_id(prev_trial_id) @@ -169,7 +180,10 @@ def get_initial_directory(pipeline_directory: Path | str | None = None) -> Path: # TODO: Hard coded assumption that we are operating in a filebased neps assert isinstance(initial_dir, str) - return Path(initial_dir) + path = Path(initial_dir) + + _INTIAL_DIRECTORY_CACHE[trial.metadata.id] = path + return path def get_searcher_data( @@ -236,7 +250,7 @@ def get_value(obj: Any) -> Any: """Honestly, don't know why you would use this. Please try not to.""" if obj is None: return None - if isinstance(obj, (str, int, float, bool)): + if isinstance(obj, str | int | float | bool): return obj if isinstance(obj, dict): return {key: get_value(value) for key, value in obj.items()} diff --git a/neps/utils/data_loading.py b/neps/utils/data_loading.py index a0f86210..d3b08c15 100644 --- a/neps/utils/data_loading.py +++ b/neps/utils/data_loading.py @@ -5,10 +5,11 @@ import json import os import re +from collections.abc import Mapping from dataclasses import asdict from itertools import chain from pathlib import Path -from typing import Any, Mapping, TypedDict +from typing import Any, TypedDict import numpy as np import yaml diff --git a/neps/utils/files.py b/neps/utils/files.py index ddb0627c..cfd49963 100644 --- a/neps/utils/files.py +++ b/neps/utils/files.py @@ -3,9 +3,10 @@ from __future__ import annotations import dataclasses +from collections.abc import Iterable, Mapping from enum import Enum from pathlib import Path -from typing import Any, Iterable, Mapping +from typing import Any import yaml diff --git a/neps/utils/run_args.py b/neps/utils/run_args.py index 9d7f6445..bd2664e1 100644 --- a/neps/utils/run_args.py +++ b/neps/utils/run_args.py @@ -7,8 +7,9 @@ import importlib.util import logging import sys +from collections.abc import Callable from pathlib import Path -from typing import Any, Callable +from typing import Any import yaml @@ -273,7 +274,7 @@ def process_searcher(key: str, special_configs: dict, settings: dict) -> None: settings[SEARCHER_KWARGS] = searcher searcher = load_and_return_object(path, name, key) - elif isinstance(searcher, (str, Path)): + elif isinstance(searcher, str | Path): pass else: raise TypeError( @@ -428,7 +429,7 @@ def check_run_args(settings: dict) -> None: if not all(callable(item) for item in value): raise TypeError("All items in 'pre_load_hooks' must be callable.") elif param == SEARCHER: - if not (isinstance(value, (str, dict)) or issubclass(value, BaseOptimizer)): + if not (isinstance(value, str | dict) or issubclass(value, BaseOptimizer)): raise TypeError( "Parameter 'searcher' must be a string or a class that is a subclass " "of BaseOptimizer." @@ -450,13 +451,13 @@ def check_essential_arguments( root_directory: str | None, pipeline_space: dict | None, max_cost_total: int | None, - max_evaluation_total: int | None, + max_evaluations_total: int | None, searcher: BaseOptimizer | dict | str | None, ) -> None: """Validates essential NePS configuration arguments. Ensures 'run_pipeline', 'root_directory', 'pipeline_space', and either - 'max_cost_total' or 'max_evaluation_total' are provided for NePS execution. + 'max_cost_total' or 'max_evaluations_total' are provided for NePS execution. Raises ValueError with missing argument details. Additionally, checks 'searcher' is a BaseOptimizer if 'pipeline_space' is absent. @@ -465,7 +466,7 @@ def check_essential_arguments( root_directory (str): Directory path for data storage. pipeline_space: search space for this run. max_cost_total: Max allowed total cost for experiments. - max_evaluation_total: Max allowed evaluations. + max_evaluations_total: Max allowed evaluations. searcher: Optimizer for the configuration space. Raises: @@ -480,9 +481,9 @@ def check_essential_arguments( # provide the search_space because it's the argument of the searcher. raise ValueError("'pipeline_space' is required but was not provided.") - if not max_evaluation_total and not max_cost_total: + if not max_evaluations_total and not max_cost_total: raise ValueError( - "'max_evaluation_total' or 'max_cost_total' is required but " + "'max_evaluations_total' or 'max_cost_total' is required but " "both were not provided." ) diff --git a/neps/utils/types.py b/neps/utils/types.py index a6b6c540..867ff8f7 100644 --- a/neps/utils/types.py +++ b/neps/utils/types.py @@ -2,9 +2,9 @@ from __future__ import annotations +from collections.abc import Mapping from dataclasses import dataclass -from typing import TYPE_CHECKING, Any, Dict, Literal, Mapping, Union -from typing_extensions import TypeAlias +from typing import TYPE_CHECKING, Any, Literal, TypeAlias import numpy as np @@ -15,10 +15,10 @@ # TODO(eddiebergman): We can turn this to an enum at some # point to prevent having to isinstance and str match ERROR: TypeAlias = Literal["error"] -Number: TypeAlias = Union[int, float, np.number] +Number: TypeAlias = int | float | np.number ConfigID: TypeAlias = str RawConfig: TypeAlias = Mapping[str, Any] -Metadata: TypeAlias = Dict[str, Any] +Metadata: TypeAlias = dict[str, Any] ResultDict: TypeAlias = Mapping[str, Any] # NOTE(eddiebergman): Getting types for scipy distributions sucks @@ -33,7 +33,6 @@ def __repr__(self) -> str: NotSet = _NotSet() - f64 = np.float64 i64 = np.int64 diff --git a/neps/utils/validation.py b/neps/utils/validation.py index 884df0c5..0f8281cd 100644 --- a/neps/utils/validation.py +++ b/neps/utils/validation.py @@ -3,7 +3,8 @@ from __future__ import annotations import inspect -from typing import Any, Callable +from collections.abc import Callable +from typing import Any from neps.exceptions import NePSError diff --git a/neps_examples/basic_usage/architecture.py b/neps_examples/basic_usage/architecture.py index 57ac74cc..5d43efe7 100644 --- a/neps_examples/basic_usage/architecture.py +++ b/neps_examples/basic_usage/architecture.py @@ -1,4 +1,4 @@ -from __future__ import annotations + import logging diff --git a/neps_examples/efficiency/freeze_thaw.py b/neps_examples/efficiency/freeze_thaw.py new file mode 100644 index 00000000..32943ec2 --- /dev/null +++ b/neps_examples/efficiency/freeze_thaw.py @@ -0,0 +1,180 @@ +import logging +from pathlib import Path +import torch +import torch.nn as nn +import torch.optim as optim +from torch.utils.data import DataLoader +from torchvision import datasets, transforms + +import neps +from neps import tblogger +from neps.plot.plot3D import Plotter3D + + +class SimpleNN(nn.Module): + def __init__(self, input_size, num_layers, num_neurons): + super().__init__() + layers = [nn.Flatten()] + + for _ in range(num_layers): + layers.append(nn.Linear(input_size, num_neurons)) + layers.append(nn.ReLU()) + input_size = num_neurons # Set input size for the next layer + + layers.append(nn.Linear(num_neurons, 10)) # Output layer for 10 classes + self.model = nn.Sequential(*layers) + + def forward(self, x): + return self.model(x) + + +def training_pipeline( + pipeline_directory, + previous_pipeline_directory, + num_layers, + num_neurons, + epochs, + learning_rate, + weight_decay +): + """ + Trains and validates a simple neural network on the MNIST dataset. + + Args: + num_layers (int): Number of hidden layers in the network. + num_neurons (int): Number of neurons in each hidden layer. + epochs (int): Number of training epochs. + learning_rate (float): Learning rate for the optimizer. + optimizer (str): Name of the optimizer to use ('adam' or 'sgd'). + + Returns: + float: The average loss over the validation set after training. + + Raises: + KeyError: If the specified optimizer is not supported. + """ + # Transformations applied on each image + transform = transforms.Compose( + [ + transforms.ToTensor(), + transforms.Normalize( + (0.1307,), (0.3081,) + ), # Mean and Std Deviation for MNIST + ] + ) + + # Loading MNIST dataset + dataset = datasets.MNIST( + root="./.data", train=True, download=True, transform=transform + ) + train_set, val_set = torch.utils.data.random_split(dataset, [50000, 10000]) + train_loader = DataLoader(train_set, batch_size=64, shuffle=True) + val_loader = DataLoader(val_set, batch_size=1000, shuffle=False) + + model = SimpleNN(28 * 28, num_layers, num_neurons) + criterion = nn.CrossEntropyLoss() + + # Select optimizer + optimizer = optim.AdamW(model.parameters(), lr=learning_rate, weight_decay=weight_decay) + + # Loading potential checkpoint + start_epoch = 1 + if previous_pipeline_directory is not None: + if (Path(previous_pipeline_directory) / "checkpoint.pt").exists(): + states = torch.load(Path(previous_pipeline_directory) / "checkpoint.pt") + model = states["model"] + optimizer = states["optimizer"] + start_epoch = states["epochs"] + + # Training loop + for epoch in range(start_epoch, epochs + 1): + model.train() + for batch_idx, (data, target) in enumerate(train_loader): + optimizer.zero_grad() + output = model(data) + loss = criterion(output, target) + loss.backward() + optimizer.step() + + # Validation loop + model.eval() + val_loss = 0 + val_correct = 0 + val_total = 0 + with torch.no_grad(): + for data, target in val_loader: + output = model(data) + val_loss += criterion(output, target).item() + + # Get the predicted class + _, predicted = torch.max(output.data, 1) + + # Count correct predictions + val_total += target.size(0) + val_correct += (predicted == target).sum().item() + + val_loss /= len(val_loader.dataset) + val_err = 1 - val_correct / val_total + + # Saving checkpoint + states = { + "model": model, + "optimizer": optimizer, + "epochs": epochs, + } + torch.save(states, Path(pipeline_directory) / "checkpoint.pt") + + # Logging + tblogger.log( + loss=val_loss, + current_epoch=epochs, + # Set to `True` for a live incumbent trajectory. + write_summary_incumbent=True, + # Set to `True` for a live loss trajectory for each config. + writer_config_scalar=True, + # Set to `True` for live parallel coordinate, scatter plot matrix, and table view. + writer_config_hparam=True, + # Appending extra data + extra_data={ + "train_loss": tblogger.scalar_logging(loss.item()), + "val_err": tblogger.scalar_logging(val_err), + }, + ) + + return val_err + + +if __name__ == "__main__": + logging.basicConfig(level=logging.INFO) + + pipeline_space = { + "learning_rate": neps.Float(1e-5, 1e-1, log=True), + "num_layers": neps.Integer(1, 5), + "num_neurons": neps.Integer(64, 128), + "weight_decay": neps.Float(1e-5, 0.1, log=True), + "epochs": neps.Integer(1, 10, is_fidelity=True), + } + + neps.run( + pipeline_space=pipeline_space, + run_pipeline=training_pipeline, + searcher="ifbo", + max_evaluations_total=50, + root_directory="./debug/ifbo-mnist/", + overwrite_working_directory=False, # set to False for a multi-worker run + # (optional) ifbo hyperparameters + step_size=1, + # (optional) ifbo surrogate model hyperparameters (for FT-PFN) + surrogate_model_args=dict( + version="0.0.1", + target_path=None, + ), + ) + + # NOTE: this is `experimental` and may not work as expected + ## plotting a 3D plot for learning curves explored by ifbo + plotter = Plotter3D( + run_path="./debug/ifbo-mnist/", # same as `root_directory` above + fidelity_key="epochs", # same as `pipeline_space` + ) + plotter.plot3D(filename="ifbo") diff --git a/neps_examples/experimental/hierarchical_architecture.py b/neps_examples/experimental/hierarchical_architecture.py index db101be4..6751cc7a 100644 --- a/neps_examples/experimental/hierarchical_architecture.py +++ b/neps_examples/experimental/hierarchical_architecture.py @@ -1,5 +1,3 @@ -from __future__ import annotations - import logging from torch import nn diff --git a/neps_examples/experimental/hierarchical_architecture_hierarchical_GP.py b/neps_examples/experimental/hierarchical_architecture_hierarchical_GP.py index 3db93bde..c79a7a01 100644 --- a/neps_examples/experimental/hierarchical_architecture_hierarchical_GP.py +++ b/neps_examples/experimental/hierarchical_architecture_hierarchical_GP.py @@ -1,5 +1,3 @@ -from __future__ import annotations - import logging import time diff --git a/neps_examples/template/ifbo_template.py b/neps_examples/template/ifbo_template.py new file mode 100644 index 00000000..9e99c820 --- /dev/null +++ b/neps_examples/template/ifbo_template.py @@ -0,0 +1,37 @@ +import numpy as np + +from neps.plot.plot3D import Plotter3D + +from .priorband_template import pipeline_space, run_pipeline + + +ASSUMED_MAX_LOSS = 10 + + +def ifbo_run_pipeline( + pipeline_directory, # The directory where the config is saved + previous_pipeline_directory, # The directory of the config's immediate lower fidelity + **config, # The hyperparameters to be used in the pipeline +) -> dict | float: + result_dict = run_pipeline( + pipeline_directory=pipeline_directory, # NOTE: can only support <=10 HPs and no categoricals + previous_pipeline_directory=previous_pipeline_directory, + **config, + ) + # NOTE: Normalize the loss to be between 0 and 1 + ## crucial for ifBO's FT-PFN surrogate to work as expected + result_dict["loss"] = np.clip(result_dict["loss"], 0, ASSUMED_MAX_LOSS) / ASSUMED_MAX_LOSS + return result_dict + + +if __name__ == "__main__": + import neps + + neps.run( + run_pipeline=run_pipeline, + pipeline_space=pipeline_space(), + root_directory="results", + max_evaluations_total=50, + searcher="ifbo", + ) +# end of ifbo_run_pipeline \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index 06b4baa4..a08a9e7c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -37,8 +37,6 @@ classifiers = [ "Operating System :: POSIX :: Linux", "Operating System :: Microsoft :: Windows", "Operating System :: MacOS", - "Programming Language :: Python :: 3.8", - "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Topic :: Scientific/Engineering :: Artificial Intelligence", @@ -48,14 +46,14 @@ packages = [{ include = "neps" }, { include = "neps_examples" }] [tool.poetry.dependencies] -python = ">=3.8,<3.12" +python = ">=3.10,<3.12" ConfigSpace = "^0.7" grakel = "^0.1" numpy = "^1" pandas = "^2" networkx = "^2.6.3" nltk = "^3.6.4" -scipy = "^1" +scipy = ">=1.13.1" torch = ">1.7.0,!=2.0.1, !=2.1.0" matplotlib = "^3" more-itertools = "*" @@ -65,6 +63,7 @@ pyyaml = "^6" tensorboard = "^2" typing-extensions = "*" torchvision = ">=0.8.0" +ifbo = ">=0.3.10" [tool.poetry.group.dev.dependencies] ruff = "^0.4" @@ -96,7 +95,7 @@ build-backend = "poetry.core.masonry.api" # "tests", # "neps_examples", [tool.ruff] -target-version = "py38" +target-version = "py310" output-format = "full" line-length = 90 src = ["neps"] @@ -107,6 +106,7 @@ exclude = [ "neps/search_spaces/architecture/**/*.py", "neps/search_spaces/yaml_search_space_utils.py", "neps/utils/run_args_from_yaml.py", + "neps/utils/common.py", "neps/api.py", "tests", "neps_examples", @@ -210,6 +210,7 @@ ignore = [ "COM812", # Require trailing commas, recommended to ignore due to ruff formatter "PLR2004", # No magic numbers inline "N817", # CamelCase import as (ignore for ConfigSpace) + "N999", # Invalid name for module "NPY002", # Replace legacy `np.random.choice` call with `np.random.Generator` ] @@ -249,8 +250,8 @@ ignore = [ [tool.ruff.lint.isort] known-first-party = ["neps"] known-third-party = [] -no-lines-before = ["future"] required-imports = ["from __future__ import annotations"] +no-lines-before = ["future"] combine-as-imports = true extra-standard-library = ["typing_extensions"] force-wrap-aliases = true @@ -274,7 +275,7 @@ markers = [ filterwarnings = "ignore::DeprecationWarning:torch.utils.tensorboard.*:" [tool.mypy] -python_version = "3.8" # Match minimum supported version +python_version = "3.10" # Match minimum supported version packages = ["neps"] show_error_codes = true diff --git a/tests/regression_objectives.py b/tests/regression_objectives.py index 6655fcbb..8265c704 100644 --- a/tests/regression_objectives.py +++ b/tests/regression_objectives.py @@ -1,11 +1,8 @@ -from __future__ import annotations - import warnings from pathlib import Path -from typing import Any, Callable +from typing import Any, Callable, Literal import numpy as np -from typing_extensions import Literal import neps from neps.search_spaces.search_space import SearchSpace, pipeline_space_from_configspace diff --git a/tests/regression_runner.py b/tests/regression_runner.py index 0a5a8898..d102a1df 100644 --- a/tests/regression_runner.py +++ b/tests/regression_runner.py @@ -1,6 +1,4 @@ # mypy: disable-error-code = union-attr -from __future__ import annotations - import json import logging from pathlib import Path diff --git a/tests/test_neps_api/test_api.py b/tests/test_neps_api/test_api.py index 32408007..b4a54940 100644 --- a/tests/test_neps_api/test_api.py +++ b/tests/test_neps_api/test_api.py @@ -1,5 +1,3 @@ -from __future__ import annotations - import logging import os import runpy diff --git a/tests/test_runtime/test_default_report_values.py b/tests/test_runtime/test_default_report_values.py index 652db9de..2ebec1c0 100644 --- a/tests/test_runtime/test_default_report_values.py +++ b/tests/test_runtime/test_default_report_values.py @@ -1,5 +1,3 @@ -from __future__ import annotations - from pathlib import Path from pytest_cases import fixture diff --git a/tests/test_runtime/test_error_handling_strategies.py b/tests/test_runtime/test_error_handling_strategies.py index 890717c5..d3180ae9 100644 --- a/tests/test_runtime/test_error_handling_strategies.py +++ b/tests/test_runtime/test_error_handling_strategies.py @@ -1,5 +1,3 @@ -from __future__ import annotations - from neps.exceptions import WorkerRaiseError import pytest from dataclasses import dataclass diff --git a/tests/test_runtime/test_stopping_criterion.py b/tests/test_runtime/test_stopping_criterion.py index 28426a1f..5b83985d 100644 --- a/tests/test_runtime/test_stopping_criterion.py +++ b/tests/test_runtime/test_stopping_criterion.py @@ -1,5 +1,3 @@ -from __future__ import annotations - import time from pathlib import Path from pytest_cases import fixture diff --git a/tests/test_state/test_filebased_neps_state.py b/tests/test_state/test_filebased_neps_state.py index a3385141..cf4e77b8 100644 --- a/tests/test_state/test_filebased_neps_state.py +++ b/tests/test_state/test_filebased_neps_state.py @@ -2,8 +2,6 @@ This could be generalized if we end up with a server based implementation but for now we're just testing the filebased implementation.""" -from __future__ import annotations - from pathlib import Path from typing import Any from neps.exceptions import NePSError, TrialNotFoundError diff --git a/tests/test_state/test_neps_state.py b/tests/test_state/test_neps_state.py index 0d0891ce..ab3a6b6a 100644 --- a/tests/test_state/test_neps_state.py +++ b/tests/test_state/test_neps_state.py @@ -2,8 +2,6 @@ This could be generalized if we end up with a server based implementation but for now we're just testing the filebased implementation.""" -from __future__ import annotations - import time from pathlib import Path from typing import Any @@ -93,20 +91,30 @@ def case_search_space_fid_with_prior() -> SearchSpace: "hyperband", "hyperband_custom_default", "priorband", + "priorband_bo", "mobster", "mf_ei_bo", + "priorband_asha", + "ifbo", + "priorband_asha_hyperband", ] OPTIMIZER_REQUIRES_BUDGET = [ "successive_halving_prior", "hyperband_custom_default", "asha", "priorband", + "priorband_bo", + "priorband_asha", + "priorband_asha_hyperband", "hyperband", "asha_prior", "mobster", ] REQUIRES_PRIOR = { "priorband", + "priorband_bo", + "priorband_asha", + "priorband_asha_hyperband", } REQUIRES_COST = ["cost_cooling_bayesian_optimization", "cost_cooling"] diff --git a/tests/test_state/test_rng.py b/tests/test_state/test_rng.py index 1f1318d3..d122c8e8 100644 --- a/tests/test_state/test_rng.py +++ b/tests/test_state/test_rng.py @@ -1,5 +1,3 @@ -from __future__ import annotations - from pathlib import Path import random from typing import Callable diff --git a/tests/test_state/test_synced.py b/tests/test_state/test_synced.py index 3a28b724..fb39b148 100644 --- a/tests/test_state/test_synced.py +++ b/tests/test_state/test_synced.py @@ -1,9 +1,8 @@ -from __future__ import annotations +import copy +import random from pytest_cases import parametrize, parametrize_with_cases, case -import copy import numpy as np -import random from neps.state.err_dump import ErrDump, SerializableTrialError from neps.state.filebased import ( ReaderWriterErrDump, @@ -15,8 +14,6 @@ FileLocker, ) from neps.state.optimizer import BudgetInfo, OptimizationState, OptimizerInfo -from neps.state.protocols import Synced -from neps.state.trial import Trial import pytest from typing import Any, Callable from pathlib import Path diff --git a/tests/test_state/test_trial.py b/tests/test_state/test_trial.py index 0ddc9e34..a433a917 100644 --- a/tests/test_state/test_trial.py +++ b/tests/test_state/test_trial.py @@ -1,8 +1,9 @@ -from __future__ import annotations -from neps.state import Trial import os + import numpy as np +from neps.state import Trial + def test_trial_creation() -> None: trial_id = "1"