From a78bef8297d9de60311e623736f85f7c9de0e37e Mon Sep 17 00:00:00 2001 From: Jonathan Wurth Date: Fri, 4 Oct 2024 19:10:56 +0200 Subject: [PATCH] Prepare for irace v4 --- examples/dual_annealing.py | 4 +- examples/parameter_space.py | 45 ++++--- irace/_rpy2.py | 109 ++++++++++++----- irace/base.py | 17 ++- irace/experiment.py | 19 +-- irace/params.py | 234 +++++++++++++++++++++++++++++------- irace/scenario.py | 1 + 7 files changed, 311 insertions(+), 118 deletions(-) diff --git a/examples/dual_annealing.py b/examples/dual_annealing.py index 22f7d3b..66b2d74 100644 --- a/examples/dual_annealing.py +++ b/examples/dual_annealing.py @@ -49,10 +49,10 @@ def target_runner(experiment: Experiment, scenario: Scenario) -> float: scenario = Scenario( max_experiments=180, instances=[Rastrigin(dim) for dim in (2, 3, 5, 10, 20, 40)], - verbose=1, + verbose=100, seed=42, ) if __name__ == '__main__': - result = irace(target_runner, scenario, parameter_space, return_df=True) + result = irace(target_runner, parameter_space, scenario, return_df=True) print(result) diff --git a/examples/parameter_space.py b/examples/parameter_space.py index bb63126..3b42ee1 100644 --- a/examples/parameter_space.py +++ b/examples/parameter_space.py @@ -1,18 +1,33 @@ -from irace import ParameterSpace, Categorical, Real, Integer, Bool +import irace.params as p +from irace import ParameterSpace, Categorical, Real, Integer, Bool, Scenario, Experiment, irace + +parameter_space = ParameterSpace([ + Categorical('algorithm', ['as', 'mmas', 'eas', 'ras', 'acs']), + Categorical('localsearch', [0, 1, 2, 3]), + Real('alpha', 0, 5), + Real('beta', 0, 10), + Real('rho', 0.01, 1), + Integer('ants', 5, 100), + Integer('nnls', 5, 50), + Real('q0', 0, 1, condition=p.ValueOf('algorithm').eq('acs')), + Integer('rasrank', 1, p.ValueOf('ants').min(10), condition=p.ValueOf('algorithm').eq('ras')), + Integer('elistants', 1, p.ValueOf('ants')), + Integer('nnls', 5, 50, condition=p.ValueOf('localsearch').isin([1, 2, 3])), + Bool('dlb', condition=p.ValueOf('localsearch').isin([1, 2, 3])), +], forbidden=[p.all(p.ValueOf('alpha').eq(0), p.ValueOf('beta').eq(0))]) + +scenario = Scenario( + max_experiments=300, + verbose=100, + seed=42, +) -if __name__ == '__main__': - parameter_space = ParameterSpace([ - Categorical('algorithm', ['as', 'mmas', 'eas', 'ras', 'acs']), - Categorical('localsearch', [0, 1, 2, 3]), - Real('alpha', 0, 5), - Real('beta', 0, 10), - Real('rho', 0.01, 1), - Integer('ants', 5, 100), - Integer('nnls', 5, 50), - Real('q0', 0, 1), - Bool('dlb'), - Integer('rasrank', 1, "ants"), - Integer('elistants', 1, 750), - ]) +def target_runner(experiment: Experiment, _) -> float: + return experiment.configuration['alpha'] * experiment.configuration['beta'] + + +if __name__ == '__main__': print(parameter_space) + result = irace(target_runner, parameter_space, scenario, return_df=True) + print(result) diff --git a/irace/_rpy2.py b/irace/_rpy2.py index a8aef59..af78acb 100644 --- a/irace/_rpy2.py +++ b/irace/_rpy2.py @@ -1,18 +1,20 @@ import logging import math from collections import OrderedDict -from typing import Any +from collections.abc import Mapping, Collection +from typing import Any, Optional import numpy as np import pandas as pd from rpy2 import rinterface, robjects, rinterface_lib from rpy2.rinterface import SexpClosure, ListSexpVector, rternalize -from rpy2.robjects import ListVector +from rpy2.robjects import ListVector, IntVector, BoolVector, RObject from rpy2.robjects import numpy2ri, pandas2ri from rpy2.robjects.packages import importr, PackageNotInstalledError +from . import params as p from .experiment import Experiment -from .params import ParameterSpace, Real, Integer, Bool, Categorical, Ordinal +from .params import ParameterSpace from .runner import TargetRunner from .scenario import Scenario @@ -36,9 +38,9 @@ def rpy2py_recursive(data: Any) -> Any: Leaves will be converted to e.g. numpy arrays or lists as appropriate and the whole tree to a dictionary. """ - if data == rinterface.NULL: - return None - elif data == rinterface.na_values.NA_Character: + if data in ( + rinterface.NULL, rinterface.NA_Character, rinterface.NA_Real, rinterface.NA_Integer, + rinterface.NA_Logical, rinterface.NA): return None elif type(data) in [robjects.DataFrame, robjects.ListVector]: return OrderedDict(zip(data.names, [rpy2py_recursive(elt) for elt in data])) @@ -47,7 +49,7 @@ def rpy2py_recursive(data: Any) -> Any: return rpy2py_recursive(data[0]) else: return [rpy2py_recursive(elt) for elt in data] - elif type(data) in [robjects.FloatVector, robjects.IntVector]: + elif type(data) in [robjects.FloatVector, robjects.IntVector, robjects.BoolVector]: if len(data) == 1: return rpy2py_recursive(data[0]) else: @@ -71,14 +73,18 @@ def convert_configuration(raw_configuration: dict[str, Any], parameter_space: Pa if subspace is None: continue - if isinstance(subspace, Real): + if raw_param is None or (isinstance(raw_param, float) and math.isnan(raw_param)): + configuration[name] = None + continue + + if isinstance(subspace, p.Real): param = float(raw_param) - elif isinstance(subspace, Integer): + elif isinstance(subspace, p.Integer): param = int(raw_param) - elif isinstance(subspace, Bool): - # `bool` is represented as discrete with `["0", "1"]` variants. - param = bool(int(raw_param)) - elif isinstance(subspace, Categorical) or isinstance(subspace, Ordinal): + elif isinstance(subspace, p.Bool): + # `bool` is represented as discrete with `["TRUE", "FALSE"]` variants. + param = bool(raw_param) + elif isinstance(subspace, p.Categorical) or isinstance(subspace, p.Ordinal): # categorical and ordinal are represented as integers, so we need to convert to the real variant. param = subspace.values[int(raw_param)] else: @@ -106,11 +112,11 @@ def convert_result(result: pd.DataFrame, parameter_space: ParameterSpace, return def rpy2py_experiment(obj: ListVector, scenario: Scenario, parameter_space: ParameterSpace) -> Experiment: experiment = rpy2py_recursive(obj) - configuration_id = str(experiment['id.configuration']) + configuration_id = str(experiment['id_configuration']) seed = int(experiment['seed']) if scenario.instances is not None: - instance_id = str(experiment['id.instance']) + instance_id = str(experiment['id_instance']) instance = scenario.instances[int(experiment['instance'])] else: instance_id = None @@ -129,8 +135,46 @@ def rpy2py_experiment(obj: ListVector, scenario: Scenario, parameter_space: Para return experiment +def py2rpy_expression(value: Any) -> RObject: + return robjects.r(f'expression({p.check_expression(value)})') + + +def py2rpy_quote(value: Any) -> RObject: + return robjects.r(f'quote({p.check_expression(value)})') + + def py2rpy_parameter_space(parameter_space: ParameterSpace) -> ListVector: - return _irace.readParameters(text=str(parameter_space)) + r_parameter_space = [] + for subspace in parameter_space.params.values(): + if subspace.condition is not None: + condition = py2rpy_expression(subspace.condition) + else: + condition = True + if isinstance(subspace, p.Real) or isinstance(subspace, p.Integer): + constructor = _irace.param_real if isinstance(subspace, p.Real) else _irace.param_int + lower = py2rpy_expression(subspace.lower) + upper = py2rpy_expression(subspace.upper) + transf = "log" if subspace.log else "" + r_subspace = constructor(name=subspace.name, lower=lower, upper=upper, condition=condition, transf=transf) + elif isinstance(subspace, p.Bool): + # `bool` is represented as discrete with `["0", "1"]` variants. + values = BoolVector([False, True]) + r_subspace = _irace.param_cat(subspace.name, values=values, condition=condition) + elif isinstance(subspace, p.Categorical) or isinstance(subspace, p.Ordinal): + # categorical and ordinal are represented as integers. + values = IntVector(list(range(len(subspace.values)))) + r_subspace = _irace.param_cat(subspace.name, values=values, condition=condition) + else: + raise ValueError("unknown parameter type") + + r_parameter_space.append(r_subspace) + + if parameter_space.forbidden is not None: + forbidden = py2rpy_expression(p.any(*parameter_space.forbidden)) + else: + forbidden = '' + + return _irace.parametersNew(*r_parameter_space, forbidden=forbidden) def py2rpy_target_runner(target_runner: TargetRunner, scenario: Scenario, @@ -142,24 +186,26 @@ def inner(experiment: ListSexpVector, _: ListSexpVector) -> ListVector: experiment = rpy2py_experiment(ListVector(experiment), scenario, parameter_space) try: - result = target_runner(experiment=experiment, scenario=scenario) + result = target_runner(experiment, scenario) + + if isinstance(result, Collection) and len(result) == 2: + cost, time = result + r_result = ListVector(dict(cost=float(cost), time=float(time))) + elif isinstance(result, Mapping): + r_result = ListVector({key: float(value) for key, value in result.items()}) + else: + r_result = ListVector(dict(cost=float(result))) + except Exception as e: - return ListVector(dict(cost=math.inf, error=str(e))) - - if isinstance(result, float): - return ListVector(dict(cost=float(result))) - elif isinstance(result, tuple): - cost, time = result - return ListVector(dict(cost=float(cost), time=float(time))) - elif isinstance(result, dict): - return ListVector({key: float(value) for key, value in result.items()}) - else: - raise NotImplementedError("`target_runner` returned an invalid result") + r_result = ListVector(dict(cost=math.inf, error=str(e))) + + return r_result return inner -def py2rpy_scenario(scenario: Scenario, r_target_runner: SexpClosure) -> ListVector: +def py2rpy_scenario(scenario: Scenario, r_target_runner: SexpClosure, + r_parameter_space: Optional[ListVector] = None) -> ListVector: r_scenario = { 'targetRunner': r_target_runner, 'elitist': int(scenario.elitist), @@ -169,6 +215,9 @@ def py2rpy_scenario(scenario: Scenario, r_target_runner: SexpClosure) -> ListVec 'parallel': scenario.n_jobs, } + if r_parameter_space is not None: + r_scenario['parameters'] = r_parameter_space + if scenario.max_experiments is not None: r_scenario['maxExperiments'] = scenario.max_experiments @@ -192,4 +241,4 @@ def py2rpy_scenario(scenario: Scenario, r_target_runner: SexpClosure) -> ListVec if scenario.seed is not None: r_scenario['seed'] = scenario.seed - return _irace.checkScenario(ListVector(r_scenario)) + return ListVector(r_scenario) diff --git a/irace/base.py b/irace/base.py index 5301cf6..46d22ac 100644 --- a/irace/base.py +++ b/irace/base.py @@ -7,31 +7,30 @@ from .scenario import Scenario -def irace(target_runner: TargetRunner, scenario: Scenario, parameter_space: ParameterSpace, return_df: bool = False, +def irace(target_runner: TargetRunner, parameter_space: ParameterSpace, scenario: Scenario, return_df: bool = False, remove_metadata: bool = True) -> pd.DataFrame | list[dict[str, Any]]: """irace: Iterated Racing for Automatic Algorithm Configuration.""" - from ._rpy2 import _irace, py2rpy_scenario, py2rpy_target_runner, \ - py2rpy_parameter_space, converter, convert_result + from ._rpy2 import _irace, py2rpy_scenario, py2rpy_target_runner, py2rpy_parameter_space, converter, convert_result r_target_runner = py2rpy_target_runner(target_runner, scenario, parameter_space) - r_scenario = py2rpy_scenario(scenario, r_target_runner) r_parameter_space = py2rpy_parameter_space(parameter_space) + r_scenario = py2rpy_scenario(scenario, r_target_runner, r_parameter_space) - result = _irace.irace(r_scenario, r_parameter_space) + result = _irace.irace(r_scenario) result = converter.rpy2py(result) return convert_result(result, parameter_space, return_df=return_df, remove_metadata=remove_metadata) class Run: - """A single run of irace with a given target runner, scenario and parameter space.""" + """A single run of irace with a given target runner and scenario.""" - def __init__(self, target_runner: TargetRunner, scenario: Scenario, parameter_space: ParameterSpace, + def __init__(self, target_runner: TargetRunner, parameter_space: ParameterSpace, scenario: Scenario, name: Optional[str] = None) -> None: self.target_runner = target_runner - self.scenario = scenario self.parameter_space = parameter_space + self.scenario = scenario self.name = name @@ -45,7 +44,7 @@ def multi_irace(runs: Iterable[Run], n_jobs: int = 1, return_df: bool = False, r @delayed def inner(run: Run) -> pd.DataFrame | list[dict[str, Any]]: - return irace(target_runner=run.target_runner, scenario=run.scenario, parameter_space=run.parameter_space, + return irace(target_runner=run.target_runner, scenario=run.scenario, return_df=return_df, remove_metadata=remove_metadata) results = Parallel(n_jobs=n_jobs)(inner(run) for run in runs) diff --git a/irace/experiment.py b/irace/experiment.py index c5870f3..3491cb1 100644 --- a/irace/experiment.py +++ b/irace/experiment.py @@ -1,26 +1,13 @@ +from dataclasses import dataclass from typing import Any, Optional +@dataclass class Experiment: - """Metadata about the current experiment i.e. target runner execution.""" + """Metadata about the current experiment, i.e. target runner execution.""" configuration_id: str instance_id: Optional[str] instance: Optional[Any] seed: int - bound: int configuration: dict[str, Any] - - def __init__( - self, - configuration_id: str, - instance_id: Optional[str], - instance: Optional[Any], - seed: int, - configuration: dict[str, Any], - ) -> None: - self.configuration_id = configuration_id - self.instance_id = instance_id - self.seed = seed - self.instance = instance - self.configuration = configuration diff --git a/irace/params.py b/irace/params.py index cf8ba4e..ca8364a 100644 --- a/irace/params.py +++ b/irace/params.py @@ -1,88 +1,232 @@ -from abc import ABC +from abc import ABCMeta, abstractmethod from collections import OrderedDict -from typing import Optional, Iterable, Union, Sequence +from functools import reduce +from typing import Optional, Iterable, Union, Sequence, Any, Self -import numpy as np +class RExpression(metaclass=ABCMeta): + """An R expression that can be quoted.""" -class ParameterSubspace(ABC): + @abstractmethod + def to_r_expression(self) -> str: + pass - def __init__(self, name: str, condition: Optional[str]): + +def check_expression(value: Any) -> str: + if isinstance(value, RExpression): + return value.to_r_expression() + else: + return value + + +class CompositeRExpression(RExpression): + + def __init__(self, left: RExpression, right: RExpression, symbol: str, print_symbol: Optional[str] = None) -> None: + self.left = left + self.right = right + self.symbol = symbol + self.print_symbol = print_symbol + + def to_r_expression(self) -> str: + return f"({self.left.to_r_expression()} {self.symbol} {self.right.to_r_expression()})" + + def __str__(self) -> str: + symbol = self.print_symbol if self.print_symbol is not None else self.symbol + return f"({self.left} {symbol} {self.right})" + + +class RCondition(RExpression, metaclass=ABCMeta): + + def both(self, other: Self) -> Self: + return CompositeRExpression(self, other, symbol='&', print_symbol='and') + + def one(self, other: Self) -> Self: + return CompositeRExpression(self, other, symbol='|', print_symbol='or') + + def negate(self) -> Self: + return NegateRCondition(self) + + +def all(*conditions: RCondition) -> RCondition: + return reduce(RCondition.both, conditions) + + +def any(*conditions: RCondition) -> RCondition: + return reduce(RCondition.one, conditions) + + +class NegateRCondition(RCondition): + + def __init__(self, condition: RCondition) -> None: + self.condition = condition + + def to_r_expression(self) -> str: + return f"(!{self.condition.to_r_expression()})" + + def __str__(self) -> str: + return f"(!{self.condition})" + + +class OneOfRCondition(RCondition): + def __init__(self, name: str, variants: Sequence) -> None: + self.name = name + self.variants = variants + + def to_r_expression(self) -> str: + return f"({self.name} %in% c({', '.join(map(str, self.variants))}))" + + def __str__(self) -> str: + return f"({self.name} in [{', '.join(map(str, self.variants))}])" + + +class ComparisonRCondition(RCondition, CompositeRExpression): + pass + + +class RFuncCall(RExpression): + + def __init__(self, symbol: str, *args: RExpression, **kwargs: RExpression) -> None: + self.symbol = symbol + self.args = args + self.kwargs = kwargs + + def to_r_expression(self) -> str: + str_args = [arg.to_r_expression() for arg in self.args] + str_kwargs = [f"{name}={value.to_r_expression()}" for name, value in self.kwargs] + return f"{self.symbol}({', '.join([*str_args, *str_kwargs])})" + + +class RLiteral(RExpression): + + def __init__(self, value: Any) -> None: + assert type(value) in (int, float, str, bool) + self.value = value + + def to_r_expression(self) -> str: + if isinstance(self.value, str): + return f"\"{self.value}\"" + elif isinstance(self.value, bool): + return 'TRUE' if self.value else 'FALSE' + else: + return str(self.value) + + def __str__(self) -> str: + return str(self.value) if not isinstance(self.value, str) else f"\"{self.value}\"" + + +def check_literal(value: Any) -> RExpression: + if isinstance(value, RExpression): + return value + else: + return RLiteral(value) + + +class ValueOf(RExpression): + """Represents the value of a parameter at runtime.""" + + def __init__(self, name: str) -> None: + self.name = name + + def min(self, other: Any) -> RExpression: + return RFuncCall('min', self, check_literal(other)) + + def max(self, other: Any) -> RExpression: + return RFuncCall('max', self, check_literal(other)) + + def eq(self, value: Any) -> RCondition: + return ComparisonRCondition(self, check_literal(value), symbol='==') + + def neq(self, value: Any) -> RCondition: + return ComparisonRCondition(self, check_literal(value), symbol='!=') + + def leq(self, value: Any) -> RCondition: + return ComparisonRCondition(self, check_literal(value), symbol='<=') + + def geq(self, value: Any) -> RCondition: + return ComparisonRCondition(self, check_literal(value), symbol='>=') + + def le(self, value: Any) -> RCondition: + return ComparisonRCondition(self, check_literal(value), symbol='<') + + def ge(self, value: Any) -> RCondition: + return ComparisonRCondition(self, check_literal(value), symbol='>') + + def inrange(self, lower: Any, upper: Any) -> RCondition: + return self.ge(lower).both(self.le(upper)) + + def isin(self, variants: Sequence) -> RCondition: + return OneOfRCondition(self.name, variants) + + def notin(self, variants: Sequence) -> RCondition: + return self.isin(variants).negate() + + def to_r_expression(self) -> str: + return self.name + + def __str__(self) -> str: + return self.name + + +class ParameterSubspace(metaclass=ABCMeta): + + def __init__(self, name: str, condition: Optional[str | RCondition]): self.name = name self.condition = condition - def format_condition(self) -> str: - return f"| {self.condition}" if self.condition is not None else "" + def _fmt_condition(self): + return "" if self.condition is None else f"if {self.condition}" -class NumericalParameterSubspace(ParameterSubspace, ABC): +class NumericalParameterSubspace(ParameterSubspace, metaclass=ABCMeta): def __init__( self, name: str, - lower: Union[float, str, "NumericalParameterSubspace"], - upper: Union[float, str, "NumericalParameterSubspace"], + lower: Union[float, str | RExpression], + upper: Union[float, str | RExpression], log: bool = False, - condition: Optional[str] = None + condition: Optional[str | RCondition] = None ) -> None: super().__init__(name=name, condition=condition) self.lower = lower self.upper = upper self.log = log - def format_bound(self, bound: Union[float, str, "NumericalParameterSubspace"]) -> str: - if isinstance(bound, float): - return np.format_float_positional(bound, trim='-') - elif isinstance(bound, str): - return f'"{bound}"' - elif isinstance(bound, type(self)): - return f'"{bound.name}"' - else: - return bound - - def _format_line(self, ty: str) -> str: - lower = self.format_bound(self.lower) - upper = self.format_bound(self.upper) - log = ",log" if self.log else "" - return f'{self.name} "" {ty}{log} ({lower}, {upper}) {self.format_condition()}' + def __str__(self) -> str: + log = " (log)" if self.log else "" + return f"{self.name}: ({check_expression(self.lower)}, {check_expression(self.upper)}){log}; {self._fmt_condition()}" class Real(NumericalParameterSubspace): """Real parameters are numerical parameters that can take floating-point values within a given range.""" - def __str__(self) -> str: - return self._format_line('r') - class Integer(NumericalParameterSubspace): """Integer parameters are numerical parameters that can take only integer values within the given range""" - def __str__(self) -> str: - return self._format_line('i') - -class DiscreteParameterSubspace(ParameterSubspace, ABC): - def __init__(self, name: str, variants: Sequence, condition: Optional[str] = None): +class DiscreteParameterSubspace(ParameterSubspace, metaclass=ABCMeta): + def __init__(self, name: str, variants: Sequence, condition: Optional[str | RCondition] = None): super().__init__(name=name, condition=condition) self.values = variants - def _format_line(self, ty: str) -> str: - return f'{self.name} "" {ty} ({",".join(map(str, range(len(self.values))))}) {self.format_condition()}' + def __str__(self) -> str: + return f"{self.name}: [{', '.join(map(str, self.values))}]; {self._fmt_condition()}" class Categorical(DiscreteParameterSubspace): """Categorical parameters are defined by a set of possible values specified as list.""" - def __str__(self) -> str: - return self._format_line('c') - class Bool(Categorical): """Boolean parameters are expressed as categorical parameters with values `True` and `False`.""" - def __init__(self, name: str, condition: Optional[str] = None) -> None: + def __init__(self, name: str, condition: Optional[str | RCondition] = None) -> None: super().__init__(name=name, variants=[False, True], condition=condition) + def __str__(self) -> str: + return f"{self.name}: bool; {self._fmt_condition()}" + class Ordinal(DiscreteParameterSubspace): """ @@ -90,9 +234,6 @@ class Ordinal(DiscreteParameterSubspace): possible values in the same format as for categorical parameters. """ - def __str__(self) -> str: - return self._format_line('o') - class ParameterSpace: """A parameter space.""" @@ -100,13 +241,14 @@ class ParameterSpace: params: dict[str, ParameterSubspace] forbidden: Optional[Iterable[str]] - def __init__(self, params: Iterable[ParameterSubspace], forbidden: Optional[Iterable[str]] = None) -> None: + def __init__(self, params: Iterable[ParameterSubspace], + forbidden: Optional[Iterable[str | RCondition]] = None) -> None: self.params = OrderedDict([(param.name, param) for param in params]) self.forbidden = forbidden def __str__(self): - forbidden = ["[forbidden]", *self.forbidden] if self.forbidden is not None else [] - return '\n'.join([str(param) for param in self.params.values()] + forbidden) + forbidden = ["[forbidden]", *map(str, self.forbidden)] if self.forbidden is not None else [] + return '\n'.join([*map(str, self.params.values()), *forbidden]) def get_subspace(self, name: str) -> Optional[ParameterSubspace]: return self.params.get(name) diff --git a/irace/scenario.py b/irace/scenario.py index d2a94d2..e325eb5 100644 --- a/irace/scenario.py +++ b/irace/scenario.py @@ -1,6 +1,7 @@ import os from pathlib import Path from typing import Optional, Sequence +from .params import ParameterSpace class Scenario: