-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathexpe.py
120 lines (112 loc) · 3.78 KB
/
expe.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
""" Packages import """
from env.synthetic import SyntheticNonlinModel
from env.nlp_dataset import HateSpeechEnv
from utils import (
plotRegret,
storeRegret,
)
import numpy as np
def MAB_expe(
n_expe,
n_features,
n_arms,
T,
methods,
param_dic,
labels,
colors,
path,
problem="Quadratic",
doplot=False,
freq_task=True,
seed=2022,
**kwargs,
):
"""
Compute regrets for a given set of algorithms (methods) over t=1,...,T and for n_expe number of independent
experiments. Here we deal with n_arms Linear Gaussian Bandits with multivariate Gaussian prior
:param n_expe: int, number of experiments
:param n_features: int, dimension of feature vectors
:param n_arms: int, number of arms
:param T: int, time horizon
:param methods: list, algorithms to use
:param param_dic: dict, parameters associated to each algorithm (see main for formatting)
:param labels: list, labels for the curves
:param colors: list, colors for the curves
:param doplot: boolean, plot the curves or not
:param problem: str, choose from {'FreqRusso', 'Zhang', 'Russo', 'movieLens'}
:param freq_task: boolean, Freq MOD for task
:param path: str
:return: dict, regrets, quantiles, means, stds of final regrets for each methods
"""
from agent.hyper import HyperMAB
if problem == "Quadratic":
reward_version = "v1"
elif problem == "Neural":
reward_version = "v2"
models = [
HyperMAB(
SyntheticNonlinModel(
n_features,
n_arms,
all_actions=kwargs["all_arms"],
reward_version=reward_version,
freq_task=freq_task,
eta=kwargs.get("eta", 0.1),
)
)
for _ in range(n_expe)
]
title = f"Nonlinear Bandit Model - n_arms: {n_arms} - n_features: {n_features} - reward: {reward_version}"
print("Begin experiments on '{}'".format(title))
results = storeRegret(
models, methods, param_dic, n_expe, T, path, seed, use_torch=True
)
if doplot:
plotRegret(labels, results, colors, title, path, log=False)
return results
def Textual_expe(
n_expe,
n_features,
n_arms,
T,
methods,
param_dic,
labels,
colors,
path,
problem="hatespeech",
llm_name="gpt2",
doplot=False,
seed=2022,
**kwargs,
):
"""
Compute regrets for a given set of algorithms (methods) over t=1,...,T and for n_expe number of independent
experiments. Here we deal with n_arms Linear Gaussian Bandits with multivariate Gaussian prior
:param n_expe: int, number of experiments
:param n_features: int, dimension of feature vectors
:param n_arms: int, number of arms
:param T: int, time horizon
:param methods: list, algorithms to use
:param param_dic: dict, parameters associated to each algorithm (see main for formatting)
:param labels: list, labels for the curves
:param colors: list, colors for the curves
:param doplot: boolean, plot the curves or not
:param problem: str, choose from {'FreqRusso', 'Zhang', 'Russo', 'movieLens'}
:param path: str
:return: dict, regrets, quantiles, means, stds of final regrets for each methods
"""
from agent.hyper import HyperMAB
if problem == "hatespeech":
models = [HyperMAB(HateSpeechEnv(n_features, n_arms, llm_name=llm_name)) for _ in range(n_expe)]
title = f"HateSpeech - n_arms: {n_arms} - n_features: {n_features}"
else:
raise NotImplementedError
print("Begin experiments on '{}'".format(title))
results = storeRegret(
models, methods, param_dic, n_expe, T, path, seed, use_torch=True
)
if doplot:
plotRegret(labels, results, colors, title, path, log=False)
return results