Skip to content

Commit

Permalink
Merge pull request #95 from valory-xyz/fix/strategy
Browse files Browse the repository at this point in the history
Fix strategy
  • Loading branch information
Adamantios authored Sep 22, 2023
2 parents d373f85 + d831052 commit 2e53978
Show file tree
Hide file tree
Showing 20 changed files with 170 additions and 92 deletions.
12 changes: 6 additions & 6 deletions packages/packages.json
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
{
"dev": {
"skill/valory/market_manager_abci/0.1.0": "bafybeibwe3tmsonkej2v5cnclsyfi4bfd5ievxzkrpc4lg6fqqk6s7hcny",
"skill/valory/decision_maker_abci/0.1.0": "bafybeifssrmowov77evtvevc7qophahlcskuxik573y2y4nmf64xz7oinm",
"skill/valory/trader_abci/0.1.0": "bafybeidm3awh7qul7ojgb2mthlhskymddd27fw2ojicvqkgsrpr2vmlhki",
"skill/valory/market_manager_abci/0.1.0": "bafybeig223xrxbtfx5adwhc7m6rpdbevsqm5osfq66oijucs2gzn4hehsm",
"skill/valory/decision_maker_abci/0.1.0": "bafybeieqyd5jek55q57lg77hey3oapppoosaphhyxxlulx52gg2ahkqdba",
"skill/valory/trader_abci/0.1.0": "bafybeighllbhqgeryjaew2heequ4iyaghevr3vnvvpx2pl3foqopk4k4zi",
"contract/valory/market_maker/0.1.0": "bafybeiftimqgvrbval2lxp7au6y72amioo4gtcdth2dflrbwa47i6opyb4",
"agent/valory/trader/0.1.0": "bafybeiep2ccmexjgeqiijonjwggjrymdtss3otvx5jyf555yo2d43w346e",
"service/valory/trader/0.1.0": "bafybeia5hy7yuxm7hl5xtx57vc4x3ri6o4mfhaoeqrnwpjhlaucswcnrki",
"agent/valory/trader/0.1.0": "bafybeibabgxao6ljhmwiztsquug3v6i6jo4s3izlk4zs4so4jfnvfqbxnm",
"service/valory/trader/0.1.0": "bafybeiaq5frvxzehiqyifdvvs35tspv7a32wp7ve7u7o6nflrkvbza6yhu",
"contract/valory/erc20/0.1.0": "bafybeifjwr6rwklgg2uk2zkfysn55qqy7dfi4jx7sek6lzdup37fynhpxe",
"skill/valory/tx_settlement_multiplexer_abci/0.1.0": "bafybeib4up7sqeptbuyawjkn6zwucxdknk3c4ah7o3q5iwdkebz4y5fxmq",
"skill/valory/tx_settlement_multiplexer_abci/0.1.0": "bafybeidel3rxm55y2qqewhmwroybs5j7t6ma6awmxbo7xjv3jvhmb6dv7m",
"contract/valory/mech/0.1.0": "bafybeiehjnwd63xp6ttfsskhxsphnvpjpwrfqzfltjvqn6mjqkemoo2qge",
"contract/valory/realitio/0.1.0": "bafybeicdgm2a7evjw6szcpo3uaam5mzd6axtevtzwvejr6uaeymbg437da",
"contract/valory/realitio_proxy/0.1.0": "bafybeibvndq6756qck7forgeavhdbn6ykgqs2ufyg7n5g6qdfpveatxuwy",
Expand Down
10 changes: 5 additions & 5 deletions packages/valory/agents/trader/aea-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -41,10 +41,10 @@ skills:
- valory/reset_pause_abci:0.1.0:bafybeifoihgilpfr76hc5skzspm6qehkwivx7ld2cy3veipcsi4gr2c7na
- valory/termination_abci:0.1.0:bafybeigcsls72uosoui2y5ppmnvsljjhnxakkeh3fdohklcg66aqq4g7xu
- valory/transaction_settlement_abci:0.1.0:bafybeiglsnh2hvfau5gab7requh34k4sbqwbjvrhhqjpes4hakcwq46cpi
- valory/tx_settlement_multiplexer_abci:0.1.0:bafybeib4up7sqeptbuyawjkn6zwucxdknk3c4ah7o3q5iwdkebz4y5fxmq
- valory/market_manager_abci:0.1.0:bafybeibwe3tmsonkej2v5cnclsyfi4bfd5ievxzkrpc4lg6fqqk6s7hcny
- valory/decision_maker_abci:0.1.0:bafybeifssrmowov77evtvevc7qophahlcskuxik573y2y4nmf64xz7oinm
- valory/trader_abci:0.1.0:bafybeidm3awh7qul7ojgb2mthlhskymddd27fw2ojicvqkgsrpr2vmlhki
- valory/tx_settlement_multiplexer_abci:0.1.0:bafybeidel3rxm55y2qqewhmwroybs5j7t6ma6awmxbo7xjv3jvhmb6dv7m
- valory/market_manager_abci:0.1.0:bafybeig223xrxbtfx5adwhc7m6rpdbevsqm5osfq66oijucs2gzn4hehsm
- valory/decision_maker_abci:0.1.0:bafybeieqyd5jek55q57lg77hey3oapppoosaphhyxxlulx52gg2ahkqdba
- valory/trader_abci:0.1.0:bafybeighllbhqgeryjaew2heequ4iyaghevr3vnvvpx2pl3foqopk4k4zi
default_ledger: ethereum
required_ledgers:
- ethereum
Expand Down Expand Up @@ -187,7 +187,7 @@ models:
redeeming_batch_size: ${int:5}
slippage: ${float:0.01}
redeem_margin_days: ${int:15}
epsilon: ${float:0.1}
policy_epsilon: ${float:0.1}
irrelevant_tools: ${list:["openai-text-davinci-002", "openai-text-davinci-003",
"openai-gpt-3.5-turbo", "openai-gpt-4", "stabilityai-stable-diffusion-v1-5",
"stabilityai-stable-diffusion-xl-beta-v2-2-2", "stabilityai-stable-diffusion-512-v2-1",
Expand Down
10 changes: 5 additions & 5 deletions packages/valory/services/trader/service.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ license: Apache-2.0
fingerprint:
README.md: bafybeigtuothskwyvrhfosps2bu6suauycolj67dpuxqvnicdrdu7yhtvq
fingerprint_ignore_patterns: []
agent: valory/trader:0.1.0:bafybeiep2ccmexjgeqiijonjwggjrymdtss3otvx5jyf555yo2d43w346e
agent: valory/trader:0.1.0:bafybeibabgxao6ljhmwiztsquug3v6i6jo4s3izlk4zs4so4jfnvfqbxnm
number_of_agents: 4
deployment: {}
---
Expand Down Expand Up @@ -104,7 +104,7 @@ type: skill
redeeming_batch_size: ${REDEEMING_BATCH_SIZE:int:1}
slippage: ${SLIPPAGE:float:0.01}
redeem_margin_days: ${REDEEM_MARGIN_DAYS:int:15}
epsilon: ${EPSILON:float:0.1}
policy_epsilon: ${POLICY_EPSILON:float:0.1}
irrelevant_tools: ${IRRELEVANT_TOOLS:list:["openai-text-davinci-002", "openai-text-davinci-003",
"openai-gpt-3.5-turbo", "openai-gpt-4", "stabilityai-stable-diffusion-v1-5",
"stabilityai-stable-diffusion-xl-beta-v2-2-2", "stabilityai-stable-diffusion-512-v2-1",
Expand Down Expand Up @@ -173,7 +173,7 @@ type: skill
redeeming_batch_size: ${REDEEMING_BATCH_SIZE:int:1}
slippage: ${SLIPPAGE:float:0.01}
redeem_margin_days: ${REDEEM_MARGIN_DAYS:int:15}
epsilon: ${EPSILON:float:0.1}
policy_epsilon: ${POLICY_EPSILON:float:0.1}
irrelevant_tools: ${IRRELEVANT_TOOLS:list:["openai-text-davinci-002", "openai-text-davinci-003",
"openai-gpt-3.5-turbo", "openai-gpt-4", "stabilityai-stable-diffusion-v1-5",
"stabilityai-stable-diffusion-xl-beta-v2-2-2", "stabilityai-stable-diffusion-512-v2-1",
Expand Down Expand Up @@ -240,7 +240,7 @@ type: skill
redeeming_batch_size: ${REDEEMING_BATCH_SIZE:int:1}
slippage: ${SLIPPAGE:float:0.01}
redeem_margin_days: ${REDEEM_MARGIN_DAYS:int:15}
epsilon: ${EPSILON:float:0.1}
policy_epsilon: ${POLICY_EPSILON:float:0.1}
irrelevant_tools: ${IRRELEVANT_TOOLS:list:["openai-text-davinci-002", "openai-text-davinci-003",
"openai-gpt-3.5-turbo", "openai-gpt-4", "stabilityai-stable-diffusion-v1-5",
"stabilityai-stable-diffusion-xl-beta-v2-2-2", "stabilityai-stable-diffusion-512-v2-1",
Expand Down Expand Up @@ -307,7 +307,7 @@ type: skill
redeeming_batch_size: ${REDEEMING_BATCH_SIZE:int:1}
slippage: ${SLIPPAGE:float:0.01}
redeem_margin_days: ${REDEEM_MARGIN_DAYS:int:15}
epsilon: ${EPSILON:float:0.1}
policy_epsilon: ${POLICY_EPSILON:float:0.1}
irrelevant_tools: ${IRRELEVANT_TOOLS:list:["openai-text-davinci-002", "openai-text-davinci-003",
"openai-gpt-3.5-turbo", "openai-gpt-4", "stabilityai-stable-diffusion-v1-5",
"stabilityai-stable-diffusion-xl-beta-v2-2-2", "stabilityai-stable-diffusion-512-v2-1",
Expand Down
5 changes: 5 additions & 0 deletions packages/valory/skills/decision_maker_abci/behaviours/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,11 @@ def policy(self) -> EGreedyPolicy:
)
return self._policy

@property
def is_first_period(self) -> bool:
"""Return whether it is the first period of the service."""
return self.synchronized_data.period_count == 0

@staticmethod
def wei_to_native(wei: int) -> float:
"""Convert WEI to native token."""
Expand Down
29 changes: 21 additions & 8 deletions packages/valory/skills/decision_maker_abci/behaviours/reedem.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@

"""This module contains the redeeming state of the decision-making abci app."""

import json
from abc import ABC
from collections import defaultdict
from sys import maxsize
Expand Down Expand Up @@ -66,6 +67,7 @@ class RedeemInfoBehaviour(DecisionMakerBaseBehaviour, QueryingBehaviour, ABC):
def __init__(self, **kwargs: Any) -> None:
"""Initialize a `RedeemInfo` object."""
super().__init__(**kwargs)
self.utilized_tools: Dict[str, int] = {}
self.trades: Set[Trade] = set()

# blocks in which the markets were created mapped to the corresponding condition ids
Expand All @@ -89,6 +91,7 @@ def synced_timestamp(self) -> int:
def setup(self) -> None:
"""Setup the behaviour"""
self._policy = self.synchronized_data.policy
self.utilized_tools = self.synchronized_data.utilized_tools

def _set_block_number(self, trade: Trade) -> Generator:
"""Set the block number of the given trade's market."""
Expand All @@ -112,19 +115,25 @@ def _set_block_number(self, trade: Trade) -> Generator:
def _update_policy(self, update: Trade) -> None:
"""Update the policy."""
claimable_xdai = self.wei_to_native(update.claimable_amount)
tool_index = self.synchronized_data.utilized_tools[update.transactionHash]
self.policy.add_reward(tool_index, claimable_xdai)
# the mapping might not contain a tool for a bet placement because it might have happened on a previous run
tool_index = self.utilized_tools.get(update.transactionHash, None)
if tool_index is not None:
# we try to avoid an ever-increasing dictionary of utilized tools by removing a tool when not needed anymore
del self.utilized_tools[update.transactionHash]
self.policy.add_reward(tool_index, claimable_xdai)

def _stats_report(self) -> None:
"""Report policy statistics."""
stats_report = "Policy statistics so far:\n"
for i, tool in enumerate(self.synchronized_data.available_mech_tools):
stats_report = "Policy statistics so far (only for resolved markets):\n"
available_tools = self.synchronized_data.available_mech_tools
for i, tool in enumerate(available_tools):
stats_report += (
f"{tool} tool:\n"
f"\tTimes used: {self.policy.counts[i]}\n"
f"\tReward rate: {self.policy.reward_rates[i]}\n"
)
stats_report += f"Best tool so far is {self.policy.select_tool()}."
best_tool = available_tools[self.policy.best_tool]
stats_report += f"Best tool so far is {best_tool!r}."
self.context.logger.info(stats_report)

def update_redeem_info(self, chunk: list) -> Generator:
Expand Down Expand Up @@ -156,7 +165,8 @@ def update_redeem_info(self, chunk: list) -> Generator:
if update == unique_obj:
self.claimable_amounts[condition_id] += update.claimable_amount

self._stats_report()
if self.policy.has_updated:
self._stats_report()


class RedeemBehaviour(RedeemInfoBehaviour):
Expand Down Expand Up @@ -580,11 +590,14 @@ def async_act(self) -> Generator:
yield from self._clean_redeem_info()
agent = self.context.agent_address
redeem_tx_hex = yield from self._prepare_safe_tx()
tx_submitter = policy = None
tx_submitter = policy = utilized_tools = None
if redeem_tx_hex is not None:
tx_submitter = self.matching_round.auto_round_id()
policy = self.policy.serialize()
utilized_tools = json.dumps(self.utilized_tools)

payload = RedeemPayload(agent, tx_submitter, redeem_tx_hex, policy)
payload = RedeemPayload(
agent, tx_submitter, redeem_tx_hex, policy, utilized_tools
)

yield from self.finish_behaviour(payload)
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
"""This module contains the behaviour of the skill which is responsible for selecting a mech tool."""

import json
from typing import Any, Generator, List, Optional
from typing import Any, Dict, Generator, List, Optional

from packages.valory.contracts.agent_registry.contract import AgentRegistryContract
from packages.valory.protocols.contract_api import ContractApiMessage
Expand Down Expand Up @@ -48,7 +48,7 @@ def __init__(self, **kwargs: Any) -> None:
super().__init__(**kwargs)
self._mech_id: int = 0
self._mech_hash: str = ""
self.mech_tools: Optional[List[str]] = None
self._mech_tools: Optional[List[str]] = None

@property
def mech_id(self) -> int:
Expand All @@ -70,6 +70,25 @@ def mech_hash(self, mech_hash: str) -> None:
"""Set the hash of the mech agent."""
self._mech_hash = mech_hash

@property
def mech_tools(self) -> List[str]:
"""Get the mech agent's tools."""
if self._mech_tools is None:
raise ValueError("The mech's tools have not been set.")
return self._mech_tools

@mech_tools.setter
def mech_tools(self, mech_tools: List[str]) -> None:
"""Set the mech agent's tools."""
self._mech_tools = mech_tools

@property
def utilized_tools(self) -> Dict[str, int]:
"""Get the utilized tools."""
if self.is_first_period:
return {}
return self.synchronized_data.utilized_tools

@property
def mech_tools_api(self) -> AgentToolsSpecs:
"""Get the mech agent api specs."""
Expand Down Expand Up @@ -109,6 +128,7 @@ def _get_mech_hash(self) -> WaitableConditionType:

def _get_mech_tools(self) -> WaitableConditionType:
"""Get the mech agent's tools from IPFS."""
self.set_mech_agent_specs()
specs = self.mech_tools_api.get_spec()
res_raw = yield from self.get_http_response(**specs)
res = self.mech_tools_api.process_response(res_raw)
Expand All @@ -125,9 +145,15 @@ def _get_mech_tools(self) -> WaitableConditionType:
return False

self.context.logger.info(f"Retrieved the mech agent's tools: {res}.")
# keep only the relevant mech tools, sorted
# we sort the tools to avoid using dictionaries in the policy implementation,
# so that we can easily assess which index corresponds to which tool
res = sorted(set(res) - self.params.irrelevant_tools)
self.context.logger.info(f"Relevant tools to the prediction task: {res}.")

if len(res) == 0:
res = None
self.context.logger.error("The mech agent's tools are empty!")
self.context.logger.error("The relevant mech agent's tools are empty!")
return False
self.mech_tools = res
self.mech_tools_api.reset_retries()
return True
Expand All @@ -143,55 +169,55 @@ def _get_tools(
):
yield from self.wait_for_condition_with_sleep(step)

def _adjust_policy_tools(self, tools: List[str]) -> None:
def _adjust_policy_tools(self) -> None:
"""Add or remove tools from the policy to match the remote tools."""
local = self.synchronized_data.available_mech_tools

# remove tools if they are not available anymore
local = set(self.synchronized_data.available_mech_tools)
remote = set(tools)
relevant_remote = remote - self.params.irrelevant_tools
removed_tools_idx = [
idx for idx, tool in enumerate(local) if tool not in relevant_remote
]
if len(removed_tools_idx) > 0:
self.policy.remove_tools(removed_tools_idx)
# process the indices in reverse order to avoid index shifting when removing the unavailable tools later
reversed_idx = range(len(local) - 1, -1, -1)
removed_idx = [idx for idx in reversed_idx if local[idx] not in self.mech_tools]
self.policy.remove_tools(removed_idx)

# add tools if there are new ones available
new_tools = remote - local
n_new_tools = len(new_tools)
if n_new_tools > 0:
self.policy.add_new_tools(n_new_tools)
# process the indices in reverse order to avoid index shifting when adding the new tools later
reversed_idx = range(len(self.mech_tools) - 1, -1, -1)
new_idx = [idx for idx in reversed_idx if self.mech_tools[idx] not in local]
self.policy.add_new_tools(new_idx)

def _set_policy(self, tools: List[str]) -> None:
def _set_policy(self) -> None:
"""Set the E Greedy Policy."""
if self.synchronized_data.period_count == 0:
self._policy = EGreedyPolicy.initial_state(self.params.epsilon, len(tools))
if self.is_first_period:
n_relevant = len(self.mech_tools)
self._policy = EGreedyPolicy.initial_state(self.params.epsilon, n_relevant)
else:
self._policy = self.synchronized_data.policy
self._adjust_policy_tools(tools)
self._adjust_policy_tools()

def _select_tool(self) -> Generator[None, None, Optional[int]]:
"""Select a Mech tool based on an e-greedy policy and return its index."""
yield from self._get_tools()
if self.mech_tools is None:
return None

self._set_policy(self.mech_tools)
return self.policy.select_tool()
self._set_policy()
selected = self.policy.select_tool()
self.context.logger.info(f"Selected the mech tool {selected!r}.")
return selected

def async_act(self) -> Generator:
"""Do the action."""

with self.context.benchmark_tool.measure(self.behaviour_id).local():
mech_tools = policy = None
mech_tools = policy = utilized_tools = None
selected_tool = yield from self._select_tool()
if selected_tool is not None:
mech_tools = json.dumps(self.mech_tools)
policy = self.policy.serialize()
utilized_tools = json.dumps(self.utilized_tools, sort_keys=True)

payload = ToolSelectionPayload(
self.context.agent_address,
mech_tools,
policy,
utilized_tools,
selected_tool,
)

Expand Down
2 changes: 1 addition & 1 deletion packages/valory/skills/decision_maker_abci/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ def __init__(self, *args: Any, **kwargs: Any) -> None:
# a slippage in the range of [0, 1] to apply to the `minOutcomeTokensToBuy` when buying shares on a fpmm
self._slippage = 0.0
self.slippage: float = self._ensure("slippage", kwargs, float)
self.epsilon: float = self._ensure("epsilon", kwargs, float)
self.epsilon: float = self._ensure("policy_epsilon", kwargs, float)
self.agent_registry_address: str = self._ensure(
"agent_registry_address", kwargs, str
)
Expand Down
2 changes: 2 additions & 0 deletions packages/valory/skills/decision_maker_abci/payloads.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ class RedeemPayload(MultisigTxPayload):
"""Represents a transaction payload for preparing an on-chain transaction for redeeming."""

policy: Optional[str]
utilized_tools: Optional[str]


@dataclass(frozen=True)
Expand All @@ -77,4 +78,5 @@ class ToolSelectionPayload(BaseTxPayload):

mech_tools: Optional[str]
policy: Optional[str]
utilized_tools: Optional[str]
index: Optional[int]
Loading

0 comments on commit 2e53978

Please sign in to comment.