Merge pull request #99 from valory-xyz/feat/store-tools

Feat/store tools
valory-xyz · Oct 6, 2023 · 129a2c5 · 129a2c5
2 parents fd710b6 + 7bca54b
commit 129a2c5
Show file tree

Hide file tree

Showing 10 changed files with 643 additions and 523 deletions.
diff --git a/packages/packages.json b/packages/packages.json
@@ -1,13 +1,13 @@
 {
     "dev": {
         "skill/valory/market_manager_abci/0.1.0": "bafybeidnqerwkljbjgog73qaa5duu5ymsfjs6jsszeupseshx7go3n6afq",
-        "skill/valory/decision_maker_abci/0.1.0": "bafybeiaubnxcztoon3izj4js6wajzdv3a64qst3cwow23q345526exbjsa",
-        "skill/valory/trader_abci/0.1.0": "bafybeiammcpvex74hcsju5lryontcejkm5ilr4zef6wjjems67wbjomrha",
+        "skill/valory/decision_maker_abci/0.1.0": "bafybeib2ipy6vo3mu2hzmgemvdrf63aczejgza7hqxfk4vb4ebjonwbma4",
+        "skill/valory/trader_abci/0.1.0": "bafybeic5uljeduzndpw3t32xwm65l7h7bk7vbqvir5ilvkevfucyfo6t4u",
         "contract/valory/market_maker/0.1.0": "bafybeidaz4dol7qsrwitw5jwasjtvd2vtlxhxizwkzen5kurblwacz4biu",
-        "agent/valory/trader/0.1.0": "bafybeiclx4xy5ot25cfo4s3iw5nrj3fbgzv32rvl24nqrb2uwlkgp25kam",
-        "service/valory/trader/0.1.0": "bafybeiakf6j4mxljbbajvlxbs3byyria3ug2trscphycrehhcq6jekelae",
+        "agent/valory/trader/0.1.0": "bafybeigog4gb35dezhkamaudoo45572ecunsgo2av33baojyg7dye2zvoa",
+        "service/valory/trader/0.1.0": "bafybeifdh22wk63xn6xi43mxlo3lxkbmnz2rv4eqpvnamhaeribpwjlba4",
         "contract/valory/erc20/0.1.0": "bafybeid6p64a6tnovatbwybc6ocdd4u7bqkxsb4ks52rvf7ozcxxl6iaf4",
-        "skill/valory/tx_settlement_multiplexer_abci/0.1.0": "bafybeiaoczrcwq7vqwuibuirphjfqiah6hq3idufkqkzg4lcybfe6btaom",
+        "skill/valory/tx_settlement_multiplexer_abci/0.1.0": "bafybeihqeeiehyr3wgruyia4izbp75qz6rilcvbbc6ceb5o4jtvwqzboyy",
         "contract/valory/mech/0.1.0": "bafybeiddc6pgurpyja2k64wzsb3jgcvl254s7rplgt5iooftsyfalqlbfq",
         "contract/valory/realitio/0.1.0": "bafybeibzyfaeaxgyqnygtduhprs47fhtskh2vnmw2cvpemqhkgc72q4wpm",
         "contract/valory/realitio_proxy/0.1.0": "bafybeibvndq6756qck7forgeavhdbn6ykgqs2ufyg7n5g6qdfpveatxuwy",

diff --git a/packages/valory/agents/trader/aea-config.yaml b/packages/valory/agents/trader/aea-config.yaml
@@ -41,10 +41,10 @@ skills:
 - valory/reset_pause_abci:0.1.0:bafybeigeoc363gv3wp2rrmk6p2fdxney33nxd3owtpfugzapgruwe4klyu
 - valory/termination_abci:0.1.0:bafybeigqpij2sgrpnilqjljfciixop4fldq5qceixc7534q6af4potdmdm
 - valory/transaction_settlement_abci:0.1.0:bafybeia7rzsbea3ch4gcafyp3z6uvqh4npws2xpdwbkkdbrqqpjops7nui
-- valory/tx_settlement_multiplexer_abci:0.1.0:bafybeiaoczrcwq7vqwuibuirphjfqiah6hq3idufkqkzg4lcybfe6btaom
+- valory/tx_settlement_multiplexer_abci:0.1.0:bafybeihqeeiehyr3wgruyia4izbp75qz6rilcvbbc6ceb5o4jtvwqzboyy
 - valory/market_manager_abci:0.1.0:bafybeidnqerwkljbjgog73qaa5duu5ymsfjs6jsszeupseshx7go3n6afq
-- valory/decision_maker_abci:0.1.0:bafybeiaubnxcztoon3izj4js6wajzdv3a64qst3cwow23q345526exbjsa
-- valory/trader_abci:0.1.0:bafybeiammcpvex74hcsju5lryontcejkm5ilr4zef6wjjems67wbjomrha
+- valory/decision_maker_abci:0.1.0:bafybeib2ipy6vo3mu2hzmgemvdrf63aczejgza7hqxfk4vb4ebjonwbma4
+- valory/trader_abci:0.1.0:bafybeic5uljeduzndpw3t32xwm65l7h7bk7vbqvir5ilvkevfucyfo6t4u
 default_ledger: ethereum
 required_ledgers:
 - ethereum

diff --git a/packages/valory/services/trader/service.yaml b/packages/valory/services/trader/service.yaml
@@ -7,7 +7,7 @@ license: Apache-2.0
 fingerprint:
   README.md: bafybeigtuothskwyvrhfosps2bu6suauycolj67dpuxqvnicdrdu7yhtvq
 fingerprint_ignore_patterns: []
-agent: valory/trader:0.1.0:bafybeiclx4xy5ot25cfo4s3iw5nrj3fbgzv32rvl24nqrb2uwlkgp25kam
+agent: valory/trader:0.1.0:bafybeigog4gb35dezhkamaudoo45572ecunsgo2av33baojyg7dye2zvoa
 number_of_agents: 4
 deployment: {}
 ---

diff --git a/packages/valory/skills/decision_maker_abci/behaviours/reedem.py b/packages/valory/skills/decision_maker_abci/behaviours/reedem.py
@@ -174,6 +174,8 @@ class RedeemBehaviour(RedeemInfoBehaviour):
 
     matching_round = RedeemRound
 
+    UTILIZED_TOOLS_PATH = "utilized_tools.json"
+
     def __init__(self, **kwargs: Any) -> None:
         """Initialize `RedeemBehaviour`."""
         super().__init__(**kwargs)
@@ -583,6 +585,12 @@ def _prepare_safe_tx(self) -> Generator[None, None, Optional[str]]:
         self.context.logger.info("Transaction successfully prepared.")
         return self.tx_hex
 
+    def _store_utilized_tools(self) -> None:
+        """Store the tools utilized by the behaviour."""
+        path = self.params.policy_store_path / self.UTILIZED_TOOLS_PATH
+        with path.open("w") as f:
+            json.dump(self.utilized_tools, f)
+
     def async_act(self) -> Generator:
         """Do the action."""
         with self.context.benchmark_tool.measure(self.behaviour_id).local():
@@ -599,5 +607,5 @@ def async_act(self) -> Generator:
             payload = RedeemPayload(
                 agent, tx_submitter, redeem_tx_hex, policy, utilized_tools
             )
-
+        self._store_utilized_tools()
         yield from self.finish_behaviour(payload)
diff --git a/packages/valory/skills/decision_maker_abci/behaviours/tool_selection.py b/packages/valory/skills/decision_maker_abci/behaviours/tool_selection.py
@@ -43,6 +43,10 @@ class ToolSelectionBehaviour(DecisionMakerBaseBehaviour):
 
     matching_round = ToolSelectionRound
 
+    POLICY_STORE = "policy_store.json"
+    AVAILABLE_TOOLS_STORE = "available_tools_store.json"
+    UTILIZED_TOOLS_STORE = "utilized_tools.json"
+
     def __init__(self, **kwargs: Any) -> None:
         """Initialize Behaviour."""
         super().__init__(**kwargs)
@@ -86,6 +90,9 @@ def mech_tools(self, mech_tools: List[str]) -> None:
     def utilized_tools(self) -> Dict[str, int]:
         """Get the utilized tools."""
         if self.is_first_period:
+            tools = self._try_recover_utilized_tools()
+            if tools is not None:
+                return tools
             return {}
         return self.synchronized_data.utilized_tools
 
@@ -188,12 +195,59 @@ def _adjust_policy_tools(self) -> None:
     def _set_policy(self) -> None:
         """Set the E Greedy Policy."""
         if self.is_first_period:
-            n_relevant = len(self.mech_tools)
-            self._policy = EGreedyPolicy.initial_state(self.params.epsilon, n_relevant)
+            self._policy = self._get_init_policy()
+            recovered_tools = self._try_recover_mech_tools()
+            self.mech_tools = list(set(self.mech_tools + recovered_tools))
         else:
             self._policy = self.synchronized_data.policy
             self._adjust_policy_tools()
 
+    def _get_init_policy(self) -> EGreedyPolicy:
+        """Get the initial policy"""
+        # try to read the policy from the policy store
+        policy = self._try_recover_policy()
+        if policy is not None:
+            # we successfully recovered the policy, so we return it
+            return policy
+
+        # we could not recover the policy, so we create a new one
+        n_relevant = len(self.mech_tools)
+        policy = EGreedyPolicy.initial_state(self.params.epsilon, n_relevant)
+        return policy
+
+    def _try_recover_policy(self) -> Optional[EGreedyPolicy]:
+        """Try to recover the policy from the policy store."""
+        try:
+            policy_path = self.params.policy_store_path / self.POLICY_STORE
+            with open(policy_path, "r") as f:
+                policy = f.read()
+                return EGreedyPolicy.deserialize(policy)
+        except Exception as e:
+            self.context.logger.warning(f"Could not recover the policy: {e}.")
+            return None
+
+    def _try_recover_utilized_tools(self) -> Optional[Dict[str, Any]]:
+        """Try to recover the available tools from the tools store."""
+        try:
+            tools_path = self.params.policy_store_path / self.UTILIZED_TOOLS_STORE
+            with open(tools_path, "r") as f:
+                tools = json.load(f)
+                return tools
+        except Exception as e:
+            self.context.logger.warning(f"Could not recover the tools: {e}.")
+            return None
+
+    def _try_recover_mech_tools(self) -> List[str]:
+        """Try to recover the available tools from the tools store."""
+        try:
+            tools_path = self.params.policy_store_path / self.AVAILABLE_TOOLS_STORE
+            with open(tools_path, "r") as f:
+                tools = json.load(f)
+                return tools
+        except Exception as e:
+            self.context.logger.warning(f"Could not recover the tools: {e}.")
+            return []
+
     def _select_tool(self) -> Generator[None, None, Optional[int]]:
         """Select a Mech tool based on an e-greedy policy and return its index."""
         yield from self._get_tools()
@@ -203,9 +257,20 @@ def _select_tool(self) -> Generator[None, None, Optional[int]]:
         self.context.logger.info(f"Selected the mech tool {selected!r}.")
         return selected_idx
 
+    def _store_policy(self) -> None:
+        """Store the policy"""
+        policy_path = self.params.policy_store_path / self.POLICY_STORE
+        with open(policy_path, "w") as f:
+            f.write(self.policy.serialize())
+
+    def _store_available_mech_tools(self) -> None:
+        """Store the policy"""
+        policy_path = self.params.policy_store_path / self.AVAILABLE_TOOLS_STORE
+        with open(policy_path, "w") as f:
+            json.dump(self.mech_tools, f)
+
     def async_act(self) -> Generator:
         """Do the action."""
-
         with self.context.benchmark_tool.measure(self.behaviour_id).local():
             mech_tools = policy = utilized_tools = None
             selected_tool = yield from self._select_tool()
@@ -222,4 +287,6 @@ def async_act(self) -> Generator:
                 selected_tool,
             )
 
+        self._store_policy()
+        self._store_available_mech_tools()
         yield from self.finish_behaviour(payload)
diff --git a/packages/valory/skills/decision_maker_abci/models.py b/packages/valory/skills/decision_maker_abci/models.py
@@ -20,8 +20,10 @@
 """This module contains the models for the skill."""
 
 import json
+import os
 import re
 from dataclasses import dataclass
+from pathlib import Path
 from string import Template
 from typing import Any, Dict, Optional, Set
 
@@ -126,6 +128,7 @@ def __init__(self, *args: Any, **kwargs: Any) -> None:
         self.agent_registry_address: str = self._ensure(
             "agent_registry_address", kwargs, str
         )
+        self.policy_store_path: Path = self.get_policy_store_path(kwargs)
         self.irrelevant_tools: set = set(self._ensure("irrelevant_tools", kwargs, list))
         super().__init__(*args, **kwargs)
 
@@ -160,6 +163,20 @@ def get_bet_amount(self, confidence: float) -> int:
         threshold = round(confidence, 1)
         return self.bet_amount_per_threshold[threshold]
 
+    def get_policy_store_path(self, kwargs: Dict) -> Path:
+        """Get the path of the policy store."""
+        path = self._ensure("policy_store_path", kwargs, str)
+        # check if path exists, and we can write to it
+        if (
+            not os.path.isdir(path)
+            or not os.access(path, os.W_OK)
+            or not os.access(path, os.R_OK)
+        ):
+            raise ValueError(
+                f"Policy store path {path!r} is not a directory or is not writable."
+            )
+        return Path(path)
+
 
 class MechResponseSpecs(ApiSpecs):
     """A model that wraps ApiSpecs for the Mech's response specifications."""

diff --git a/packages/valory/skills/decision_maker_abci/skill.yaml b/packages/valory/skills/decision_maker_abci/skill.yaml
@@ -18,14 +18,14 @@ fingerprint:
   behaviours/decision_receive.py: bafybeid54jwjs4lulcl2n2w7taxne3wqgsey6ppaidwr2up6bztyf35ghm
   behaviours/decision_request.py: bafybeidlyl2ojmpfs2zkewoacraya2cbampo4ynqbqaocsoq7v2nif3ahi
   behaviours/handle_failed_tx.py: bafybeidxpc6u575ymct5tdwutvzov6zqfdoio5irgldn3fw7q3lg36mmxm
-  behaviours/reedem.py: bafybeigzsx4wyov3ehfvdjmo2d7yimss4p7mbkopkqyr4gryxpa5rpv6ku
+  behaviours/reedem.py: bafybeiccrnttqwf5vuf3lr55k5c6tsqsrekoypoag5gntyimjhbmuokkga
   behaviours/round_behaviour.py: bafybeig4tdktyu6hapoqymnxh2bgpds547st6a44heue657wkctwe4gjvm
   behaviours/sampling.py: bafybeiadikynvkaofbko72jc45xthhmmjfmlkpgramormhxwk5u47rnwdu
-  behaviours/tool_selection.py: bafybeihplw663chhva7qko6b67tlmrblub7vvbgkxvvwqt62bwei6qxrde
+  behaviours/tool_selection.py: bafybeigbxgm6rl6jnexxzjhb7thsdaqr56wfhxpve25yxlr7eyswf6lsey
   dialogues.py: bafybeigpwuzku3we7axmxeamg7vn656maww6emuztau5pg3ebsoquyfdqm
   fsm_specification.yaml: bafybeifnob3ceim2mj7lqagtnpwqjqqxs5eg3oiwc73gwm6x5i2dvvlcya
   handlers.py: bafybeihj33szgrcxnpd73s4nvluyxwwsvhjum2cuq3ilhhe6vfola3k7vy
-  models.py: bafybeie6ynq37isn4itspkj2eka6r2wwsok3ofdve6iipw5nwjimmzblzu
+  models.py: bafybeidlis33rkpvtb2w6qrtmiqzb4tbqfolkbbs6hk3jcx73lfuwqif3e
   payloads.py: bafybeic4iz6uxinqpaxnefkadnzaox23y7k57zawnnkeovplih36v5cchy
   policy.py: bafybeiftviqwq6lapfxj6ykxyh64z72vauqrgmsoho7zo2lvsdvu32kw3m
   redeem_info.py: bafybeihtbundd75a4wdk2d23smvdjlh7fsv42ff2cwwuwgkyyxjciobwca
@@ -184,6 +184,7 @@ models:
       realitio_proxy_address: '0xAB16D643bA051C11962DA645f74632d3130c81E2'
       realitio_address: '0x79e32aE03fb27B07C89c0c568F80287C01ca2E57'
       redeeming_batch_size: 5
+      policy_store_path: /data/
       slippage: 0.01
       policy_epsilon: 0.1
       irrelevant_tools:

diff --git a/packages/valory/skills/trader_abci/skill.yaml b/packages/valory/skills/trader_abci/skill.yaml
@@ -25,8 +25,8 @@ skills:
 - valory/transaction_settlement_abci:0.1.0:bafybeia7rzsbea3ch4gcafyp3z6uvqh4npws2xpdwbkkdbrqqpjops7nui
 - valory/termination_abci:0.1.0:bafybeigqpij2sgrpnilqjljfciixop4fldq5qceixc7534q6af4potdmdm
 - valory/market_manager_abci:0.1.0:bafybeidnqerwkljbjgog73qaa5duu5ymsfjs6jsszeupseshx7go3n6afq
-- valory/decision_maker_abci:0.1.0:bafybeiaubnxcztoon3izj4js6wajzdv3a64qst3cwow23q345526exbjsa
-- valory/tx_settlement_multiplexer_abci:0.1.0:bafybeiaoczrcwq7vqwuibuirphjfqiah6hq3idufkqkzg4lcybfe6btaom
+- valory/decision_maker_abci:0.1.0:bafybeib2ipy6vo3mu2hzmgemvdrf63aczejgza7hqxfk4vb4ebjonwbma4
+- valory/tx_settlement_multiplexer_abci:0.1.0:bafybeihqeeiehyr3wgruyia4izbp75qz6rilcvbbc6ceb5o4jtvwqzboyy
 behaviours:
   main:
     args: {}

diff --git a/packages/valory/skills/tx_settlement_multiplexer_abci/skill.yaml b/packages/valory/skills/tx_settlement_multiplexer_abci/skill.yaml
@@ -20,7 +20,7 @@ contracts: []
 protocols: []
 skills:
 - valory/abstract_round_abci:0.1.0:bafybeih2fyfb6kkf7r45pvdk7pyyebr5xloia4xiqxtb3qsrasnstqmepq
-- valory/decision_maker_abci:0.1.0:bafybeiaubnxcztoon3izj4js6wajzdv3a64qst3cwow23q345526exbjsa
+- valory/decision_maker_abci:0.1.0:bafybeib2ipy6vo3mu2hzmgemvdrf63aczejgza7hqxfk4vb4ebjonwbma4
 behaviours:
   main:
     args: {}