Skip to content

Commit

Permalink
Merge pull request #87 from valory-xyz/feat/policy
Browse files Browse the repository at this point in the history
Implement a simple epsilon greedy policy
  • Loading branch information
Adamantios authored Sep 20, 2023
2 parents 89d0594 + 7d51809 commit c35554a
Show file tree
Hide file tree
Showing 29 changed files with 665 additions and 167 deletions.
17 changes: 9 additions & 8 deletions packages/packages.json
Original file line number Diff line number Diff line change
@@ -1,17 +1,18 @@
{
"dev": {
"skill/valory/market_manager_abci/0.1.0": "bafybeicjqkzen2gvpnc3syojcf6ibr4kjaorcwktikqp2zzxec6vs64oky",
"skill/valory/decision_maker_abci/0.1.0": "bafybeic77zhxf5a53ngfzacklmm3nag4t4jeufdhoeg3ykrmvknojtwipi",
"skill/valory/trader_abci/0.1.0": "bafybeibi3rt3rdk4bvng3tlmwdxal3mk3bflq4cgq3fxyjwzpmhj3y6pmy",
"skill/valory/market_manager_abci/0.1.0": "bafybeigvw5tqei5xb272jb7retxiryhuhxxzupddmbdixgsmxzco3n2dde",
"skill/valory/decision_maker_abci/0.1.0": "bafybeihm77vt53vu3xt6x7istsvaw75juptlynpriebgeltbil5evvhtue",
"skill/valory/trader_abci/0.1.0": "bafybeiea2r67bzi5gkymu2pu2m3sg3lflke5m2finonak4uprqpbdcp2ru",
"contract/valory/market_maker/0.1.0": "bafybeif6hivvhxqv4a3uqo2v3yszluzrmngsp624utdi466xwo5qbz5bsi",
"agent/valory/trader/0.1.0": "bafybeigoojaxpc7sz7vuswkxvpa5lp37lipuwc4qycztqk4hxdesmxqcda",
"service/valory/trader/0.1.0": "bafybeidyuzdmwrt66w32oppopviesvhinmwd76q73zv3dex2th7nldpbyi",
"agent/valory/trader/0.1.0": "bafybeiba5eqawktwkpziiazxm5u6ftm7o3zytcbafqsjwt7zrr63o2yis4",
"service/valory/trader/0.1.0": "bafybeifqi4yprf6ajureyqn45aa3mo7vezwsx63skc4n56grlormdzoz5i",
"contract/valory/erc20/0.1.0": "bafybeiggo4u56drxusvcdruqrr7mlfzqbieg4hajalh4tkctxhh3c5lpdi",
"skill/valory/tx_settlement_multiplexer_abci/0.1.0": "bafybeidtrigrfjikl7tkbtowbp6mrdzxulayxnxcmviwy3gag557vnymlq",
"contract/valory/mech/0.1.0": "bafybeibfikekaruskx6ui7u4qnls57i2namfxi45zhqslziqyxg4npjzxu",
"skill/valory/tx_settlement_multiplexer_abci/0.1.0": "bafybeiey57caacd6d3ow25i6a2zuiinkx6kjtckdktannk7kijpd3p2zb4",
"contract/valory/mech/0.1.0": "bafybeidtezc4ubsyqdltiojvqe5eeh77ejte7vqbojspiej5quivgap3ae",
"contract/valory/realitio/0.1.0": "bafybeigb722aznqhc5lsbt3dn4bpyaqe5hnl5onmnestqmzliwtvl3eaom",
"contract/valory/realitio_proxy/0.1.0": "bafybeibvndq6756qck7forgeavhdbn6ykgqs2ufyg7n5g6qdfpveatxuwy",
"contract/valory/conditional_tokens/0.1.0": "bafybeicxwjdbmjajgr5rsmadtkxxwmcm42r2htef3tvng73uzib4hmb6qa"
"contract/valory/conditional_tokens/0.1.0": "bafybeicxwjdbmjajgr5rsmadtkxxwmcm42r2htef3tvng73uzib4hmb6qa",
"contract/valory/agent_registry/0.1.0": "bafybeib6odummk6qqietjekpljkmhqfxk7kv56kv6pyfsnnanews625ncy"
},
"third_party": {
"protocol/open_aea/signing/1.0.0": "bafybeifuxs7gdg2okbn7uofymenjlmnih2wxwkym44lsgwmklgwuckxm2m",
Expand Down
17 changes: 9 additions & 8 deletions packages/valory/agents/trader/aea-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ contracts:
- valory/market_maker:0.1.0:bafybeif6hivvhxqv4a3uqo2v3yszluzrmngsp624utdi466xwo5qbz5bsi
- valory/erc20:0.1.0:bafybeiggo4u56drxusvcdruqrr7mlfzqbieg4hajalh4tkctxhh3c5lpdi
- valory/multisend:0.1.0:bafybeidfktuprydtmi4umolfles5qaf7s3t26puvvs44hvkq6uwwr3ia3a
- valory/mech:0.1.0:bafybeibfikekaruskx6ui7u4qnls57i2namfxi45zhqslziqyxg4npjzxu
- valory/mech:0.1.0:bafybeidtezc4ubsyqdltiojvqe5eeh77ejte7vqbojspiej5quivgap3ae
- valory/conditional_tokens:0.1.0:bafybeicxwjdbmjajgr5rsmadtkxxwmcm42r2htef3tvng73uzib4hmb6qa
- valory/realitio:0.1.0:bafybeigb722aznqhc5lsbt3dn4bpyaqe5hnl5onmnestqmzliwtvl3eaom
- valory/realitio_proxy:0.1.0:bafybeibvndq6756qck7forgeavhdbn6ykgqs2ufyg7n5g6qdfpveatxuwy
Expand All @@ -41,10 +41,10 @@ skills:
- valory/reset_pause_abci:0.1.0:bafybeiblayblhp5wuirfomwcpgydg35ve5tfq3xxetlosjn47wva5ucmzy
- valory/termination_abci:0.1.0:bafybeieqfhvk6klnvxak3vo2ibslkrnnk2bfsn5l3gbaelcprd6cjngxki
- valory/transaction_settlement_abci:0.1.0:bafybeicisazpyvnnzlqso3txiucxr5qhsa4ac7ius6b4mhouxr2wkadwfy
- valory/tx_settlement_multiplexer_abci:0.1.0:bafybeidtrigrfjikl7tkbtowbp6mrdzxulayxnxcmviwy3gag557vnymlq
- valory/market_manager_abci:0.1.0:bafybeicjqkzen2gvpnc3syojcf6ibr4kjaorcwktikqp2zzxec6vs64oky
- valory/decision_maker_abci:0.1.0:bafybeic77zhxf5a53ngfzacklmm3nag4t4jeufdhoeg3ykrmvknojtwipi
- valory/trader_abci:0.1.0:bafybeibi3rt3rdk4bvng3tlmwdxal3mk3bflq4cgq3fxyjwzpmhj3y6pmy
- valory/tx_settlement_multiplexer_abci:0.1.0:bafybeiey57caacd6d3ow25i6a2zuiinkx6kjtckdktannk7kijpd3p2zb4
- valory/market_manager_abci:0.1.0:bafybeigvw5tqei5xb272jb7retxiryhuhxxzupddmbdixgsmxzco3n2dde
- valory/decision_maker_abci:0.1.0:bafybeihm77vt53vu3xt6x7istsvaw75juptlynpriebgeltbil5evvhtue
- valory/trader_abci:0.1.0:bafybeiea2r67bzi5gkymu2pu2m3sg3lflke5m2finonak4uprqpbdcp2ru
default_ledger: ethereum
required_ledgers:
- ethereum
Expand Down Expand Up @@ -188,9 +188,10 @@ models:
slippage: ${float:0.01}
redeem_margin_days: ${int:15}
epsilon: ${float:0.1}
irrelevant_tools: ${set:{"openai-text-davinci-002", "openai-text-davinci-003", "openai-gpt-3.5-turbo",
"openai-gpt-4", "stabilityai-stable-diffusion-v1-5", "stabilityai-stable-diffusion-xl-beta-v2-2-2",
"stabilityai-stable-diffusion-512-v2-1", "stabilityai-stable-diffusion-768-v2-1"}}
irrelevant_tools: ${list:["openai-text-davinci-002", "openai-text-davinci-003",
"openai-gpt-3.5-turbo", "openai-gpt-4", "stabilityai-stable-diffusion-v1-5",
"stabilityai-stable-diffusion-xl-beta-v2-2-2", "stabilityai-stable-diffusion-512-v2-1",
"stabilityai-stable-diffusion-768-v2-1"]}
---
public_id: valory/p2p_libp2p_client:0.1.0
type: connection
Expand Down
9 changes: 3 additions & 6 deletions packages/valory/contracts/agent_registry/contract.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,9 @@ description: Agent Registry contract
license: Apache-2.0
aea_version: '>=1.0.0, <2.0.0'
fingerprint:
__init__.py: bafybeidey4syohls5hxmso6qsp5p4uhtzle5txv2mlbym6ktjzknich6oa
build/AgentRegistry.json: bafybeia4qi2vstrutejzrxfpbb6eift7va5cjs7bparaal2fafiiczuiyy
build/AgentRegistryL2.json: bafybeic2jylwfod4nmdtbs4izyxyi246pd3f35aoqyahnmyrvzn7j3sv4e
contract.py: bafybeibqwl52cnz64cysjd2jnjijuakdvyrffapxq65cdzx6g65gu42deq
tests/__init__.py: bafybeicl2oklx774jomlt6wwwegfdzrxh6iazjxwcyc7h4gepjljkpl4ji
tests/test_contract.py: bafybeicj535veqf35zb3ycu5iqjvqgj4a2kdmogmx5ba7fiolt5chah42a
__init__.py: bafybeid3wfzglolebuo6jrrsopswzu4lk77bm76mvw3euizlsjtnt3wmgu
build/AgentRegistry.json: bafybeicoe5elvvsv2neiirsdn4uddrilizmyib3x4mvpklr7olhj2kh4ue
contract.py: bafybeihrv6blme3v6diwci6zxxn72qbg5sanzmfq5tobhs4375ebcuyday
fingerprint_ignore_patterns: []
contracts: []
class_name: AgentRegistryContract
Expand Down
2 changes: 1 addition & 1 deletion packages/valory/contracts/mech/contract.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ fingerprint:
README.md: bafybeibl4uw7rs6mwh7zuvdnqmj2o2xyr7nx5qk3w7torwx3jg6farn6ca
__init__.py: bafybeicx5pxh3cxnml2biuuoebvafvu5tvy6mgkzyjzuubuoeebb5yzjsm
build/mech.json: bafybeihsfz7rdnf6cpa3c4eagvs4pw6jhr6pcsikstakejrlkuwvwzhw7m
contract.py: bafybeigypn3frcjr7mcmdoe5ubgoy57owm4bfcgtrcytiu76u7khthlvei
contract.py: bafybeifbfa6p3jcwn6j7s5aiiqxb3ne4vbmvoggr5zpptmd727gpsjqjpe
fingerprint_ignore_patterns: []
contracts: []
class_name: Mech
Expand Down
34 changes: 17 additions & 17 deletions packages/valory/services/trader/service.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ license: Apache-2.0
fingerprint:
README.md: bafybeigtuothskwyvrhfosps2bu6suauycolj67dpuxqvnicdrdu7yhtvq
fingerprint_ignore_patterns: []
agent: valory/trader:0.1.0:bafybeigoojaxpc7sz7vuswkxvpa5lp37lipuwc4qycztqk4hxdesmxqcda
agent: valory/trader:0.1.0:bafybeiba5eqawktwkpziiazxm5u6ftm7o3zytcbafqsjwt7zrr63o2yis4
number_of_agents: 4
deployment: {}
---
Expand Down Expand Up @@ -105,10 +105,10 @@ type: skill
slippage: ${SLIPPAGE:float:0.01}
redeem_margin_days: ${REDEEM_MARGIN_DAYS:int:15}
epsilon: ${EPSILON:float:0.1}
irrelevant_tools: ${IRRELEVANT_TOOLS:set:{"openai-text-davinci-002", "openai-text-davinci-003",
"openai-gpt-3.5-turbo", "openai-gpt-4", "stabilityai-stable-diffusion-v1-5",
"stabilityai-stable-diffusion-xl-beta-v2-2-2", "stabilityai-stable-diffusion-512-v2-1",
"stabilityai-stable-diffusion-768-v2-1"}}
irrelevant_tools: ${IRRELEVANT_TOOLS:list:["openai-text-davinci-002", "openai-text-davinci-003",
"openai-gpt-3.5-turbo", "openai-gpt-4", "stabilityai-stable-diffusion-v1-5",
"stabilityai-stable-diffusion-xl-beta-v2-2-2", "stabilityai-stable-diffusion-512-v2-1",
"stabilityai-stable-diffusion-768-v2-1"]}
benchmark_tool: &id005
args:
log_dir: ${LOG_DIR:str:/benchmarks}
Expand Down Expand Up @@ -174,10 +174,10 @@ type: skill
slippage: ${SLIPPAGE:float:0.01}
redeem_margin_days: ${REDEEM_MARGIN_DAYS:int:15}
epsilon: ${EPSILON:float:0.1}
irrelevant_tools: ${IRRELEVANT_TOOLS:set:{"openai-text-davinci-002", "openai-text-davinci-003",
"openai-gpt-3.5-turbo", "openai-gpt-4", "stabilityai-stable-diffusion-v1-5",
"stabilityai-stable-diffusion-xl-beta-v2-2-2", "stabilityai-stable-diffusion-512-v2-1",
"stabilityai-stable-diffusion-768-v2-1"}}
irrelevant_tools: ${IRRELEVANT_TOOLS:list:["openai-text-davinci-002", "openai-text-davinci-003",
"openai-gpt-3.5-turbo", "openai-gpt-4", "stabilityai-stable-diffusion-v1-5",
"stabilityai-stable-diffusion-xl-beta-v2-2-2", "stabilityai-stable-diffusion-512-v2-1",
"stabilityai-stable-diffusion-768-v2-1"]}
benchmark_tool: *id005
2:
models:
Expand Down Expand Up @@ -241,10 +241,10 @@ type: skill
slippage: ${SLIPPAGE:float:0.01}
redeem_margin_days: ${REDEEM_MARGIN_DAYS:int:15}
epsilon: ${EPSILON:float:0.1}
irrelevant_tools: ${IRRELEVANT_TOOLS:set:{"openai-text-davinci-002", "openai-text-davinci-003",
"openai-gpt-3.5-turbo", "openai-gpt-4", "stabilityai-stable-diffusion-v1-5",
"stabilityai-stable-diffusion-xl-beta-v2-2-2", "stabilityai-stable-diffusion-512-v2-1",
"stabilityai-stable-diffusion-768-v2-1"}}
irrelevant_tools: ${IRRELEVANT_TOOLS:list:["openai-text-davinci-002", "openai-text-davinci-003",
"openai-gpt-3.5-turbo", "openai-gpt-4", "stabilityai-stable-diffusion-v1-5",
"stabilityai-stable-diffusion-xl-beta-v2-2-2", "stabilityai-stable-diffusion-512-v2-1",
"stabilityai-stable-diffusion-768-v2-1"]}
benchmark_tool: *id005
3:
models:
Expand Down Expand Up @@ -308,10 +308,10 @@ type: skill
slippage: ${SLIPPAGE:float:0.01}
redeem_margin_days: ${REDEEM_MARGIN_DAYS:int:15}
epsilon: ${EPSILON:float:0.1}
irrelevant_tools: ${IRRELEVANT_TOOLS:set:{"openai-text-davinci-002", "openai-text-davinci-003",
"openai-gpt-3.5-turbo", "openai-gpt-4", "stabilityai-stable-diffusion-v1-5",
"stabilityai-stable-diffusion-xl-beta-v2-2-2", "stabilityai-stable-diffusion-512-v2-1",
"stabilityai-stable-diffusion-768-v2-1"}}
irrelevant_tools: ${IRRELEVANT_TOOLS:list:["openai-text-davinci-002", "openai-text-davinci-003",
"openai-gpt-3.5-turbo", "openai-gpt-4", "stabilityai-stable-diffusion-v1-5",
"stabilityai-stable-diffusion-xl-beta-v2-2-2", "stabilityai-stable-diffusion-512-v2-1",
"stabilityai-stable-diffusion-768-v2-1"]}
benchmark_tool: *id005
---
public_id: valory/ledger:0.19.0
Expand Down
12 changes: 12 additions & 0 deletions packages/valory/skills/decision_maker_abci/behaviours/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@
DecisionMakerParams,
MultisendBatch,
)
from packages.valory.skills.decision_maker_abci.policy import EGreedyPolicy
from packages.valory.skills.decision_maker_abci.states.base import SynchronizedData
from packages.valory.skills.transaction_settlement_abci.payload_tools import (
hash_payload_to_hex,
Expand All @@ -56,6 +57,7 @@
# which is what we want in most cases
# more info here: https://safe-docs.dev.gnosisdev.com/safe/docs/contracts_tx_execution/
SAFE_GAS = 0
CID_PREFIX = "f01701220"


def remove_fraction_wei(amount: int, fraction: float) -> int:
Expand All @@ -75,6 +77,7 @@ def __init__(self, **kwargs: Any) -> None:
self.multisend_batches: List[MultisendBatch] = []
self.multisend_data = b""
self._safe_tx_hash = ""
self._policy: Optional[EGreedyPolicy] = None

@property
def params(self) -> DecisionMakerParams:
Expand Down Expand Up @@ -129,6 +132,15 @@ def tx_hex(self) -> Optional[str]:
SafeOperation.DELEGATE_CALL.value,
)

@property
def policy(self) -> EGreedyPolicy:
"""Get the policy."""
if self._policy is None:
raise ValueError(
"Attempting to retrieve the policy before it has been established."
)
return self._policy

@staticmethod
def wei_to_native(wei: int) -> float:
"""Convert WEI to native token."""
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
from packages.valory.protocols.contract_api import ContractApiMessage
from packages.valory.skills.abstract_round_abci.base import get_name
from packages.valory.skills.decision_maker_abci.behaviours.base import (
CID_PREFIX,
DecisionMakerBaseBehaviour,
WaitableConditionType,
remove_fraction_wei,
Expand All @@ -41,7 +42,6 @@
from packages.valory.skills.market_manager_abci.bets import BINARY_N_SLOTS


IPFS_HASH_PREFIX = "f01701220"
ZERO_ADDRESS = "0x0000000000000000000000000000000000000000"


Expand Down Expand Up @@ -103,7 +103,7 @@ def mech_response_api(self) -> MechResponseSpecs:

def set_mech_response_specs(self) -> None:
"""Set the mech's response specs."""
full_ipfs_hash = IPFS_HASH_PREFIX + self.response_hex
full_ipfs_hash = CID_PREFIX + self.response_hex
ipfs_link = self.params.ipfs_address + full_ipfs_hash + f"/{self.request_id}"
# The url must be dynamically generated as it depends on the ipfs hash
self.mech_response_api.__dict__["_frozen"] = False
Expand All @@ -112,7 +112,7 @@ def set_mech_response_specs(self) -> None:

@property
def mech_response(self) -> MechInteractionResponse:
"""Get the mech response api specs."""
"""Get the mech's response."""
if self._mech_response is None:
error = "The mech's response has not been set!"
return MechInteractionResponse(error=error)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,8 @@ def setup(self) -> None:
question=sampled_bet.title, yes=sampled_bet.yes, no=sampled_bet.no
)
prompt = self.params.prompt_template.substitute(prompt_params)
self._metadata = MechMetadata(prompt=prompt, tool=self.params.mech_tool)
tool = self.synchronized_data.mech_tool
self._metadata = MechMetadata(prompt, tool)
msg = f"Prepared metadata {self.metadata!r} for the request."
self.context.logger.info(msg)

Expand Down
40 changes: 33 additions & 7 deletions packages/valory/skills/decision_maker_abci/behaviours/reedem.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@
WaitableConditionType,
)
from packages.valory.skills.decision_maker_abci.models import MultisendBatch
from packages.valory.skills.decision_maker_abci.payloads import MultisigTxPayload
from packages.valory.skills.decision_maker_abci.payloads import RedeemPayload
from packages.valory.skills.decision_maker_abci.redeem_info import (
Condition,
FPMM,
Expand Down Expand Up @@ -86,6 +86,10 @@ def synced_timestamp(self) -> int:
"""Return the synchronized timestamp across the agents."""
return int(self.round_sequence.last_round_transition_timestamp.timestamp())

def setup(self) -> None:
"""Setup the behaviour"""
self._policy = self.synchronized_data.policy

def _set_block_number(self, trade: Trade) -> Generator:
"""Set the block number of the given trade's market."""
timestamp = trade.fpmm.creationTimestamp
Expand All @@ -105,6 +109,24 @@ def _set_block_number(self, trade: Trade) -> Generator:
f"Chose block number {self.from_block_mapping[condition_id]!r} as closest to timestamp {timestamp!r}"
)

def _update_policy(self, update: Trade) -> None:
"""Update the policy."""
claimable_xdai = self.wei_to_native(update.claimable_amount)
tool_index = self.synchronized_data.utilized_tools[update.transactionHash]
self.policy.add_reward(tool_index, claimable_xdai)

def _stats_report(self) -> None:
"""Report policy statistics."""
stats_report = "Policy statistics so far:\n"
for i, tool in enumerate(self.synchronized_data.available_mech_tools):
stats_report += (
f"{tool} tool:\n"
f"\tTimes used: {self.policy.counts[i]}\n"
f"\tReward rate: {self.policy.reward_rates[i]}\n"
)
stats_report += f"Best tool so far is {self.policy.select_tool()}."
self.context.logger.info(stats_report)

def update_redeem_info(self, chunk: list) -> Generator:
"""Update the redeeming information using the given chunk."""
trades_updates: Iterator[Trade] = (
Expand All @@ -115,6 +137,8 @@ def update_redeem_info(self, chunk: list) -> Generator:
)

for update in trades_updates:
self._update_policy(update)

# do not use the information if position is not winning
if not update.is_winning:
continue
Expand All @@ -132,6 +156,8 @@ def update_redeem_info(self, chunk: list) -> Generator:
if update == unique_obj:
self.claimable_amounts[condition_id] += update.claimable_amount

self._stats_report()


class RedeemBehaviour(RedeemInfoBehaviour):
"""Redeem the winnings."""
Expand Down Expand Up @@ -523,11 +549,11 @@ def async_act(self) -> Generator:
yield from self._clean_redeem_info()
agent = self.context.agent_address
redeem_tx_hex = yield from self._prepare_safe_tx()
tx_submitter = (
self.matching_round.auto_round_id()
if redeem_tx_hex is not None
else None
)
payload = MultisigTxPayload(agent, tx_submitter, redeem_tx_hex)
tx_submitter = policy = None
if redeem_tx_hex is not None:
tx_submitter = self.matching_round.auto_round_id()
policy = self.policy.serialize()

payload = RedeemPayload(agent, tx_submitter, redeem_tx_hex, policy)

yield from self.finish_behaviour(payload)
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,9 @@
from packages.valory.skills.decision_maker_abci.behaviours.sampling import (
SamplingBehaviour,
)
from packages.valory.skills.decision_maker_abci.behaviours.tool_selection import (
ToolSelectionBehaviour,
)
from packages.valory.skills.decision_maker_abci.rounds import DecisionMakerAbciApp


Expand All @@ -60,4 +63,5 @@ class AgentDecisionMakerRoundBehaviour(AbstractRoundBehaviour):
BetPlacementBehaviour, # type: ignore
RedeemBehaviour, # type: ignore
HandleFailedTxBehaviour, # type: ignore
ToolSelectionBehaviour, # type: ignore
}
Loading

0 comments on commit c35554a

Please sign in to comment.