Skip to content

Commit

Permalink
Merge pull request #113 from valory-xyz/fix/reward
Browse files Browse the repository at this point in the history
Fix penalty, reward, and tool selection logic
  • Loading branch information
Adamantios authored Oct 25, 2023
2 parents 5a49f5b + 32fada8 commit 8df1d7c
Show file tree
Hide file tree
Showing 11 changed files with 53 additions and 34 deletions.
10 changes: 5 additions & 5 deletions packages/packages.json
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
{
"dev": {
"skill/valory/market_manager_abci/0.1.0": "bafybeigmy3g2klkzdhttti2ouxf575ua5y7ajmh4a3yfpukeukqxjky47m",
"skill/valory/decision_maker_abci/0.1.0": "bafybeiac5tj4x22dgzaq62ja5tudeyt6f27nhs5e3fulej5ayojye5oyzq",
"skill/valory/trader_abci/0.1.0": "bafybeibspd55q2gm5un6wpfoq4zanho3nqwgzp42w63accgpyjo63cq6ie",
"skill/valory/decision_maker_abci/0.1.0": "bafybeib72et7lfim3s3swxj2yjg4sktdbbwhskij7hdxbbmbsr3pll4hou",
"skill/valory/trader_abci/0.1.0": "bafybeif472rqlrlfyl5orrgaj5t7qevbccv6xgl3olj53nrbc5vdpfpbcq",
"contract/valory/market_maker/0.1.0": "bafybeif4mm2s3gxtvp227yypkcnna5ftec7vajcftvtbdmqddh7nprah5m",
"agent/valory/trader/0.1.0": "bafybeicfyo2qkrqep6o2dhp56eqt4zb6cqz33o753zh3wewgwx5rwn6p6m",
"service/valory/trader/0.1.0": "bafybeib46qrohyfex5g34wqtqt6ka2kzbkqllofw2knktve5hn3acf6isy",
"agent/valory/trader/0.1.0": "bafybeibwom2vpz2bkhdfclszqpaswhgafnkn5xyqi2jg6gs5tnvlrxorvq",
"service/valory/trader/0.1.0": "bafybeifljixfiawbcp7jo5ljjpxdxmmghy47ifhr2phu5uoxtzianhqje4",
"contract/valory/erc20/0.1.0": "bafybeibysifjwhlcn7nntsjkowdmja53ed6urfzypi6sxfc7chtheecvpi",
"skill/valory/tx_settlement_multiplexer_abci/0.1.0": "bafybeiemvcwdd746zwhq3rtocpdcx3bj2crxmm2u5kebfsit3fkeq4oogm",
"skill/valory/tx_settlement_multiplexer_abci/0.1.0": "bafybeiddjn7jyhg2wjdtfex772vj7alopkszh6gejxqak332tdwvfuwavy",
"contract/valory/mech/0.1.0": "bafybeifaavuqr2bz7nypapjwkufc5c6vwxm5ifucmtd3w7sh42tne35nga",
"contract/valory/realitio/0.1.0": "bafybeic3wihpzz4deguslpzlnsgrlrabclq3njwuagyaiiv6rzqujfrs6e",
"contract/valory/realitio_proxy/0.1.0": "bafybeidx37xzjjmapwacedgzhum6grfzhp5vhouz4zu3pvpgdy5pgb2fr4",
Expand Down
6 changes: 3 additions & 3 deletions packages/valory/agents/trader/aea-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -41,10 +41,10 @@ skills:
- valory/reset_pause_abci:0.1.0:bafybeicpxn2khtaesuf4cq6ypwdmdmonlqroj2q2i6cxvpizc2y4cw66pe
- valory/termination_abci:0.1.0:bafybeieqm46zuccaagnko3qlw6p3nvoohdrfgvpmw467r5lyil2dqrzjsy
- valory/transaction_settlement_abci:0.1.0:bafybeia6cdxdlqrcwk2maw25fo7dafzd2p3rs7syropvufophk2pitzbwy
- valory/tx_settlement_multiplexer_abci:0.1.0:bafybeiemvcwdd746zwhq3rtocpdcx3bj2crxmm2u5kebfsit3fkeq4oogm
- valory/tx_settlement_multiplexer_abci:0.1.0:bafybeiddjn7jyhg2wjdtfex772vj7alopkszh6gejxqak332tdwvfuwavy
- valory/market_manager_abci:0.1.0:bafybeigmy3g2klkzdhttti2ouxf575ua5y7ajmh4a3yfpukeukqxjky47m
- valory/decision_maker_abci:0.1.0:bafybeiac5tj4x22dgzaq62ja5tudeyt6f27nhs5e3fulej5ayojye5oyzq
- valory/trader_abci:0.1.0:bafybeibspd55q2gm5un6wpfoq4zanho3nqwgzp42w63accgpyjo63cq6ie
- valory/decision_maker_abci:0.1.0:bafybeib72et7lfim3s3swxj2yjg4sktdbbwhskij7hdxbbmbsr3pll4hou
- valory/trader_abci:0.1.0:bafybeif472rqlrlfyl5orrgaj5t7qevbccv6xgl3olj53nrbc5vdpfpbcq
default_ledger: ethereum
required_ledgers:
- ethereum
Expand Down
2 changes: 1 addition & 1 deletion packages/valory/services/trader/service.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ license: Apache-2.0
fingerprint:
README.md: bafybeigtuothskwyvrhfosps2bu6suauycolj67dpuxqvnicdrdu7yhtvq
fingerprint_ignore_patterns: []
agent: valory/trader:0.1.0:bafybeicfyo2qkrqep6o2dhp56eqt4zb6cqz33o753zh3wewgwx5rwn6p6m
agent: valory/trader:0.1.0:bafybeibwom2vpz2bkhdfclszqpaswhgafnkn5xyqi2jg6gs5tnvlrxorvq
number_of_agents: 4
deployment: {}
---
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,8 +50,11 @@ def _blacklist(self) -> Optional[str]:
sampled_bet.status = BetStatus.BLACKLISTED
blacklist_expiration = self.synced_time + self.params.blacklisting_duration
sampled_bet.blacklist_expiration = blacklist_expiration
# add a zero reward to the tool that has lead to the blacklisting of the market
self.policy.add_reward(self.synchronized_data.mech_tool_idx)
if self.synchronized_data.is_mech_price_set:
# impose a penalty equivalent to the mech's price on the tool responsible for blacklisting the market
tool_idx = self.synchronized_data.mech_tool_idx
penalty = -self.synchronized_data.mech_price
self.policy.add_reward(tool_idx, penalty)

return serialize_bets(bets)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -114,13 +114,15 @@ def _set_block_number(self, trade: Trade) -> Generator:

def _update_policy(self, update: Trade) -> None:
"""Update the policy."""
claimable_xdai = self.wei_to_native(update.claimable_amount)
# the mapping might not contain a tool for a bet placement because it might have happened on a previous run
tool_index = self.utilized_tools.get(update.transactionHash, None)
if tool_index is not None:
# we try to avoid an ever-increasing dictionary of utilized tools by removing a tool when not needed anymore
del self.utilized_tools[update.transactionHash]
self.policy.add_reward(tool_index, claimable_xdai)
claimable_xdai = self.wei_to_native(update.claimable_amount)
mech_price = self.wei_to_native(self.synchronized_data.mech_price)
reward = claimable_xdai - mech_price
self.policy.add_reward(tool_index, reward)

def _stats_report(self) -> None:
"""Report policy statistics."""
Expand Down
33 changes: 21 additions & 12 deletions packages/valory/skills/decision_maker_abci/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -196,7 +196,7 @@ class MultisendBatch:
operation: MultiSendOperation = MultiSendOperation.CALL


@dataclass
@dataclass(init=False)
class PredictionResponse:
"""A response of a prediction."""

Expand All @@ -205,9 +205,14 @@ class PredictionResponse:
confidence: float
info_utility: float

def __post_init__(self) -> None:
"""Runs checks on whether the current prediction response is valid or not."""
# all the fields are probabilities
def __init__(self, **kwargs: Any) -> None:
"""Initialize the mech's prediction ignoring extra keys."""
self.p_yes = kwargs.pop("p_yes")
self.p_no = kwargs.pop("p_no")
self.confidence = kwargs.pop("confidence")
self.info_utility = kwargs.pop("info_utility")

# all the fields are probabilities; run checks on whether the current prediction response is valid or not.
probabilities = (getattr(self, field) for field in self.__annotations__)
if (
any(not (0 <= prob <= 1) for prob in probabilities)
Expand All @@ -223,18 +228,22 @@ def vote(self) -> Optional[int]:
return None


@dataclass
@dataclass(init=False)
class MechInteractionResponse:
"""A structure for the response of a mech interaction task."""

requestId: int = 0
result: Optional[PredictionResponse] = None
error: str = "Unknown"
request_id: int
result: Optional[PredictionResponse]
error: str

def __init__(self, **kwargs: Any) -> None:
"""Initialize the mech's response ignoring extra keys."""
self.request_id = kwargs.pop("requestId", 0)
self.error = kwargs.pop("error", "Unknown")
result = kwargs.pop("result", None)

def __post_init__(self) -> None:
"""Parses the nested part of the mech interaction response to a `PredictionResponse`."""
if isinstance(self.result, str):
self.result = PredictionResponse(**json.loads(self.result))
if isinstance(result, str):
self.result = PredictionResponse(**json.loads(result))

@classmethod
def incorrect_format(cls, res: Any) -> "MechInteractionResponse":
Expand Down
2 changes: 1 addition & 1 deletion packages/valory/skills/decision_maker_abci/policy.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ def select_tool(self) -> Optional[int]:
if self.n_tools == 0:
return None

if sum(self.counts) == 0 or random.random() < self.eps: # nosec
if sum(self.reward_rates) == 0 or random.random() < self.eps: # nosec
return self.random_tool

return self.best_tool
Expand Down
10 changes: 5 additions & 5 deletions packages/valory/skills/decision_maker_abci/skill.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,24 +14,24 @@ fingerprint:
behaviours/__init__.py: bafybeih6ddz2ocvm6x6ytvlbcz6oi4snb5ee5xh5h65nq4w2qf7fd7zfky
behaviours/base.py: bafybeidny4y4w4jbjbp5d5qivur6jm37fnuik4mqzchzshiuvpozxe7vwy
behaviours/bet_placement.py: bafybeiauzdizqawjgzc3ugx33vuh726e25juhacqdr7c6uztpn6tra5dva
behaviours/blacklisting.py: bafybeie4aawdikv7pj5rorqy55jy2fkqzhitz3qmf3wyycarsj4dmwz7wa
behaviours/blacklisting.py: bafybeih2yrimf4l5t7mdqnmxv3zzv67fuzxytzbwcsmkcs5crbtmwccsry
behaviours/decision_receive.py: bafybeibfnucc444uqcythgsn4cawluubbi4u4q7ab4aqc4ts3lu4qnvhdq
behaviours/decision_request.py: bafybeibrk6jxalkgspinmjboqr3o6d6hl6mxtlkif74a5p32vp2zzhhzua
behaviours/handle_failed_tx.py: bafybeidxpc6u575ymct5tdwutvzov6zqfdoio5irgldn3fw7q3lg36mmxm
behaviours/reedem.py: bafybeiccrnttqwf5vuf3lr55k5c6tsqsrekoypoag5gntyimjhbmuokkga
behaviours/reedem.py: bafybeihywv5332ngbz4hhcwjbaglctblwjbfhdenppznnlxbsgnxdhrrim
behaviours/round_behaviour.py: bafybeig4tdktyu6hapoqymnxh2bgpds547st6a44heue657wkctwe4gjvm
behaviours/sampling.py: bafybeiadikynvkaofbko72jc45xthhmmjfmlkpgramormhxwk5u47rnwdu
behaviours/tool_selection.py: bafybeigfr2frkljrxyfxs5p3j42equzehgaqtkyuxk6eiujyudr6ajqakm
dialogues.py: bafybeigpwuzku3we7axmxeamg7vn656maww6emuztau5pg3ebsoquyfdqm
fsm_specification.yaml: bafybeifnob3ceim2mj7lqagtnpwqjqqxs5eg3oiwc73gwm6x5i2dvvlcya
handlers.py: bafybeihj33szgrcxnpd73s4nvluyxwwsvhjum2cuq3ilhhe6vfola3k7vy
models.py: bafybeidlis33rkpvtb2w6qrtmiqzb4tbqfolkbbs6hk3jcx73lfuwqif3e
models.py: bafybeihp2heb2x7qyyvlaxr7slaxofflkhnrvrfdaorwc5ztnui2f7huly
payloads.py: bafybeiawx6ix2ljvf545fe3wocdoh7frds6gjvzgv62b3w6huaolgvywqi
policy.py: bafybeidkqqrkgxah5q73mlpr64swxczadhuyax3ab2ijg6obemnvy2sxnu
policy.py: bafybeidpmx4ek3qze63zpuwixyf6t7bdv62ewgkzt3ljrzadiwdw64cueq
redeem_info.py: bafybeibddfxwp3577c3dl2utaowwltquu5fg6crezpumoebw563wxpbfrm
rounds.py: bafybeihopxmhzrpipc6r5x2vqf2tcixzuf5xdpqfsr7gr3n756n3ssft5q
states/__init__.py: bafybeid23llnyp6j257dluxmrnztugo5llsrog7kua53hllyktz4dqhqoy
states/base.py: bafybeibx3kn2b3rovyiowjzpwhspjdbzyfroequg7zsjqsbdh5cyowu72a
states/base.py: bafybeihe72qgalihmtzaezy3bvodxiqiawv2gzlnrfnmegddf22brieyiu
states/bet_placement.py: bafybeibalhxhp2c4oljmiwqi6ds3g36fgtabmf42mb5sgq6z22znrcbhda
states/blacklisting.py: bafybeifruvxwwltndwazkzzbpc5nnnad3z5t5ofy6d3i7nssljbxl6gvxu
states/decision_receive.py: bafybeifm3oyq2aji7f5yag6wpe4vr3ivi74pybdsk2jvmziiidx5nt7t4a
Expand Down
5 changes: 5 additions & 0 deletions packages/valory/skills/decision_maker_abci/states/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,11 @@ def sampled_bet(self) -> Bet:
"""Get the sampled bet."""
return self.bets[self.sampled_bet_index]

@property
def is_mech_price_set(self) -> bool:
"""Get whether mech's price is known."""
return bool(self.db.get("mech_price", False))

@property
def mech_price(self) -> int:
"""Get the mech's request price."""
Expand Down
4 changes: 2 additions & 2 deletions packages/valory/skills/trader_abci/skill.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,8 @@ skills:
- valory/transaction_settlement_abci:0.1.0:bafybeia6cdxdlqrcwk2maw25fo7dafzd2p3rs7syropvufophk2pitzbwy
- valory/termination_abci:0.1.0:bafybeieqm46zuccaagnko3qlw6p3nvoohdrfgvpmw467r5lyil2dqrzjsy
- valory/market_manager_abci:0.1.0:bafybeigmy3g2klkzdhttti2ouxf575ua5y7ajmh4a3yfpukeukqxjky47m
- valory/decision_maker_abci:0.1.0:bafybeiac5tj4x22dgzaq62ja5tudeyt6f27nhs5e3fulej5ayojye5oyzq
- valory/tx_settlement_multiplexer_abci:0.1.0:bafybeiemvcwdd746zwhq3rtocpdcx3bj2crxmm2u5kebfsit3fkeq4oogm
- valory/decision_maker_abci:0.1.0:bafybeib72et7lfim3s3swxj2yjg4sktdbbwhskij7hdxbbmbsr3pll4hou
- valory/tx_settlement_multiplexer_abci:0.1.0:bafybeiddjn7jyhg2wjdtfex772vj7alopkszh6gejxqak332tdwvfuwavy
behaviours:
main:
args: {}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ contracts: []
protocols: []
skills:
- valory/abstract_round_abci:0.1.0:bafybeid7neqpxxe4ualp23gxgrtpf6r5u3myr2mmc4uvwameyqshfdoydq
- valory/decision_maker_abci:0.1.0:bafybeiac5tj4x22dgzaq62ja5tudeyt6f27nhs5e3fulej5ayojye5oyzq
- valory/decision_maker_abci:0.1.0:bafybeib72et7lfim3s3swxj2yjg4sktdbbwhskij7hdxbbmbsr3pll4hou
behaviours:
main:
args: {}
Expand Down

0 comments on commit 8df1d7c

Please sign in to comment.