Skip to content

Commit

Permalink
Merge pull request #155 from valory-xyz/test/benchmark
Browse files Browse the repository at this point in the history
proposal changes for benchmark runs
  • Loading branch information
0xArdi authored Feb 7, 2024
2 parents 0f36b71 + aba3a6f commit 3e152f9
Show file tree
Hide file tree
Showing 17 changed files with 881 additions and 576 deletions.
58 changes: 58 additions & 0 deletions .pylintrc
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
[MASTER]
ignore-patterns=.*_pb2.py,contract_dispatcher.py,test_abci_messages.py,test_tendermint_messages.py
ignore=packages/valory/protocols/abci,packages/valory/connections/abci/gogoproto

[MESSAGES CONTROL]
disable=C0103,R0801,C0301,C0201,C0204,C0209,W1203,C0302,R1735,R1729,W0511,E0611

# See here for more options: https://www.codeac.io/documentation/pylint-configuration.html
R1735: use-dict-literal
R1729: use-a-generator
C0103: invalid-name
C0201: consider-iterating-dictionary
W1203: logging-fstring-interpolation
C0204: bad-mcs-classmethod-argument
C0209: consider-using-f-string
C0301: http://pylint-messages.wikidot.com/messages:c0301 > Line too long
C0302: http://pylint-messages.wikidot.com/messages:c0302 > Too many lines in module
R0801: similar lines
E0611: no-name-in-module

[IMPORTS]
ignored-modules=pandas,numpy,aea_cli_ipfs,compose,multidict,gql,anthropic,tiktoken

[DESIGN]
# min-public-methods=1
max-public-methods=58
# max-returns=10
# max-bool-expr=7
max-args=6
# max-locals=31
# max-statements=80
max-parents=10
max-branches=36
max-attributes=8

[REFACTORING]
# max-nested-blocks=6

[SPELLING]
# uncomment to enable
# spelling-dict=en_US

# List of comma separated words that should not be checked.
spelling-ignore-words=nocover,pragma,params,noqa,kwargs,str,async,json,boolean,config,pytest,args,url,tx,jsonschema,traceback,api,nosec

[SIMILARITIES]

# Minimum lines number of a similarity.
min-similarity-lines=10

# Ignore comments when computing similarities.
ignore-comments=yes

# Ignore docstrings when computing similarities.
ignore-docstrings=yes

# Ignore imports when computing similarities.
ignore-imports=no
6 changes: 3 additions & 3 deletions packages/packages.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,13 @@
"dev": {
"connection/valory/websocket_client/0.1.0": "bafybeiflmystocxaqblhpzqlcop2vkhsknpzjx2jomohomaxamwskeokzm",
"skill/valory/contract_subscription/0.1.0": "bafybeicyugrkx5glat4p4ezwf6i7oduh26eycfie6ftd4uxrknztzl3ik4",
"agent/valory/mech/0.1.0": "bafybeibhm744jats4jdpavgfrwxgetsolyg5pcy554qcqibx7fhrln4x5q",
"agent/valory/mech/0.1.0": "bafybeiepdstu23qid2pcrugkpwtibh7jhlshmnmclwwbgjmefio27cp4im",
"skill/valory/mech_abci/0.1.0": "bafybeieimp7xzxcnbzsuunf2xkcy5juulhmzsmkq2v3sw3o3lgssb53cnu",
"contract/valory/agent_mech/0.1.0": "bafybeiepxumywg6z2zapqzc3bg3iey23cmlgjzxisqox5j74o5i2texr5e",
"service/valory/mech/0.1.0": "bafybeiadmouelgxqwa6qmeosrsf7emcpxfa6m6qobg4oxp4ra73rfh5x6y",
"service/valory/mech/0.1.0": "bafybeifzkrmgejdce5nvp7s63dpvthde6wn6etolesh4dmf5pno7jplzcy",
"protocol/valory/acn_data_share/0.1.0": "bafybeih5ydonnvrwvy2ygfqgfabkr47s4yw3uqxztmwyfprulwfsoe7ipq",
"skill/valory/task_submission_abci/0.1.0": "bafybeib4m2bwgchloqss3wotsx4rz7qqkwydaesiqkls2zq7zbtp6jtpsi",
"skill/valory/task_execution/0.1.0": "bafybeib7zgltwvd5mzji7z4zcplhd4yg5nqc5ecnkg7ldgln4qvztue7aq",
"skill/valory/task_execution/0.1.0": "bafybeicthrgfdv6q56htrsradow445smojjk2zqqizm4cdxyfxfor22vyy",
"contract/valory/agent_registry/0.1.0": "bafybeiargayav6yiztdnwzejoejstcx4idssch2h4f5arlgtzj3tgsgfmu",
"protocol/valory/websocket_client/0.1.0": "bafybeih43mnztdv3v2hetr2k3gezg7d3yj4ur7cxdvcyaqhg65e52s5sf4",
"skill/valory/websocket_client/0.1.0": "bafybeidwntmkk4b2ixq5454ycbkknclqx7a6vpn7aqpm2nw3duszqrxvta",
Expand Down
2 changes: 1 addition & 1 deletion packages/valory/agents/mech/aea-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ skills:
- valory/registration_abci:0.1.0:bafybeic2ynseiak7jpta7jfwuqwyp453b4p7lolr4wihxmpn633uekv5am
- valory/reset_pause_abci:0.1.0:bafybeidzajbe3erygeh2xbd6lrjv7nsptznjuzrt24ykgvhgotdeyhfnba
- valory/subscription_abci:0.1.0:bafybeigaxq7m2dqv2huhg5jvb4jx3rysqwvvjj2xhojow3t3zzuwq2k4ie
- valory/task_execution:0.1.0:bafybeib7zgltwvd5mzji7z4zcplhd4yg5nqc5ecnkg7ldgln4qvztue7aq
- valory/task_execution:0.1.0:bafybeicthrgfdv6q56htrsradow445smojjk2zqqizm4cdxyfxfor22vyy
- valory/task_submission_abci:0.1.0:bafybeib4m2bwgchloqss3wotsx4rz7qqkwydaesiqkls2zq7zbtp6jtpsi
- valory/termination_abci:0.1.0:bafybeie4zvjfxvdu7qrulmur3chpjz3kpj5m4bjsxvpk4gvj5zbyyayfaa
- valory/transaction_settlement_abci:0.1.0:bafybeiaefgqbs7zsn5xe5kdwrujj7ivygkn3ujpw6crnvi3knvxw75qmja
Expand Down
2 changes: 1 addition & 1 deletion packages/valory/services/mech/service.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ license: Apache-2.0
fingerprint:
README.md: bafybeif7ia4jdlazy6745ke2k2x5yoqlwsgwr6sbztbgqtwvs3ndm2p7ba
fingerprint_ignore_patterns: []
agent: valory/mech:0.1.0:bafybeibhm744jats4jdpavgfrwxgetsolyg5pcy554qcqibx7fhrln4x5q
agent: valory/mech:0.1.0:bafybeiepdstu23qid2pcrugkpwtibh7jhlshmnmclwwbgjmefio27cp4im
number_of_agents: 4
deployment:
agent:
Expand Down
14 changes: 12 additions & 2 deletions packages/valory/skills/task_execution/behaviours.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@
from packages.valory.protocols.ipfs.dialogues import IpfsDialogue
from packages.valory.protocols.ledger_api import LedgerApiMessage
from packages.valory.skills.task_execution.models import Params
from packages.valory.skills.task_execution.utils.benchmarks import TokenCounterCallback
from packages.valory.skills.task_execution.utils.ipfs import (
ComponentPackageLoader,
get_ipfs_file_hash,
Expand Down Expand Up @@ -291,8 +292,16 @@ def _handle_done_task(self, task_result: Any) -> None:
}
if task_result is not None:
# task succeeded
deliver_msg, prompt, transaction = task_result
response = {**response, "result": deliver_msg, "prompt": prompt}
deliver_msg, prompt, transaction, counter_callback = task_result
cost_dict = {}
if counter_callback is not None:
cost_dict = cast(TokenCounterCallback, counter_callback).cost_dict
response = {
**response,
"result": deliver_msg,
"prompt": prompt,
"cost_dict": cost_dict,
}
self._done_task["transaction"] = transaction

self.context.logger.info(f"Task result for request {req_id}: {task_result}")
Expand Down Expand Up @@ -371,6 +380,7 @@ def _prepare_task(self, task_data: Dict[str, Any]) -> None:
task_data["tool_py"] = tool_py
task_data["callable_method"] = callable_method
task_data["api_keys"] = self.params.api_keys
task_data["counter_callback"] = TokenCounterCallback()
future = self._submit_task(tool_task.execute, **task_data)
executing_task = cast(Dict[str, Any], self._executing_task)
executing_task["timeout_deadline"] = time.time() + self.params.task_deadline
Expand Down
7 changes: 6 additions & 1 deletion packages/valory/skills/task_execution/skill.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,12 @@ license: Apache-2.0
aea_version: '>=1.0.0, <2.0.0'
fingerprint:
__init__.py: bafybeidqhvvlnthkbnmrdkdeyjyx2f2ab6z4xdgmagh7welqnh2v6wczx4
behaviours.py: bafybeifa72egwarcmfneqmo3ak6wfygjc3i7hplxl6ptafb263vuaey3fm
behaviours.py: bafybeihprwot27csugwpoimsqcwprxu5bstqpvanot3nkmfgvhbr66ww4y
dialogues.py: bafybeid4zxalqdlo5mw4yfbuf34hx4jp5ay5z6chm4zviwu4cj7fudtwca
handlers.py: bafybeidbt5ezj74cgfogk3w4uw4si2grlnk5g54veyumw7g5yh6gdscywu
models.py: bafybeihgclxctyltuehj2f4fzj26edptqugrrm4phd6ovuulezrqot6qo4
utils/__init__.py: bafybeiccdijaigu6e5p2iruwo5mkk224o7ywedc7nr6xeu5fpmhjqgk24e
utils/benchmarks.py: bafybeihdutp44ds4cupszbd34gsmcw6fsdda2tzkh5b27fpg65ejbpdvdm
utils/ipfs.py: bafybeidinbdqkidix44ibz5hug7inkcbijooag53gr5mtbaa72tk335uqq
utils/task.py: bafybeieuziu7owtk543z3umgmayhjh67klftk7vrhz24l6rlaii5lvkqh4
fingerprint_ignore_patterns: []
Expand Down Expand Up @@ -106,4 +107,8 @@ dependencies:
version: ==0.2.1
pyyaml:
version: <=6.0.1,>=3.10
tiktoken:
version: ==0.5.1
anthropic:
version: ==0.3.11
is_abstract: false
105 changes: 105 additions & 0 deletions packages/valory/skills/task_execution/utils/benchmarks.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
# -*- coding: utf-8 -*-
# ------------------------------------------------------------------------------
#
# Copyright 2024 Valory AG
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# ------------------------------------------------------------------------------
"""Benchmarking for tools."""

import logging
from typing import Any, Dict, Union

import anthropic
import tiktoken
from tiktoken import Encoding


PRICE_NUM_TOKENS = 1000


def encoding_for_model(model: str) -> Encoding:
"""Get the encoding for a model."""
return tiktoken.encoding_for_model(model)


def count_tokens(text: str, model: str) -> int:
"""Count the number of tokens in a text."""
if "claude" in model:
return anthropic.Anthropic().count_tokens(text)

enc = encoding_for_model(model)
return len(enc.encode(text))


class TokenCounterCallback:
"""Callback to count the number of tokens used in a generation."""

TOKEN_PRICES = {
"gpt-3.5-turbo": {"input": 0.001, "output": 0.002},
"gpt-4": {"input": 0.03, "output": 0.06},
"gpt-4-turbo": {"input": 0.01, "output": 0.03},
"claude-2": {"input": 0.008, "output": 0.024},
}

def __init__(self) -> None:
"""Initialize the callback."""
self.cost_dict: Dict[str, Union[int, float]] = {
"input_tokens": 0,
"output_tokens": 0,
"total_tokens": 0,
"input_cost": 0,
"output_cost": 0,
"total_cost": 0,
}

@staticmethod
def token_to_cost(tokens: int, model: str, tokens_type: str) -> float:
"""Converts a number of tokens to a cost in dollars."""
return (
tokens
/ PRICE_NUM_TOKENS
* TokenCounterCallback.TOKEN_PRICES[model][tokens_type]
)

def calculate_cost(self, tokens_type: str, model: str, **kwargs: Any) -> None:
"""Calculate the cost of a generation."""
# Check if it its prompt or tokens are passed in
prompt_key = f"{tokens_type}_prompt"
token_key = f"{tokens_type}_tokens"
if prompt_key in kwargs:
tokens = count_tokens(kwargs[prompt_key], model)
elif token_key in kwargs:
tokens = kwargs[token_key]
else:
logging.warning(f"No {token_key}_tokens or {tokens_type}_prompt found.")
cost = self.token_to_cost(tokens, model, tokens_type)
self.cost_dict[token_key] += tokens
self.cost_dict[f"{tokens_type}_cost"] += cost

def __call__(self, model: str, **kwargs: Any) -> None:
"""Callback to count the number of tokens used in a generation."""
if model not in list(TokenCounterCallback.TOKEN_PRICES.keys()):
raise ValueError(f"Model {model} not supported.")
try:
self.calculate_cost("input", model, **kwargs)
self.calculate_cost("output", model, **kwargs)
self.cost_dict["total_tokens"] = (
self.cost_dict["input_tokens"] + self.cost_dict["output_tokens"]
)
self.cost_dict["total_cost"] = (
self.cost_dict["input_cost"] + self.cost_dict["output_cost"]
)
except Exception as e:
logging.error(f"Error in TokenCounterCallback: {e}")
51 changes: 26 additions & 25 deletions tools/native_transfer_request/native_transfer_request.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,20 +29,22 @@

client: Optional[OpenAI] = None

class OpenAIClientManager:
"""Client context manager for OpenAI."""
def __init__(self, api_key: str):
self.api_key = api_key

def init_openai_client(api_key: str) -> OpenAI:
"""Initialize the OpenAI client"""
global client
if client is None:
client = OpenAI(api_key=api_key)
return client
def __enter__(self) -> OpenAI:
global client
if client is None:
client = OpenAI(api_key=self.api_key)
return client

def close_openai_client() -> None:
"""Close the OpenAI client"""
global client
if client is not None:
client.close()
client = None
def __exit__(self, exc_type, exc_value, traceback) -> None:
global client
if client is not None:
client.close()
client = None



Expand Down Expand Up @@ -102,7 +104,7 @@ def make_request_openai_request(
def native_transfer(
prompt: str,
api_key: str,
) -> Tuple[str, Optional[str], Optional[Dict[str, Any]]]:
) -> Tuple[str, Optional[str], Optional[Dict[str, Any]], Any]:
"""Perform native transfer."""
tool_prompt = NATIVE_TRANSFER_PROMPT.format(user_prompt=prompt)
response = make_request_openai_request(prompt=tool_prompt)
Expand All @@ -118,26 +120,25 @@ def native_transfer(
"to": str(parsed_txs["to"]),
"value": int(parsed_txs["wei_value"]),
}
return response, prompt, transaction
return response, prompt, transaction, None


AVAILABLE_TOOLS = {
"native": native_transfer,
}


def run(**kwargs) -> Tuple[str, Optional[str], Optional[Dict[str, Any]]]:
def run(**kwargs) -> Tuple[str, Optional[str], Optional[Dict[str, Any]], Any]:
"""Run the task"""
init_openai_client(kwargs["api_keys"]["openai"])
with OpenAIClientManager(kwargs["api_keys"]["openai"]):

prompt = kwargs["prompt"]
api_key = kwargs["api_keys"]["openai"]
tool = cast(str, kwargs["tool"]).replace(TOOL_PREFIX, "")
prompt = kwargs["prompt"]
api_key = kwargs["api_keys"]["openai"]
tool = cast(str, kwargs["tool"]).replace(TOOL_PREFIX, "")

if tool not in AVAILABLE_TOOLS:
return f"No tool named `{kwargs['tool']}`", None, None
if tool not in AVAILABLE_TOOLS:
return f"No tool named `{kwargs['tool']}`", None, None, None

transaction_builder = AVAILABLE_TOOLS[tool]
response = transaction_builder(prompt, api_key)
close_openai_client()
return response
transaction_builder = AVAILABLE_TOOLS[tool]
response = transaction_builder(prompt, api_key)
return response
Loading

0 comments on commit 3e152f9

Please sign in to comment.