valory-xyz · 0xArdi · Aug 7, 2024 · Apr 24, 2024 · Apr 29, 2024 · May 7, 2024
diff --git a/.example.env b/.example.env
@@ -1,5 +1,5 @@
-FILE_HASH_TO_TOOLS='[["bafybeibdcttrlgp5udygntka5fofi566pitkxhquke37ng7csvndhy4s2i",["openai-gpt-3.5-turbo-instruct","openai-gpt-3.5-turbo","openai-gpt-4"]],["bafybeiegbsq2ajxyipajac4mmxyvbt22ctwyuypuid6ziavqpndns6fsjy",["stabilityai-stable-diffusion-v1-5","stabilityai-stable-diffusion-xl-beta-v2-2-2","stabilityai-stable-diffusion-512-v2-1","stabilityai-stable-diffusion-768-v2-1"]],["bafybeihugaylajwh2fgypxatcw5qrw5qxadtrsi2h2s2246442wlvjirtm",["transfer-native"]],["bafybeiayity7o6c2yynrpe6libyak37hgf4fp7a4kjfcnqkkxwy3zkp3ie",["prediction-offline","prediction-online"]]]'
-API_KEYS='[["openai","dummy_api_key"],["stabilityai","dummy_api_key"]]'
+FILE_HASH_TO_TOOLS='[["bafybeibdcttrlgp5udygntka5fofi566pitkxhquke37ng7csvndhy4s2i",["openai-gpt-3.5-turbo-instruct","openai-gpt-3.5-turbo","openai-gpt-4"]],["bafybeiegbsq2ajxyipajac4mmxyvbt22ctwyuypuid6ziavqpndns6fsjy",["stabilityai-stable-diffusion-v1-5","stabilityai-stable-diffusion-xl-beta-v2-2-2","stabilityai-stable-diffusion-512-v2-1","stabilityai-stable-diffusion-768-v2-1"]],["bafybeihugaylajwh2fgypxatcw5qrw5qxadtrsi2h2s2246442wlvjirtm",["transfer-native"]],["bafybeiayity7o6c2yynrpe6libyak37hgf4fp7a4kjfcnqkkxwy3zkp3ie",["prediction-offline","prediction-online"]],["bafybeifpalgiya4dyu42hehsqxlwr36fjtxdo75xf3gcpfrauvoxgargy4",["gemini-pro", "gemini-1.0-pro-001", "gemini-1.0-pro-latest", "gemini-1.5-pro-latest"]]]'
+API_KEYS='[["openai","dummy_api_key"],["stabilityai","dummy_api_key"],["gemini","dummy_api_key"]]'
 ETHEREUM_LEDGER_RPC_0=https://rpc.eu-central-2.gateway.fm/v4/gnosis/non-archival/mainnet
 GNOSIS_RPC_0=https://rpc.eu-central-2.gateway.fm/v4/gnosis/non-archival/mainnet
 ETHEREUM_WEBSOCKET_RPC_0=wss://rpc.eu-central-2.gateway.fm/ws/v4/gnosis/non-archival/mainnet

diff --git a/.github/workflows/common_checks.yaml b/.github/workflows/common_checks.yaml
@@ -190,6 +190,9 @@ jobs:
           REPLICATE_API_KEY: ${{ secrets.REPLICATE_API_KEY }}
           NEWS_API_KEY: ${{ secrets.NEWS_API_KEY }}
           OPENROUTER_API_KEY: ${{ secrets.OPEN_ROUTER_API_KEY }}
+          GNOSIS_RPC_URL: ${{ secrets.GNOSIS_RPC_URL }}
+          GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }}
+          GRAPH_API_KEY: ${{ secrets.GRAPH_API_KEY }}
         run: |
           printenv
           tox -e check-tools
diff --git a/.gitignore b/.gitignore
@@ -54,4 +54,6 @@ backup_mech/
 /packages/valory/skills/termination_abci/
 /pip
 /tool_test.py
-.venv
+.venv
+log
+.benchmark-cache
diff --git a/.gitleaksignore b/.gitleaksignore
@@ -130,4 +130,8 @@ a8dd503580f9dee27828d954000e5195a8231685:packages/valory/skills/task_submission_
 a8dd503580f9dee27828d954000e5195a8231685:packages/valory/skills/task_submission_abci/skill.yaml:generic-api-key:1574
 a8dd503580f9dee27828d954000e5195a8231685:packages/valory/skills/task_submission_abci/skill.yaml:generic-api-key:1575
 cefa30e1c946a42089bed80891eff51bad6b39a9:packages/valory/protocols/websocket_client/protocol.yaml:generic-api-key:18
-c73b5d88fe1a364d7b1168b3ad171da59e7a2eac:packages/valory/protocols/websocket_client/protocol.yaml:generic-api-key:18
+c73b5d88fe1a364d7b1168b3ad171da59e7a2eac:packages/valory/protocols/websocket_client/protocol.yaml:generic-api-key:18
+c0dee637e3d520b9706e91db197e859ca5fce23a:packages/valory/skills/task_execution/skill.yaml:generic-api-key:15
+3a080091883863f5b62d1dcfe030ec7577889f8c:packages/valory/skills/task_execution/skill.yaml:generic-api-key:15
+eecbf34a242377fb07d28230cf11ea43fb07de8d:packages/valory/skills/task_execution/skill.yaml:generic-api-key:15
+d7f9fd6ad8bcf9ea2bf14bf3de221971ddb56268:packages/valory/skills/task_execution/skill.yaml:generic-api-key:15
diff --git a/README.md b/README.md
@@ -223,7 +223,7 @@ You can create and mint your own AI Mech that handles requests for tasks that yo
             <api_key>=kwargs["api_keys"][<api_key_id>].
             ```
 
-    - **Output**: It must **always** return a tuple (`Tuple[str, Optional[str], Optional[Dict[str, Any]], Any]`):
+    - **Output**: It must **always** return a tuple (`Tuple[str, Optional[str], Optional[Dict[str, Any]], Any, Any]`):
         - `result_str`: A string-serialized JSON object containing the result of the tool execution (custom format).
         - `prompt_used`: A string representing the prompt used internally by the tool. This output is only used for analytics and it can be set to `None`.
         - `generated_tx`: A dictionary containing the fields of a generated transaction to be submitted following the execution of the tool (e.g., a token transfer). It can be set to `None`. Template of a generated transaction:
@@ -238,10 +238,15 @@ You can create and mint your own AI Mech that handles requests for tasks that yo
           ```
 
         - `counter_callback`: Object to be called for calculating the cost when making requests to this tool. It can be set to `None`.
-
+        - `keychain`: The object hanlding the keys. Note that if you use the `with_key_rotation` decorator, you don't need to return this.
     - **Exceptions**: A compliant implementation of the `run` function must capture any exception raised during its execution and return it appropriately, for example as an error code in `result_str`. If `run` raises an exception the Mech will capture and output an `Invalid response` string.
 
-2. **Upload the tool file to IPFS.** You can push your tool to IPFS like the other packages:
+   - **Dependencies**: If your tool introduces new dependencies, you must add them in the following three places:
+      - [pyproject.toml](./pyproject.toml): Add your dependencies under the `[tool.poetry.dependencies]` section.
+      - [tox.ini](./tox.ini): Add your dependencies under the `[testenv]deps` section.
+      - [aea-config.yaml](./packages/valory/agents/mech/aea-config.yaml): Ensure your dependencies are listed under the `dependencies` key.
+
+3. **Upload the tool file to IPFS.** You can push your tool to IPFS like the other packages:
 
     ```bash
     autonomy push-all
@@ -258,7 +263,7 @@ You can create and mint your own AI Mech that handles requests for tasks that yo
 
     Your tool will be available on [packages.json](packages/packages.json).
 
-3. **Configure your service.** Edit the `.env` file. The demo service has this configuration:
+4. **Configure your service.** Edit the `.env` file. The demo service has this configuration:
 
     ```bash
     FILE_HASH_TO_TOOLS=[["bafybeiaodddyn4eruafqg5vldkkjfglj7jg76uvyi5xhi2cysktlu4w6r4",["openai-gpt-3.5-turbo-instruct","openai-gpt-3.5-turbo","openai-gpt-4"]],["bafybeiepc5v4ixwuu5m6p5stck5kf2ecgkydf6crj52i5umnl2qm5swb4i",["stabilityai-stable-diffusion-v1-5","stabilityai-stable-diffusion-xl-beta-v2-2-2","stabilityai-stable-diffusion-512-v2-1","stabilityai-stable-diffusion-768-v2-1"]]]
@@ -272,7 +277,7 @@ You can create and mint your own AI Mech that handles requests for tasks that yo
     API_KEYS=[[openai, dummy_api_key],[<your_api_key_id>, <your_api_key>]]
     ```
 
-4. **Mint your agent service** in the [Autonolas Protocol](https://registry.olas.network/services/mint), and create a Mech for it in [Mech Hub](https://aimechs.autonolas.network/factory). This will allow you to set the `SAFE_CONTRACT_ADDRESS` and `AGENT_MECH_CONTRACT_ADDRESS` in the `.1env` file.
+5. **Mint your agent service** in the [Autonolas Protocol](https://registry.olas.network/services/mint), and create a Mech for it in [Mech Hub](https://aimechs.autonolas.network/factory). This will allow you to set the `SAFE_CONTRACT_ADDRESS` and `AGENT_MECH_CONTRACT_ADDRESS` in the `.1env` file.
 
     > **Warning**
     > AI Mechs run on the [Gnosis chain](https://www.gnosis.io/). You must ensure that your wallet is connected to the [Gnosis chain](https://www.gnosis.io/) before using the [Autonolas Protocol](https://protocol.autonolas.network/services/mint) and [Mech Hub](https://aimechs.autonolas.network/factory).
@@ -290,7 +295,7 @@ You can create and mint your own AI Mech that handles requests for tasks that yo
     }
     ```
 
-5. **Run your service.** You can take a look at the `run_service.sh` script and execute your service locally as [above](#option-2-run-the-mech-as-an-agent-service).
+6. **Run your service.** You can take a look at the `run_service.sh` script and execute your service locally as [above](#option-2-run-the-mech-as-an-agent-service).
 
     Once your service works locally, you have the option to run it on a hosted service like [Propel](https://propel.valory.xyz/).
 

diff --git a/packages/gnosis/__init__.py b/packages/gnosis/__init__.py
@@ -0,0 +1,19 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# ------------------------------------------------------------------------------
+#
+#   Copyright 2024 Valory AG
+#
+#   Licensed under the Apache License, Version 2.0 (the "License");
+#   you may not use this file except in compliance with the License.
+#   You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+#   Unless required by applicable law or agreed to in writing, software
+#   distributed under the License is distributed on an "AS IS" BASIS,
+#   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#   See the License for the specific language governing permissions and
+#   limitations under the License.
+#
+# ------------------------------------------------------------------------------
diff --git a/packages/gnosis/customs/__init__.py b/packages/gnosis/customs/__init__.py
@@ -0,0 +1,19 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# ------------------------------------------------------------------------------
+#
+#   Copyright 2024 Valory AG
+#
+#   Licensed under the Apache License, Version 2.0 (the "License");
+#   you may not use this file except in compliance with the License.
+#   You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+#   Unless required by applicable law or agreed to in writing, software
+#   distributed under the License is distributed on an "AS IS" BASIS,
+#   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#   See the License for the specific language governing permissions and
+#   limitations under the License.
+#
+# ------------------------------------------------------------------------------
diff --git a/packages/gnosis/customs/ofv_market_resolver/benchmark.py b/packages/gnosis/customs/ofv_market_resolver/benchmark.py
@@ -0,0 +1,151 @@
+import typer
+import json
+import pandas as pd
+from packages.gnosis.customs.ofv_market_resolver.ofv_market_resolver import (
+    run as ofv_run,
+)
+from packages.napthaai.customs.resolve_market_reasoning.resolve_market_reasoning import (
+    Results,
+    run as original_run,
+)
+from pydantic import SecretStr, ValidationError
+from joblib import Memory
+
+# File cache to not re-run the same questions.
+MEMORY = Memory(".benchmark-cache", verbose=0)
+APP = typer.Typer()
+
+
+@MEMORY.cache
+def ofv_run_cached(
+    question: str,
+    openai_api_key: SecretStr,
+    serper_api_key: SecretStr,
+) -> bool | None:
+    return json.loads(
+        ofv_run(
+            prompt=question,
+            api_keys={
+                "openai": openai_api_key.get_secret_value(),
+                "serperapi": serper_api_key.get_secret_value(),
+            },
+        )[0]
+    )["has_occurred"]
+
+
+@MEMORY.cache
+def run_original_resolver_cached(
+    question: str,
+    openai_api_key: SecretStr,
+    google_api_key: SecretStr,
+    google_engine_id: SecretStr,
+) -> bool | None:
+    try:
+        dump = original_run(
+            api_keys={
+                "openai": openai_api_key.get_secret_value(),
+                "google_api_key": google_api_key.get_secret_value(),
+                "google_engine_id": google_engine_id.get_secret_value(),
+            },
+            tool="resolve-market-reasoning-gpt-4",
+            prompt=question,
+        )[0]
+        return Results.model_validate_json(dump).has_occurred
+    except ValueError:
+        return None
+
+
+@APP.command()
+def full(
+    data_path: str,
+    openai_api_key: str,
+    serper_api_key: str,
+    google_api_key: str,
+    google_engine_id: str,
+) -> None:
+    """
+    Will run the prediction market resolver on all provided data and compare the results.
+
+    Expects a tsv file with columns:
+        - question
+        - resolution (YES/NO, as currently resolved on Omen)
+        - my_resolution (YES/NO, as resolved manually by you, used as ground truth)
+
+    Example command:
+
+    ```
+    python packages/gnosis/customs/ofv_market_resolver/benchmark.py full markets.tsv {openai api key} {serper api key} {google api key} {google engine id}
+    ```
+    """
+    df = pd.read_csv(data_path, sep="\t")
+
+    # Run the resolution on all the data.
+    df["ofv_resolution"] = df["question"].apply(
+        lambda q: ofv_run_cached(
+            q,
+            openai_api_key=SecretStr(openai_api_key),
+            serper_api_key=SecretStr(serper_api_key),
+        )
+    )
+    df["new_original_resolution"] = df["question"].apply(
+        lambda q: run_original_resolver_cached(
+            q,
+            openai_api_key=SecretStr(openai_api_key),
+            google_api_key=SecretStr(google_api_key),
+            google_engine_id=SecretStr(google_engine_id),
+        )
+    )
+    # Normalise boolean to YES/NO/None.
+    df["ofv_resolution"] = df["ofv_resolution"].apply(
+        lambda r: "None" if r is None else "YES" if r else "NO"
+    )
+    df["new_original_resolution"] = df["new_original_resolution"].apply(
+        lambda r: "None" if r is None else "YES" if r else "NO"
+    )
+    # Save all the predictions and separatelly these that are incorrect.
+    df.to_csv("markets_resolved.tsv", sep="\t", index=False)
+    df[df["ofv_resolution"] != df["my_resolution"]].to_csv(
+        "markets_resolved_incorretly_by_ofv.tsv", sep="\t", index=False
+    )
+
+    # Calculate the accuracy.
+    accuracy_current = sum(df["resolution"] == df["my_resolution"]) / len(df)
+    accuracy_new_original = sum(
+        df["new_original_resolution"] == df["my_resolution"]
+    ) / len(df)
+    accuracy_ofv = sum(df["ofv_resolution"] == df["my_resolution"]) / len(df)
+    print(
+        f"""
+Current accuracy: {accuracy_current*100:.2f}%
+Original's new run accuracy: {accuracy_new_original * 100:.2f}
+OFV's accuracy: {accuracy_ofv*100:.2f}%
+"""
+    )
+
+
+@APP.command()
+def single(
+    question: str,
+    openai_api_key: str,
+    serper_api_key: str,
+) -> None:
+    """
+    Will run the prediction market resolver and print the result on a single question.
+
+    Example command:
+
+    ```
+    python packages/gnosis/customs/ofv_market_resolver/benchmark.py single "Will McDonald's successfully buy back all its Israeli restaurants by 12 April 2024?" {openai api key} {serper api key}
+    ```
+    """
+    ofv_run(
+        question,
+        api_keys={
+            "openai": openai_api_key,
+            "serperapi": serper_api_key,
+        },
+    )
+
+
+if __name__ == "__main__":
+    APP()