From ec4dff98b0acfa5b3f00e70ef57af986d0865b6a Mon Sep 17 00:00:00 2001
From: jmoreira-valory <jose.moreira.sanchez@valory.xyz>
Date: Fri, 4 Oct 2024 20:33:31 +0200
Subject: [PATCH 1/3] feat: propose question tool

---
 .../customs/propose_question/__init__.py      |  20 +
 .../customs/propose_question/component.yaml   |  22 +
 .../propose_question/propose_question.py      | 406 ++++++++++++++++++
 3 files changed, 448 insertions(+)
 create mode 100644 packages/valory/customs/propose_question/__init__.py
 create mode 100644 packages/valory/customs/propose_question/component.yaml
 create mode 100644 packages/valory/customs/propose_question/propose_question.py

diff --git a/packages/valory/customs/propose_question/__init__.py b/packages/valory/customs/propose_question/__init__.py
new file mode 100644
index 00000000..f1a2c81f
--- /dev/null
+++ b/packages/valory/customs/propose_question/__init__.py
@@ -0,0 +1,20 @@
+# -*- coding: utf-8 -*-
+# ------------------------------------------------------------------------------
+#
+#   Copyright 2024 Valory AG
+#
+#   Licensed under the Apache License, Version 2.0 (the "License");
+#   you may not use this file except in compliance with the License.
+#   You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+#   Unless required by applicable law or agreed to in writing, software
+#   distributed under the License is distributed on an "AS IS" BASIS,
+#   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#   See the License for the specific language governing permissions and
+#   limitations under the License.
+#
+# ------------------------------------------------------------------------------
+
+"""This module contains the propose question tool."""
diff --git a/packages/valory/customs/propose_question/component.yaml b/packages/valory/customs/propose_question/component.yaml
new file mode 100644
index 00000000..8a3ba3f6
--- /dev/null
+++ b/packages/valory/customs/propose_question/component.yaml
@@ -0,0 +1,22 @@
+name: openai_request
+author: valory
+version: 0.1.0
+type: custom
+description: A tool that runs a prompt against the OpenAI API.
+license: Apache-2.0
+aea_version: '>=1.0.0, <2.0.0'
+fingerprint:
+  __init__.py: bafybeibbn67pnrrm4qm3n3kbelvbs3v7fjlrjniywmw2vbizarippidtvi
+  openai_request.py: bafybeiefuf2ssw6eid2bzy5qzoihxsc5arsmith62nyxisuoukqan5ady4
+fingerprint_ignore_patterns: []
+entry_point: openai_request.py
+callable: run
+dependencies:
+  openai:
+    version: ==1.30.2
+  tiktoken:
+    version: ==0.7.0
+  anthropic:
+    version: ==0.21.3
+  google-api-python-client:
+    version: ==2.95.0
diff --git a/packages/valory/customs/propose_question/propose_question.py b/packages/valory/customs/propose_question/propose_question.py
new file mode 100644
index 00000000..3bc0c9e5
--- /dev/null
+++ b/packages/valory/customs/propose_question/propose_question.py
@@ -0,0 +1,406 @@
+# -*- coding: utf-8 -*-
+# ------------------------------------------------------------------------------
+#
+#   Copyright 2023-2024 Valory AG
+#
+#   Licensed under the Apache License, Version 2.0 (the "License");
+#   you may not use this file except in compliance with the License.
+#   You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+#   Unless required by applicable law or agreed to in writing, software
+#   distributed under the License is distributed on an "AS IS" BASIS,
+#   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#   See the License for the specific language governing permissions and
+#   limitations under the License.
+#
+# ------------------------------------------------------------------------------
+"""Contains the job definitions"""
+import functools
+import json
+import random
+from datetime import datetime, timezone
+from typing import Any, Callable, Dict, List, Optional, Tuple
+
+import anthropic
+import googleapiclient
+import openai
+import requests
+from gql import Client, gql
+from gql.transport.requests import RequestsHTTPTransport
+from openai import OpenAI
+from pydantic import BaseModel
+from tiktoken import encoding_for_model
+
+
+NEWSAPI_TOP_HEADLINES_URL = "https://newsapi.org/v2/top-headlines"
+NEWSAPI_DEFAULT_NEWS_SOURCES = [
+    "bbc-news",
+    "bbc-sport",
+    "abc-news",
+    "cnn",
+    "google-news",
+    "reuters",
+    "usa-today",
+    "breitbart-news",
+    "the-verge",
+    "techradar",
+]
+
+OMEN_SUBGRAPH_URL = "https://gateway-arbitrum.network.thegraph.com/api/{subgraph_api_key}/subgraphs/id/9fUVQpFwzpdWS9bq5WkAnmKbNNcoBwatMR4yZq81pbbz"
+HTTP_OK = 200
+MAX_ARTICLES = 40
+MAX_LATEST_QUESTIONS = 40
+
+FPMM_CREATORS = ["0x89c5cc945dd550bcffb72fe42bff002429f46fec", "0xffc8029154ecd55abed15bd428ba596e7d23f557"]
+FPMMS_QUERY = """
+    query fpmms_query($creator_in: [Bytes!], $first: Int) {
+      fixedProductMarketMakers(
+        where: {creator_in: $creator_in}
+        orderBy: creationTimestamp
+        orderDirection: desc
+        first: $first
+      ) {
+        question {
+          title
+        }
+      }
+    }
+    """
+
+DEFAULT_TOPICS = [
+    "business",
+    "cryptocurrency",
+    "politics",
+    "science",
+    "technology",
+    "trending",
+    "fashion",
+    "social",
+    "health",
+    "sustainability",
+    "internet",
+    "travel",
+    "food",
+    "pets",
+    "animals",
+    "curiosities",
+    "music",
+    "economy",
+    "arts",
+    "entertainment",
+    "weather",
+    "sports",
+    "finance",
+    "international",
+]
+
+PROPOSE_QUESTION_PROMPT = """You are provided a numbered list of recent news article
+    snippets under ARTICLES. Your task is to formulate one novel prediction market question
+    with clear, objective outcomes. The question must satisfy all the following criteria:
+    - It must be of public interest.
+    - It must be substantially different from the ones in EXISTING_QUESTIONS.
+    - It must be related to an event happening before EVENT_DAY or on EVENT_DAY.
+    - It must not encourage unethical behavior or violence.
+    - It should follow a structure similar to these: "Will EVENT occur on or before EVENT_DAY?", "Will EVENT occur by EVENT_DAY?", etc.
+    - It must not include unmeasurable statements like "significant increase".
+    - It must not reference matches, sport events or any other event that do not occur on EVENT_DAY.
+    - The answer must be 'yes' or 'no.
+    - The answer must be verified using publicly available sources or news media.
+    - The answer must not be an opinion.
+    - The answer must be known after EVENT_DAY.
+
+    EXISTING_QUESTIONS
+    {latest_questions}
+
+    EVENT_DAY
+    {event_day}
+
+    INPUT
+    {articles}
+
+    TOPICS
+    {topics}
+    """
+
+client: Optional[OpenAI] = None
+
+
+MechResponse = Tuple[str, Optional[str], Optional[Dict[str, Any]], Any, Any]
+
+
+class LLMQuestionProposalSchema(BaseModel):
+    question: str
+    topic: str
+    article_id: int
+
+
+def with_key_rotation(func: Callable):
+    @functools.wraps(func)
+    def wrapper(*args, **kwargs) -> MechResponse:
+        # this is expected to be a KeyChain object,
+        # although it is not explicitly typed as such
+        api_keys = kwargs["api_keys"]
+        retries_left: Dict[str, int] = api_keys.max_retries()
+
+        def execute() -> MechResponse:
+            """Retry the function with a new key."""
+            try:
+                result = func(*args, **kwargs)
+                return result + (api_keys,)
+            except anthropic.RateLimitError as e:
+                # try with a new key again
+                service = "anthropic"
+                if retries_left[service] <= 0:
+                    raise e
+                retries_left[service] -= 1
+                api_keys.rotate(service)
+                return execute()
+            except openai.RateLimitError as e:
+                # try with a new key again
+                if retries_left["openai"] <= 0 and retries_left["openrouter"] <= 0:
+                    raise e
+                retries_left["openai"] -= 1
+                retries_left["openrouter"] -= 1
+                api_keys.rotate("openai")
+                api_keys.rotate("openrouter")
+                return execute()
+            except googleapiclient.errors.HttpError as e:
+                # try with a new key again
+                rate_limit_exceeded_code = 429
+                if e.status_code != rate_limit_exceeded_code:
+                    raise e
+                service = "google_api_key"
+                if retries_left[service] <= 0:
+                    raise e
+                retries_left[service] -= 1
+                api_keys.rotate(service)
+                return execute()
+            except Exception as e:
+                return str(e), "", None, None, api_keys
+
+        mech_response = execute()
+        return mech_response
+
+    return wrapper
+
+
+class OpenAIClientManager:
+    """Client context manager for OpenAI."""
+
+    def __init__(self, api_key: str):
+        self.api_key = api_key
+
+    def __enter__(self) -> OpenAI:
+        global client
+        if client is None:
+            client = OpenAI(api_key=self.api_key)
+        return client
+
+    def __exit__(self, exc_type, exc_value, traceback) -> None:
+        global client
+        if client is not None:
+            client.close()
+            client = None
+
+
+def count_tokens(text: str, model: str) -> int:
+    """Count the number of tokens in a text."""
+    enc = encoding_for_model(model)
+    return len(enc.encode(text))
+
+
+DEFAULT_OPENAI_SETTINGS = {
+    "max_tokens": 500,
+    "temperature": 0.7,
+}
+DEFAULT_ENGINES = {
+    "propose-question": "gpt-4o-2024-08-06"
+}
+ALLOWED_TOOLS = ["propose-question"]
+
+
+def format_utc_timestamp(utc_timestamp: int) -> str:
+    dt = datetime.fromtimestamp(utc_timestamp, tz=timezone.utc)
+    formatted_date = dt.strftime("%d %B %Y")
+    return formatted_date
+
+
+def gather_articles(news_sources: List[str], newsapi_api_key: str) -> Optional[List[Dict[str, Any]]]:
+    """Gather news from NewsAPI (top-headlines endpoint)"""
+    headers = {"X-Api-Key": newsapi_api_key}
+    parameters = {
+        "sources": ",".join(news_sources),
+        "pageSize": "100",  # TODO: pagination
+    }
+    url = NEWSAPI_TOP_HEADLINES_URL
+    response = requests.get(
+        url=url,
+        headers=headers,
+        params=parameters,
+        timeout=60,
+    )
+    if response.status_code != HTTP_OK:
+        print(
+            f"Could not retrieve response from {url}."
+            f"Received status code {response.status_code}."
+            f"{response}"
+        )
+        return None
+
+    response_data = json.loads(response.content.decode("utf-8"))
+    articles = response_data["articles"]
+    return articles
+
+
+def gather_latest_questions(subgraph_api_key: str) -> List[str]:
+    transport = RequestsHTTPTransport(url=OMEN_SUBGRAPH_URL.format(subgraph_api_key=subgraph_api_key))
+    client = Client(transport=transport, fetch_schema_from_transport=True)
+    variables = {
+        "creator_in": FPMM_CREATORS,
+        "first": MAX_LATEST_QUESTIONS,
+    }
+    response = client.execute(gql(FPMMS_QUERY), variable_values=variables)
+    items = response.get("fixedProductMarketMakers", [])
+    output = [q["question"]["title"] for q in items]
+    return output
+
+
+
+# TODO
+#@with_key_rotation
+def run(**kwargs) -> Tuple[Optional[str], Optional[Dict[str, Any]], Any, Any]:
+    """Run the task"""
+
+    # Verify input
+    tool = kwargs.get("tool")
+    if not tool or tool not in ALLOWED_TOOLS:
+        return (
+            f"Error: Tool {tool} is not in the list of supported tools.",
+            None,
+            None,
+            None,
+        )
+
+    resolution_time = kwargs.get("resolution_time")
+    if resolution_time is None:
+        return (
+            "Error: 'resolution_time' is not defined.",
+            None,
+            None,
+            None,
+        )
+    
+    # Gather latest opened questions from input or from TheGraph
+    latest_questions = kwargs.get("latest_questions")
+    if latest_questions is None:
+        latest_questions = gather_latest_questions(kwargs["api_keys"]["subgraph"])
+
+    latest_questions = random.sample(latest_questions, min(MAX_LATEST_QUESTIONS, len(latest_questions)))
+    latest_questions_string = "\n".join(latest_questions)
+
+    # Gather recent news articles from NewsAPI
+    news_sources = kwargs.get("news_sources", NEWSAPI_DEFAULT_NEWS_SOURCES)
+    articles = gather_articles(news_sources, kwargs["api_keys"]["newsapi"])
+
+    if articles is None:
+        return (
+            "Error: Failed to retrieve articles from NewsAPI.",
+            None,
+            None,
+            None,
+        )
+
+    articles = random.sample(articles, min(MAX_ARTICLES, len(articles)))
+
+    articles_string = ""
+    for i, article in enumerate(articles, start=0):
+        articles_string += f"{i} - {article['title']} ({article['publishedAt']}): {article['content']}\n"
+
+
+    # Define topics
+    topics = kwargs.get("topics", DEFAULT_TOPICS)
+    topics_string = ", ".join(topics)
+
+    # Call LLM
+    with OpenAIClientManager(kwargs["api_keys"]["openai"]):
+        max_tokens = kwargs.get("max_tokens", DEFAULT_OPENAI_SETTINGS["max_tokens"])
+        temperature = kwargs.get("temperature", DEFAULT_OPENAI_SETTINGS["temperature"])
+        counter_callback = kwargs.get("counter_callback", None)
+        engine = kwargs.get("engine", DEFAULT_ENGINES.get(tool))
+
+        prompt_values = {
+            "articles": articles_string,
+            "topics": topics_string,
+            "event_day": format_utc_timestamp(resolution_time),
+            "latest_questions": latest_questions_string,
+        }
+
+        prompt = PROPOSE_QUESTION_PROMPT.format(**prompt_values)
+
+        moderation_result = client.moderations.create(input=prompt)
+        if moderation_result.results[0].flagged:
+            return (
+                "Error: Moderation flagged the prompt as in violation of terms.",
+                None,
+                None,
+                None,
+            )
+
+        messages = [
+            {"role": "system", "content": "You are a helpful assistant."},
+            {"role": "user", "content": prompt},
+        ]
+        response = client.chat.completions.create(
+            model=engine,
+            messages=messages,
+            temperature=temperature,
+            max_tokens=max_tokens,
+            n=1,
+            timeout=120,
+            stop=None,
+            response_format={
+                'type': 'json_schema',
+                'json_schema': 
+                    {
+                        "name":"whocares",
+                        "schema": LLMQuestionProposalSchema.model_json_schema()
+                    }
+                },
+        )
+
+        response = json.loads(response.choices[0].message.content)
+        article_id = response["article_id"]
+        del response["article_id"]
+        response["article"] = articles[article_id]
+        return response, prompt, None, None
+
+
+if __name__ == "__main__":
+    import os
+    from packages.valory.skills.task_execution.utils.apis import KeyChain
+    
+    tool = "propose-question"
+    keys = KeyChain(
+        {
+            "openai": [os.getenv("OPENAI_API_KEY")],
+            "newsapi": [os.getenv("NEWSAPI_API_KEY")],
+            "subgraph": [os.getenv("SUBGRAPH_API_KEY")]
+        }
+    )
+
+    my_kwargs = dict(
+        tool=tool,
+        api_keys=keys,
+        # news_sources=news_sources,  # Use default value
+        # topics=topics,              # Use default value
+        resolution_time=1728518400
+    )
+
+    print("================================")
+    print(f"Start request {tool=}")
+    tool_output = run(**my_kwargs)
+    print("================================")
+    print(f"Output of {tool=}")
+    print(tool_output[0])

From 10a77a88034b0198a16824a8513b34fd79720037 Mon Sep 17 00:00:00 2001
From: jmoreira-valory <jose.moreira.sanchez@valory.xyz>
Date: Fri, 4 Oct 2024 20:36:00 +0200
Subject: [PATCH 2/3] chore: fix yaml

---
 packages/valory/customs/propose_question/component.yaml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/packages/valory/customs/propose_question/component.yaml b/packages/valory/customs/propose_question/component.yaml
index 8a3ba3f6..0f558e29 100644
--- a/packages/valory/customs/propose_question/component.yaml
+++ b/packages/valory/customs/propose_question/component.yaml
@@ -1,15 +1,15 @@
-name: openai_request
+name: propose_question
 author: valory
 version: 0.1.0
 type: custom
-description: A tool that runs a prompt against the OpenAI API.
+description: A tool that proposes prediction market questions.
 license: Apache-2.0
 aea_version: '>=1.0.0, <2.0.0'
 fingerprint:
   __init__.py: bafybeibbn67pnrrm4qm3n3kbelvbs3v7fjlrjniywmw2vbizarippidtvi
   openai_request.py: bafybeiefuf2ssw6eid2bzy5qzoihxsc5arsmith62nyxisuoukqan5ady4
 fingerprint_ignore_patterns: []
-entry_point: openai_request.py
+entry_point: propose_question.py
 callable: run
 dependencies:
   openai:

From ca4b906cc49574d54ca75be5f84cf6be714f09d9 Mon Sep 17 00:00:00 2001
From: jmoreira-valory <jose.moreira.sanchez@valory.xyz>
Date: Fri, 4 Oct 2024 20:43:14 +0200
Subject: [PATCH 3/3] chore: edit placeholder

---
 packages/valory/customs/propose_question/propose_question.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/packages/valory/customs/propose_question/propose_question.py b/packages/valory/customs/propose_question/propose_question.py
index 3bc0c9e5..99161ca1 100644
--- a/packages/valory/customs/propose_question/propose_question.py
+++ b/packages/valory/customs/propose_question/propose_question.py
@@ -364,7 +364,7 @@ def run(**kwargs) -> Tuple[Optional[str], Optional[Dict[str, Any]], Any, Any]:
                 'type': 'json_schema',
                 'json_schema': 
                     {
-                        "name":"whocares",
+                        "name": "LLMQuestionProposalSchema",
                         "schema": LLMQuestionProposalSchema.model_json_schema()
                     }
                 },