From 54dd3d264664d1067ce4c27e7c65840f87127791 Mon Sep 17 00:00:00 2001
From: jmoreira-valory <jose.moreira.sanchez@valory.xyz>
Date: Mon, 12 Feb 2024 10:41:30 +0100
Subject: [PATCH] chore: initial commit

---
 tools/resolve_market/__init__.py       |  20 ++
 tools/resolve_market/component.yaml    |  14 +
 tools/resolve_market/resolve_market.py | 423 +++++++++++++++++++++++++
 3 files changed, 457 insertions(+)
 create mode 100644 tools/resolve_market/__init__.py
 create mode 100644 tools/resolve_market/component.yaml
 create mode 100644 tools/resolve_market/resolve_market.py

diff --git a/tools/resolve_market/__init__.py b/tools/resolve_market/__init__.py
new file mode 100644
index 00000000..fd8e1dda
--- /dev/null
+++ b/tools/resolve_market/__init__.py
@@ -0,0 +1,20 @@
+# -*- coding: utf-8 -*-
+# ------------------------------------------------------------------------------
+#
+#   Copyright 2023 Valory AG
+#
+#   Licensed under the Apache License, Version 2.0 (the "License");
+#   you may not use this file except in compliance with the License.
+#   You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+#   Unless required by applicable law or agreed to in writing, software
+#   distributed under the License is distributed on an "AS IS" BASIS,
+#   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#   See the License for the specific language governing permissions and
+#   limitations under the License.
+#
+# ------------------------------------------------------------------------------
+
+"""This module contains the resolve market tool."""
diff --git a/tools/resolve_market/component.yaml b/tools/resolve_market/component.yaml
new file mode 100644
index 00000000..88885324
--- /dev/null
+++ b/tools/resolve_market/component.yaml
@@ -0,0 +1,14 @@
+name: resolve_market
+author: valory
+version: 0.1.0
+type: custom
+description: A tool for resolving markets after they have been closed.
+license: Apache-2.0
+aea_version: '>=1.0.0, <2.0.0'
+fingerprint:
+  __init__.py: bafybeidey4syohls5hxmso6qsp5p4uhtzle5txv2mlbym6ktjzknich6oa
+  prediction_request.py: bafybeibqwl52cnz64cysjd2jnjijuakdvyrffapxq65cdzx6g65gu42deq
+fingerprint_ignore_patterns: []
+entry_point: resolve_market.py
+callable: run
+dependencies: {}
\ No newline at end of file
diff --git a/tools/resolve_market/resolve_market.py b/tools/resolve_market/resolve_market.py
new file mode 100644
index 00000000..4aed9861
--- /dev/null
+++ b/tools/resolve_market/resolve_market.py
@@ -0,0 +1,423 @@
+# -*- coding: utf-8 -*-
+# ------------------------------------------------------------------------------
+#
+#   Copyright 2023 Valory AG
+#
+#   Licensed under the Apache License, Version 2.0 (the "License");
+#   you may not use this file except in compliance with the License.
+#   You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+#   Unless required by applicable law or agreed to in writing, software
+#   distributed under the License is distributed on an "AS IS" BASIS,
+#   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#   See the License for the specific language governing permissions and
+#   limitations under the License.
+#
+# ------------------------------------------------------------------------------
+
+"""This module implements a Mech tool for binary predictions."""
+
+import json
+import os
+from collections import defaultdict
+from concurrent.futures import Future, ThreadPoolExecutor
+from heapq import nlargest
+from itertools import islice
+from string import punctuation
+from typing import Any, Dict, Generator, List, Optional, Tuple, Callable
+from string import Template
+
+
+import tiktoken
+from openai import OpenAI
+
+import requests
+from typing import TypedDict
+
+import spacy
+import logging
+
+from spacy import Language
+from spacy.cli import download
+from spacy.lang.en import STOP_WORDS
+from spacy.tokens import Doc, Span
+
+from openai import OpenAI
+
+client: Optional[OpenAI] = None
+
+class OpenAIClientManager:
+    """Client context manager for OpenAI."""
+    def __init__(self, api_key: str):
+        self.api_key = api_key
+
+    def __enter__(self) -> OpenAI:
+        global client
+        if client is None:
+            client = OpenAI(api_key=self.api_key)
+        return client
+
+    def __exit__(self, exc_type, exc_value, traceback) -> None:
+        global client
+        if client is not None:
+            client.close()
+            client = None
+
+DEFAULT_OPENAI_SETTINGS = {
+    "max_tokens": 500,
+    "temperature": 0.7,
+}
+MAX_TOKENS = {
+    "gpt-3.5-turbo": 4096,
+    "gpt-4": 8192,
+}
+ALLOWED_TOOLS = [
+    "close_market",
+]
+TOOL_TO_ENGINE = {tool: "gpt-4" for tool in ALLOWED_TOOLS}
+# the default number of URLs to fetch online information for
+DEFAULT_NUM_URLS = defaultdict(lambda: 3)
+DEFAULT_NUM_URLS["close_market"] = 7
+# the default number of words to fetch online information for
+DEFAULT_NUM_WORDS: Dict[str, Optional[int]] = defaultdict(lambda: 300)
+DEFAULT_NUM_WORDS["close_market"] = None
+# how much of the initial content will be kept during summarization
+DEFAULT_COMPRESSION_FACTOR = 0.05
+# the vocabulary to use for the summarization
+DEFAULT_VOCAB = "en_core_web_sm"
+
+NEWSAPI_ENDPOINT = "https://newsapi.org/v2"
+TOP_HEADLINES = "top-headlines"
+EVERYTHING = "everything"
+
+ARTICLE_LIMIT = 1_000
+ADDITIONAL_INFO_LIMIT = 5_000
+HTTP_OK = 200
+ANSWER_NO, ANSWER_YES = (
+    "0x0000000000000000000000000000000000000000000000000000000000000000",
+    "0x0000000000000000000000000000000000000000000000000000000000000001",
+)
+
+URL_QUERY_PROMPT_TEMPLATE = """
+You are an LLM inside a multi-agent system that takes in a prompt of a user requesting a binary outcome
+for a given event. You are provided with an input under the label "USER_PROMPT". You must follow the instructions
+under the label "INSTRUCTIONS". You must provide your response in the format specified under "OUTPUT_FORMAT".
+
+INSTRUCTIONS
+* Read the input under the label "USER_PROMPT" delimited by three backticks.
+* The "USER_PROMPT" specifies an event.
+* The "USER_PROMPT" will contain a date which in the past.
+* The event will only have has two possible outcomes: either the event has happened or the event has not happened.
+* If the event has more than two possible outcomes, you must ignore the rest of the instructions and output the response "Error".
+* You must provide your response in the format specified under "OUTPUT_FORMAT".
+* Do not include any other contents in your response.
+
+USER_PROMPT:
+```
+{user_prompt}
+```
+
+OUTPUT_FORMAT
+* Your output response must be only a single JSON object to be parsed by Python's "json.loads()".
+* The JSON must contain two fields: "queries", and "urls".
+   - "queries": An array of strings of size between 1 and 4. Each string must be a search engine query that can help
+     obtain relevant information to check that the event in "USER_PROMPT" occurs.
+     You must provide original information in each query, and they should not overlap.
+     or lead to obtain the same set of results.
+* Output only the JSON object. Do not include any other contents in your response.
+"""
+
+OUTCOME_PROMPT_TEMPLATE = """
+You are an LLM inside a multi-agent system that takes in a prompt of a user requesting a probability estimation
+for a given event. You are provided with an input under the label "USER_PROMPT". You must follow the instructions
+under the label "INSTRUCTIONS". You must provide your response in the format specified under "OUTPUT_FORMAT".
+
+INSTRUCTIONS
+* Read the input under the label "USER_PROMPT" delimited by three backticks.
+* The "USER_PROMPT" specifies an event.
+* The "USER_PROMPT" will contain a date which in the past.
+* The event will only have two possible outcomes: either the event has happened or the event has not happened.
+* If the event has more than two possible outcomes, you must ignore the rest of the instructions and output the response "Error".
+* You must provide a decision whether the event in "USER_PROMPT" has occurred or not.
+* You are provided an itemized list of information under the label "ADDITIONAL_INFORMATION" delimited by three backticks.
+* You can use any item in "ADDITIONAL_INFORMATION" in addition to your training data.
+* If an item in "ADDITIONAL_INFORMATION" is not relevant, you must ignore that item for the estimation.
+* You must provide your response in the format specified under "OUTPUT_FORMAT".
+* Do not include any other contents in your response.
+
+USER_PROMPT:
+```
+{user_prompt}
+```
+
+ADDITIONAL_INFORMATION:
+```
+{additional_information}
+```
+
+OUTPUT_FORMAT
+* Your output response must be only a single JSON object to be parsed by Python's "json.loads()".
+* The JSON must contain one field: "has_occurred". When the event in "USER_PROMPT" has occurred, the value of
+"has_occurred" must be true if it has occurred, and false if it has not.
+* Output only the JSON object. Do not include any other contents in your response.
+"""
+
+logging.basicConfig(level=logging.INFO)
+
+class Object(object):
+    pass
+
+class CloseMarketBehaviourMock:
+
+    params: Object
+    context: Object
+
+    def __init__(
+        self,
+        market_closing_newsapi_api_key: str,
+        newsapi_endpoint: str,
+    ):
+        self.context = Object()
+        self.context.logger = logging.getLogger(__name__)
+        self.params = Object()
+        self.params.market_closing_newsapi_api_key = market_closing_newsapi_api_key
+        self.params.newsapi_endpoint = newsapi_endpoint
+
+    def get_http_response(
+        self,
+        method: str,
+        url: str,
+        headers: Dict[str, str],
+        parameters: Dict[str, Any],
+    ) -> requests.Response:
+        """Make an HTTP request and yield the response."""
+        if method == "GET":
+            response = requests.get(url, headers=headers, params=parameters)
+        elif method == "POST":
+            response = requests.post(url, headers=headers, json=parameters)
+        else:
+            raise ValueError(f"Unsupported HTTP method: {method}")
+
+        return response
+
+    def _parse_llm_output(
+        self, output: str, required_fields: Optional[List[str]] = None
+    ) -> Optional[Dict[str, Any]]:
+        """Parse the llm output to json."""
+        try:
+            json_data = json.loads(output)
+            if required_fields is not None:
+                for field in required_fields:
+                    if field not in json_data:
+                        self.context.logger.error(
+                            f"Field {field} not in json_data {json_data}"
+                        )
+                        return None
+            return json_data
+        except json.JSONDecodeError as e:
+            self.context.logger.error(f"Error decoding JSON response. {e}")
+            return None
+
+    def _append_articles_to_input(
+        self, news_list: List[dict], input_string: str
+    ) -> str:
+        """Append articles to input."""
+        for article in news_list:
+            title = article["title"]
+            content = article["content"][:ARTICLE_LIMIT]
+            date = article["publishedAt"]
+            current_article = f"- ({date}) {title}\n  {content}\n\n"
+            if len(input_string) + len(current_article) > ADDITIONAL_INFO_LIMIT:
+                break
+            input_string += current_article
+        return input_string
+
+    def do_llm_request(self, **kwargs) -> str:
+
+        with OpenAIClientManager(kwargs["api_keys"]["openai"]):
+            max_tokens = kwargs.get("max_tokens", DEFAULT_OPENAI_SETTINGS["max_tokens"])
+            temperature = kwargs.get("temperature", DEFAULT_OPENAI_SETTINGS["temperature"])
+            counter_callback = kwargs.get("counter_callback", None)
+            prompt = kwargs.get("prompt")
+            engine = "gpt-4"
+            moderation_result = client.moderations.create(input=prompt)
+            if moderation_result.results[0].flagged:
+                return "Moderation flagged the prompt as in violation of terms.", None, None
+
+            messages = [
+                {"role": "system", "content": "You are a helpful assistant."},
+                {"role": "user", "content": prompt},
+            ]
+            response = client.chat.completions.create(
+                model=engine,
+                messages=messages,
+                temperature=temperature,
+                max_tokens=max_tokens,
+                n=1,
+                timeout=120,
+                stop=None,
+            )
+            res = Object()
+            res.value = response.choices[0].message.content
+
+            return res
+ 
+
+    def _get_answer(self, question: str, **kwargs) -> Optional[str]:
+        """Get an answer for the provided questions"""
+
+        # An initial query is made to Newsapi to detect ratelimit issue
+        # This query is also included in the input_news passed to the LLM,
+        # if the call succeeds. Newsapi returns 0 output if included the
+        # question mark ? sign.
+        input_news = ""
+        initial_news_articles = self._get_news(question.replace("will", "").replace("?", ""))
+        if initial_news_articles is None:
+            self.context.logger.info(
+                f"Could not get news articles for query {question} (initial)"
+            )
+            return None
+        input_news = self._append_articles_to_input(initial_news_articles, input_news)
+
+        prompt_values = {
+            "user_prompt": question,
+        }
+
+        prompt = URL_QUERY_PROMPT_TEMPLATE.format(**prompt_values)
+        kwargs1 = {"prompt": prompt, **kwargs}
+        llm_response_message = self.do_llm_request(**kwargs1)
+
+        result_str = llm_response_message.value.replace("OUTPUT:", "").rstrip().lstrip()
+        self.context.logger.info(f"Got LLM response: {result_str}")
+        result = self._parse_llm_output(result_str, required_fields=["queries"])
+        if result is None:
+            self.context.logger.info(f"Could not parse LLM response: {result}")
+            return None
+
+        queries = result["queries"] 
+        self.context.logger.info(f"Got queries: {queries}")
+        if len(queries) == 0:
+            self.context.logger.info(f"No queries found in LLM response: {result}")
+            return None
+
+        # query newsapi
+        for query in queries:
+            news_articles = self._get_news(query)
+            if news_articles is None:
+                self.context.logger.info(
+                    f"Could not get news articles for query {query}"
+                )
+                return None
+            input_news = self._append_articles_to_input(news_articles, input_news)
+
+        if len(input_news) == 0:
+            self.context.logger.info(f"No news articles found for queries {queries}")
+            return None
+
+        prompt_values["additional_information"] = input_news
+
+        # llm request message
+        prompt = OUTCOME_PROMPT_TEMPLATE.format(**prompt_values)
+        kwargs2 = {"prompt": prompt, **kwargs}
+        llm_response_message = self.do_llm_request(**kwargs2)
+
+        result_str = llm_response_message.value.replace("OUTPUT:", "").rstrip().lstrip()
+        self.context.logger.info(f"Got LLM response: {result_str}")
+        json_data = self._parse_llm_output(result_str, required_fields=["has_occurred"])
+        if json_data is None:
+            self.context.logger.info(f"Could not parse LLM response: {json_data}")
+            return None
+
+        has_occurred = bool(json_data["has_occurred"])
+        self.context.logger.info(f'Has "{question!r}" occurred?: {has_occurred}')
+
+        json_data["question"] = question
+        return json_data
+
+    def _get_news(
+        self, query: str
+    ) -> Generator[None, None, Optional[List[Dict[str, Any]]]]:
+        """Auxiliary method to collect data from endpoint."""
+
+        headers = {"X-Api-Key": self.params.market_closing_newsapi_api_key}
+
+        parameters = {
+            "q": query,
+            "pageSize": "100",
+        }
+        # search through all articles everything
+        url = f"{self.params.newsapi_endpoint}/{EVERYTHING}"
+        response = self.get_http_response(
+            method="GET",
+            url=url,
+            headers=headers,
+            parameters=parameters,
+        )
+        if response.status_code != HTTP_OK:
+            self.context.logger.error(
+                f"Could not retrieve response from {self.params.newsapi_endpoint}."
+                f"Received status code {response.status_code}.\n{response}"
+            )
+            return None
+
+        response_data = json.loads(response.text)
+        self.context.logger.info(
+            f"Response received from {self.params.newsapi_endpoint}:\n {response_data}"
+        )
+        return response_data["articles"]
+
+
+def run(**kwargs) -> Tuple[Optional[str], Optional[Dict[str, Any]], Any]:
+    """Run the task"""
+    tool = kwargs["tool"]
+    question = kwargs["question"]
+    max_tokens = kwargs.get("max_tokens", DEFAULT_OPENAI_SETTINGS["max_tokens"])
+    temperature = kwargs.get("temperature", DEFAULT_OPENAI_SETTINGS["temperature"])
+    num_urls = kwargs.get("num_urls", DEFAULT_NUM_URLS[tool])
+    num_words = kwargs.get("num_words", DEFAULT_NUM_WORDS[tool])
+    compression_factor = kwargs.get("compression_factor", DEFAULT_COMPRESSION_FACTOR)
+    vocab = kwargs.get("vocab", DEFAULT_VOCAB)
+    counter_callback = kwargs.get("counter_callback", None)
+    api_keys = kwargs.get("api_keys", {})
+    google_api_key = api_keys.get("google_api_key", None)
+    google_engine_id = api_keys.get("google_engine_id", None)
+
+    if tool not in ALLOWED_TOOLS:
+        raise ValueError(f"Tool {tool} is not supported.")
+
+    engine = TOOL_TO_ENGINE[tool]
+
+    market_behavior = CloseMarketBehaviourMock(
+        market_closing_newsapi_api_key=api_keys["newsapi"],
+        newsapi_endpoint=NEWSAPI_ENDPOINT,
+    )
+
+    kwargs.pop('question', None)
+    v = market_behavior._get_answer(question, **kwargs)
+    return v
+
+
+if __name__ == "__main__":
+
+    newsapi_api_key = os.getenv("NEWSAPI_API_KEY")
+    openai_api_key = os.getenv("OPENAI_API_KEY")
+
+    my_kwargs = {
+        "tool": "close_market",
+        "question": "Will a cease-fire be implemented in the Gaza Strip by 5 February 2024?",
+        "max_tokens": 100,
+        "temperature": 0.7,
+        "num_urls": 5,
+        "num_words": 200,
+        "compression_factor": 2.0,
+        "vocab": ["word1", "word2", "word3"],
+        "counter_callback": None,
+        "api_keys": {
+            "newsapi": newsapi_api_key,
+            "openai": openai_api_key,
+        }
+    }
+
+    print(run(**my_kwargs))
\ No newline at end of file