Skip to content

Commit

Permalink
Merge pull request #169 from valory-xyz/fix/tools
Browse files Browse the repository at this point in the history
Fix/ tools
  • Loading branch information
Adamantios authored Feb 20, 2024
2 parents 827a0b8 + 03552d0 commit 7daacb3
Show file tree
Hide file tree
Showing 8 changed files with 58 additions and 6 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -52,3 +52,5 @@ leak_report
agent/
backup_mech/
/packages/valory/skills/termination_abci/
/pip
/tool_test.py
4 changes: 4 additions & 0 deletions tools/prediction_request/prediction_request.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,10 @@ def count_tokens(text: str, model: str) -> int:
the probability that the event in "USER_PROMPT" occurs. You must provide original information in each query, and they should not overlap
or lead to obtain the same set of results.
* Output only the JSON object. Do not include any other contents in your response.
* Never use Markdown syntax highlighting, such as ```json``` to surround the output. Only output the raw json string.
* This is incorrect:"```json{{\n \"p_yes\": 0.2,\n \"p_no\": 0.8,\n \"confidence\": 0.7,\n \"info_utility\": 0.5\n}}```"
* This is incorrect:```json"{{\n \"p_yes\": 0.2,\n \"p_no\": 0.8,\n \"confidence\": 0.7,\n \"info_utility\": 0.5\n}}"```
* This is correct:"{{\n \"p_yes\": 0.2,\n \"p_no\": 0.8,\n \"confidence\": 0.7,\n \"info_utility\": 0.5\n}}"
"""


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@
* Never use Markdown syntax highlighting, such as ```json```. Only output the raw json string.
* This is incorrect:"```json{{\n \"queries\": [\"term1\", \"term2\"]}}```"
* This is incorrect:```json"{{\n \"queries\": [\"term1\", \"term2\"]}}"```
* This is correct:"{{\n \"quries\": [\"term1\", \"term2\"]}}"
* This is correct:"{{\n \"queries\": [\"term1\", \"term2\"]}}"
"""

ASSISTANT_TEXT = "```json"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,10 @@ def count_tokens(text: str, model: str) -> int:
- "info_utility": Utility of the information provided in "ADDITIONAL_INFORMATION" to help you make the probability estimation ranging from 0 (lowest utility) to 1 (maximum utility).
* The sum of "p_yes" and "p_no" must equal 1.
* Output only the JSON object in your response. Do not include any other contents in your response.
* Never use Markdown syntax highlighting, such as ```json``` to surround the output. Only output the raw json string.
* This is incorrect:"```json{{\n \"p_yes\": 0.2,\n \"p_no\": 0.8,\n \"confidence\": 0.7,\n \"info_utility\": 0.5\n}}```"
* This is incorrect:```json"{{\n \"p_yes\": 0.2,\n \"p_no\": 0.8,\n \"confidence\": 0.7,\n \"info_utility\": 0.5\n}}"```
* This is correct:"{{\n \"p_yes\": 0.2,\n \"p_no\": 0.8,\n \"confidence\": 0.7,\n \"info_utility\": 0.5\n}}"
"""

URL_QUERY_PROMPT = """
Expand Down
4 changes: 4 additions & 0 deletions tools/prediction_request_sme/prediction_request_sme.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,10 @@ def count_tokens(text: str, model: str) -> int:
0 indicates lowest utility; 1 maximum utility.
* The sum of "p_yes" and "p_no" must equal 1.
* Output only the JSON object. Do not include any other contents in your response.
* Never use Markdown syntax highlighting, such as ```json``` to surround the output. Only output the raw json string.
* This is incorrect:"```json{{\n \"p_yes\": 0.2,\n \"p_no\": 0.8,\n \"confidence\": 0.7,\n \"info_utility\": 0.5\n}}```"
* This is incorrect:```json"{{\n \"p_yes\": 0.2,\n \"p_no\": 0.8,\n \"confidence\": 0.7,\n \"info_utility\": 0.5\n}}"```
* This is correct:"{{\n \"p_yes\": 0.2,\n \"p_no\": 0.8,\n \"confidence\": 0.7,\n \"info_utility\": 0.5\n}}"
"""

URL_QUERY_PROMPT = """
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,10 @@ def count_tokens(text: str, model: str) -> int:
- "info_utility": Utility of the information provided in "ADDITIONAL_INFORMATION" to help you make the prediction ranging from 0 (lowest utility) to 1 (maximum utility).
* The sum of "p_yes" and "p_no" must equal 1.
* Output only the JSON object in your response. Do not include any other contents in your response.
* Never use Markdown syntax highlighting, such as ```json``` to surround the output. Only output the raw json string.
* This is incorrect:"```json{{\n \"p_yes\": 0.2,\n \"p_no\": 0.8,\n \"confidence\": 0.7,\n \"info_utility\": 0.5\n}}```"
* This is incorrect:```json"{{\n \"p_yes\": 0.2,\n \"p_no\": 0.8,\n \"confidence\": 0.7,\n \"info_utility\": 0.5\n}}"```
* This is correct:"{{\n \"p_yes\": 0.2,\n \"p_no\": 0.8,\n \"confidence\": 0.7,\n \"info_utility\": 0.5\n}}"
"""

URL_QUERY_PROMPT = """
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@
from datetime import datetime, timezone
from typing import Any, Callable, Dict, Optional, Tuple
from concurrent.futures import ThreadPoolExecutor, as_completed

import openai
from langchain.text_splitter import RecursiveCharacterTextSplitter
from openai import OpenAI
from pydantic import BaseModel
Expand All @@ -31,7 +33,7 @@
import requests
from bs4 import BeautifulSoup
from requests import Response
from chromadb import Collection, EphemeralClient
from chromadb import Collection, EphemeralClient, Documents, Embeddings
import chromadb.utils.embedding_functions as embedding_functions
from tiktoken import encoding_for_model

Expand Down Expand Up @@ -112,6 +114,10 @@
- "info_utility": Utility of the information provided in "ADDITIONAL_INFORMATION" to help you make the probability estimation ranging from 0 (lowest utility) to 1 (maximum utility).
* The sum of "p_yes" and "p_no" must equal 1.
* Output only the JSON object in your response. Do not include any other contents in your response.
* Never use Markdown syntax highlighting, such as ```json``` to surround the output. Only output the raw json string.
* This is incorrect:"```json{{\n \"p_yes\": 0.2,\n \"p_no\": 0.8,\n \"confidence\": 0.7,\n \"info_utility\": 0.5\n}}```"
* This is incorrect:```json"{{\n \"p_yes\": 0.2,\n \"p_no\": 0.8,\n \"confidence\": 0.7,\n \"info_utility\": 0.5\n}}"```
* This is correct:"{{\n \"p_yes\": 0.2,\n \"p_no\": 0.8,\n \"confidence\": 0.7,\n \"info_utility\": 0.5\n}}"
"""


Expand Down Expand Up @@ -158,8 +164,32 @@
Use markdown syntax. Include as much relevant information as possible and try not to summarize.
"""

class CustomOpenAIEmbeddingFunction(embedding_functions.OpenAIEmbeddingFunction):
"""Custom OpenAI embedding function"""

def __init__(self, *args: Any, **kwargs: Any) -> None:
"""Initialize custom OpenAI embedding function"""
super().__init__(*args, **kwargs)
# OpenAI@v1 compatible
self._client = openai.embeddings

def __call__(self, texts: Documents) -> Embeddings:
"""Return embedding"""
# replace newlines, which can negatively affect performance.
texts = [t.replace("\n", " ") for t in texts]

# Call the OpenAI Embedding API
embeddings = self._client.create(input=texts, model=self._model_name).data

# Sort resulting embeddings by index
sorted_embeddings = sorted(embeddings, key=lambda e: e.index) # type: ignore

# Return just the embeddings
return [result.embedding for result in sorted_embeddings]
# MODELS

MAX_TEXT_LENGTH = 7500


class WebSearchResult(BaseModel):
title: str
Expand Down Expand Up @@ -293,7 +323,7 @@ def search(queries: list[str], api_key: str, filter = lambda x: True) -> list[tu

def create_embeddings_from_results(results: list[WebScrapeResult], text_splitter, api_key: str) -> Collection:
client = EphemeralClient()
openai_ef = embedding_functions.OpenAIEmbeddingFunction(
openai_ef = CustomOpenAIEmbeddingFunction(
api_key=api_key,
model_name="text-embedding-ada-002"
)
Expand All @@ -307,12 +337,13 @@ def create_embeddings_from_results(results: list[WebScrapeResult], text_splitter

for scrape_result in results:
text_splits = text_splitter.split_text(scrape_result.content)
texts += text_splits
if not len(texts + text_splits) > MAX_TEXT_LENGTH:
texts += text_splits
metadatas += [scrape_result.dict() for _ in text_splits]

collection.add(
documents=texts,
metadatas=metadatas, # type: ignore
metadatas=metadatas, # type: ignore
ids=[f'id{i}' for i in range(len(texts))]
)
return collection
Expand Down
5 changes: 4 additions & 1 deletion tox.ini
Original file line number Diff line number Diff line change
Expand Up @@ -532,4 +532,7 @@ tqdm: >=4.56.0
blspy: >=1.0.16
; sub-dep of cosmos
hypothesis: ==6.21.6
; we don't modify
; sub-dep of chromadb, has Apache 2.0 Licence https://github.com/chroma-core/hnswlib/blob/master/LICENSE
chroma-hnswlib: ==0.7.3
; sub-dep of chromadb, has Apache 2.0 Licence https://github.com/apache/pulsar-client-python/blob/main/LICENSE
pulsar-client: ==3.4.0

0 comments on commit 7daacb3

Please sign in to comment.