diff --git a/src/codeinterpreterapi/thoughts/tot.py b/src/codeinterpreterapi/thoughts/tot.py index 47dd2b0f..c9be1594 100644 --- a/src/codeinterpreterapi/thoughts/tot.py +++ b/src/codeinterpreterapi/thoughts/tot.py @@ -1,7 +1,8 @@ import os -import re -from typing import Tuple +from typing import Optional, Tuple +from langchain.prompts import PromptTemplate +from langchain.schema import BaseMemory, HumanMessage from langchain_experimental.tot.base import ToTChain from langchain_experimental.tot.checker import ToTChecker from langchain_experimental.tot.thought import ThoughtValidity @@ -22,19 +23,42 @@ print(problem_description) ####### -# The following code implement a simple rule based checker for -# a specific 4x4 sudoku puzzle. +# The following code implements an LLM-based checker ####### class MyChecker(ToTChecker): + llm: Optional[BaseMemory] = None + prompt: PromptTemplate = PromptTemplate( + input_variables=["problem_description", "thoughts"], + template=""" + Given the following problem description and a series of thoughts, evaluate the validity of the last thought. + + Problem Description: + {problem_description} + + Thoughts: + {thoughts} + + Evaluate the last thought and return one of the following: + - VALID_FINAL if the last thought is a valid final solution to the problem. + - VALID_INTERMEDIATE if the last thought is a valid intermediate step towards the solution. + - INVALID if the last thought is invalid or contradicts the problem description. + + Evaluation: + """, + ) + def evaluate(self, problem_description: str, thoughts: Tuple[str, ...] = ()) -> ThoughtValidity: - last_thought = thoughts[-1] - clean_solution = last_thought.replace(" ", "").replace('"', "") - regex_solution = clean_solution.replace("*", ".").replace("|", "\\|") - if sudoku_solution in clean_solution: + prompt = self.prompt.format(problem_description=problem_description, thoughts="\n".join(thoughts)) + message = HumanMessage(content=prompt) + evaluation = self.llm([message]).content.strip().upper() + + print("evaluation=", evaluation) + + if evaluation == "VALID_FINAL": return ThoughtValidity.VALID_FINAL - elif re.search(regex_solution, sudoku_solution): + elif evaluation == "VALID_INTERMEDIATE": return ThoughtValidity.VALID_INTERMEDIATE else: return ThoughtValidity.INVALID @@ -43,23 +67,31 @@ def evaluate(self, problem_description: str, thoughts: Tuple[str, ...] = ()) -> ####### # Testing the MyChecker class above: ####### -def test_checker(): - checker = MyChecker() - assert checker.evaluate("", ("3,*,*,2|1,*,3,*|*,1,*,3|4,*,*,1",)) == ThoughtValidity.VALID_INTERMEDIATE - assert checker.evaluate("", ("3,4,1,2|1,2,3,4|2,1,4,3|4,3,2,1",)) == ThoughtValidity.VALID_FINAL - assert checker.evaluate("", ("3,4,1,2|1,2,3,4|2,1,4,3|4,3,*,1",)) == ThoughtValidity.VALID_INTERMEDIATE - assert checker.evaluate("", ("3,4,1,2|1,2,3,4|2,1,4,3|4,*,3,1",)) == ThoughtValidity.INVALID +def test_checker(tot_chain): + checker = tot_chain.checker + assert ( + checker.evaluate(problem_description, ("3,*,*,2|1,*,3,*|*,1,*,3|4,*,*,1",)) + == ThoughtValidity.VALID_INTERMEDIATE + ) + assert checker.evaluate(problem_description, ("3,4,1,2|1,2,3,4|2,1,4,3|4,3,2,1",)) == ThoughtValidity.VALID_FINAL + assert ( + checker.evaluate(problem_description, ("3,4,1,2|1,2,3,4|2,1,4,3|4,3,*,1",)) + == ThoughtValidity.VALID_INTERMEDIATE + ) + assert checker.evaluate(problem_description, ("3,4,1,2|1,2,3,4|2,1,4,3|4,*,3,1",)) == ThoughtValidity.INVALID ####### # Initialize and run the ToT chain, # with maximum number of interactions k set to 30 and -# the maximum number child thoughts c set to 8. +# the maximum number of child thoughts c set to 8. ####### def create(llm): - tot_chain = ToTChain(llm=llm, checker=MyChecker(), k=30, c=5, verbose=True, verbose_llm=False) + checker = MyChecker() + checker.llm = llm + tot_chain = ToTChain.from_llm(llm=llm, checker=checker, k=30, c=5, verbose=True, verbose_llm=False) tot_chain.run(problem_description=problem_description) return tot_chain @@ -78,5 +110,4 @@ def test_create(): if __name__ == "__main__": - test_checker() test_create()