From 7f9fca6c7e51ce38849ffa04626c9b0bb7afd7a5 Mon Sep 17 00:00:00 2001 From: nik Date: Thu, 26 Oct 2023 20:19:04 +0100 Subject: [PATCH 1/5] Add skill.evolved indicator and selector --- adala/agents/base.py | 3 +++ adala/skills/base.py | 20 ++++++++++++++++++++ adala/skills/skillset.py | 14 +++++++++----- 3 files changed, 32 insertions(+), 5 deletions(-) diff --git a/adala/agents/base.py b/adala/agents/base.py index c65f65c..b0ecdda 100644 --- a/adala/agents/base.py +++ b/adala/agents/base.py @@ -273,6 +273,9 @@ def learn( print_text(f"Re-apply {learned_skill.name} skill to dataset ...") experience = learned_skill.apply(dataset, runtime, experience=experience) + # Mark skill as evolved - it will not be selected for improvement again + learned_skill.evolved = True + # Update skills and memory based on experience if update_skills: self.skills = skills diff --git a/adala/skills/base.py b/adala/skills/base.py index 09e3b3c..b0adc96 100644 --- a/adala/skills/base.py +++ b/adala/skills/base.py @@ -69,6 +69,11 @@ class BaseSkill(BaseModel, ABC): examples=['predictions'], default='predictions' ) + evolved: bool = Field( + title='Evolved', + description='Whether the skill has been evolved or not.', + default=False + ) @model_validator(mode='after') def validate_inputs(self): @@ -169,6 +174,18 @@ def analyze( ShortTermMemory: The updated experience after analysis. """ + @abstractmethod + def can_be_improved(self, experience: ShortTermMemory) -> bool: + """ + Checks if the current skill can be improved. + + Args: + experience (ShortTermMemory): The current experience. + + Returns: + bool: True if the skill can be improved, False otherwise. + """ + @abstractmethod def improve( self, @@ -308,6 +325,9 @@ def analyze( experience.errors = errors return experience + def can_be_improved(self, experience: ShortTermMemory) -> bool: + return not self.evolved + def improve( self, experience: ShortTermMemory, diff --git a/adala/skills/skillset.py b/adala/skills/skillset.py index 4704642..bc8b421 100644 --- a/adala/skills/skillset.py +++ b/adala/skills/skillset.py @@ -4,6 +4,7 @@ from adala.datasets.base import Dataset from adala.runtimes.base import Runtime from adala.memories.base import ShortTermMemory +from adala.utils.logs import print_text from .base import BaseSkill, LLMSkill @@ -20,7 +21,8 @@ class SkillSet(BaseModel, ABC): skills (Union[List[str], Dict[str, str], List[BaseSkill], Dict[str, BaseSkill]]): Provided skills """ - skills: Union[List[str], Dict[str, str], List[BaseSkill], Dict[str, BaseSkill]] + # skills: Union[List[str], Dict[str, str], List[BaseSkill], Dict[str, BaseSkill]] + skills: Dict[str, BaseSkill] @abstractmethod def apply(self, dataset: Dataset, runtime: Runtime, experience: Optional[ShortTermMemory] = None) -> ShortTermMemory: @@ -79,7 +81,7 @@ class LinearSkillSet(SkillSet): skill_sequence: List[str] = None - @field_validator('skills') + @field_validator('skills', mode='before') def skills_validator(cls, v: Union[List[str], List[BaseSkill], Dict[str, BaseSkill]]) -> Dict[str, BaseSkill]: """ Validates and converts the skills attribute to a dictionary of skill names to BaseSkill instances. @@ -164,11 +166,12 @@ def apply( skill = self.skills[skill_name] # use input dataset for the first node in the pipeline input_dataset = dataset if i == 0 else experience.predictions + print_text(f"Applying skill: {skill_name}") experience = skill.apply(input_dataset, runtime, experience) return experience - def select_skill_to_improve(self, experience: ShortTermMemory) -> BaseSkill: + def select_skill_to_improve(self, experience: ShortTermMemory) -> Optional[BaseSkill]: """ Picks the next skill for improvement in the sequence. @@ -179,8 +182,9 @@ def select_skill_to_improve(self, experience: ShortTermMemory) -> BaseSkill: BaseSkill: The next skill selected for improvement. """ - # TODO: implement real logic for skill selection - return self.skills[self.skill_sequence[-1]] + for skill_name in self.skill_sequence: + if self.skills[skill_name].can_be_improved(experience): + return self.skills[skill_name] def __rich__(self): """Returns a rich representation of the skill.""" From a981b8ce680db72576a9c32f0668ad628b2b04eb Mon Sep 17 00:00:00 2001 From: nik Date: Sun, 29 Oct 2023 19:34:10 +0000 Subject: [PATCH 2/5] Add tests, some API changes to comply with LinearSkillSet --- adala/agents/base.py | 132 ++++++------ adala/environments/base.py | 102 ++++++---- adala/memories/__init__.py | 2 +- adala/memories/base.py | 58 +----- adala/memories/file_memory.py | 21 +- adala/runtimes/base.py | 83 +++----- adala/skills/base.py | 168 ++++++---------- adala/skills/skillset.py | 110 ++++++---- adala/utils/internal_data.py | 1 + tests/test_agent_basics.py | 367 ++++++++++++++++++++++++++++++++++ tests/test_classification.py | 24 +-- tests/test_environments.py | 147 ++++++++++---- tests/test_llm_skillset.py | 6 +- tests/utils.py | 18 +- 14 files changed, 806 insertions(+), 433 deletions(-) create mode 100644 tests/test_agent_basics.py diff --git a/adala/agents/base.py b/adala/agents/base.py index b0ecdda..7c4dcfa 100644 --- a/adala/agents/base.py +++ b/adala/agents/base.py @@ -1,15 +1,17 @@ from pydantic import BaseModel, Field, SkipValidation, field_validator, model_validator from abc import ABC, abstractmethod -from typing import Any, Optional, List, Dict, Union -from adala.environments.base import Environment, BasicEnvironment +from typing import Any, Optional, List, Dict, Union, Tuple +from rich import print + +from adala.environments.base import Environment, BasicEnvironment, GroundTruthSignal from adala.datasets import Dataset, DataFrameDataset from adala.runtimes.base import Runtime, LLMRuntime, LLMRuntimeType, LLMRuntimeModelType from adala.runtimes.openai import OpenAIRuntime -from adala.memories.base import ShortTermMemory, LongTermMemory +from adala.memories.base import Memory from adala.skills.base import BaseSkill from adala.skills.skillset import SkillSet, LinearSkillSet from adala.utils.logs import print_dataframe, print_text, print_error -from adala.utils.internal_data import InternalDataFrame +from adala.utils.internal_data import InternalDataFrame, InternalDataFrameConcat class Agent(BaseModel, ABC): @@ -26,9 +28,9 @@ class Agent(BaseModel, ABC): """ environment: Union[InternalDataFrame, Dataset, Environment] = Field(default_factory=DataFrameDataset) - skills: Union[SkillSet, BaseSkill, List[BaseSkill], Dict[str, BaseSkill]] + skills: SkillSet - memory: LongTermMemory = Field(default=None) + memory: Memory = Field(default=None) runtimes: Optional[Dict[str, Runtime]] = Field( default_factory=lambda: { 'openai': OpenAIRuntime(model='gpt-3.5-turbo-instruct'), @@ -90,7 +92,7 @@ def environment_validator(cls, v): v = BasicEnvironment(dataset=v) return v - @field_validator('skills') + @field_validator('skills', mode='before') def skills_validator(cls, v): """ Validates and possibly transforms the skills attribute. @@ -103,14 +105,11 @@ def skills_validator(cls, v): """ if isinstance(v, SkillSet): - pass + return v elif isinstance(v, BaseSkill): - v = LinearSkillSet(skills={'skill_0': v}) - elif isinstance(v, list): - v = LinearSkillSet(skills={f'skill_{i}': skill for i, skill in enumerate(v)}) - elif isinstance(v, dict): - v = LinearSkillSet(skills=v) - return v + return LinearSkillSet(skills={v.name: v}) + else: + return LinearSkillSet(skills=v) @model_validator(mode='after') def verify_input_parameters(self): @@ -169,40 +168,30 @@ def get_teacher_runtime(self, runtime: Optional[str] = None) -> Runtime: raise ValueError(f'Teacher Runtime "{runtime}" not found.') return self.teacher_runtimes[runtime] - def apply_skills( - self, - dataset: Union[Dataset, InternalDataFrame], - runtime: Optional[Union[str, Runtime]] = None, - experience: Optional[ShortTermMemory] = None, - ) -> ShortTermMemory: + def run(self, dataset: Union[Dataset, InternalDataFrame], runtime: Optional[str] = None) -> InternalDataFrame: """ - Applies the agent's skills to a given dataset using the specified runtime. + Runs the agent on the specified dataset. Args: - dataset (Dataset): The dataset to apply skills on. - runtime (str, optional): The runtime to use. Defaults to None. - experience (ShortTermMemory, optional): The agent's short-term memory. Defaults to None. + dataset (Union[Dataset, InternalDataFrame]): The dataset to run the agent on. + runtime (str, optional): The name of the runtime to use. Defaults to None, use the default runtime. Returns: - ShortTermMemory: The short-term memory resulting from the application of skills. + InternalDataFrame: The dataset with the agent's predictions. """ - runtime = runtime or self.default_runtime - if isinstance(dataset, InternalDataFrame): - dataset = DataFrameDataset(df=dataset) - if isinstance(runtime, str): - runtime = self.get_runtime(runtime=runtime) - return self.skills.apply(dataset=dataset, runtime=runtime, experience=experience) + runtime = self.get_runtime(runtime=runtime) + predictions = self.skills.apply(dataset, runtime=runtime) + return predictions def learn( self, learning_iterations: int = 3, accuracy_threshold: float = 0.9, - update_skills: bool = True, update_memory: bool = True, request_environment_feedback: bool = True, - experience: Optional[ShortTermMemory] = None, runtime: Optional[str] = None, - ) -> ShortTermMemory: + teacher_runtime: Optional[str] = None, + ) -> GroundTruthSignal: """ Enables the agent to learn and improve its skills based on interactions with its environment. @@ -221,67 +210,64 @@ def learn( runtime = self.get_runtime(runtime=runtime) # TODO: support teacher runtime input, not default - teacher_runtime = self.get_teacher_runtime(runtime=self.default_teacher_runtime) + teacher_runtime = self.get_teacher_runtime(runtime=teacher_runtime) - skills = self.skills.model_copy(deep=True) dataset = self.environment.as_dataset() # Apply agent skills to dataset and get experience with predictions - experience = self.apply_skills(dataset=dataset, runtime=runtime, experience=experience) + predictions = self.skills.apply(dataset, runtime=runtime) - # Agent select one skill to improve - learned_skill = skills.select_skill_to_improve(experience) - - # Request feedback from environment is necessary - if request_environment_feedback: - self.environment.request_feedback(learned_skill, experience) + ground_truth_signal = None for iteration in range(learning_iterations): print_text(f'\n\n=> Iteration #{iteration}: Comparing to ground truth, analyzing and improving ...') - # 1. EVALUATION PHASE: Compare predictions to ground truth - experience = self.environment.compare_to_ground_truth(learned_skill, experience) + # Request feedback from environment is necessary + if request_environment_feedback: + self.environment.request_feedback(self.skills, predictions) + + # Compare predictions to ground truth -> get ground truth signal + ground_truth_signal = self.environment.compare_to_ground_truth(self.skills, predictions) print_text(f'Comparing predictions to ground truth data ...') - print_dataframe(experience.evaluations) + print(ground_truth_signal) + + # Use ground truth signal to find the skill to improve + accuracy = ground_truth_signal.get_accuracy() + train_skill = self.skills.select_skill_to_improve(accuracy, accuracy_threshold) + if not train_skill: + print_text(f'No skill to improve found. Stopping learning process.') + break + # select the worst performing skill + print_text(f'Accuracy = {accuracy[train_skill.name] * 100:0.2f}%', style='bold red') + + skill_errors = ground_truth_signal.get_errors(train_skill.name) # 2. ANALYSIS PHASE: Analyze evaluation experience, optionally use long term memory print_text(f'Analyze evaluation experience ...') - experience = learned_skill.analyze( - experience=experience, + error_analysis = train_skill.analyze( + predictions=predictions, + errors=skill_errors, student_runtime=runtime, teacher_runtime=teacher_runtime, memory=self.memory ) - print_text(f'Number of errors: {len(experience.errors)}') - - print_text(f'Accuracy = {experience.accuracy*100:0.2f}%', style='bold red') - if experience.accuracy >= accuracy_threshold: - print_text(f'Accuracy threshold reached ({experience.accuracy} >= {accuracy_threshold})') - break + print_text(error_analysis) + if self.memory and update_memory: + self.memory.remember(error_analysis, self.skills) # 3. IMPROVEMENT PHASE: Improve skills based on analysis - print_text(f"Improve \"{learned_skill.name}\" skill based on analysis ...") - experience = learned_skill.improve( - experience=experience, + print_text(f"Improve \"{train_skill.name}\" skill based on analysis ...") + train_skill.improve( + error_analysis=error_analysis, runtime=teacher_runtime, - update_instructions=True ) - print_text(f'Updated instructions for skill "{learned_skill.name}":\n') - print_text(learned_skill.instructions, style='bold green') + print_text(f'Updated instructions for skill "{train_skill.name}":\n') + print_text(train_skill.instructions, style='bold green') # 4. RE-APPLY PHASE: Re-apply skills to dataset - print_text(f"Re-apply {learned_skill.name} skill to dataset ...") - experience = learned_skill.apply(dataset, runtime, experience=experience) - - # Mark skill as evolved - it will not be selected for improvement again - learned_skill.evolved = True - - # Update skills and memory based on experience - if update_skills: - self.skills = skills - - if self.memory and update_memory: - self.memory.remember(experience, self.skills) + print_text(f"Re-apply {train_skill.name} skill to dataset ...") + self.skills[train_skill.name] = train_skill + predictions = self.skills.apply(predictions, runtime=runtime, improved_skill=train_skill.name) print_text('Train is done!') - return experience + return ground_truth_signal diff --git a/adala/environments/base.py b/adala/environments/base.py index 4f4a3f0..def9ea1 100644 --- a/adala/environments/base.py +++ b/adala/environments/base.py @@ -1,13 +1,37 @@ -from pydantic import BaseModel, dataclasses, Field, field_validator +from pydantic import BaseModel, Field, field_validator from abc import ABC, abstractmethod -from typing import Any, Optional, Dict, Union, Callable +from typing import Any, Optional, Dict, Union, Callable, Dict -from adala.utils.internal_data import InternalDataFrame, InternalDataFrameConcat +from adala.utils.internal_data import InternalDataFrame, InternalSeries, InternalDataFrameConcat from adala.skills.base import BaseSkill -from adala.memories.base import ShortTermMemory +from adala.skills.skillset import SkillSet from adala.datasets import Dataset, DataFrameDataset +class GroundTruthSignal(BaseModel): + match: InternalDataFrame + errors: Optional[Dict[str, InternalDataFrame]] = None + + def get_accuracy(self) -> InternalSeries: + return self.match.mean() + + def get_errors(self, skill_name: str) -> InternalDataFrame: + errors = self.errors[skill_name] + assert len(errors.columns) == 2 # ["predictions", "ground_truth name"] + return errors + + def __rich__(self): + text = '[bold blue]Ground Truth Signal:[/bold blue]\n\n' + text += f'\n[bold]Match[/bold]\n{self.match}' + if self.errors is not None: + for skill_name, errors in self.errors.items(): + text += f'\n[bold]Errors for {skill_name}[/bold]\n{errors}' + return text + + class Config: + arbitrary_types_allowed = True + + class Environment(BaseModel, ABC): """Abstract base class for environments. @@ -19,11 +43,11 @@ class Environment(BaseModel, ABC): """ @abstractmethod - def request_feedback(self, skill: BaseSkill, experience: ShortTermMemory): + def request_feedback(self, skill_set: SkillSet, predictions: InternalDataFrame): """Request user feedback using predictions and update internal ground truth set.""" @abstractmethod - def compare_to_ground_truth(self, skill: BaseSkill, experience: ShortTermMemory) -> ShortTermMemory: + def compare_to_ground_truth(self, skill_set: SkillSet, predictions: InternalDataFrame) -> GroundTruthSignal: """Compare predictions with ground truth and return the results.""" @abstractmethod @@ -53,14 +77,11 @@ class BasicEnvironment(Environment): Defaults to an empty DataFrameDataset. ground_truth_column (str): Name of the column containing ground truth in the dataset. Defaults to 'ground_truth'. - _prediction_column (str): Name of the column containing predictions. """ ground_truth_dataset: Union[InternalDataFrame, DataFrameDataset] = Field(default_factory=DataFrameDataset) - ground_truth_column: str = 'ground_truth' - - _prediction_column: str + ground_truth_columns: Dict[str, str] @field_validator('ground_truth_dataset') def _validate_ground_truth_dataset(cls, v): @@ -68,43 +89,46 @@ def _validate_ground_truth_dataset(cls, v): return DataFrameDataset(df=v) return v - def request_feedback(self, skill: BaseSkill, experience: ShortTermMemory): + def request_feedback(self, skill: BaseSkill, predictions: InternalDataFrame): """In the BasicEnvironment, ground truth is already provided with the input data.""" - def compare_to_ground_truth(self, skill: BaseSkill, experience: ShortTermMemory) -> ShortTermMemory: + def compare_to_ground_truth(self, skill_set: SkillSet, predictions: InternalDataFrame) -> GroundTruthSignal: """Compare the predictions with the ground truth using exact matching. Args: - skill (BaseSkill): The skill being evaluated. - experience (ShortTermMemory): The experience memory containing predictions. - + skill_set (SkillSet): The skill set being evaluated. + predictions (InternalDataFrame): The predictions to compare with ground truth. Returns: - ShortTermMemory: Updated memory containing evaluation results against ground truth. + GroundTruthSignal: The ground truth signal. """ - experience = experience.model_copy() - - gt = self.ground_truth_dataset.df[self.ground_truth_column] - pred = experience.predictions - # select - gt = gt[gt.index.isin(pred.index)] - if gt.empty: - # return empty memory - return experience - - gt = gt.to_frame(self.ground_truth_column) - - # compare ground truth with predictions using exact matching - match_column_name = f'{self.ground_truth_column}__x__{skill.name}' - evaluations = InternalDataFrameConcat([ - pred, - (gt[self.ground_truth_column] == pred[skill.name]).rename(match_column_name) - ], axis=1) - experience.evaluations = evaluations - # remember the last column names used in evaluations - experience.ground_truth_column_name = self.ground_truth_column - experience.match_column_name = match_column_name - return experience + ground_truth_match = InternalDataFrame() + errors = {} + for skill_id, skill in skill_set.skills.items(): + gt_column = self.ground_truth_columns[skill.name] + gt = self.ground_truth_dataset.df[gt_column] + pred = predictions[skill.name] + # from ground truth dataset, select only the rows that are in the predictions + gt, pred = gt.align(pred) + # compare ground truth with predictions + # TODO: we can customize the matching function here beyond exact matching + gt_pred_match = (gt == pred)[gt.notnull() & pred.notnull()] + error_index = gt_pred_match[~gt_pred_match].index + # concatenate errors - dataframe with two columns: predictions and ground truth + errors[skill.name] = InternalDataFrameConcat([pred[error_index], gt[error_index]], axis=1) + errors[skill.name].columns = ["predictions", gt_column] + # concatenate matching columns + ground_truth_match = InternalDataFrameConcat([ + # previous skills' ground truth matches + ground_truth_match, + # current skill's ground truth match + gt_pred_match.rename(skill.name), + ], axis=1) + + return GroundTruthSignal( + match=ground_truth_match.reindex(predictions.index), + errors=errors + ) def as_dataset(self) -> Dataset: """Return the ground truth dataset. diff --git a/adala/memories/__init__.py b/adala/memories/__init__.py index 8f218bf..cf4e934 100644 --- a/adala/memories/__init__.py +++ b/adala/memories/__init__.py @@ -1,2 +1,2 @@ from .file_memory import FileMemory -from .base import ShortTermMemory, LongTermMemory \ No newline at end of file +from .base import Memory \ No newline at end of file diff --git a/adala/memories/base.py b/adala/memories/base.py index 822f9dd..db6376c 100644 --- a/adala/memories/base.py +++ b/adala/memories/base.py @@ -1,61 +1,13 @@ from __future__ import annotations from abc import ABC, abstractmethod -from typing import Any, Optional, TYPE_CHECKING +from typing import Any, Optional, TYPE_CHECKING, Dict -from pydantic import BaseModel +from pydantic import BaseModel, Field from adala.datasets.base import Dataset, InternalDataFrame from rich import print -if TYPE_CHECKING: - from adala.skills.skillset import SkillSet - -class ShortTermMemory(BaseModel): - """ - Base class for short term memory storage - """ - dataset: Dataset = None - predictions: InternalDataFrame = None - evaluations: InternalDataFrame = None - ground_truth_column_name: str = None - match_column_name: str = None - errors: InternalDataFrame = None - accuracy: float = None - initial_instructions: str = None - updated_instructions: str = None - - class Config: - arbitrary_types_allowed = True - - def reset(self): - self.predictions = None - self.evaluations = None - self.errors = None - self.accuracy = None - self.initial_instructions = None - self.updated_instructions = None - - def __rich__(self): - text = '[bold blue]Agent Experience:[/bold blue]\n\n' - if self.predictions is not None: - text += f'\n[bold]Predictions[/bold]\n{self.predictions}' - if self.evaluations is not None: - text += f'\n[bold]Evaluations[/bold]\n{self.evaluations}' - if self.errors is not None: - text += f'\n[bold]Errors[/bold]\n{self.errors}' - if self.accuracy is not None: - text += f'\n[bold]Accuracy[/bold]\n{self.accuracy}' - if self.initial_instructions is not None: - text += f'\n[bold]Initial Instructions[/bold]\n{self.initial_instructions}' - if self.updated_instructions is not None: - text += f'\n[bold]Updated Instructions[/bold]\n{self.updated_instructions}' - return text - - def display(self): - print(self) - - -class LongTermMemory(BaseModel, ABC): +class Memory(BaseModel, ABC): """ Base class for long-term memories. @@ -63,13 +15,13 @@ class LongTermMemory(BaseModel, ABC): """ @abstractmethod - def remember(self, experience: ShortTermMemory, skills: SkillSet): + def remember(self, observation: str, experience: Any): """ Base method for remembering experiences in long term memory. """ @abstractmethod - def retrieve(self, observations: ShortTermMemory) -> ShortTermMemory: + def retrieve(self, observation: str) -> Any: """ Base method for retrieving past experiences from long term memory, based on current observations """ diff --git a/adala/memories/file_memory.py b/adala/memories/file_memory.py index f297266..7707006 100644 --- a/adala/memories/file_memory.py +++ b/adala/memories/file_memory.py @@ -1,21 +1,26 @@ -from .base import LongTermMemory, ShortTermMemory +import json +from .base import Memory from typing import Any -class FileMemory(LongTermMemory): +class FileMemory(Memory): filepath: str - def remember(self, experience: ShortTermMemory): + def remember(self, observation: str, experience: Any): """ Serialize experience in JSON and append to file """ - experience_json = experience.model_dump_json() - with open(self.filepath, 'a') as f: - f.write(experience_json + '\n') + with open(self.filepath) as f: + memory = json.load(f) + memory[observation] = experience + with open(self.filepath, 'w') as f: + json.dump(memory, f, indent=2) - def retrieve(self, observations: ShortTermMemory) -> ShortTermMemory: + def retrieve(self, observation: str) -> Any: """ Retrieve experience from file """ - raise NotImplementedError + with open(self.filepath) as f: + memory = json.load(f) + return memory[observation] diff --git a/adala/runtimes/base.py b/adala/runtimes/base.py index c77cb4b..11869fc 100644 --- a/adala/runtimes/base.py +++ b/adala/runtimes/base.py @@ -96,7 +96,7 @@ def init_runtime(self): self._create_program() return self - def get_outputs(self, output_template: str) -> List[str]: + def get_outputs(self, output_template: Optional[str] = None) -> List[str]: """Extracts output fields from the output template. Args: @@ -107,6 +107,8 @@ def get_outputs(self, output_template: str) -> List[str]: """ # search for all occurrences of {{...'output'...}} # TODO: this is a very naive regex implementation - likely to fail in many cases + if output_template is None: + return [] outputs = re.findall(r'\'(.*?)\'', output_template) return outputs @@ -116,7 +118,8 @@ def _process_record( program, extra_fields, outputs=None - ): + ) -> Dict[str, Any]: + """Processes a single record using the guidance program. Args: @@ -141,12 +144,12 @@ def _process_record( verified_input.update(extra_fields) if self.verbose: - print_text(verified_input) + print_text(str(verified_input)) result = program( silent=not self.verbose, **verified_input ) - if outputs is None: + if not outputs: verified_output = {'': str(result)} else: verified_output = {field: result[field] for field in outputs} @@ -194,12 +197,27 @@ def get_instructions_program(self, instructions): return guidance(instructions, llm=self._llm) + def _prepare_program_and_params(self, input_template, output_template, instructions, extra_fields): + extra_fields = extra_fields or {} + extra_fields = extra_fields.copy() + # if only one program template is provided, use it as a program + if output_template is None and instructions is None: + program = self.get_input_program(input_template) + else: + program = self._program + extra_fields.update({ + 'input_program': self.get_input_program(input_template), + 'output_program': self.get_output_program(output_template), + 'instructions_program': self.get_instructions_program(instructions), + }) + return program, extra_fields + def process_record( self, record: Dict[str, Any], input_template: str, - output_template: str, - instructions: str, + output_template: Optional[str] = None, + instructions: Optional[str] = None, extra_fields: Optional[Dict[str, Any]] = None, ) -> Dict[str, Any]: """Processes a record using the provided templates and instructions. @@ -214,18 +232,12 @@ def process_record( Returns: Dict[str, Any]: The processed record. """ - - outputs = re.findall(r'\'(.*?)\'', output_template) + outputs = self.get_outputs(output_template) + program, extra_fields = self._prepare_program_and_params(input_template, output_template, instructions, extra_fields) - input = record.copy() - input.update({ - 'input_program': self.get_input_program(input_template), - 'output_program': self.get_output_program(output_template), - 'instructions_program': self.get_instructions_program(instructions), - }) output = self._process_record( - record=input, - program=self._program, + record=record, + program=program, outputs=outputs, extra_fields=extra_fields ) @@ -253,52 +265,17 @@ def process_batch( """ outputs = self.get_outputs(output_template) - - extra_fields = extra_fields or {} - # copy extra fields to avoid modification of the original dict - extra_fields = extra_fields.copy() - # TODO: it's not efficient way to initialize the program here - should be done once - extra_fields.update({ - 'input_program': self.get_input_program(input_template), - 'output_program': self.get_output_program(output_template), - 'instructions_program': self.get_instructions_program(instructions), - }) + program, extra_fields = self._prepare_program_and_params(input_template, output_template, instructions, extra_fields) output = batch.progress_apply( self._process_record, axis=1, result_type='expand', - program=self._program, + program=program, outputs=outputs, extra_fields=extra_fields ) return output - def process_batch_inputs( - self, - batch: InternalDataFrame, - input_template: str, - extra_fields: Optional[Dict[str, Any]] = None, - ) -> InternalDataFrame: - """Processes inputs for a batch of records using the provided input template. - - Args: - batch (InternalDataFrame): The batch of records for input processing. - input_template (str): The template for input processing. - extra_fields (Dict[str, Any], optional): Additional fields to include during input processing. - - Returns: - InternalDataFrame: The processed inputs for the batch of records. - """ - - output = batch.progress_apply( - self._process_record, - axis=1, - result_type='expand', - program=self.get_input_program(input_template), - extra_fields=extra_fields or {} - ) - return output - class CodeRuntime(Runtime): """Base class representing a runtime designed for executing code.""" diff --git a/adala/skills/base.py b/adala/skills/base.py index b0adc96..5abd6e7 100644 --- a/adala/skills/base.py +++ b/adala/skills/base.py @@ -11,7 +11,7 @@ from adala.runtimes.base import LLMRuntime from adala.datasets import Dataset, DataFrameDataset from adala.runtimes.base import Runtime -from adala.memories.base import ShortTermMemory, LongTermMemory +from adala.memories.base import Memory from adala.utils.internal_data import InternalDataFrame, InternalDataFrameConcat from adala.utils.logs import print_error @@ -69,11 +69,6 @@ class BaseSkill(BaseModel, ABC): examples=['predictions'], default='predictions' ) - evolved: bool = Field( - title='Evolved', - description='Whether the skill has been evolved or not.', - default=False - ) @model_validator(mode='after') def validate_inputs(self): @@ -119,7 +114,10 @@ def __call__(self, input: InternalDataFrame, runtime: Runtime, dataset: Dataset) instructions=self.instructions, extra_fields=self._get_extra_fields() ) - return InternalDataFrameConcat((input, runtime_predictions), axis=1) + runtime_predictions.rename(columns={self.prediction_field: self.name}, inplace=True) + output = input.copy() + output[runtime_predictions.columns] = runtime_predictions[runtime_predictions.columns] + return output def _get_extra_fields(self): """ @@ -140,27 +138,27 @@ def _get_extra_fields(self): def apply( self, dataset: Dataset, runtime: Runtime, - experience: ShortTermMemory - ) -> ShortTermMemory: + ) -> InternalDataFrame: """ Applies the skill to a dataset and returns the results. Args: dataset (Dataset): The dataset on which the skill is to be applied. runtime (Runtime): The runtime instance to be used for processing. - experience (ShortTermMemory): Previous experiences or results. - + Returns: ShortTermMemory: The updated experience after applying the skill. """ @abstractmethod def analyze( - self, experience: ShortTermMemory, + self, + predictions: InternalDataFrame, + errors: InternalDataFrame, student_runtime: Runtime, teacher_runtime: Optional[Runtime] = None, - memory: Optional[LongTermMemory] = None, - ) -> ShortTermMemory: + memory: Optional[Memory] = None, + ) -> str: """ Analyzes the results to derive new experiences. @@ -174,33 +172,19 @@ def analyze( ShortTermMemory: The updated experience after analysis. """ - @abstractmethod - def can_be_improved(self, experience: ShortTermMemory) -> bool: - """ - Checks if the current skill can be improved. - - Args: - experience (ShortTermMemory): The current experience. - - Returns: - bool: True if the skill can be improved, False otherwise. - """ - @abstractmethod def improve( self, - experience: ShortTermMemory, - runtime: Runtime, - update_instructions: bool = True, - ) -> ShortTermMemory: + error_analysis: str, + runtime: Runtime + ): """ Refines the current state of the skill based on its experiences. Args: experience (ShortTermMemory): The current experience. runtime (Runtime): The runtime instance to be used for processing. - update_instructions (bool, optional): Flag to decide if instructions should be updated. Defaults to True. - + Returns: ShortTermMemory: The updated experience after improvements. """ @@ -217,8 +201,7 @@ def apply( self, dataset: Union[Dataset, InternalDataFrame], runtime: LLMRuntime, - experience: ShortTermMemory - ) -> ShortTermMemory: + ) -> InternalDataFrame: """ Applies the LLM skill on a dataset and returns the results. @@ -230,8 +213,6 @@ def apply( Returns: ShortTermMemory: The updated experience after applying the skill. """ - - experience = experience.model_copy() predictions = [] if isinstance(dataset, InternalDataFrame): @@ -241,21 +222,19 @@ def apply( runtime_predictions = self(batch, runtime, dataset) predictions.append(runtime_predictions) - if not predictions: - predictions = InternalDataFrame() - else: - predictions = InternalDataFrameConcat(predictions, copy=False) - predictions.rename(columns={self.prediction_field: self.name}, inplace=True) + if predictions: + return InternalDataFrameConcat(predictions, copy=False) - experience.predictions = predictions - return experience + return InternalDataFrame(columns=dataset.df.columns.tolist() + [self.name]) def analyze( - self, experience: ShortTermMemory, + self, + predictions: InternalDataFrame, + errors: InternalDataFrame, student_runtime: Runtime, teacher_runtime: Optional[Runtime] = None, - memory: Optional[LongTermMemory] = None - ) -> ShortTermMemory: + memory: Optional[Memory] = None + ) -> str: """ Analyzes the results to identify any discrepancies and returns the observed experience. @@ -268,41 +247,22 @@ def analyze( Returns: ShortTermMemory: The updated experience after analysis. """ - - experience = experience.model_copy() - - # TODO: can be multiple prediction validation fields - match = experience.match_column_name - errors = experience.evaluations[~experience.evaluations[match]] - experience.accuracy = experience.evaluations[match].mean() - if errors.empty: - # No errors - nothing to analyze - experience.errors = errors - return experience # collect errors and create error report # first sample errors - make it uniform, but more sophisticated sampling can be implemented - errors = errors.sample(n=min(3, errors.shape[0])) + MAX_ERRORS = 3 + errors = errors.sample(n=min(MAX_ERRORS, errors.shape[0])) + # TODO: ground truth column name can be the input parameter that comes from GT signal + ground_truth_column_name = errors.columns[-1] - # collect error inputs from runtime - extra_fields = self._get_extra_fields() - inputs = student_runtime.process_batch_inputs( - batch=errors, - input_template=self.input_template, - extra_fields=extra_fields - ) - - # construct error report - errors = pd.concat([ - inputs, - errors[[self.name, experience.ground_truth_column_name]] - ], axis=1) - errors.columns = ['input', 'prediction', 'ground_truth'] if not teacher_runtime: teacher_runtime = student_runtime + predictions_and_errors = pd.concat([predictions.loc[errors.index], errors[ground_truth_column_name]], axis=1) + predictions_and_errors.columns = predictions_and_errors.columns[:-1].tolist() + [ground_truth_column_name] + error_reasons = teacher_runtime.process_batch( - errors, + batch=predictions_and_errors, instructions="{{#system~}}\n" "LLM prompt was created by concatenating instructions with text input:\n\n" "Prediction = LLM(Input, Instructions)\n\n" @@ -312,46 +272,48 @@ def analyze( f"Instructions: {self.instructions}\n" "{{~/system}}", input_template="{{#user~}}\n" - "{{input}}\n" - "Prediction: {{prediction}}\n" - "Ground truth: {{ground_truth}}\n" - "Explanation:\n" + f"{{{{>{self.input_template}}}}}\n" + f"Prediction: {{{{{self.name}}}}}\n" + f"Ground truth: {{{{{ground_truth_column_name}}}}}\n" + "Reason:\n" "{{~/user}}", output_template="{{#assistant~}}{{gen 'reason'}}{{~/assistant}}", - extra_fields=extra_fields + extra_fields=self._get_extra_fields() ) - errors['reason'] = error_reasons['reason'] - - experience.errors = errors - return experience + predictions_and_errors['reason'] = error_reasons['reason'] - def can_be_improved(self, experience: ShortTermMemory) -> bool: - return not self.evolved + # build error report + result = teacher_runtime.process_record( + record={ + 'predictions_and_errors': predictions_and_errors.to_dict(orient='records'), + }, + input_template="{{#each predictions_and_errors}}" + "\n{{this.input}}\n" + "Prediction: {{this.prediction}}\n" + "Ground truth: {{this.ground_truth}}\n" + 'Reason: {{this.reason}}\n' + "{{/each}}" + ) + # no specific output specified, all output is in the error report + error_report = result[''] + return error_report def improve( self, - experience: ShortTermMemory, + error_analysis: str, runtime: Runtime, - update_instructions: bool = True, - ) -> ShortTermMemory: + ): """ Refines the LLM skill based on its recent experiences. Args: experience (ShortTermMemory): The current experience. runtime (Runtime): The runtime instance to be used for processing. - update_instructions (bool, optional): Flag to decide if instructions should be updated. Defaults to True. - - Returns: - ShortTermMemory: The updated experience after improvements. """ - - experience = experience.model_copy() - errors = experience.errors.to_dict(orient='records') result = runtime.process_record( record={ - 'errors': errors + 'error_analysis': error_analysis }, instructions="{{#system~}}\n" "LLM prompt was created by concatenating instructions with text input:\n\n" @@ -366,22 +328,10 @@ def improve( "{{~/system}}\n", input_template="{{#user~}}\n" f"Old instruction: {self.instructions}\n\n" - "Errors:\n{{#each errors}}" - "\n{{this.input}}\n" - "Prediction: {{this.prediction}}\n" - "Ground truth: {{this.ground_truth}}\n" - "{{/each}}\n" + "Errors:\n{{error_analysis}}\n" "New instruction:\n" "{{~/user}}", output_template="{{#assistant~}}{{gen 'new_instruction'}}{{~/assistant}}", extra_fields=self._get_extra_fields() ) - new_instruction = result['new_instruction'] - - experience.initial_instructions = self.instructions - experience.updated_instructions = new_instruction - - if update_instructions: - self.instructions = new_instruction - - return experience + self.instructions = result['new_instruction'] diff --git a/adala/skills/skillset.py b/adala/skills/skillset.py index bc8b421..b5fff57 100644 --- a/adala/skills/skillset.py +++ b/adala/skills/skillset.py @@ -1,10 +1,10 @@ from pydantic import BaseModel, model_validator, field_validator from abc import ABC, abstractmethod -from typing import List, Union, Dict, Any, Optional +from typing import List, Union, Dict, Any, Optional, Mapping from adala.datasets.base import Dataset from adala.runtimes.base import Runtime -from adala.memories.base import ShortTermMemory from adala.utils.logs import print_text +from adala.utils.internal_data import InternalDataFrame, InternalSeries, InternalDataFrameConcat from .base import BaseSkill, LLMSkill @@ -25,30 +25,65 @@ class SkillSet(BaseModel, ABC): skills: Dict[str, BaseSkill] @abstractmethod - def apply(self, dataset: Dataset, runtime: Runtime, experience: Optional[ShortTermMemory] = None) -> ShortTermMemory: + def apply( + self, + dataset: Union[Dataset, InternalDataFrame], + runtime: Runtime, + improved_skill: Optional[str] = None + ) -> InternalDataFrame: """ Apply the skill set to a dataset using a specified runtime. Args: - dataset (Dataset): The dataset to apply the skill set to. + dataset (Union[Dataset, InternalDataFrame]): The dataset to apply the skill set to. runtime (Runtime): The runtime environment in which to apply the skills. - experience (Optional[ShortTermMemory], optional): Existing experience data. Defaults to None. - + improved_skill (Optional[str], optional): Name of the skill to start from (to optimize calculations). Defaults to None. Returns: - ShortTermMemory: Updated experience after applying the skill set. + InternalDataFrame: Skill predictions. """ @abstractmethod - def select_skill_to_improve(self, experience: ShortTermMemory) -> BaseSkill: + def select_skill_to_improve(self, accuracy: Mapping, accuracy_threshold: Optional[float] = 1.0) -> Optional[BaseSkill]: """ - Select the next skill to enhance based on the current experience. - + Select skill to improve based on accuracy. + + Args: + accuracy (Mapping): Skills accuracies. + accuracy_threshold (Optional[float], optional): Accuracy threshold. Defaults to 1.0. + Returns: + Optional[BaseSkill]: Skill to improve. None if no skill to improve. + """ + + def __getitem__(self, skill_name) -> BaseSkill: + """ + Select skill by name. + + Args: + skill_name (str): Name of the skill to select. + + Returns: + BaseSkill: Skill + """ + return self.skills[skill_name] + + def __setitem__(self, skill_name, skill: BaseSkill): + """ + Set skill by name. + Args: - experience (ShortTermMemory): Current experience data. - + skill_name (str): Name of the skill to set. + skill (BaseSkill): Skill to set. + """ + self.skills[skill_name] = skill + + def get_skill_names(self) -> List[str]: + """ + Get list of skill names. + Returns: - BaseSkill: Skill selected for improvement. + List[str]: List of skill names. """ + return list(self.skills.keys()) class LinearSkillSet(SkillSet): @@ -142,48 +177,53 @@ def skill_sequence_validator(self): return self def apply( - self, dataset: Dataset, + self, + dataset: Union[Dataset, InternalDataFrame], runtime: Runtime, - experience: Optional[ShortTermMemory] = None - ) -> ShortTermMemory: + improved_skill: Optional[str] = None, + ) -> InternalDataFrame: """ Sequentially applies each skill on the dataset, enhancing the agent's experience. Args: dataset (Dataset): The dataset to apply the skills on. runtime (Runtime): The runtime environment in which to apply the skills. - experience (Optional[ShortTermMemory], optional): Existing experience data. Defaults to None. - + improved_skill (Optional[str], optional): Name of the skill to improve. Defaults to None. Returns: - ShortTermMemory: Updated experience after sequentially applying the skills. + InternalDataFrame: Skill predictions. """ - if experience is None: - experience = ShortTermMemory(dataset=dataset) - else: - experience = experience.model_copy() - for i, skill_name in enumerate(self.skill_sequence): + predictions = None + if improved_skill: + # start from the specified skill, assuming previous skills have already been applied + skill_sequence = self.skill_sequence[self.skill_sequence.index(improved_skill):] + else: + skill_sequence = self.skill_sequence + for i, skill_name in enumerate(skill_sequence): skill = self.skills[skill_name] # use input dataset for the first node in the pipeline - input_dataset = dataset if i == 0 else experience.predictions + input_dataset = dataset if i == 0 else predictions print_text(f"Applying skill: {skill_name}") - experience = skill.apply(input_dataset, runtime, experience) + predictions = skill.apply(input_dataset, runtime) - return experience + return predictions - def select_skill_to_improve(self, experience: ShortTermMemory) -> Optional[BaseSkill]: + def select_skill_to_improve( + self, + accuracy: Mapping, + accuracy_threshold: Optional[float] = 1.0 + ) -> Optional[BaseSkill]: """ - Picks the next skill for improvement in the sequence. - + Selects the skill with the lowest accuracy to improve. + Args: - experience (ShortTermMemory): Current experience data. - + accuracy (Mapping): Accuracy of each skill. + accuracy_threshold (Optional[float], optional): Accuracy threshold. Defaults to 1.0. Returns: - BaseSkill: The next skill selected for improvement. + Optional[BaseSkill]: Skill to improve. None if no skill to improve. """ - for skill_name in self.skill_sequence: - if self.skills[skill_name].can_be_improved(experience): + if accuracy[skill_name] < accuracy_threshold: return self.skills[skill_name] def __rich__(self): diff --git a/adala/utils/internal_data.py b/adala/utils/internal_data.py index 8c8f669..bec57e4 100644 --- a/adala/utils/internal_data.py +++ b/adala/utils/internal_data.py @@ -6,6 +6,7 @@ # Internal data tables representation. Replace this with Dask or Polars in the future. InternalDataFrame = pd.DataFrame +InternalSeries = pd.Series def InternalDataFrame_encoder(df: InternalDataFrame) -> List: diff --git a/tests/test_agent_basics.py b/tests/test_agent_basics.py new file mode 100644 index 0000000..71c6793 --- /dev/null +++ b/tests/test_agent_basics.py @@ -0,0 +1,367 @@ +import pandas as pd + +from utils import patching, PatchedCalls + + +@patching( + target_function=PatchedCalls.OPENAI_MODEL_LIST.value, + data=[ + # calling API model list for the first runtime (student) + {'input': {}, 'output': {'data': [{'id': 'gpt-3.5-turbo-instruct'}, {'id': 'gpt-3.5-turbo'}, {'id': 'gpt-4'}]}}, + # calling API model list for the second runtime (teacher) + {'input': {}, 'output': {'data': [{'id': 'gpt-3.5-turbo-instruct'}, {'id': 'gpt-3.5-turbo'}, {'id': 'gpt-4'}]}}, + ], +) +@patching( + target_function=PatchedCalls.GUIDANCE.value, + data=[ + # call[0]: apply first skill 0->1, first row + {'input': {'input': '0 5 0'}, 'output': {'predictions': '1 5 1'}}, + # call[1]: apply first skill 0->1, second row + {'input': {'input': '0 0 0'}, 'output': {'predictions': '1 5 1'}}, + # call[2]: analyze errors first skill 0->1 + { + 'input': { + 'input': '0 0 0', + '0->1': '1 5 1', + 'gt_0': '1 1 1' + }, + 'output': { + 'reason': '0 transformed to 5 instead of 1' + } + }, + # call[3]: build error report for first skill 0->1 + { + 'input': { + 'predictions_and_errors': [{ + 'input': '0 0 0', + '0->1': '1 5 1', + 'gt_0': '1 1 1', + 'reason': '0 transformed to 5 instead of 1' + }]}, + 'output': '''\ + Input: 0 0 0 + Prediction: 1 5 1 + Ground Truth: 1 1 1 + Reason: 0 transformed to 5 instead of 1 + ''', + }, + # call[4]: improve first skill 0->1 + { + 'input': { + 'error_analysis': '''\ + Input: 0 0 0 + Prediction: 1 5 1 + Ground Truth: 1 1 1 + Reason: 0 transformed to 5 instead of 1 + '''}, + 'output': { + 'new_instruction': 'Transform 0 to 1' + } + }, + # call[5]: reapply skill 0->1, first row + {'input': {'input': '0 5 0'}, 'output': {'predictions': '1 5 1'}}, + # call[6]: reapply skill 0->1, first row + {'input': {'input': '0 0 0'}, 'output': {'predictions': '1 1 1'}}, + + ] +) +def test_agent_quickstart_single_skill(): + from adala.agents import Agent + from adala.skills import LinearSkillSet + from adala.environments import BasicEnvironment + + agent = Agent( + skills=LinearSkillSet( + skills={ + "0->1": "...", + } + ), + environment=BasicEnvironment( + ground_truth_dataset=pd.DataFrame([ + ['0 5 0', '1 5 1'], + ['0 0 0', '1 1 1'] + ], columns=['input', 'gt_0']), + ground_truth_columns={ + "0->1": "gt_0" + } + ) + ) + + ground_truth_signal = agent.learn() + + # assert final instruction + assert agent.skills['0->1'].instructions == 'Transform 0 to 1' + # assert final accuracy for skill 0->1 + pd.testing.assert_series_equal( + pd.Series({'0->1': 1.0}), + ground_truth_signal.get_accuracy() + ) + + +@patching( + target_function=PatchedCalls.OPENAI_MODEL_LIST.value, + data=[ + # calling API model list for the first runtime (student) + {'input': {}, 'output': {'data': [{'id': 'gpt-3.5-turbo-instruct'}, {'id': 'gpt-3.5-turbo'}, {'id': 'gpt-4'}]}}, + # calling API model list for the second runtime (teacher) + {'input': {}, 'output': {'data': [{'id': 'gpt-3.5-turbo-instruct'}, {'id': 'gpt-3.5-turbo'}, {'id': 'gpt-4'}]}}, + ], +) +@patching( + target_function=PatchedCalls.GUIDANCE.value, + data=[ + # call[0]: apply first skill 0->1, first row, GT = 1 5 1 + {'input': {'input': '0 5 0'}, 'output': {'predictions': '1 5 1'}}, + # call[1]: apply first skill 0->1, second row, GT = 1 1 1 + {'input': {'input': '0 0 0'}, 'output': {'predictions': '1 5 1'}}, + # call[2]: apply second skill 1->2, first row, GT = 2 5 2 + {'input': {'input': '0 5 0', '0->1': '1 5 1'}, 'output': {'predictions': '2 5 2'}}, + # call[3]: apply second skill 1->2, second row, GT = 2 2 2 + {'input': {'input': '0 0 0', '0->1': '1 5 1'}, 'output': {'predictions': '2 5 2'}}, + # call[4]: analyze errors first skill 0->1, error in the second row (0 0 0 -> 1 5 1) + { + 'input': { + 'input': '0 0 0', + '0->1': '1 5 1', + 'gt_0': '1 1 1' + }, + 'output': { + 'reason': '0 transformed to 5 instead of 1' + } + }, + # call[5]: build error report for first skill 0->1 + { + 'input': { + 'predictions_and_errors': [{ + 'input': '0 0 0', + '0->1': '1 5 1', + '1->2': '2 5 2', + 'gt_0': '1 1 1', + 'gt_1': '2 2 2', + 'reason': '0 transformed to 5 instead of 1' + }]}, + 'output': '''\ + Input: 0 0 0 + Prediction: 1 5 1 + Ground Truth: 1 1 1 + Reason: 0 transformed to 5 instead of 1 + ''', + }, + # call[6]: improve first skill 0->1 + { + 'input': { + 'error_analysis': '''\ + Input: 0 0 0 + Prediction: 1 5 1 + Ground Truth: 1 1 1 + Reason: 0 transformed to 5 instead of 1 + '''}, + 'output': { + 'new_instruction': 'Transform 0 to 1' + } + }, + # call[7]: reapply first skill 0->1, first row, GT = 1 5 1 + {'input': {'input': '0 5 0'}, 'output': {'predictions': '1 5 1'}}, + # call[8]: reapply first skill 0->1, second row, GT = 1 1 1 + {'input': {'input': '0 0 0'}, 'output': {'predictions': '1 1 1'}}, + # call[9]: reapply second skill 1->2, first row, GT = 2 5 2 + {'input': {'input': '0 5 0', '0->1': '1 5 1'}, 'output': {'predictions': '2 2 2'}}, + # call[10]: reapply second skill 1->2, second row, GT = 2 2 2 + {'input': {'input': '0 0 0', '0->1': '1 1 1'}, 'output': {'predictions': '2 2 2'}}, + # call[11]: analyze errors second skill 1->2 (first row 2 2 2 instead of 2 5 2) + { + 'input': { + 'input': '0 5 0', + '0->1': '1 5 1', + '1->2': '2 2 2', + 'gt_0': '1 5 1', + 'gt_1': '2 5 2' + }, + 'output': { + 'reason': '5 transformed to 2 instead of remaining 5' + } + }, + # call[12]: build error report for second skill 1->2 + { + 'input': { + 'predictions_and_errors': [{ + 'input': '0 5 0', + '0->1': '1 5 1', + '1->2': '2 2 2', + 'gt_0': '1 5 1', + 'gt_1': '2 5 2', + 'reason': '5 transformed to 2 instead of remaining 5' + }]}, + 'output': '''\ + Input: 1 5 1 + Prediction: 2 2 2 + Ground Truth: 2 5 2 + Reason: 5 transformed to 2 instead of remaining 5 + ''', + }, + # call[13]: improve second skill 1->2 + { + 'input': { + 'error_analysis': '''\ + Input: 1 5 1 + Prediction: 2 2 2 + Ground Truth: 2 5 2 + Reason: 5 transformed to 2 instead of remaining 5 + '''}, + 'output': { + 'new_instruction': 'Transform 1 to 2' + } + }, + # call[14]: reapply second skill 1->2, first row, GT = 2 5 2 + {'input': {'input': '0 5 0', '0->1': '1 5 1'}, 'output': {'predictions': '2 5 2'}}, + # call[15]: reapply second skill 1->2, second row, GT = 2 2 2 + {'input': {'input': '0 0 0', '0->1': '1 1 1'}, 'output': {'predictions': '2 2 2'}}, + ] +) +def test_agent_quickstart_two_skills(): + from adala.agents import Agent + from adala.skills import LinearSkillSet + from adala.environments import BasicEnvironment + + agent = Agent( + skills=LinearSkillSet( + skills={ + "0->1": "...", + "1->2": "..." + }, + skill_sequence=["0->1", "1->2"] + ), + environment=BasicEnvironment( + ground_truth_dataset=pd.DataFrame([ + ['0 5 0', '1 5 1', '2 5 2'], + ['0 0 0', '1 1 1', '2 2 2'] + ], columns=['input', 'gt_0', 'gt_1']), + ground_truth_columns={ + "0->1": "gt_0", + "1->2": "gt_1" + } + ) + ) + + ground_truth_signal = agent.learn() + + # assert final instruction + assert agent.skills['0->1'].instructions == 'Transform 0 to 1' + assert agent.skills['1->2'].instructions == 'Transform 1 to 2' + # assert final accuracy for skill 0->1 + pd.testing.assert_series_equal( + pd.Series({'0->1': 1.0, '1->2': 1.0}), + ground_truth_signal.get_accuracy() + ) + + +@patching( + target_function=PatchedCalls.OPENAI_MODEL_LIST.value, + data=[ + # calling API model list for the first runtime (student) + {'input': {}, 'output': {'data': [{'id': 'gpt-3.5-turbo-instruct'}, {'id': 'gpt-3.5-turbo'}, {'id': 'gpt-4'}]}}, + # calling API model list for the second runtime (teacher) + {'input': {}, 'output': {'data': [{'id': 'gpt-3.5-turbo-instruct'}, {'id': 'gpt-3.5-turbo'}, {'id': 'gpt-4'}]}}, + ], +) +@patching( + target_function=PatchedCalls.GUIDANCE.value, + data=[ + # call[0]: apply first skill 0->1, GT = 1 5 1 + {'input': {'input': '0 5 0'}, 'output': {'predictions': '1 5 1'}}, + # call[1]: apply second skill 1->2, GT = 2 5 2 -> ERROR! + {'input': {'input': '0 5 0', '0->1': '1 5 1'}, 'output': {'predictions': '2 5 4'}}, + # call[3]: apply third skill 2->3, GT = 3 5 3 -> Also error, but it is due to previous error + {'input': {'input': '0 5 0', '0->1': '1 5 1', '1->2': '2 5 4'}, 'output': {'predictions': '3 5 4'}}, + # call[4]: analyze errors for second skill 1->2 (2 5 4 instead of 2 5 2) + { + 'input': { + 'input': '0 5 0', + '0->1': '1 5 1', + '1->2': '2 5 4', + '2->3': '3 5 4', + 'gt_0': '1 5 1', + 'gt_1': '2 5 2', + 'gt_2': '3 5 3', + }, + 'output': { + 'reason': '1 transformed to 4 instead of 2' + } + }, + # call[5]: build error report for second skill 1->2 + { + 'input': { + 'predictions_and_errors': [{ + 'input': '0 5 0', + '0->1': '1 5 1', + '1->2': '2 5 4', + '2->3': '3 5 4', + 'gt_0': '1 5 1', + 'gt_1': '2 5 2', + 'gt_2': '3 5 3', + 'reason': '1 transformed to 4 instead of 2' + }]}, + 'output': '''\ + Input: 0 5 0 + Prediction: 2 5 4 + Ground Truth: 2 5 2 + Reason: 1 transformed to 4 instead of 2 + ''', + }, + # call[6]: improve first skill 0->1 + { + 'input': { + 'error_analysis': '''\ + Input: 0 5 0 + Prediction: 2 5 4 + Ground Truth: 2 5 2 + Reason: 1 transformed to 4 instead of 2 + '''}, + 'output': { + 'new_instruction': 'Transform 1 to 2' + } + }, + # call[7]: apply second skill 1->2, GT = 2 5 2 + {'input': {'input': '0 5 0', '0->1': '1 5 1'}, 'output': {'predictions': '2 5 2'}}, + # call[8]: apply third skill 2->3, GT = 3 5 3 + {'input': {'input': '0 5 0', '0->1': '1 5 1', '1->2': '2 5 2'}, 'output': {'predictions': '3 5 3'}}, + ] +) +def test_agent_quickstart_three_skills_only_second_fail(): + from adala.agents import Agent + from adala.skills import LinearSkillSet + from adala.environments import BasicEnvironment + + agent = Agent( + skills=LinearSkillSet( + skills={ + "0->1": "...", + "1->2": "...", + "2->3": "..." + }, + skill_sequence=["0->1", "1->2", "2->3"] + ), + environment=BasicEnvironment( + ground_truth_dataset=pd.DataFrame([ + ['0 5 0', '1 5 1', '2 5 2', '3 5 3'], + ], columns=['input', 'gt_0', 'gt_1', 'gt_2']), + ground_truth_columns={ + "0->1": "gt_0", + "1->2": "gt_1", + "2->3": "gt_2" + } + ) + ) + + ground_truth_signal = agent.learn() + + # assert final instruction + assert agent.skills['0->1'].instructions == '...' + assert agent.skills['1->2'].instructions == 'Transform 1 to 2' + assert agent.skills['2->3'].instructions == '...' + # assert final accuracy for skill 0->1 + pd.testing.assert_series_equal( + pd.Series({'0->1': 1.0, '1->2': 1.0, '2->3': 1.0}), + ground_truth_signal.get_accuracy() + ) diff --git a/tests/test_classification.py b/tests/test_classification.py index 92a2b1e..66b8447 100644 --- a/tests/test_classification.py +++ b/tests/test_classification.py @@ -18,13 +18,13 @@ def process_record_generator(*args, **kwargs): yield {'sentiment': 'Neutral'} # errors - yield {'reason': 'Test reason'} - yield {'reason': 'Test reason'} - yield {'reason': 'Test reason'} - yield {'reason': 'Test reason'} + if i < 2: + yield {'reason': 'Test reason'} + yield {'reason': 'Test reason'} + yield {'': 'Test reason'} - # instruction generation - yield {'new_instruction': 'Test instruction'} + # instruction generation + yield {'new_instruction': 'Test instruction'} # test yield {'sentiment': 'Positive'} @@ -64,7 +64,7 @@ def test_classification_skill( # connect to a dataset environment=BasicEnvironment( ground_truth_dataset=train_dataset, - ground_truth_column="ground_truth" + ground_truth_columns={"sentiment": "ground_truth"} ), # define a skill skills=ClassificationSkill( @@ -75,15 +75,15 @@ def test_classification_skill( ), ) run = agent.learn(learning_iterations=3, accuracy_threshold=0.95) - assert run.accuracy > 0.8 + assert run.get_accuracy()['sentiment'] > 0.8 print('\n\n=> Final instructions:') print('=====================') - print(f'{run.updated_instructions}') + print(f'{agent.skills["sentiment"].instructions}') print('=====================') print('\n=> Run test ...') - run = agent.apply_skills(test_dataset) - print_dataframe(run.predictions) + predictions = agent.run(test_dataset) + print_dataframe(predictions) - assert not run.predictions.empty + assert not predictions.empty diff --git a/tests/test_environments.py b/tests/test_environments.py index ab78eaf..cdc4fca 100644 --- a/tests/test_environments.py +++ b/tests/test_environments.py @@ -1,49 +1,110 @@ +import pandas as pd import pytest -from adala.memories.base import ShortTermMemory -from adala.skills.base import BaseSkill +from adala.skills import LinearSkillSet, LLMSkill from adala.utils.internal_data import InternalDataFrame from adala.environments.base import BasicEnvironment +NaN = float("nan") + + +@pytest.mark.parametrize("skillset, predictions, ground_truth, ground_truth_columns, expected_match, expected_errors", [ + # test single skill, full ground truth signal + ( + LinearSkillSet(skills=[LLMSkill(name='some_skill', input_data_field="text")]), + InternalDataFrame({"text": list('abcd'), "some_skill": ['1', '0', '1', '0']}), + InternalDataFrame({"my_ground_truth": ['1', '1', '1', '1']}), + {"some_skill": "my_ground_truth"}, + # match + InternalDataFrame({"some_skill": [True, False, True, False]}), + # errors + { + "some_skill": InternalDataFrame({ + "predictions": ['0', '0'], "my_ground_truth": ['1', '1']}, index=[1, 3]) + } + ), + # test two linear skills, partial ground truth signal + ( + # skills + LinearSkillSet(skills=[ + LLMSkill(name='skill_1', input_data_field="text"), + LLMSkill(name="skill_2", input_data_field="text") + ]), + # predictions + InternalDataFrame({ + "text": list('abcd'), + "skill_1": ['1', '0', '1', '0'], + "skill_2": ['1', '0', '0', '1'] + }, index=[11, 22, 33, 44]), + # ground truths + InternalDataFrame({ + "gt_1": [NaN, '0', NaN, '1'], + "gt_2": ['1', '0', '1', NaN], + }, index=[11, 22, 33, 44]), + {"skill_1": "gt_1", "skill_2": "gt_2"}, + # expected match + InternalDataFrame({ + "skill_1": [NaN, True, NaN, False], + "skill_2": [True, True, False, NaN] + }, index=[11, 22, 33, 44]), + # expected errors + { + "skill_1": InternalDataFrame({ + "predictions": ['0'], "gt_1": ['1']}, index=[44]), + "skill_2": InternalDataFrame({ + "predictions": ['0'], "gt_2": ['1']}, index=[33]) + } + ), + # test two linear skills, no ground truth signal for one skill, different size of dataframes + ( + # skills + LinearSkillSet(skills=[ + LLMSkill(name='skill_1', input_data_field="text"), + LLMSkill(name="skill_2", input_data_field="text") + ]), + # predictions + InternalDataFrame({ + "text": list('abcd'), + "skill_1": ['1', '0', '1', '0'], + "skill_2": ['1', '0', '0', '1'] + }, index=[11, 22, 33, 44]), + # ground truths + InternalDataFrame({ + "gt_1": [NaN, NaN], + "gt_2": ['1', '0'], + }, index=[99, 44]), + {"skill_1": "gt_1", "skill_2": "gt_2"}, + # expected match + InternalDataFrame({ + "skill_1": [NaN, NaN, NaN, NaN], + "skill_2": [NaN, NaN, NaN, False] + }, index=[11, 22, 33, 44]), + # expected errors + { + "skill_1": InternalDataFrame({ + "predictions": [], "gt_1": []}, index=[]), + "skill_2": InternalDataFrame({ + "predictions": ['1'], "gt_2": ['0']}, index=[44]) + } + ), +]) +def test_basic_env_compare_to_ground_truth(skillset, predictions, ground_truth, ground_truth_columns, expected_match, expected_errors): + + basic_env = BasicEnvironment( + ground_truth_dataset=ground_truth, + ground_truth_columns=ground_truth_columns + ) + + ground_truth_signal = basic_env.compare_to_ground_truth(skillset, predictions) + + # TODO: we should check the index type and dtype, but it's not working for empty and NaN dataframes + pd.testing.assert_frame_equal(expected_match, ground_truth_signal.match, check_index_type=False, check_dtype=False), \ + f'Expected: {expected_match}\nGot: {ground_truth_signal.match}' + + if expected_errors is not None: + for skill_name in skillset.skills: + skill_errors = ground_truth_signal.errors[skill_name] + expected_skill_errors = expected_errors[skill_name] + pd.testing.assert_frame_equal(expected_skill_errors, skill_errors, check_index_type=False, check_dtype=False), \ + f'Skill {skill_name}\n\nExpected: {expected_skill_errors}\nGot: {skill_errors}' -class TestSkill(BaseSkill): - def analyze(self, *args, **kwargs): - pass - - def apply(self, *args, **kwargs): - pass - - def improve(self, *args, **kwargs): - pass - - -@pytest.fixture -def basic_env(): - ground_truth_data = InternalDataFrame({"ground_truth": [1, 0, 1, 1]}) - return BasicEnvironment(ground_truth_dataset=ground_truth_data, ground_truth_column='ground_truth') - - -@pytest.fixture -def short_term_memory(): - return ShortTermMemory(predictions=InternalDataFrame({"some_skill": [1, 0, 1, 0]})) - - -@pytest.fixture -def some_skill(): - return TestSkill(name='some_skill', input_data_field="text") - - -def test_compare_to_ground_truth(basic_env, short_term_memory, some_skill): - experience = basic_env.compare_to_ground_truth(some_skill, short_term_memory) - - assert experience is not None - assert "evaluations" in experience.model_dump() - assert experience.ground_truth_column_name == 'ground_truth' - assert experience.match_column_name == 'ground_truth__x__some_skill' - - expected_evaluations = InternalDataFrame({ - "some_skill": [1, 0, 1, 0], - "ground_truth__x__some_skill": [True, True, True, False] - }) - - assert experience.evaluations.equals(expected_evaluations) \ No newline at end of file diff --git a/tests/test_llm_skillset.py b/tests/test_llm_skillset.py index 86cb519..da03719 100644 --- a/tests/test_llm_skillset.py +++ b/tests/test_llm_skillset.py @@ -50,12 +50,12 @@ def test_llm_linear_skillset(): "Apple's latest product, the iPhone 15, was released in September 2023.", # "The Louvre Museum in Paris houses the Mona Lisa." ], columns=["text"])) - result = skillset.apply( + predictions = skillset.apply( dataset=dataset, runtime=OpenAIRuntime(verbose=True), ) - assert result.predictions.equals(pd.DataFrame.from_records([ + pd.testing.assert_frame_equal(InternalDataFrame.from_records([ # FIRST ROW {'text': 'Barack Obama was the 44th president of the United States.', 'skill_0': '\n- Barack Obama (person)\n- 44th (ordinal number)\n- president (title)\n- United States (location)', @@ -71,4 +71,4 @@ def test_llm_linear_skillset(): # 'skill_0': '\n- The Louvre Museum (Organization)\n- Paris (Location)\n- Mona Lisa (Artwork)', # 'skill_1': "\n- Le Musée du Louvre (Organisation)\n- Paris (Lieu)\n- La Joconde (Œuvre d'art)", # 'skill_2': '\n{\n "Organisation": "Le Musée du Louvre",\n "Lieu": "Paris",\n "Œuvre d\'art": "La Joconde"\n}'} - ])) + ]), predictions) diff --git a/tests/utils.py b/tests/utils.py index 401dbc7..df2e55d 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -28,7 +28,7 @@ def wrapper(*args, **kwargs): def side_effect(*args, **kwargs): if call_index[0] >= len(data): - raise AssertionError(f"Unexpected call number {call_index[0] + 1} to {target_function}") + raise AssertionError(f"Unexpected call number {call_index[0]} to {target_function}") expected_input = data[call_index[0]]['input'] expected_output = data[call_index[0]]['output'] @@ -42,15 +42,25 @@ def side_effect(*args, **kwargs): if strict: if actual_input != expected_input: raise AssertionError( - f"Expected input {expected_input}\n\nbut got {actual_input}\non call number {call_index[0] + 1} to {target_function}") + f"Expected input {expected_input}\n\n" + f"but got {actual_input}\non call number {call_index[0]}" + f" to {target_function}") else: for key, value in expected_input.items(): if key not in actual_input: raise AssertionError( - f"Expected input {expected_input}\n\nbut key '{key}' was missing on actual call number {call_index[0] + 1} to {target_function}.\n\nActual input: {actual_input}") + f"Expected input {expected_input}\n\n" + f"but key '{key}' was missing " + f"on actual call number {call_index[0]} " + f"to {target_function}.\n\n" + f"Actual input: {actual_input}") if actual_input[key] != value: raise AssertionError( - f"Expected input {expected_input}\n\nbut actual_input['{key}'] != expected_input['{key}']\non call number {call_index[0] + 1} to {target_function}.\n\nActual input: {actual_input}") + f"Expected input {expected_input}\n\n" + f"but actual_input['{key}'] != expected_input['{key}']\n" + f"on call number {call_index[0]} " + f"to {target_function}.\n\n" + f"Actual input: {actual_input}") call_index[0] += 1 return expected_output From b6e0076e8f024de1adab0c5ac88a86fd069af92c Mon Sep 17 00:00:00 2001 From: nik Date: Sun, 29 Oct 2023 21:56:27 +0000 Subject: [PATCH 3/5] Update quickstart example --- README.md | 14 +++++++------- adala/agents/base.py | 5 +++-- adala/runtimes/base.py | 12 ++++++------ adala/skills/base.py | 34 +++++++++++++++++++++++----------- adala/skills/skillset.py | 1 + docs/src/index.md | 16 ++++++++-------- 6 files changed, 48 insertions(+), 34 deletions(-) diff --git a/README.md b/README.md index 9017402..b453a17 100644 --- a/README.md +++ b/README.md @@ -128,7 +128,7 @@ agent = Agent( # connect to a dataset environment=BasicEnvironment( ground_truth_dataset=ground_truth_dataset, - ground_truth_column="ground_truth" + ground_truth_columns={"sentiment_classification": "ground_truth"} ), # define a skill @@ -148,10 +148,10 @@ agent = Agent( default_runtime='openai', # NOTE! If you have access to GPT-4, you can uncomment the lines bellow for better results - # default_teacher_runtime='openai-gpt4', - # teacher_runtimes = { - # 'openai-gpt4': OpenAIRuntime(model='gpt-4') - # } +# default_teacher_runtime='openai-gpt4', +# teacher_runtimes = { +# 'openai-gpt4': OpenAIRuntime(model='gpt-4') +# } ) print(agent) @@ -160,9 +160,9 @@ print(agent.skills) agent.learn(learning_iterations=3, accuracy_threshold=0.95) print('\n=> Run tests ...') -run = agent.apply_skills(predict_dataset) +predictions = agent.run(predict_dataset) print('\n => Test results:') -print(run) +print(predictions) ``` ### 👉 Available skills diff --git a/adala/agents/base.py b/adala/agents/base.py index 7c4dcfa..d94c5db 100644 --- a/adala/agents/base.py +++ b/adala/agents/base.py @@ -229,7 +229,7 @@ def learn( # Compare predictions to ground truth -> get ground truth signal ground_truth_signal = self.environment.compare_to_ground_truth(self.skills, predictions) print_text(f'Comparing predictions to ground truth data ...') - print(ground_truth_signal) + print_dataframe(InternalDataFrameConcat([predictions, ground_truth_signal.match], axis=1)) # Use ground truth signal to find the skill to improve accuracy = ground_truth_signal.get_accuracy() @@ -251,7 +251,8 @@ def learn( teacher_runtime=teacher_runtime, memory=self.memory ) - print_text(error_analysis) + print_text(f'Error analysis for skill "{train_skill.name}":\n') + print_text(error_analysis, style='green') if self.memory and update_memory: self.memory.remember(error_analysis, self.skills) diff --git a/adala/runtimes/base.py b/adala/runtimes/base.py index 11869fc..be9c3bd 100644 --- a/adala/runtimes/base.py +++ b/adala/runtimes/base.py @@ -141,7 +141,6 @@ def _process_record( if 'text' in verified_input: verified_input['text_'] = verified_input['text'] del verified_input['text'] - verified_input.update(extra_fields) if self.verbose: print_text(str(verified_input)) @@ -183,7 +182,8 @@ def get_output_program(self, output_template): callable: The generated output program. """ - return guidance(output_template, llm=self._llm) + output_program = guidance(output_template, llm=self._llm) + return output_program def get_instructions_program(self, instructions): """Generates an instructions program from the provided template. @@ -195,7 +195,8 @@ def get_instructions_program(self, instructions): callable: The generated instructions program. """ - return guidance(instructions, llm=self._llm) + instructions_program = guidance(instructions, llm=self._llm) + return instructions_program def _prepare_program_and_params(self, input_template, output_template, instructions, extra_fields): extra_fields = extra_fields or {} @@ -234,7 +235,6 @@ def process_record( """ outputs = self.get_outputs(output_template) program, extra_fields = self._prepare_program_and_params(input_template, output_template, instructions, extra_fields) - output = self._process_record( record=record, program=program, @@ -247,8 +247,8 @@ def process_batch( self, batch: InternalDataFrame, input_template: str, - output_template: str, - instructions: str, + output_template: Optional[str] = None, + instructions: Optional[str] = None, extra_fields: Optional[Dict[str, Any]] = None, ) -> InternalDataFrame: """Processes a batch of records using the provided templates and instructions. diff --git a/adala/skills/base.py b/adala/skills/base.py index 5abd6e7..8438602 100644 --- a/adala/skills/base.py +++ b/adala/skills/base.py @@ -254,13 +254,25 @@ def analyze( errors = errors.sample(n=min(MAX_ERRORS, errors.shape[0])) # TODO: ground truth column name can be the input parameter that comes from GT signal ground_truth_column_name = errors.columns[-1] + extra_fields = self._get_extra_fields() + + # get error prepared inputs + inputs = student_runtime.process_batch( + batch=predictions.loc[errors.index], + input_template=self.input_template, + extra_fields=extra_fields + ) if not teacher_runtime: teacher_runtime = student_runtime - predictions_and_errors = pd.concat([predictions.loc[errors.index], errors[ground_truth_column_name]], axis=1) + predictions_and_errors = pd.concat([ + inputs, + predictions[self.name].loc[errors.index], + errors[ground_truth_column_name] + ], axis=1) + predictions_and_errors.columns = ['input', 'prediction', 'ground_truth'] predictions_and_errors.columns = predictions_and_errors.columns[:-1].tolist() + [ground_truth_column_name] - error_reasons = teacher_runtime.process_batch( batch=predictions_and_errors, instructions="{{#system~}}\n" @@ -272,16 +284,15 @@ def analyze( f"Instructions: {self.instructions}\n" "{{~/system}}", input_template="{{#user~}}\n" - f"{{{{>{self.input_template}}}}}\n" - f"Prediction: {{{{{self.name}}}}}\n" - f"Ground truth: {{{{{ground_truth_column_name}}}}}\n" - "Reason:\n" + "{{input}}\n" + "Prediction: {{prediction}}\n" + "Ground truth: {{ground_truth}}\n" + "Error reason:\n" "{{~/user}}", output_template="{{#assistant~}}{{gen 'reason'}}{{~/assistant}}", - extra_fields=self._get_extra_fields() + extra_fields=extra_fields ) predictions_and_errors['reason'] = error_reasons['reason'] - # build error report result = teacher_runtime.process_record( record={ @@ -291,7 +302,7 @@ def analyze( "\n{{this.input}}\n" "Prediction: {{this.prediction}}\n" "Ground truth: {{this.ground_truth}}\n" - 'Reason: {{this.reason}}\n' + 'Error reason: {{this.reason}}\n' "{{/each}}" ) # no specific output specified, all output is in the error report @@ -319,7 +330,8 @@ def improve( "LLM prompt was created by concatenating instructions with text input:\n\n" "Prediction = LLM(Input, Instructions)\n\n" "We expect the prediction to be equal to the ground truth.\n" - "Your task is to craft a revised concise instruction for the LLM. " + "Your task is to analyze errors made by old instructions " + "and craft new instructions for the LLM.\n" "Follow best practices for LLM prompt engineering.\n" "Include 2-3 examples at the end of your response to demonstrate how the new instruction would be applied.\n" "Use the following format for your examples:\n" @@ -327,7 +339,7 @@ def improve( "Output: ...\n\n" "{{~/system}}\n", input_template="{{#user~}}\n" - f"Old instruction: {self.instructions}\n\n" + f"Old instructions: {self.instructions}\n\n" "Errors:\n{{error_analysis}}\n" "New instruction:\n" "{{~/user}}", diff --git a/adala/skills/skillset.py b/adala/skills/skillset.py index b5fff57..d88c5e0 100644 --- a/adala/skills/skillset.py +++ b/adala/skills/skillset.py @@ -132,6 +132,7 @@ def skills_validator(cls, v: Union[List[str], List[BaseSkill], Dict[str, BaseSki skills = {} if isinstance(v, list) and isinstance(v[0], str): # if list of strings presented, they are interpreted as skill instructions + # TODO: specify input_data_field as parameter input_data_field = 'text' for i, instructions in enumerate(v): skill_name = f"skill_{i}" diff --git a/docs/src/index.md b/docs/src/index.md index af173f7..92a6c9a 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -64,7 +64,7 @@ agent = Agent( # connect to a dataset environment=BasicEnvironment( ground_truth_dataset=ground_truth_dataset, - ground_truth_column="ground_truth" + ground_truth_columns={"sentiment_classification": "ground_truth"} ), # define a skill @@ -79,15 +79,15 @@ agent = Agent( runtimes = { # You can specify your OPENAI API KEY here via `OpenAIRuntime(..., api_key='your-api-key')` 'openai': OpenAIRuntime(model='gpt-3.5-turbo-instruct'), - 'openai-gpt3': OpenAIRuntime(model='gpt-3.5-turbo'), + 'openai-gpt3': OpenAIRuntime(model='gpt-3.5-turbo') }, default_runtime='openai', # NOTE! If you have access to GPT-4, you can uncomment the lines bellow for better results - # default_teacher_runtime='openai-gpt4', - # teacher_runtimes = { - # 'openai-gpt4': OpenAIRuntime(model='gpt-4') - # } +# default_teacher_runtime='openai-gpt4', +# teacher_runtimes = { +# 'openai-gpt4': OpenAIRuntime(model='gpt-4') +# } ) print(agent) @@ -96,9 +96,9 @@ print(agent.skills) agent.learn(learning_iterations=3, accuracy_threshold=0.95) print('\n=> Run tests ...') -run = agent.apply_skills(predict_dataset) +predictions = agent.run(predict_dataset) print('\n => Test results:') -print(run) +print(predictions) ``` ## Reference From 383061fa36ea779a80d43f6299bba0105c32e2fc Mon Sep 17 00:00:00 2001 From: nik Date: Mon, 30 Oct 2023 00:47:36 +0000 Subject: [PATCH 4/5] Fix tests, add example with linear skill program --- adala/environments/base.py | 15 +- adala/skills/base.py | 1 - adala/skills/skillset.py | 21 +- adala/utils/matching.py | 14 + examples/linear_skill_program.ipynb | 2753 +++++++++++++++++++++++++++ tests/test_agent_basics.py | 172 +- tests/test_classification.py | 2 + tests/test_llm_skillset.py | 8 +- 8 files changed, 2880 insertions(+), 106 deletions(-) create mode 100644 adala/utils/matching.py create mode 100644 examples/linear_skill_program.ipynb diff --git a/adala/environments/base.py b/adala/environments/base.py index def9ea1..fe3e399 100644 --- a/adala/environments/base.py +++ b/adala/environments/base.py @@ -3,6 +3,7 @@ from typing import Any, Optional, Dict, Union, Callable, Dict from adala.utils.internal_data import InternalDataFrame, InternalSeries, InternalDataFrameConcat +from adala.utils.matching import fuzzy_match from adala.skills.base import BaseSkill from adala.skills.skillset import SkillSet from adala.datasets import Dataset, DataFrameDataset @@ -82,6 +83,8 @@ class BasicEnvironment(Environment): ground_truth_dataset: Union[InternalDataFrame, DataFrameDataset] = Field(default_factory=DataFrameDataset) ground_truth_columns: Dict[str, str] + matching_function: str = 'exact' + matching_threshold: float = 0.8 @field_validator('ground_truth_dataset') def _validate_ground_truth_dataset(cls, v): @@ -110,9 +113,17 @@ def compare_to_ground_truth(self, skill_set: SkillSet, predictions: InternalData pred = predictions[skill.name] # from ground truth dataset, select only the rows that are in the predictions gt, pred = gt.align(pred) + nonnull_index = gt.notnull() & pred.notnull() + gt = gt[nonnull_index] + pred = pred[nonnull_index] # compare ground truth with predictions - # TODO: we can customize the matching function here beyond exact matching - gt_pred_match = (gt == pred)[gt.notnull() & pred.notnull()] + if self.matching_function == 'exact': + gt_pred_match = gt == pred + elif self.matching_function == 'fuzzy': + gt_pred_match = fuzzy_match(gt, pred, threshold=self.matching_threshold) + else: + raise NotImplementedError(f'Unknown matching function {self.matching_function}') + error_index = gt_pred_match[~gt_pred_match].index # concatenate errors - dataframe with two columns: predictions and ground truth errors[skill.name] = InternalDataFrameConcat([pred[error_index], gt[error_index]], axis=1) diff --git a/adala/skills/base.py b/adala/skills/base.py index 8438602..eca0e9f 100644 --- a/adala/skills/base.py +++ b/adala/skills/base.py @@ -272,7 +272,6 @@ def analyze( errors[ground_truth_column_name] ], axis=1) predictions_and_errors.columns = ['input', 'prediction', 'ground_truth'] - predictions_and_errors.columns = predictions_and_errors.columns[:-1].tolist() + [ground_truth_column_name] error_reasons = teacher_runtime.process_batch( batch=predictions_and_errors, instructions="{{#system~}}\n" diff --git a/adala/skills/skillset.py b/adala/skills/skillset.py index d88c5e0..f71088d 100644 --- a/adala/skills/skillset.py +++ b/adala/skills/skillset.py @@ -1,6 +1,7 @@ from pydantic import BaseModel, model_validator, field_validator from abc import ABC, abstractmethod from typing import List, Union, Dict, Any, Optional, Mapping +from collections import OrderedDict from adala.datasets.base import Dataset from adala.runtimes.base import Runtime from adala.utils.logs import print_text @@ -115,6 +116,7 @@ class LinearSkillSet(SkillSet): """ skill_sequence: List[str] = None + input_data_field: Optional[str] = None @field_validator('skills', mode='before') def skills_validator(cls, v: Union[List[str], List[BaseSkill], Dict[str, BaseSkill]]) -> Dict[str, BaseSkill]: @@ -127,13 +129,13 @@ def skills_validator(cls, v: Union[List[str], List[BaseSkill], Dict[str, BaseSki Returns: Dict[str, BaseSkill]: Dictionary mapping skill names to their corresponding BaseSkill instances. """ + skills = OrderedDict() if not v: - return {} - skills = {} + return skills + + input_data_field = None if isinstance(v, list) and isinstance(v[0], str): # if list of strings presented, they are interpreted as skill instructions - # TODO: specify input_data_field as parameter - input_data_field = 'text' for i, instructions in enumerate(v): skill_name = f"skill_{i}" skills[skill_name] = LLMSkill( @@ -145,7 +147,6 @@ def skills_validator(cls, v: Union[List[str], List[BaseSkill], Dict[str, BaseSki input_data_field = skill_name elif isinstance(v, dict) and isinstance(v[list(v.keys())[0]], str): # if dictionary of strings presented, they are interpreted as skill instructions - input_data_field = 'text' for skill_name, instructions in v.items(): skills[skill_name] = LLMSkill( name=skill_name, @@ -156,7 +157,8 @@ def skills_validator(cls, v: Union[List[str], List[BaseSkill], Dict[str, BaseSki input_data_field = skill_name elif isinstance(v, list) and isinstance(v[0], BaseSkill): # convert list of skill names to dictionary - skills = {skill.name: skill for skill in v} + for skill in v: + skills[skill.name] = skill elif isinstance(v, dict): skills = v else: @@ -171,10 +173,13 @@ def skill_sequence_validator(self): Returns: LinearSkillSet: The current instance with updated skill_sequence attribute. """ - if self.skill_sequence is None: # use default skill sequence defined by lexicographical order - self.skill_sequence = sorted(self.skills.keys()) + self.skill_sequence = list(self.skills.keys()) + if len(self.skill_sequence) != len(self.skills): + raise ValueError(f"skill_sequence must contain all skill names - " + f"length of skill_sequence is {len(self.skill_sequence)} " + f"while length of skills is {len(self.skills)}") return self def apply( diff --git a/adala/utils/matching.py b/adala/utils/matching.py new file mode 100644 index 0000000..d41fac2 --- /dev/null +++ b/adala/utils/matching.py @@ -0,0 +1,14 @@ +import pandas as pd +import difflib +from .internal_data import InternalSeries + + +# Function to apply fuzzy matching +def _fuzzy_match(str1, str2, match_threshold=0.95): + ratio = difflib.SequenceMatcher(None, str1.strip(), str2.strip()).ratio() + return ratio >= match_threshold + + +def fuzzy_match(x: InternalSeries, y: InternalSeries, threshold=0.8): + result = x.combine(y, lambda x, y: _fuzzy_match(x, y, threshold)) + return result diff --git a/examples/linear_skill_program.ipynb b/examples/linear_skill_program.ipynb new file mode 100644 index 0000000..ee8397c --- /dev/null +++ b/examples/linear_skill_program.ipynb @@ -0,0 +1,2753 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Learning sequence of skills" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Adala agent is able to create the sequence of skills based on the provided input/output samples.\n", + "In the example below, we ask agent to build two skills from scratch with the following requirements:\n", + "\n", + "1. First skill get's nutrients `\"category\"` name as input and should produce the output similar to what we specify in `\"entities\"` (for example, list of common nutrients based on provided category)\n", + "\n", + "2. Second skill gets the output of the first skill (`\"entities\"`) and generate the text, using the examples provided in the ground truth.\n", + "\n", + "In other words, agent learns how to perform the data generation pipeline like `\"category\"` --> `\"entities\"` --> `\"description\"`. \n", + "You can adjust to your specific use case" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
Applying skill: skill_0\n",
+       "
\n" + ], + "text/plain": [ + "Applying skill: skill_0\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|█████████| 3/3 [00:00<00:00, 58.05it/s]\n" + ] + }, + { + "data": { + "text/html": [ + "
Applying skill: skill_1\n",
+       "
\n" + ], + "text/plain": [ + "Applying skill: skill_1\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|█████████| 3/3 [00:00<00:00, 70.03it/s]\n" + ] + }, + { + "data": { + "text/html": [ + "
\n",
+       "\n",
+       "=> Iteration #0: Comparing to ground truth, analyzing and improving ...\n",
+       "
\n" + ], + "text/plain": [ + "\n", + "\n", + "=> Iteration #\u001b[1;36m0\u001b[0m: Comparing to ground truth, analyzing and improving \u001b[33m...\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
Comparing predictions to ground truth data ...\n",
+       "
\n" + ], + "text/plain": [ + "Comparing predictions to ground truth data \u001b[33m...\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
                                                                                                                   \n",
+       "  category         entities           text               skill_0             skill_1            skill_0   skill_1  \n",
+       " ───────────────────────────────────────────────────────────────────────────────────────────────────────────────── \n",
+       "  Macronutrients   Carbohydrates,     Carbohydrates                                             False     False    \n",
+       "                   Proteins, Fats     provide quick      Macronutrients      Macronutrients                        \n",
+       "                                      energy, proteins   are essential       are crucial                           \n",
+       "                                      are essential      nutrients that      nutrients that                        \n",
+       "                                      for muscle         provide the body    supply the body                       \n",
+       "                                      repair and         with energy and     with energy and                       \n",
+       "                                      growth, and fats   support various     facilitate                            \n",
+       "                                      are vital for      bodily functions.   various bodily                        \n",
+       "                                      long-term energy   They are divided    functions. They                       \n",
+       "                                      storage and cell   into three          are categorized                       \n",
+       "                                      function.          categories:         into three                            \n",
+       "                                                         carbohydrates,      groups:                               \n",
+       "                                                         proteins, and       carbohydrates,                        \n",
+       "                                                         fats.               proteins, and                         \n",
+       "                                                                             fats.                                 \n",
+       "                                                         Carbohydrates are                                         \n",
+       "                                                         the main source     Carbohydrates                         \n",
+       "                                                         of energy for the   are the primary                       \n",
+       "                                                         body. They are      source of energy                      \n",
+       "                                                         found in foods      for the body.                         \n",
+       "                                                         such as grains,     They can be                           \n",
+       "                                                         fruits, and         found in foods                        \n",
+       "                                                         vegetables. They    like grains,                          \n",
+       "                                                         are broken down     fruits, and                           \n",
+       "                                                         into glucose,       vegetables. They                      \n",
+       "                                                         which is used by    are broken down                       \n",
+       "                                                         the body for        into glucose,                         \n",
+       "                                                         energy.             which is                              \n",
+       "                                                                             utilized by the                       \n",
+       "                                                         Proteins are        body for energy.                      \n",
+       "                                                         important for                                             \n",
+       "                                                         building and        Proteins are                          \n",
+       "                                                         repairing tissues   essential for                         \n",
+       "                                                         in the body. They   building and                          \n",
+       "                                                         are found in        repairing                             \n",
+       "                                                         foods such as       tissues in the                        \n",
+       "                                                         meat, fish, eggs,   body. They can                        \n",
+       "                                                         and beans.          be found in                           \n",
+       "                                                         Proteins are made   foods like meat,                      \n",
+       "                                                         up of amino         fish, eggs, and                       \n",
+       "                                                         acids, which are    beans. Proteins                       \n",
+       "                                                         essential for the   are composed of                       \n",
+       "                                                         body to function    amino acids,                          \n",
+       "                                                         properly.           which are                             \n",
+       "                                                                             necessary for                         \n",
+       "                                                         Fats are a          proper bodily                         \n",
+       "                                                         concentrated        function.                             \n",
+       "                                                         source of energy                                          \n",
+       "                                                         and are important   Fats are a                            \n",
+       "                                                         for insulation      concentrated                          \n",
+       "                                                         and protection of   source of energy                      \n",
+       "                                                         organs. They are    and are vital                         \n",
+       "                                                         found in foods      for insulation                        \n",
+       "                                                         such as oils,       and protection                        \n",
+       "                                                         nuts, and           of organs. They                       \n",
+       "                                                         avocados. Fats      can be found in                       \n",
+       "                                                         are also            foods like oils,                      \n",
+       "                                                         necessary for the   nuts, and                             \n",
+       "                                                         absorption of       avocados. Fats                        \n",
+       "                                                         certain vitamins    are also crucial                      \n",
+       "                                                         and minerals.       for the                               \n",
+       "                                                                             absorption of                         \n",
+       "                                                         In addition to      certain vitamins                      \n",
+       "                                                         providing energy,   and minerals.                         \n",
+       "                                                         macronutrients                                            \n",
+       "                                                         also play a role    Aside from                            \n",
+       "                                                         in maintaining a    providing                             \n",
+       "                                                         healthy immune      energy,                               \n",
+       "                                                         system,             macronutrients                        \n",
+       "                                                         regulating          also play a                           \n",
+       "                                                         hormones, and       significant role                      \n",
+       "                                                         supporting brain    in maintaining a                      \n",
+       "                                                         function. It is     strong immune                         \n",
+       "                                                         important to have   system,                               \n",
+       "                                                         a balanced intake   regulating                            \n",
+       "                                                         of all three        hormones, and                         \n",
+       "                                                         macronutrients in   supporting brain                      \n",
+       "                                                         order to maintain   function. It is                       \n",
+       "                                                         overall health      essential to                          \n",
+       "                                                         and well-being.     have a                                \n",
+       "                                                                             well-balanced                         \n",
+       "                                                                             intake of all                         \n",
+       "                                                                             three                                 \n",
+       "                                                                             macronutrients                        \n",
+       "                                                                             in order to                           \n",
+       "                                                                             promote overall                       \n",
+       "                                                                             health and                            \n",
+       "                                                                             well-being.                           \n",
+       "  Vitamins         Vitamin A,         Vitamin A is                                              False     False    \n",
+       "                   Vitamin C,         crucial for good   1. Vitamin A        1. Vitamin A                          \n",
+       "                   Vitamin D          vision and a       2. Vitamin B        2. Vitamin B                          \n",
+       "                                      healthy immune     3. Vitamin C        3. Vitamin C                          \n",
+       "                                      system, Vitamin    4. Vitamin D        4. Vitamin D                          \n",
+       "                                      C helps in the     5. Vitamin E        5. Vitamin E                          \n",
+       "                                      repair of          6. Vitamin K        6. Vitamin K                          \n",
+       "                                      tissues and the    7. Thiamine         7. Thiamine                           \n",
+       "                                      enzymatic          (Vitamin B1)        (Vitamin B1)                          \n",
+       "                                      production of      8. Riboflavin       8. Riboflavin                         \n",
+       "                                      certain            (Vitamin B2)        (Vitamin B2)                          \n",
+       "                                      neurotransmitte…   9. Niacin           9. Niacin                             \n",
+       "                                      and Vitamin D is   (Vitamin B3)        (Vitamin B3)                          \n",
+       "                                      essential for      10. Pantothenic     10. Pantothenic                       \n",
+       "                                      strong bones and   acid (Vitamin B5)   acid (Vitamin                         \n",
+       "                                      teeth as it        11. Pyridoxine      B5)                                   \n",
+       "                                      helps the body     (Vitamin B6)        11. Pyridoxine                        \n",
+       "                                      absorb calcium.    12. Biotin          (Vitamin B6)                          \n",
+       "                                                         (Vitamin B7)        12. Biotin                            \n",
+       "                                                         13. Folate          (Vitamin B7)                          \n",
+       "                                                         (Vitamin B9)        13. Folate                            \n",
+       "                                                         14. Cobalamin       (Vitamin B9)                          \n",
+       "                                                         (Vitamin B12)       14. Cobalamin                         \n",
+       "                                                         15. Choline         (Vitamin B12)                         \n",
+       "                                                         16. Inositol        15. Choline                           \n",
+       "                                                         17. Vitamin B15     16. Inositol                          \n",
+       "                                                         18. Vitamin B17     17. Vitamin B15                       \n",
+       "                                                         19. Vitamin F       18. Vitamin B17                       \n",
+       "                                                         20. Vitamin G       19. Vitamin F                         \n",
+       "                                                         21. Vitamin H       20. Vitamin G                         \n",
+       "                                                         22. Vitamin J       21. Vitamin H                         \n",
+       "                                                         23. Vitamin L       22. Vitamin J                         \n",
+       "                                                         24. Vitamin M       23. Vitamin L                         \n",
+       "                                                         25. Vitamin P       24. Vitamin M                         \n",
+       "                                                         26. Vitamin Q       25. Vitamin P                         \n",
+       "                                                         27. Vitamin R       26. Vitamin Q                         \n",
+       "                                                         28. Vitamin S       27. Vitamin R                         \n",
+       "                                                         29. Vitamin T       28. Vitamin S                         \n",
+       "                                                         30. Vitamin U       29. Vitamin T                         \n",
+       "                                                         31. Vitamin V       30. Vitamin U                         \n",
+       "                                                         32. Vitamin W       31. Vitamin V                         \n",
+       "                                                         33. Vitamin X       32. Vitamin W                         \n",
+       "                                                         34. Vitamin Y       33. Vitamin X                         \n",
+       "                                                         35. Vitamin Z       34. Vitamin Y                         \n",
+       "                                                                             35. Vitamin Z                         \n",
+       "  Minerals         Calcium, Iron,     Calcium is                                                False     False    \n",
+       "                   Magnesium          necessary for      1. Iron             1. Iron                               \n",
+       "                                      maintaining        2. Calcium          2. Calcium                            \n",
+       "                                      healthy bones      3. Magnesium        3. Magnesium                          \n",
+       "                                      and teeth, Iron    4. Potassium        4. Potassium                          \n",
+       "                                      is crucial for     5. Sodium           5. Sodium                             \n",
+       "                                      making red blood   6. Zinc             6. Zinc                               \n",
+       "                                      cells and          7. Copper           7. Copper                             \n",
+       "                                      transporting       8. Manganese        8. Manganese                          \n",
+       "                                      oxygen             9. Phosphorus       9. Phosphorus                         \n",
+       "                                      throughout the     10. Selenium        10. Selenium                          \n",
+       "                                      body, and          11. Chromium        11. Chromium                          \n",
+       "                                      Magnesium plays    12. Iodine          12. Iodine                            \n",
+       "                                      a role in over     13. Fluoride        13. Fluoride                          \n",
+       "                                      300 enzyme         14. Molybdenum      14. Molybdenum                        \n",
+       "                                      reactions in the   15. Cobalt          15. Cobalt                            \n",
+       "                                      human body,        16. Nickel          16. Nickel                            \n",
+       "                                      including the      17. Vanadium        17. Vanadium                          \n",
+       "                                      metabolism of      18. Silicon         18. Silicon                           \n",
+       "                                      food, synthesis    19. Boron           19. Boron                             \n",
+       "                                      of fatty acids     20. Chloride        20. Chloride                          \n",
+       "                                      and proteins,                                                                \n",
+       "                                      and the                                                                      \n",
+       "                                      transmission of                                                              \n",
+       "                                      nerve impulses.                                                              \n",
+       "                                                                                                                   \n",
+       "
\n" + ], + "text/plain": [ + " \n", + " \u001b[1;35m \u001b[0m\u001b[1;35mcategory \u001b[0m\u001b[1;35m \u001b[0m \u001b[1;35m \u001b[0m\u001b[1;35mentities \u001b[0m\u001b[1;35m \u001b[0m \u001b[1;35m \u001b[0m\u001b[1;35mtext \u001b[0m\u001b[1;35m \u001b[0m \u001b[1;35m \u001b[0m\u001b[1;35mskill_0 \u001b[0m\u001b[1;35m \u001b[0m \u001b[1;35m \u001b[0m\u001b[1;35mskill_1 \u001b[0m\u001b[1;35m \u001b[0m \u001b[1;35m \u001b[0m\u001b[1;35mskill_0\u001b[0m\u001b[1;35m \u001b[0m \u001b[1;35m \u001b[0m\u001b[1;35mskill_1\u001b[0m\u001b[1;35m \u001b[0m \n", + " ───────────────────────────────────────────────────────────────────────────────────────────────────────────────── \n", + " Macronutrients Carbohydrates, Carbohydrates False False \n", + " Proteins, Fats provide quick Macronutrients Macronutrients \n", + " energy, proteins are essential are crucial \n", + " are essential nutrients that nutrients that \n", + " for muscle provide the body supply the body \n", + " repair and with energy and with energy and \n", + " growth, and fats support various facilitate \n", + " are vital for bodily functions. various bodily \n", + " long-term energy They are divided functions. They \n", + " storage and cell into three are categorized \n", + " function. categories: into three \n", + " carbohydrates, groups: \n", + " proteins, and carbohydrates, \n", + " fats. proteins, and \n", + " fats. \n", + " Carbohydrates are \n", + " the main source Carbohydrates \n", + " of energy for the are the primary \n", + " body. They are source of energy \n", + " found in foods for the body. \n", + " such as grains, They can be \n", + " fruits, and found in foods \n", + " vegetables. They like grains, \n", + " are broken down fruits, and \n", + " into glucose, vegetables. They \n", + " which is used by are broken down \n", + " the body for into glucose, \n", + " energy. which is \n", + " utilized by the \n", + " Proteins are body for energy. \n", + " important for \n", + " building and Proteins are \n", + " repairing tissues essential for \n", + " in the body. They building and \n", + " are found in repairing \n", + " foods such as tissues in the \n", + " meat, fish, eggs, body. They can \n", + " and beans. be found in \n", + " Proteins are made foods like meat, \n", + " up of amino fish, eggs, and \n", + " acids, which are beans. Proteins \n", + " essential for the are composed of \n", + " body to function amino acids, \n", + " properly. which are \n", + " necessary for \n", + " Fats are a proper bodily \n", + " concentrated function. \n", + " source of energy \n", + " and are important Fats are a \n", + " for insulation concentrated \n", + " and protection of source of energy \n", + " organs. They are and are vital \n", + " found in foods for insulation \n", + " such as oils, and protection \n", + " nuts, and of organs. They \n", + " avocados. Fats can be found in \n", + " are also foods like oils, \n", + " necessary for the nuts, and \n", + " absorption of avocados. Fats \n", + " certain vitamins are also crucial \n", + " and minerals. for the \n", + " absorption of \n", + " In addition to certain vitamins \n", + " providing energy, and minerals. \n", + " macronutrients \n", + " also play a role Aside from \n", + " in maintaining a providing \n", + " healthy immune energy, \n", + " system, macronutrients \n", + " regulating also play a \n", + " hormones, and significant role \n", + " supporting brain in maintaining a \n", + " function. It is strong immune \n", + " important to have system, \n", + " a balanced intake regulating \n", + " of all three hormones, and \n", + " macronutrients in supporting brain \n", + " order to maintain function. It is \n", + " overall health essential to \n", + " and well-being. have a \n", + " well-balanced \n", + " intake of all \n", + " three \n", + " macronutrients \n", + " in order to \n", + " promote overall \n", + " health and \n", + " well-being. \n", + " \u001b[2m \u001b[0m\u001b[2mVitamins \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mVitamin A, \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mVitamin A is \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mFalse \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mFalse \u001b[0m\u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mVitamin C, \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mcrucial for good\u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m1. Vitamin A \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m1. Vitamin A \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mVitamin D \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mvision and a \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m2. Vitamin B \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m2. Vitamin B \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mhealthy immune \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m3. Vitamin C \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m3. Vitamin C \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2msystem, Vitamin \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m4. Vitamin D \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m4. Vitamin D \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mC helps in the \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m5. Vitamin E \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m5. Vitamin E \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mrepair of \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m6. Vitamin K \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m6. Vitamin K \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mtissues and the \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m7. Thiamine \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m7. Thiamine \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2menzymatic \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m(Vitamin B1) \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m(Vitamin B1) \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mproduction of \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m8. Riboflavin \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m8. Riboflavin \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mcertain \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m(Vitamin B2) \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m(Vitamin B2) \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mneurotransmitte…\u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m9. Niacin \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m9. Niacin \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mand Vitamin D is\u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m(Vitamin B3) \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m(Vitamin B3) \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2messential for \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m10. Pantothenic \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m10. Pantothenic \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mstrong bones and\u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2macid (Vitamin B5)\u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2macid (Vitamin \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mteeth as it \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m11. Pyridoxine \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mB5) \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mhelps the body \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m(Vitamin B6) \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m11. Pyridoxine \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mabsorb calcium. \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m12. Biotin \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m(Vitamin B6) \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m(Vitamin B7) \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m12. Biotin \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m13. Folate \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m(Vitamin B7) \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m(Vitamin B9) \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m13. Folate \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m14. Cobalamin \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m(Vitamin B9) \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m(Vitamin B12) \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m14. Cobalamin \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m15. Choline \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m(Vitamin B12) \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m16. Inositol \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m15. Choline \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m17. Vitamin B15 \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m16. Inositol \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m18. Vitamin B17 \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m17. Vitamin B15 \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m19. Vitamin F \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m18. Vitamin B17 \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m20. Vitamin G \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m19. Vitamin F \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m21. Vitamin H \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m20. Vitamin G \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m22. Vitamin J \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m21. Vitamin H \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m23. Vitamin L \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m22. Vitamin J \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m24. Vitamin M \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m23. Vitamin L \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m25. Vitamin P \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m24. Vitamin M \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m26. Vitamin Q \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m25. Vitamin P \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m27. Vitamin R \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m26. Vitamin Q \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m28. Vitamin S \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m27. Vitamin R \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m29. Vitamin T \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m28. Vitamin S \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m30. Vitamin U \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m29. Vitamin T \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m31. Vitamin V \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m30. Vitamin U \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m32. Vitamin W \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m31. Vitamin V \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m33. Vitamin X \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m32. Vitamin W \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m34. Vitamin Y \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m33. Vitamin X \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m35. Vitamin Z \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m34. Vitamin Y \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m35. Vitamin Z \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " Minerals Calcium, Iron, Calcium is False False \n", + " Magnesium necessary for 1. Iron 1. Iron \n", + " maintaining 2. Calcium 2. Calcium \n", + " healthy bones 3. Magnesium 3. Magnesium \n", + " and teeth, Iron 4. Potassium 4. Potassium \n", + " is crucial for 5. Sodium 5. Sodium \n", + " making red blood 6. Zinc 6. Zinc \n", + " cells and 7. Copper 7. Copper \n", + " transporting 8. Manganese 8. Manganese \n", + " oxygen 9. Phosphorus 9. Phosphorus \n", + " throughout the 10. Selenium 10. Selenium \n", + " body, and 11. Chromium 11. Chromium \n", + " Magnesium plays 12. Iodine 12. Iodine \n", + " a role in over 13. Fluoride 13. Fluoride \n", + " 300 enzyme 14. Molybdenum 14. Molybdenum \n", + " reactions in the 15. Cobalt 15. Cobalt \n", + " human body, 16. Nickel 16. Nickel \n", + " including the 17. Vanadium 17. Vanadium \n", + " metabolism of 18. Silicon 18. Silicon \n", + " food, synthesis 19. Boron 19. Boron \n", + " of fatty acids 20. Chloride 20. Chloride \n", + " and proteins, \n", + " and the \n", + " transmission of \n", + " nerve impulses. \n", + " \n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
Accuracy = 0.00%\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[1;31mAccuracy = \u001b[0m\u001b[1;36m0.00\u001b[0m\u001b[1;31m%\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
Analyze evaluation experience ...\n",
+       "
\n" + ], + "text/plain": [ + "Analyze evaluation experience \u001b[33m...\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|████████| 3/3 [00:00<00:00, 185.36it/s]\n", + "100%|█████████| 3/3 [00:00<00:00, 20.22it/s]\n" + ] + }, + { + "data": { + "text/html": [ + "
Error analysis for skill \"skill_0\":\n",
+       "\n",
+       "
\n" + ], + "text/plain": [ + "Error analysis for skill \u001b[32m\"skill_0\"\u001b[0m:\n", + "\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n",
+       "Input: Vitamins\n",
+       "Prediction: \n",
+       "1. Vitamin A\n",
+       "2. Vitamin B\n",
+       "3. Vitamin C\n",
+       "4. Vitamin D\n",
+       "5. Vitamin E\n",
+       "6. Vitamin K\n",
+       "7. Thiamine (Vitamin B1)\n",
+       "8. Riboflavin (Vitamin B2)\n",
+       "9. Niacin (Vitamin B3)\n",
+       "10. Pantothenic acid (Vitamin B5)\n",
+       "11. Pyridoxine (Vitamin B6)\n",
+       "12. Biotin (Vitamin B7)\n",
+       "13. Folate (Vitamin B9)\n",
+       "14. Cobalamin (Vitamin B12)\n",
+       "15. Choline\n",
+       "16. Inositol\n",
+       "17. Vitamin B15\n",
+       "18. Vitamin B17\n",
+       "19. Vitamin F\n",
+       "20. Vitamin G\n",
+       "21. Vitamin H\n",
+       "22. Vitamin J\n",
+       "23. Vitamin L\n",
+       "24. Vitamin M\n",
+       "25. Vitamin P\n",
+       "26. Vitamin Q\n",
+       "27. Vitamin R\n",
+       "28. Vitamin S\n",
+       "29. Vitamin T\n",
+       "30. Vitamin U\n",
+       "31. Vitamin V\n",
+       "32. Vitamin W\n",
+       "33. Vitamin X\n",
+       "34. Vitamin Y\n",
+       "35. Vitamin Z\n",
+       "Ground truth: Vitamin A, Vitamin C, Vitamin D\n",
+       "Error reason: The instructions are missing in the provided data, making it impossible to determine the specific \n",
+       "error in the prediction.\n",
+       "\n",
+       "Input: Macronutrients\n",
+       "Prediction: \n",
+       "Macronutrients are essential nutrients that provide the body with energy and support various bodily functions. They\n",
+       "are divided into three categories: carbohydrates, proteins, and fats.\n",
+       "\n",
+       "Carbohydrates are the main source of energy for the body. They are found in foods such as grains, fruits, and \n",
+       "vegetables. They are broken down into glucose, which is used by the body for energy.\n",
+       "\n",
+       "Proteins are important for building and repairing tissues in the body. They are found in foods such as meat, fish, \n",
+       "eggs, and beans. Proteins are made up of amino acids, which are essential for the body to function properly.\n",
+       "\n",
+       "Fats are a concentrated source of energy and are important for insulation and protection of organs. They are found \n",
+       "in foods such as oils, nuts, and avocados. Fats are also necessary for the absorption of certain vitamins and \n",
+       "minerals.\n",
+       "\n",
+       "In addition to providing energy, macronutrients also play a role in maintaining a healthy immune system, regulating\n",
+       "hormones, and supporting brain function. It is important to have a balanced intake of all three macronutrients in \n",
+       "order to maintain overall health and well-being.\n",
+       "Ground truth: Carbohydrates, Proteins, Fats\n",
+       "Error reason: The instructions were not clear or specific about what the model should predict about macronutrients.\n",
+       "As a result, the model provided a detailed explanation about macronutrients instead of just listing them as in the \n",
+       "ground truth.\n",
+       "\n",
+       "Input: Minerals\n",
+       "Prediction: \n",
+       "1. Iron\n",
+       "2. Calcium\n",
+       "3. Magnesium\n",
+       "4. Potassium\n",
+       "5. Sodium\n",
+       "6. Zinc\n",
+       "7. Copper\n",
+       "8. Manganese\n",
+       "9. Phosphorus\n",
+       "10. Selenium\n",
+       "11. Chromium\n",
+       "12. Iodine\n",
+       "13. Fluoride\n",
+       "14. Molybdenum\n",
+       "15. Cobalt\n",
+       "16. Nickel\n",
+       "17. Vanadium\n",
+       "18. Silicon\n",
+       "19. Boron\n",
+       "20. Chloride\n",
+       "Ground truth: Calcium, Iron, Magnesium\n",
+       "Error reason: The instructions are missing in the provided context, making it impossible to determine the specific \n",
+       "error in the prediction.\n",
+       "\n",
+       "
\n" + ], + "text/plain": [ + "\n", + "\u001b[32mInput: Vitamins\u001b[0m\n", + "\u001b[32mPrediction: \u001b[0m\n", + "\u001b[1;36m1\u001b[0m\u001b[32m. Vitamin A\u001b[0m\n", + "\u001b[1;36m2\u001b[0m\u001b[32m. Vitamin B\u001b[0m\n", + "\u001b[1;36m3\u001b[0m\u001b[32m. Vitamin C\u001b[0m\n", + "\u001b[1;36m4\u001b[0m\u001b[32m. Vitamin D\u001b[0m\n", + "\u001b[1;36m5\u001b[0m\u001b[32m. Vitamin E\u001b[0m\n", + "\u001b[1;36m6\u001b[0m\u001b[32m. Vitamin K\u001b[0m\n", + "\u001b[1;36m7\u001b[0m\u001b[32m. Thiamine \u001b[0m\u001b[1;32m(\u001b[0m\u001b[32mVitamin B1\u001b[0m\u001b[1;32m)\u001b[0m\n", + "\u001b[1;36m8\u001b[0m\u001b[32m. Riboflavin \u001b[0m\u001b[1;32m(\u001b[0m\u001b[32mVitamin B2\u001b[0m\u001b[1;32m)\u001b[0m\n", + "\u001b[1;36m9\u001b[0m\u001b[32m. Niacin \u001b[0m\u001b[1;32m(\u001b[0m\u001b[32mVitamin B3\u001b[0m\u001b[1;32m)\u001b[0m\n", + "\u001b[1;36m10\u001b[0m\u001b[32m. Pantothenic acid \u001b[0m\u001b[1;32m(\u001b[0m\u001b[32mVitamin B5\u001b[0m\u001b[1;32m)\u001b[0m\n", + "\u001b[1;36m11\u001b[0m\u001b[32m. Pyridoxine \u001b[0m\u001b[1;32m(\u001b[0m\u001b[32mVitamin B6\u001b[0m\u001b[1;32m)\u001b[0m\n", + "\u001b[1;36m12\u001b[0m\u001b[32m. Biotin \u001b[0m\u001b[1;32m(\u001b[0m\u001b[32mVitamin B7\u001b[0m\u001b[1;32m)\u001b[0m\n", + "\u001b[1;36m13\u001b[0m\u001b[32m. Folate \u001b[0m\u001b[1;32m(\u001b[0m\u001b[32mVitamin B9\u001b[0m\u001b[1;32m)\u001b[0m\n", + "\u001b[1;36m14\u001b[0m\u001b[32m. Cobalamin \u001b[0m\u001b[1;32m(\u001b[0m\u001b[32mVitamin B12\u001b[0m\u001b[1;32m)\u001b[0m\n", + "\u001b[1;36m15\u001b[0m\u001b[32m. Choline\u001b[0m\n", + "\u001b[1;36m16\u001b[0m\u001b[32m. Inositol\u001b[0m\n", + "\u001b[1;36m17\u001b[0m\u001b[32m. Vitamin B15\u001b[0m\n", + "\u001b[1;36m18\u001b[0m\u001b[32m. Vitamin B17\u001b[0m\n", + "\u001b[1;36m19\u001b[0m\u001b[32m. Vitamin F\u001b[0m\n", + "\u001b[1;36m20\u001b[0m\u001b[32m. Vitamin G\u001b[0m\n", + "\u001b[1;36m21\u001b[0m\u001b[32m. Vitamin H\u001b[0m\n", + "\u001b[1;36m22\u001b[0m\u001b[32m. Vitamin J\u001b[0m\n", + "\u001b[1;36m23\u001b[0m\u001b[32m. Vitamin L\u001b[0m\n", + "\u001b[1;36m24\u001b[0m\u001b[32m. Vitamin M\u001b[0m\n", + "\u001b[1;36m25\u001b[0m\u001b[32m. Vitamin P\u001b[0m\n", + "\u001b[1;36m26\u001b[0m\u001b[32m. Vitamin Q\u001b[0m\n", + "\u001b[1;36m27\u001b[0m\u001b[32m. Vitamin R\u001b[0m\n", + "\u001b[1;36m28\u001b[0m\u001b[32m. Vitamin S\u001b[0m\n", + "\u001b[1;36m29\u001b[0m\u001b[32m. Vitamin T\u001b[0m\n", + "\u001b[1;36m30\u001b[0m\u001b[32m. Vitamin U\u001b[0m\n", + "\u001b[1;36m31\u001b[0m\u001b[32m. Vitamin V\u001b[0m\n", + "\u001b[1;36m32\u001b[0m\u001b[32m. Vitamin W\u001b[0m\n", + "\u001b[1;36m33\u001b[0m\u001b[32m. Vitamin X\u001b[0m\n", + "\u001b[1;36m34\u001b[0m\u001b[32m. Vitamin Y\u001b[0m\n", + "\u001b[1;36m35\u001b[0m\u001b[32m. Vitamin Z\u001b[0m\n", + "\u001b[32mGround truth: Vitamin A, Vitamin C, Vitamin D\u001b[0m\n", + "\u001b[32mError reason: The instructions are missing in the provided data, making it impossible to determine the specific \u001b[0m\n", + "\u001b[32merror in the prediction.\u001b[0m\n", + "\n", + "\u001b[32mInput: Macronutrients\u001b[0m\n", + "\u001b[32mPrediction: \u001b[0m\n", + "\u001b[32mMacronutrients are essential nutrients that provide the body with energy and support various bodily functions. They\u001b[0m\n", + "\u001b[32mare divided into three categories: carbohydrates, proteins, and fats.\u001b[0m\n", + "\n", + "\u001b[32mCarbohydrates are the main source of energy for the body. They are found in foods such as grains, fruits, and \u001b[0m\n", + "\u001b[32mvegetables. They are broken down into glucose, which is used by the body for energy.\u001b[0m\n", + "\n", + "\u001b[32mProteins are important for building and repairing tissues in the body. They are found in foods such as meat, fish, \u001b[0m\n", + "\u001b[32meggs, and beans. Proteins are made up of amino acids, which are essential for the body to function properly.\u001b[0m\n", + "\n", + "\u001b[32mFats are a concentrated source of energy and are important for insulation and protection of organs. They are found \u001b[0m\n", + "\u001b[32min foods such as oils, nuts, and avocados. Fats are also necessary for the absorption of certain vitamins and \u001b[0m\n", + "\u001b[32mminerals.\u001b[0m\n", + "\n", + "\u001b[32mIn addition to providing energy, macronutrients also play a role in maintaining a healthy immune system, regulating\u001b[0m\n", + "\u001b[32mhormones, and supporting brain function. It is important to have a balanced intake of all three macronutrients in \u001b[0m\n", + "\u001b[32morder to maintain overall health and well-being.\u001b[0m\n", + "\u001b[32mGround truth: Carbohydrates, Proteins, Fats\u001b[0m\n", + "\u001b[32mError reason: The instructions were not clear or specific about what the model should predict about macronutrients.\u001b[0m\n", + "\u001b[32mAs a result, the model provided a detailed explanation about macronutrients instead of just listing them as in the \u001b[0m\n", + "\u001b[32mground truth.\u001b[0m\n", + "\n", + "\u001b[32mInput: Minerals\u001b[0m\n", + "\u001b[32mPrediction: \u001b[0m\n", + "\u001b[1;36m1\u001b[0m\u001b[32m. Iron\u001b[0m\n", + "\u001b[1;36m2\u001b[0m\u001b[32m. Calcium\u001b[0m\n", + "\u001b[1;36m3\u001b[0m\u001b[32m. Magnesium\u001b[0m\n", + "\u001b[1;36m4\u001b[0m\u001b[32m. Potassium\u001b[0m\n", + "\u001b[1;36m5\u001b[0m\u001b[32m. Sodium\u001b[0m\n", + "\u001b[1;36m6\u001b[0m\u001b[32m. Zinc\u001b[0m\n", + "\u001b[1;36m7\u001b[0m\u001b[32m. Copper\u001b[0m\n", + "\u001b[1;36m8\u001b[0m\u001b[32m. Manganese\u001b[0m\n", + "\u001b[1;36m9\u001b[0m\u001b[32m. Phosphorus\u001b[0m\n", + "\u001b[1;36m10\u001b[0m\u001b[32m. Selenium\u001b[0m\n", + "\u001b[1;36m11\u001b[0m\u001b[32m. Chromium\u001b[0m\n", + "\u001b[1;36m12\u001b[0m\u001b[32m. Iodine\u001b[0m\n", + "\u001b[1;36m13\u001b[0m\u001b[32m. Fluoride\u001b[0m\n", + "\u001b[1;36m14\u001b[0m\u001b[32m. Molybdenum\u001b[0m\n", + "\u001b[1;36m15\u001b[0m\u001b[32m. Cobalt\u001b[0m\n", + "\u001b[1;36m16\u001b[0m\u001b[32m. Nickel\u001b[0m\n", + "\u001b[1;36m17\u001b[0m\u001b[32m. Vanadium\u001b[0m\n", + "\u001b[1;36m18\u001b[0m\u001b[32m. Silicon\u001b[0m\n", + "\u001b[1;36m19\u001b[0m\u001b[32m. Boron\u001b[0m\n", + "\u001b[1;36m20\u001b[0m\u001b[32m. Chloride\u001b[0m\n", + "\u001b[32mGround truth: Calcium, Iron, Magnesium\u001b[0m\n", + "\u001b[32mError reason: The instructions are missing in the provided context, making it impossible to determine the specific \u001b[0m\n", + "\u001b[32merror in the prediction.\u001b[0m\n", + "\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
Improve \"skill_0\" skill based on analysis ...\n",
+       "
\n" + ], + "text/plain": [ + "Improve \u001b[32m\"skill_0\"\u001b[0m skill based on analysis \u001b[33m...\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
Updated instructions for skill \"skill_0\":\n",
+       "\n",
+       "
\n" + ], + "text/plain": [ + "Updated instructions for skill \u001b[32m\"skill_0\"\u001b[0m:\n", + "\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
Given a category of nutrients, list the most common types of nutrients in that category. Do not provide detailed \n",
+       "explanations or list all possible nutrients in the category, just list the most common ones. \n",
+       "\n",
+       "Examples:\n",
+       "\n",
+       "Input: Vitamins\n",
+       "Instructions: Given a category of nutrients, list the most common types of nutrients in that category. Do not \n",
+       "provide detailed explanations or list all possible nutrients in the category, just list the most common ones.\n",
+       "Output: Vitamin A, Vitamin B, Vitamin C, Vitamin D, Vitamin E, Vitamin K\n",
+       "\n",
+       "Input: Macronutrients\n",
+       "Instructions: Given a category of nutrients, list the most common types of nutrients in that category. Do not \n",
+       "provide detailed explanations or list all possible nutrients in the category, just list the most common ones.\n",
+       "Output: Carbohydrates, Proteins, Fats\n",
+       "\n",
+       "Input: Minerals\n",
+       "Instructions: Given a category of nutrients, list the most common types of nutrients in that category. Do not \n",
+       "provide detailed explanations or list all possible nutrients in the category, just list the most common ones.\n",
+       "Output: Calcium, Iron, Magnesium\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[1;32mGiven a category of nutrients, list the most common types of nutrients in that category. Do not provide detailed \u001b[0m\n", + "\u001b[1;32mexplanations or list all possible nutrients in the category, just list the most common ones. \u001b[0m\n", + "\n", + "\u001b[1;32mExamples:\u001b[0m\n", + "\n", + "\u001b[1;32mInput: Vitamins\u001b[0m\n", + "\u001b[1;32mInstructions: Given a category of nutrients, list the most common types of nutrients in that category. Do not \u001b[0m\n", + "\u001b[1;32mprovide detailed explanations or list all possible nutrients in the category, just list the most common ones.\u001b[0m\n", + "\u001b[1;32mOutput: Vitamin A, Vitamin B, Vitamin C, Vitamin D, Vitamin E, Vitamin K\u001b[0m\n", + "\n", + "\u001b[1;32mInput: Macronutrients\u001b[0m\n", + "\u001b[1;32mInstructions: Given a category of nutrients, list the most common types of nutrients in that category. Do not \u001b[0m\n", + "\u001b[1;32mprovide detailed explanations or list all possible nutrients in the category, just list the most common ones.\u001b[0m\n", + "\u001b[1;32mOutput: Carbohydrates, Proteins, Fats\u001b[0m\n", + "\n", + "\u001b[1;32mInput: Minerals\u001b[0m\n", + "\u001b[1;32mInstructions: Given a category of nutrients, list the most common types of nutrients in that category. Do not \u001b[0m\n", + "\u001b[1;32mprovide detailed explanations or list all possible nutrients in the category, just list the most common ones.\u001b[0m\n", + "\u001b[1;32mOutput: Calcium, Iron, Magnesium\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
Re-apply skill_0 skill to dataset ...\n",
+       "
\n" + ], + "text/plain": [ + "Re-apply skill_0 skill to dataset \u001b[33m...\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
Applying skill: skill_0\n",
+       "
\n" + ], + "text/plain": [ + "Applying skill: skill_0\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|█████████| 3/3 [00:01<00:00, 2.16it/s]\n" + ] + }, + { + "data": { + "text/html": [ + "
Applying skill: skill_1\n",
+       "
\n" + ], + "text/plain": [ + "Applying skill: skill_1\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|█████████| 3/3 [00:00<00:00, 68.86it/s]\n" + ] + }, + { + "data": { + "text/html": [ + "
\n",
+       "\n",
+       "=> Iteration #1: Comparing to ground truth, analyzing and improving ...\n",
+       "
\n" + ], + "text/plain": [ + "\n", + "\n", + "=> Iteration #\u001b[1;36m1\u001b[0m: Comparing to ground truth, analyzing and improving \u001b[33m...\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
Comparing predictions to ground truth data ...\n",
+       "
\n" + ], + "text/plain": [ + "Comparing predictions to ground truth data \u001b[33m...\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
                                                                                                                   \n",
+       "  category         entities           text               skill_0             skill_1            skill_0   skill_1  \n",
+       " ───────────────────────────────────────────────────────────────────────────────────────────────────────────────── \n",
+       "  Macronutrients   Carbohydrates,     Carbohydrates       Carbohydrates,                        True      False    \n",
+       "                   Proteins, Fats     provide quick      Proteins, Fats      The recommended                       \n",
+       "                                      energy, proteins                       daily intake of                       \n",
+       "                                      are essential                          carbohydrates is                      \n",
+       "                                      for muscle                             45-65% of your                        \n",
+       "                                      repair and                             total calorie                         \n",
+       "                                      growth, and fats                       intake. This                          \n",
+       "                                      are vital for                          means that for a                      \n",
+       "                                      long-term energy                       2000 calorie                          \n",
+       "                                      storage and cell                       diet, you should                      \n",
+       "                                      function.                              aim for 225-325                       \n",
+       "                                                                             grams of                              \n",
+       "                                                                             carbohydrates                         \n",
+       "                                                                             per day.                              \n",
+       "                                                                                                                   \n",
+       "                                                                             The recommended                       \n",
+       "                                                                             daily intake of                       \n",
+       "                                                                             proteins is                           \n",
+       "                                                                             10-35% of your                        \n",
+       "                                                                             total calorie                         \n",
+       "                                                                             intake. This                          \n",
+       "                                                                             means that for a                      \n",
+       "                                                                             2000 calorie                          \n",
+       "                                                                             diet, you should                      \n",
+       "                                                                             aim for 50-175                        \n",
+       "                                                                             grams of protein                      \n",
+       "                                                                             per day.                              \n",
+       "                                                                                                                   \n",
+       "                                                                             The recommended                       \n",
+       "                                                                             daily intake of                       \n",
+       "                                                                             fats is 20-35%                        \n",
+       "                                                                             of your total                         \n",
+       "                                                                             calorie intake.                       \n",
+       "                                                                             This means that                       \n",
+       "                                                                             for a 2000                            \n",
+       "                                                                             calorie diet,                         \n",
+       "                                                                             you should aim                        \n",
+       "                                                                             for 44-78 grams                       \n",
+       "                                                                             of fat per day.                       \n",
+       "                                                                             It is important                       \n",
+       "                                                                             to choose                             \n",
+       "                                                                             healthy sources                       \n",
+       "                                                                             of fats, such as                      \n",
+       "                                                                             avocados, nuts,                       \n",
+       "                                                                             and olive oil,                        \n",
+       "                                                                             and limit                             \n",
+       "                                                                             saturated and                         \n",
+       "                                                                             trans fats.                           \n",
+       "  Vitamins         Vitamin A,         Vitamin A is       Vitamin A,                             False     False    \n",
+       "                   Vitamin C,         crucial for good   Vitamin B,          Vitamin A,                            \n",
+       "                   Vitamin D          vision and a       Vitamin C,          Vitamin B,                            \n",
+       "                                      healthy immune     Vitamin D,          Vitamin C,                            \n",
+       "                                      system, Vitamin    Vitamin E,          Vitamin D,                            \n",
+       "                                      C helps in the     Vitamin K           Vitamin E,                            \n",
+       "                                      repair of                              Vitamin K                             \n",
+       "                                      tissues and the                                                              \n",
+       "                                      enzymatic                                                                    \n",
+       "                                      production of                                                                \n",
+       "                                      certain                                                                      \n",
+       "                                      neurotransmitte…                                                             \n",
+       "                                      and Vitamin D is                                                             \n",
+       "                                      essential for                                                                \n",
+       "                                      strong bones and                                                             \n",
+       "                                      teeth as it                                                                  \n",
+       "                                      helps the body                                                               \n",
+       "                                      absorb calcium.                                                              \n",
+       "  Minerals         Calcium, Iron,     Calcium is         Calcium, Iron,                         True      False    \n",
+       "                   Magnesium          necessary for      Magnesium           Calcium: 20%                          \n",
+       "                                      maintaining                            Iron: 10%                             \n",
+       "                                      healthy bones                          Magnesium: 15%                        \n",
+       "                                      and teeth, Iron                                                              \n",
+       "                                      is crucial for                                                               \n",
+       "                                      making red blood                                                             \n",
+       "                                      cells and                                                                    \n",
+       "                                      transporting                                                                 \n",
+       "                                      oxygen                                                                       \n",
+       "                                      throughout the                                                               \n",
+       "                                      body, and                                                                    \n",
+       "                                      Magnesium plays                                                              \n",
+       "                                      a role in over                                                               \n",
+       "                                      300 enzyme                                                                   \n",
+       "                                      reactions in the                                                             \n",
+       "                                      human body,                                                                  \n",
+       "                                      including the                                                                \n",
+       "                                      metabolism of                                                                \n",
+       "                                      food, synthesis                                                              \n",
+       "                                      of fatty acids                                                               \n",
+       "                                      and proteins,                                                                \n",
+       "                                      and the                                                                      \n",
+       "                                      transmission of                                                              \n",
+       "                                      nerve impulses.                                                              \n",
+       "                                                                                                                   \n",
+       "
\n" + ], + "text/plain": [ + " \n", + " \u001b[1;35m \u001b[0m\u001b[1;35mcategory \u001b[0m\u001b[1;35m \u001b[0m \u001b[1;35m \u001b[0m\u001b[1;35mentities \u001b[0m\u001b[1;35m \u001b[0m \u001b[1;35m \u001b[0m\u001b[1;35mtext \u001b[0m\u001b[1;35m \u001b[0m \u001b[1;35m \u001b[0m\u001b[1;35mskill_0 \u001b[0m\u001b[1;35m \u001b[0m \u001b[1;35m \u001b[0m\u001b[1;35mskill_1 \u001b[0m\u001b[1;35m \u001b[0m \u001b[1;35m \u001b[0m\u001b[1;35mskill_0\u001b[0m\u001b[1;35m \u001b[0m \u001b[1;35m \u001b[0m\u001b[1;35mskill_1\u001b[0m\u001b[1;35m \u001b[0m \n", + " ───────────────────────────────────────────────────────────────────────────────────────────────────────────────── \n", + " Macronutrients Carbohydrates, Carbohydrates Carbohydrates, True False \n", + " Proteins, Fats provide quick Proteins, Fats The recommended \n", + " energy, proteins daily intake of \n", + " are essential carbohydrates is \n", + " for muscle 45-65% of your \n", + " repair and total calorie \n", + " growth, and fats intake. This \n", + " are vital for means that for a \n", + " long-term energy 2000 calorie \n", + " storage and cell diet, you should \n", + " function. aim for 225-325 \n", + " grams of \n", + " carbohydrates \n", + " per day. \n", + " \n", + " The recommended \n", + " daily intake of \n", + " proteins is \n", + " 10-35% of your \n", + " total calorie \n", + " intake. This \n", + " means that for a \n", + " 2000 calorie \n", + " diet, you should \n", + " aim for 50-175 \n", + " grams of protein \n", + " per day. \n", + " \n", + " The recommended \n", + " daily intake of \n", + " fats is 20-35% \n", + " of your total \n", + " calorie intake. \n", + " This means that \n", + " for a 2000 \n", + " calorie diet, \n", + " you should aim \n", + " for 44-78 grams \n", + " of fat per day. \n", + " It is important \n", + " to choose \n", + " healthy sources \n", + " of fats, such as \n", + " avocados, nuts, \n", + " and olive oil, \n", + " and limit \n", + " saturated and \n", + " trans fats. \n", + " \u001b[2m \u001b[0m\u001b[2mVitamins \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mVitamin A, \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mVitamin A is \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mVitamin A, \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mFalse \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mFalse \u001b[0m\u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mVitamin C, \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mcrucial for good\u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mVitamin B, \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mVitamin A, \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mVitamin D \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mvision and a \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mVitamin C, \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mVitamin B, \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mhealthy immune \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mVitamin D, \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mVitamin C, \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2msystem, Vitamin \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mVitamin E, \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mVitamin D, \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mC helps in the \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mVitamin K \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mVitamin E, \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mrepair of \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mVitamin K \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mtissues and the \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2menzymatic \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mproduction of \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mcertain \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mneurotransmitte…\u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mand Vitamin D is\u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2messential for \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mstrong bones and\u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mteeth as it \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mhelps the body \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mabsorb calcium. \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " Minerals Calcium, Iron, Calcium is Calcium, Iron, True False \n", + " Magnesium necessary for Magnesium Calcium: 20% \n", + " maintaining Iron: 10% \n", + " healthy bones Magnesium: 15% \n", + " and teeth, Iron \n", + " is crucial for \n", + " making red blood \n", + " cells and \n", + " transporting \n", + " oxygen \n", + " throughout the \n", + " body, and \n", + " Magnesium plays \n", + " a role in over \n", + " 300 enzyme \n", + " reactions in the \n", + " human body, \n", + " including the \n", + " metabolism of \n", + " food, synthesis \n", + " of fatty acids \n", + " and proteins, \n", + " and the \n", + " transmission of \n", + " nerve impulses. \n", + " \n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
Accuracy = 66.67%\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[1;31mAccuracy = \u001b[0m\u001b[1;36m66.67\u001b[0m\u001b[1;31m%\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
Analyze evaluation experience ...\n",
+       "
\n" + ], + "text/plain": [ + "Analyze evaluation experience \u001b[33m...\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|████████| 1/1 [00:00<00:00, 174.49it/s]\n", + "100%|█████████| 1/1 [00:04<00:00, 4.15s/it]\n" + ] + }, + { + "data": { + "text/html": [ + "
Error analysis for skill \"skill_0\":\n",
+       "\n",
+       "
\n" + ], + "text/plain": [ + "Error analysis for skill \u001b[32m\"skill_0\"\u001b[0m:\n", + "\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n",
+       "Input: Vitamins\n",
+       "Prediction: Vitamin A, Vitamin B, Vitamin C, Vitamin D, Vitamin E, Vitamin K\n",
+       "Ground truth: Vitamin A, Vitamin C, Vitamin D\n",
+       "Error reason: The LLM included more vitamins than the ground truth. The instruction does not specify a number of \n",
+       "vitamins to list, so the LLM's prediction is not necessarily incorrect. The discrepancy may be due to different \n",
+       "interpretations of \"most common\" vitamins.\n",
+       "\n",
+       "
\n" + ], + "text/plain": [ + "\n", + "\u001b[32mInput: Vitamins\u001b[0m\n", + "\u001b[32mPrediction: Vitamin A, Vitamin B, Vitamin C, Vitamin D, Vitamin E, Vitamin K\u001b[0m\n", + "\u001b[32mGround truth: Vitamin A, Vitamin C, Vitamin D\u001b[0m\n", + "\u001b[32mError reason: The LLM included more vitamins than the ground truth. The instruction does not specify a number of \u001b[0m\n", + "\u001b[32mvitamins to list, so the LLM's prediction is not necessarily incorrect. The discrepancy may be due to different \u001b[0m\n", + "\u001b[32minterpretations of \u001b[0m\u001b[32m\"most common\"\u001b[0m\u001b[32m vitamins.\u001b[0m\n", + "\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
Improve \"skill_0\" skill based on analysis ...\n",
+       "
\n" + ], + "text/plain": [ + "Improve \u001b[32m\"skill_0\"\u001b[0m skill based on analysis \u001b[33m...\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
Updated instructions for skill \"skill_0\":\n",
+       "\n",
+       "
\n" + ], + "text/plain": [ + "Updated instructions for skill \u001b[32m\"skill_0\"\u001b[0m:\n", + "\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
Given a category of nutrients, list the three most common types of nutrients in that category. Do not provide \n",
+       "detailed explanations or list all possible nutrients in the category, just list the three most common ones.\n",
+       "\n",
+       "Examples:\n",
+       "\n",
+       "Input: Vitamins\n",
+       "Instructions: Given a category of nutrients, list the three most common types of nutrients in that category. Do not\n",
+       "provide detailed explanations or list all possible nutrients in the category, just list the three most common ones.\n",
+       "Output: Vitamin A, Vitamin C, Vitamin D\n",
+       "\n",
+       "Input: Macronutrients\n",
+       "Instructions: Given a category of nutrients, list the three most common types of nutrients in that category. Do not\n",
+       "provide detailed explanations or list all possible nutrients in the category, just list the three most common ones.\n",
+       "Output: Carbohydrates, Proteins, Fats\n",
+       "\n",
+       "Input: Minerals\n",
+       "Instructions: Given a category of nutrients, list the three most common types of nutrients in that category. Do not\n",
+       "provide detailed explanations or list all possible nutrients in the category, just list the three most common ones.\n",
+       "Output: Calcium, Iron, Magnesium\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[1;32mGiven a category of nutrients, list the three most common types of nutrients in that category. Do not provide \u001b[0m\n", + "\u001b[1;32mdetailed explanations or list all possible nutrients in the category, just list the three most common ones.\u001b[0m\n", + "\n", + "\u001b[1;32mExamples:\u001b[0m\n", + "\n", + "\u001b[1;32mInput: Vitamins\u001b[0m\n", + "\u001b[1;32mInstructions: Given a category of nutrients, list the three most common types of nutrients in that category. Do not\u001b[0m\n", + "\u001b[1;32mprovide detailed explanations or list all possible nutrients in the category, just list the three most common ones.\u001b[0m\n", + "\u001b[1;32mOutput: Vitamin A, Vitamin C, Vitamin D\u001b[0m\n", + "\n", + "\u001b[1;32mInput: Macronutrients\u001b[0m\n", + "\u001b[1;32mInstructions: Given a category of nutrients, list the three most common types of nutrients in that category. Do not\u001b[0m\n", + "\u001b[1;32mprovide detailed explanations or list all possible nutrients in the category, just list the three most common ones.\u001b[0m\n", + "\u001b[1;32mOutput: Carbohydrates, Proteins, Fats\u001b[0m\n", + "\n", + "\u001b[1;32mInput: Minerals\u001b[0m\n", + "\u001b[1;32mInstructions: Given a category of nutrients, list the three most common types of nutrients in that category. Do not\u001b[0m\n", + "\u001b[1;32mprovide detailed explanations or list all possible nutrients in the category, just list the three most common ones.\u001b[0m\n", + "\u001b[1;32mOutput: Calcium, Iron, Magnesium\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
Re-apply skill_0 skill to dataset ...\n",
+       "
\n" + ], + "text/plain": [ + "Re-apply skill_0 skill to dataset \u001b[33m...\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
Applying skill: skill_0\n",
+       "
\n" + ], + "text/plain": [ + "Applying skill: skill_0\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|█████████| 3/3 [00:01<00:00, 2.11it/s]\n" + ] + }, + { + "data": { + "text/html": [ + "
Applying skill: skill_1\n",
+       "
\n" + ], + "text/plain": [ + "Applying skill: skill_1\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|█████████| 3/3 [00:00<00:00, 60.17it/s]\n" + ] + }, + { + "data": { + "text/html": [ + "
\n",
+       "\n",
+       "=> Iteration #2: Comparing to ground truth, analyzing and improving ...\n",
+       "
\n" + ], + "text/plain": [ + "\n", + "\n", + "=> Iteration #\u001b[1;36m2\u001b[0m: Comparing to ground truth, analyzing and improving \u001b[33m...\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
Comparing predictions to ground truth data ...\n",
+       "
\n" + ], + "text/plain": [ + "Comparing predictions to ground truth data \u001b[33m...\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
                                                                                                                   \n",
+       "  category         entities           text               skill_0             skill_1            skill_0   skill_1  \n",
+       " ───────────────────────────────────────────────────────────────────────────────────────────────────────────────── \n",
+       "  Macronutrients   Carbohydrates,     Carbohydrates       Carbohydrates,                        True      False    \n",
+       "                   Proteins, Fats     provide quick      Proteins, Fats      The recommended                       \n",
+       "                                      energy, proteins                       daily intake of                       \n",
+       "                                      are essential                          carbohydrates is                      \n",
+       "                                      for muscle                             45-65% of your                        \n",
+       "                                      repair and                             total calorie                         \n",
+       "                                      growth, and fats                       intake. This                          \n",
+       "                                      are vital for                          means that for a                      \n",
+       "                                      long-term energy                       2000 calorie                          \n",
+       "                                      storage and cell                       diet, you should                      \n",
+       "                                      function.                              aim for 225-325                       \n",
+       "                                                                             grams of                              \n",
+       "                                                                             carbohydrates                         \n",
+       "                                                                             per day.                              \n",
+       "                                                                                                                   \n",
+       "                                                                             The recommended                       \n",
+       "                                                                             daily intake of                       \n",
+       "                                                                             proteins is                           \n",
+       "                                                                             10-35% of your                        \n",
+       "                                                                             total calorie                         \n",
+       "                                                                             intake. This                          \n",
+       "                                                                             means that for a                      \n",
+       "                                                                             2000 calorie                          \n",
+       "                                                                             diet, you should                      \n",
+       "                                                                             aim for 50-175                        \n",
+       "                                                                             grams of protein                      \n",
+       "                                                                             per day.                              \n",
+       "                                                                                                                   \n",
+       "                                                                             The recommended                       \n",
+       "                                                                             daily intake of                       \n",
+       "                                                                             fats is 20-35%                        \n",
+       "                                                                             of your total                         \n",
+       "                                                                             calorie intake.                       \n",
+       "                                                                             This means that                       \n",
+       "                                                                             for a 2000                            \n",
+       "                                                                             calorie diet,                         \n",
+       "                                                                             you should aim                        \n",
+       "                                                                             for 44-78 grams                       \n",
+       "                                                                             of fat per day.                       \n",
+       "                                                                             It is important                       \n",
+       "                                                                             to choose                             \n",
+       "                                                                             healthy sources                       \n",
+       "                                                                             of fats, such as                      \n",
+       "                                                                             avocados, nuts,                       \n",
+       "                                                                             and olive oil,                        \n",
+       "                                                                             and limit                             \n",
+       "                                                                             saturated and                         \n",
+       "                                                                             trans fats.                           \n",
+       "  Vitamins         Vitamin A,         Vitamin A is       Vitamin A,                             True      False    \n",
+       "                   Vitamin C,         crucial for good   Vitamin C,          Vitamin A,                            \n",
+       "                   Vitamin D          vision and a       Vitamin D           Vitamin C,                            \n",
+       "                                      healthy immune                         Vitamin D                             \n",
+       "                                      system, Vitamin                                                              \n",
+       "                                      C helps in the                                                               \n",
+       "                                      repair of                                                                    \n",
+       "                                      tissues and the                                                              \n",
+       "                                      enzymatic                                                                    \n",
+       "                                      production of                                                                \n",
+       "                                      certain                                                                      \n",
+       "                                      neurotransmitte…                                                             \n",
+       "                                      and Vitamin D is                                                             \n",
+       "                                      essential for                                                                \n",
+       "                                      strong bones and                                                             \n",
+       "                                      teeth as it                                                                  \n",
+       "                                      helps the body                                                               \n",
+       "                                      absorb calcium.                                                              \n",
+       "  Minerals         Calcium, Iron,     Calcium is         Calcium, Iron,                         True      False    \n",
+       "                   Magnesium          necessary for      Magnesium           Calcium: 20%                          \n",
+       "                                      maintaining                            Iron: 10%                             \n",
+       "                                      healthy bones                          Magnesium: 15%                        \n",
+       "                                      and teeth, Iron                                                              \n",
+       "                                      is crucial for                                                               \n",
+       "                                      making red blood                                                             \n",
+       "                                      cells and                                                                    \n",
+       "                                      transporting                                                                 \n",
+       "                                      oxygen                                                                       \n",
+       "                                      throughout the                                                               \n",
+       "                                      body, and                                                                    \n",
+       "                                      Magnesium plays                                                              \n",
+       "                                      a role in over                                                               \n",
+       "                                      300 enzyme                                                                   \n",
+       "                                      reactions in the                                                             \n",
+       "                                      human body,                                                                  \n",
+       "                                      including the                                                                \n",
+       "                                      metabolism of                                                                \n",
+       "                                      food, synthesis                                                              \n",
+       "                                      of fatty acids                                                               \n",
+       "                                      and proteins,                                                                \n",
+       "                                      and the                                                                      \n",
+       "                                      transmission of                                                              \n",
+       "                                      nerve impulses.                                                              \n",
+       "                                                                                                                   \n",
+       "
\n" + ], + "text/plain": [ + " \n", + " \u001b[1;35m \u001b[0m\u001b[1;35mcategory \u001b[0m\u001b[1;35m \u001b[0m \u001b[1;35m \u001b[0m\u001b[1;35mentities \u001b[0m\u001b[1;35m \u001b[0m \u001b[1;35m \u001b[0m\u001b[1;35mtext \u001b[0m\u001b[1;35m \u001b[0m \u001b[1;35m \u001b[0m\u001b[1;35mskill_0 \u001b[0m\u001b[1;35m \u001b[0m \u001b[1;35m \u001b[0m\u001b[1;35mskill_1 \u001b[0m\u001b[1;35m \u001b[0m \u001b[1;35m \u001b[0m\u001b[1;35mskill_0\u001b[0m\u001b[1;35m \u001b[0m \u001b[1;35m \u001b[0m\u001b[1;35mskill_1\u001b[0m\u001b[1;35m \u001b[0m \n", + " ───────────────────────────────────────────────────────────────────────────────────────────────────────────────── \n", + " Macronutrients Carbohydrates, Carbohydrates Carbohydrates, True False \n", + " Proteins, Fats provide quick Proteins, Fats The recommended \n", + " energy, proteins daily intake of \n", + " are essential carbohydrates is \n", + " for muscle 45-65% of your \n", + " repair and total calorie \n", + " growth, and fats intake. This \n", + " are vital for means that for a \n", + " long-term energy 2000 calorie \n", + " storage and cell diet, you should \n", + " function. aim for 225-325 \n", + " grams of \n", + " carbohydrates \n", + " per day. \n", + " \n", + " The recommended \n", + " daily intake of \n", + " proteins is \n", + " 10-35% of your \n", + " total calorie \n", + " intake. This \n", + " means that for a \n", + " 2000 calorie \n", + " diet, you should \n", + " aim for 50-175 \n", + " grams of protein \n", + " per day. \n", + " \n", + " The recommended \n", + " daily intake of \n", + " fats is 20-35% \n", + " of your total \n", + " calorie intake. \n", + " This means that \n", + " for a 2000 \n", + " calorie diet, \n", + " you should aim \n", + " for 44-78 grams \n", + " of fat per day. \n", + " It is important \n", + " to choose \n", + " healthy sources \n", + " of fats, such as \n", + " avocados, nuts, \n", + " and olive oil, \n", + " and limit \n", + " saturated and \n", + " trans fats. \n", + " \u001b[2m \u001b[0m\u001b[2mVitamins \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mVitamin A, \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mVitamin A is \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mVitamin A, \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mTrue \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mFalse \u001b[0m\u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mVitamin C, \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mcrucial for good\u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mVitamin C, \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mVitamin A, \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mVitamin D \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mvision and a \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mVitamin D \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mVitamin C, \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mhealthy immune \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mVitamin D \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2msystem, Vitamin \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mC helps in the \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mrepair of \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mtissues and the \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2menzymatic \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mproduction of \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mcertain \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mneurotransmitte…\u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mand Vitamin D is\u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2messential for \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mstrong bones and\u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mteeth as it \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mhelps the body \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mabsorb calcium. \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " Minerals Calcium, Iron, Calcium is Calcium, Iron, True False \n", + " Magnesium necessary for Magnesium Calcium: 20% \n", + " maintaining Iron: 10% \n", + " healthy bones Magnesium: 15% \n", + " and teeth, Iron \n", + " is crucial for \n", + " making red blood \n", + " cells and \n", + " transporting \n", + " oxygen \n", + " throughout the \n", + " body, and \n", + " Magnesium plays \n", + " a role in over \n", + " 300 enzyme \n", + " reactions in the \n", + " human body, \n", + " including the \n", + " metabolism of \n", + " food, synthesis \n", + " of fatty acids \n", + " and proteins, \n", + " and the \n", + " transmission of \n", + " nerve impulses. \n", + " \n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
Accuracy = 0.00%\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[1;31mAccuracy = \u001b[0m\u001b[1;36m0.00\u001b[0m\u001b[1;31m%\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
Analyze evaluation experience ...\n",
+       "
\n" + ], + "text/plain": [ + "Analyze evaluation experience \u001b[33m...\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|████████| 3/3 [00:00<00:00, 243.33it/s]\n", + "100%|█████████| 3/3 [00:04<00:00, 1.55s/it]\n" + ] + }, + { + "data": { + "text/html": [ + "
Error analysis for skill \"skill_1\":\n",
+       "\n",
+       "
\n" + ], + "text/plain": [ + "Error analysis for skill \u001b[32m\"skill_1\"\u001b[0m:\n", + "\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n",
+       "Input: Vitamin A, Vitamin C, Vitamin D\n",
+       "Prediction: \n",
+       "Vitamin A, Vitamin C, Vitamin D\n",
+       "Ground truth: Vitamin A is crucial for good vision and a healthy immune system, Vitamin C helps in the repair of \n",
+       "tissues and the enzymatic production of certain neurotransmitters, and Vitamin D is essential for strong bones and \n",
+       "teeth as it helps the body absorb calcium.\n",
+       "Error reason: The instructions were not clear or specific, leading to the model simply repeating the input instead \n",
+       "of providing detailed information about each vitamin.\n",
+       "\n",
+       "Input: Calcium, Iron, Magnesium\n",
+       "Prediction: \n",
+       "Calcium: 20%\n",
+       "Iron: 10%\n",
+       "Magnesium: 15%\n",
+       "Ground truth: Calcium is necessary for maintaining healthy bones and teeth, Iron is crucial for making red blood \n",
+       "cells and transporting oxygen throughout the body, and Magnesium plays a role in over 300 enzyme reactions in the \n",
+       "human body, including the metabolism of food, synthesis of fatty acids and proteins, and the transmission of nerve \n",
+       "impulses.\n",
+       "Error reason: The model misunderstood the instructions, providing percentages instead of describing the roles of \n",
+       "Calcium, Iron, and Magnesium in the human body as per the ground truth.\n",
+       "\n",
+       "Input:  Carbohydrates, Proteins, Fats\n",
+       "Prediction: \n",
+       "The recommended daily intake of carbohydrates is 45-65% of your total calorie intake. This means that for a 2000 \n",
+       "calorie diet, you should aim for 225-325 grams of carbohydrates per day.\n",
+       "\n",
+       "The recommended daily intake of proteins is 10-35% of your total calorie intake. This means that for a 2000 calorie\n",
+       "diet, you should aim for 50-175 grams of protein per day.\n",
+       "\n",
+       "The recommended daily intake of fats is 20-35% of your total calorie intake. This means that for a 2000 calorie \n",
+       "diet, you should aim for 44-78 grams of fat per day. It is important to choose healthy sources of fats, such as \n",
+       "avocados, nuts, and olive oil, and limit saturated and trans fats.\n",
+       "Ground truth: Carbohydrates provide quick energy, proteins are essential for muscle repair and growth, and fats are\n",
+       "vital for long-term energy storage and cell function.\n",
+       "Error reason: The instructions were not clear or specific, leading to a mismatch between the predicted output and \n",
+       "the ground truth. The model provided nutritional guidelines for the intake of carbohydrates, proteins, and fats, \n",
+       "while the ground truth was about the functions of these nutrients in the body.\n",
+       "\n",
+       "
\n" + ], + "text/plain": [ + "\n", + "\u001b[32mInput: Vitamin A, Vitamin C, Vitamin D\u001b[0m\n", + "\u001b[32mPrediction: \u001b[0m\n", + "\u001b[32mVitamin A, Vitamin C, Vitamin D\u001b[0m\n", + "\u001b[32mGround truth: Vitamin A is crucial for good vision and a healthy immune system, Vitamin C helps in the repair of \u001b[0m\n", + "\u001b[32mtissues and the enzymatic production of certain neurotransmitters, and Vitamin D is essential for strong bones and \u001b[0m\n", + "\u001b[32mteeth as it helps the body absorb calcium.\u001b[0m\n", + "\u001b[32mError reason: The instructions were not clear or specific, leading to the model simply repeating the input instead \u001b[0m\n", + "\u001b[32mof providing detailed information about each vitamin.\u001b[0m\n", + "\n", + "\u001b[32mInput: Calcium, Iron, Magnesium\u001b[0m\n", + "\u001b[32mPrediction: \u001b[0m\n", + "\u001b[32mCalcium: \u001b[0m\u001b[1;36m20\u001b[0m\u001b[32m%\u001b[0m\n", + "\u001b[32mIron: \u001b[0m\u001b[1;36m10\u001b[0m\u001b[32m%\u001b[0m\n", + "\u001b[32mMagnesium: \u001b[0m\u001b[1;36m15\u001b[0m\u001b[32m%\u001b[0m\n", + "\u001b[32mGround truth: Calcium is necessary for maintaining healthy bones and teeth, Iron is crucial for making red blood \u001b[0m\n", + "\u001b[32mcells and transporting oxygen throughout the body, and Magnesium plays a role in over \u001b[0m\u001b[1;36m300\u001b[0m\u001b[32m enzyme reactions in the \u001b[0m\n", + "\u001b[32mhuman body, including the metabolism of food, synthesis of fatty acids and proteins, and the transmission of nerve \u001b[0m\n", + "\u001b[32mimpulses.\u001b[0m\n", + "\u001b[32mError reason: The model misunderstood the instructions, providing percentages instead of describing the roles of \u001b[0m\n", + "\u001b[32mCalcium, Iron, and Magnesium in the human body as per the ground truth.\u001b[0m\n", + "\n", + "\u001b[32mInput: Carbohydrates, Proteins, Fats\u001b[0m\n", + "\u001b[32mPrediction: \u001b[0m\n", + "\u001b[32mThe recommended daily intake of carbohydrates is \u001b[0m\u001b[1;36m45\u001b[0m\u001b[32m-\u001b[0m\u001b[1;36m65\u001b[0m\u001b[32m% of your total calorie intake. This means that for a \u001b[0m\u001b[1;36m2000\u001b[0m\u001b[32m \u001b[0m\n", + "\u001b[32mcalorie diet, you should aim for \u001b[0m\u001b[1;36m225\u001b[0m\u001b[32m-\u001b[0m\u001b[1;36m325\u001b[0m\u001b[32m grams of carbohydrates per day.\u001b[0m\n", + "\n", + "\u001b[32mThe recommended daily intake of proteins is \u001b[0m\u001b[1;36m10\u001b[0m\u001b[32m-\u001b[0m\u001b[1;36m35\u001b[0m\u001b[32m% of your total calorie intake. This means that for a \u001b[0m\u001b[1;36m2000\u001b[0m\u001b[32m calorie\u001b[0m\n", + "\u001b[32mdiet, you should aim for \u001b[0m\u001b[1;36m50\u001b[0m\u001b[32m-\u001b[0m\u001b[1;36m175\u001b[0m\u001b[32m grams of protein per day.\u001b[0m\n", + "\n", + "\u001b[32mThe recommended daily intake of fats is \u001b[0m\u001b[1;36m20\u001b[0m\u001b[32m-\u001b[0m\u001b[1;36m35\u001b[0m\u001b[32m% of your total calorie intake. This means that for a \u001b[0m\u001b[1;36m2000\u001b[0m\u001b[32m calorie \u001b[0m\n", + "\u001b[32mdiet, you should aim for \u001b[0m\u001b[1;36m44\u001b[0m\u001b[32m-\u001b[0m\u001b[1;36m78\u001b[0m\u001b[32m grams of fat per day. It is important to choose healthy sources of fats, such as \u001b[0m\n", + "\u001b[32mavocados, nuts, and olive oil, and limit saturated and trans fats.\u001b[0m\n", + "\u001b[32mGround truth: Carbohydrates provide quick energy, proteins are essential for muscle repair and growth, and fats are\u001b[0m\n", + "\u001b[32mvital for long-term energy storage and cell function.\u001b[0m\n", + "\u001b[32mError reason: The instructions were not clear or specific, leading to a mismatch between the predicted output and \u001b[0m\n", + "\u001b[32mthe ground truth. The model provided nutritional guidelines for the intake of carbohydrates, proteins, and fats, \u001b[0m\n", + "\u001b[32mwhile the ground truth was about the functions of these nutrients in the body.\u001b[0m\n", + "\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
Improve \"skill_1\" skill based on analysis ...\n",
+       "
\n" + ], + "text/plain": [ + "Improve \u001b[32m\"skill_1\"\u001b[0m skill based on analysis \u001b[33m...\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
Updated instructions for skill \"skill_1\":\n",
+       "\n",
+       "
\n" + ], + "text/plain": [ + "Updated instructions for skill \u001b[32m\"skill_1\"\u001b[0m:\n", + "\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
For each nutrient or vitamin listed in the input, provide a brief description of its role or function in the human \n",
+       "body. Do not include any percentages or recommended daily intake values, but focus on explaining what each nutrient\n",
+       "or vitamin does for the body.\n",
+       "\n",
+       "Examples:\n",
+       "\n",
+       "Input: Vitamin A, Vitamin C, Vitamin D\n",
+       "Output: Vitamin A is crucial for good vision and a healthy immune system, Vitamin C helps in the repair of tissues \n",
+       "and the enzymatic production of certain neurotransmitters, and Vitamin D is essential for strong bones and teeth as\n",
+       "it helps the body absorb calcium.\n",
+       "\n",
+       "Input: Calcium, Iron, Magnesium\n",
+       "Output: Calcium is necessary for maintaining healthy bones and teeth, Iron is crucial for making red blood cells \n",
+       "and transporting oxygen throughout the body, and Magnesium plays a role in over 300 enzyme reactions in the human \n",
+       "body, including the metabolism of food, synthesis of fatty acids and proteins, and the transmission of nerve \n",
+       "impulses.\n",
+       "\n",
+       "Input:  Carbohydrates, Proteins, Fats\n",
+       "Output: Carbohydrates provide quick energy, proteins are essential for muscle repair and growth, and fats are vital\n",
+       "for long-term energy storage and cell function.\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[1;32mFor each nutrient or vitamin listed in the input, provide a brief description of its role or function in the human \u001b[0m\n", + "\u001b[1;32mbody. Do not include any percentages or recommended daily intake values, but focus on explaining what each nutrient\u001b[0m\n", + "\u001b[1;32mor vitamin does for the body.\u001b[0m\n", + "\n", + "\u001b[1;32mExamples:\u001b[0m\n", + "\n", + "\u001b[1;32mInput: Vitamin A, Vitamin C, Vitamin D\u001b[0m\n", + "\u001b[1;32mOutput: Vitamin A is crucial for good vision and a healthy immune system, Vitamin C helps in the repair of tissues \u001b[0m\n", + "\u001b[1;32mand the enzymatic production of certain neurotransmitters, and Vitamin D is essential for strong bones and teeth as\u001b[0m\n", + "\u001b[1;32mit helps the body absorb calcium.\u001b[0m\n", + "\n", + "\u001b[1;32mInput: Calcium, Iron, Magnesium\u001b[0m\n", + "\u001b[1;32mOutput: Calcium is necessary for maintaining healthy bones and teeth, Iron is crucial for making red blood cells \u001b[0m\n", + "\u001b[1;32mand transporting oxygen throughout the body, and Magnesium plays a role in over \u001b[0m\u001b[1;36m300\u001b[0m\u001b[1;32m enzyme reactions in the human \u001b[0m\n", + "\u001b[1;32mbody, including the metabolism of food, synthesis of fatty acids and proteins, and the transmission of nerve \u001b[0m\n", + "\u001b[1;32mimpulses.\u001b[0m\n", + "\n", + "\u001b[1;32mInput: Carbohydrates, Proteins, Fats\u001b[0m\n", + "\u001b[1;32mOutput: Carbohydrates provide quick energy, proteins are essential for muscle repair and growth, and fats are vital\u001b[0m\n", + "\u001b[1;32mfor long-term energy storage and cell function.\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
Re-apply skill_1 skill to dataset ...\n",
+       "
\n" + ], + "text/plain": [ + "Re-apply skill_1 skill to dataset \u001b[33m...\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
Applying skill: skill_1\n",
+       "
\n" + ], + "text/plain": [ + "Applying skill: skill_1\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|█████████| 3/3 [00:03<00:00, 1.19s/it]\n" + ] + }, + { + "data": { + "text/html": [ + "
\n",
+       "\n",
+       "=> Iteration #3: Comparing to ground truth, analyzing and improving ...\n",
+       "
\n" + ], + "text/plain": [ + "\n", + "\n", + "=> Iteration #\u001b[1;36m3\u001b[0m: Comparing to ground truth, analyzing and improving \u001b[33m...\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
Comparing predictions to ground truth data ...\n",
+       "
\n" + ], + "text/plain": [ + "Comparing predictions to ground truth data \u001b[33m...\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
                                                                                                                   \n",
+       "  category         entities           text               skill_0             skill_1            skill_0   skill_1  \n",
+       " ───────────────────────────────────────────────────────────────────────────────────────────────────────────────── \n",
+       "  Macronutrients   Carbohydrates,     Carbohydrates       Carbohydrates,      Carbohydrates     True      True     \n",
+       "                   Proteins, Fats     provide quick      Proteins, Fats      provide quick                         \n",
+       "                                      energy, proteins                       energy, proteins                      \n",
+       "                                      are essential                          are essential                         \n",
+       "                                      for muscle                             for muscle                            \n",
+       "                                      repair and                             repair and                            \n",
+       "                                      growth, and fats                       growth, and fats                      \n",
+       "                                      are vital for                          are vital for                         \n",
+       "                                      long-term energy                       long-term energy                      \n",
+       "                                      storage and cell                       storage and cell                      \n",
+       "                                      function.                              function.                             \n",
+       "  Vitamins         Vitamin A,         Vitamin A is       Vitamin A,           Vitamin A is      True      False    \n",
+       "                   Vitamin C,         crucial for good   Vitamin C,          crucial for good                      \n",
+       "                   Vitamin D          vision and a       Vitamin D           vision and a                          \n",
+       "                                      healthy immune                         healthy immune                        \n",
+       "                                      system, Vitamin                        system. It is                         \n",
+       "                                      C helps in the                         also important                        \n",
+       "                                      repair of                              for the growth                        \n",
+       "                                      tissues and the                        and development                       \n",
+       "                                      enzymatic                              of cells,                             \n",
+       "                                      production of                          including skin                        \n",
+       "                                      certain                                cells. Vitamin C                      \n",
+       "                                      neurotransmitte…                       is an                                 \n",
+       "                                      and Vitamin D is                       antioxidant that                      \n",
+       "                                      essential for                          helps protect                         \n",
+       "                                      strong bones and                       cells from                            \n",
+       "                                      teeth as it                            damage and is                         \n",
+       "                                      helps the body                         necessary for                         \n",
+       "                                      absorb calcium.                        the production                        \n",
+       "                                                                             of collagen, a                        \n",
+       "                                                                             protein that                          \n",
+       "                                                                             helps with wound                      \n",
+       "                                                                             healing and                           \n",
+       "                                                                             maintaining                           \n",
+       "                                                                             healthy skin,                         \n",
+       "                                                                             bones, and blood                      \n",
+       "                                                                             vessels. Vitamin                      \n",
+       "                                                                             D is essential                        \n",
+       "                                                                             for strong bones                      \n",
+       "                                                                             and teeth as it                       \n",
+       "                                                                             helps the body                        \n",
+       "                                                                             absorb calcium.                       \n",
+       "                                                                             It also plays a                       \n",
+       "                                                                             role in immune                        \n",
+       "                                                                             function and may                      \n",
+       "                                                                             help reduce the                       \n",
+       "                                                                             risk of certain                       \n",
+       "                                                                             diseases such as                      \n",
+       "                                                                             cancer and heart                      \n",
+       "                                                                             disease.                              \n",
+       "  Minerals         Calcium, Iron,     Calcium is         Calcium, Iron,       Calcium is        True      True     \n",
+       "                   Magnesium          necessary for      Magnesium           necessary for                         \n",
+       "                                      maintaining                            maintaining                           \n",
+       "                                      healthy bones                          healthy bones                         \n",
+       "                                      and teeth, Iron                        and teeth, Iron                       \n",
+       "                                      is crucial for                         is crucial for                        \n",
+       "                                      making red blood                       making red blood                      \n",
+       "                                      cells and                              cells and                             \n",
+       "                                      transporting                           transporting                          \n",
+       "                                      oxygen                                 oxygen                                \n",
+       "                                      throughout the                         throughout the                        \n",
+       "                                      body, and                              body, and                             \n",
+       "                                      Magnesium plays                        Magnesium plays                       \n",
+       "                                      a role in over                         a role in over                        \n",
+       "                                      300 enzyme                             300 enzyme                            \n",
+       "                                      reactions in the                       reactions in the                      \n",
+       "                                      human body,                            human body,                           \n",
+       "                                      including the                          including the                         \n",
+       "                                      metabolism of                          metabolism of                         \n",
+       "                                      food, synthesis                        food, synthesis                       \n",
+       "                                      of fatty acids                         of fatty acids                        \n",
+       "                                      and proteins,                          and proteins,                         \n",
+       "                                      and the                                and the                               \n",
+       "                                      transmission of                        transmission of                       \n",
+       "                                      nerve impulses.                        nerve impulses.                       \n",
+       "                                                                                                                   \n",
+       "
\n" + ], + "text/plain": [ + " \n", + " \u001b[1;35m \u001b[0m\u001b[1;35mcategory \u001b[0m\u001b[1;35m \u001b[0m \u001b[1;35m \u001b[0m\u001b[1;35mentities \u001b[0m\u001b[1;35m \u001b[0m \u001b[1;35m \u001b[0m\u001b[1;35mtext \u001b[0m\u001b[1;35m \u001b[0m \u001b[1;35m \u001b[0m\u001b[1;35mskill_0 \u001b[0m\u001b[1;35m \u001b[0m \u001b[1;35m \u001b[0m\u001b[1;35mskill_1 \u001b[0m\u001b[1;35m \u001b[0m \u001b[1;35m \u001b[0m\u001b[1;35mskill_0\u001b[0m\u001b[1;35m \u001b[0m \u001b[1;35m \u001b[0m\u001b[1;35mskill_1\u001b[0m\u001b[1;35m \u001b[0m \n", + " ───────────────────────────────────────────────────────────────────────────────────────────────────────────────── \n", + " Macronutrients Carbohydrates, Carbohydrates Carbohydrates, Carbohydrates True True \n", + " Proteins, Fats provide quick Proteins, Fats provide quick \n", + " energy, proteins energy, proteins \n", + " are essential are essential \n", + " for muscle for muscle \n", + " repair and repair and \n", + " growth, and fats growth, and fats \n", + " are vital for are vital for \n", + " long-term energy long-term energy \n", + " storage and cell storage and cell \n", + " function. function. \n", + " \u001b[2m \u001b[0m\u001b[2mVitamins \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mVitamin A, \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mVitamin A is \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mVitamin A, \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m Vitamin A is \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mTrue \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mFalse \u001b[0m\u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mVitamin C, \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mcrucial for good\u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mVitamin C, \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mcrucial for good\u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mVitamin D \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mvision and a \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mVitamin D \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mvision and a \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mhealthy immune \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mhealthy immune \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2msystem, Vitamin \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2msystem. It is \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mC helps in the \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2malso important \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mrepair of \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mfor the growth \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mtissues and the \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mand development \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2menzymatic \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mof cells, \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mproduction of \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mincluding skin \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mcertain \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mcells. Vitamin C\u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mneurotransmitte…\u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mis an \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mand Vitamin D is\u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mantioxidant that\u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2messential for \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mhelps protect \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mstrong bones and\u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mcells from \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mteeth as it \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mdamage and is \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mhelps the body \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mnecessary for \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mabsorb calcium. \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mthe production \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mof collagen, a \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mprotein that \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mhelps with wound\u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mhealing and \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mmaintaining \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mhealthy skin, \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mbones, and blood\u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mvessels. Vitamin\u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mD is essential \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mfor strong bones\u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mand teeth as it \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mhelps the body \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mabsorb calcium. \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mIt also plays a \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mrole in immune \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mfunction and may\u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mhelp reduce the \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mrisk of certain \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mdiseases such as\u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mcancer and heart\u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mdisease. \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " Minerals Calcium, Iron, Calcium is Calcium, Iron, Calcium is True True \n", + " Magnesium necessary for Magnesium necessary for \n", + " maintaining maintaining \n", + " healthy bones healthy bones \n", + " and teeth, Iron and teeth, Iron \n", + " is crucial for is crucial for \n", + " making red blood making red blood \n", + " cells and cells and \n", + " transporting transporting \n", + " oxygen oxygen \n", + " throughout the throughout the \n", + " body, and body, and \n", + " Magnesium plays Magnesium plays \n", + " a role in over a role in over \n", + " 300 enzyme 300 enzyme \n", + " reactions in the reactions in the \n", + " human body, human body, \n", + " including the including the \n", + " metabolism of metabolism of \n", + " food, synthesis food, synthesis \n", + " of fatty acids of fatty acids \n", + " and proteins, and proteins, \n", + " and the and the \n", + " transmission of transmission of \n", + " nerve impulses. nerve impulses. \n", + " \n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
Accuracy = 66.67%\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[1;31mAccuracy = \u001b[0m\u001b[1;36m66.67\u001b[0m\u001b[1;31m%\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
Analyze evaluation experience ...\n",
+       "
\n" + ], + "text/plain": [ + "Analyze evaluation experience \u001b[33m...\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|████████| 1/1 [00:00<00:00, 201.00it/s]\n", + "100%|█████████| 1/1 [00:09<00:00, 9.70s/it]\n" + ] + }, + { + "data": { + "text/html": [ + "
Error analysis for skill \"skill_1\":\n",
+       "\n",
+       "
\n" + ], + "text/plain": [ + "Error analysis for skill \u001b[32m\"skill_1\"\u001b[0m:\n", + "\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n",
+       "Input: Vitamin A, Vitamin C, Vitamin D\n",
+       "Prediction:  Vitamin A is crucial for good vision and a healthy immune system. It is also important for the growth \n",
+       "and development of cells, including skin cells. Vitamin C is an antioxidant that helps protect cells from damage \n",
+       "and is necessary for the production of collagen, a protein that helps with wound healing and maintaining healthy \n",
+       "skin, bones, and blood vessels. Vitamin D is essential for strong bones and teeth as it helps the body absorb \n",
+       "calcium. It also plays a role in immune function and may help reduce the risk of certain diseases such as cancer \n",
+       "and heart disease.\n",
+       "Ground truth: Vitamin A is crucial for good vision and a healthy immune system, Vitamin C helps in the repair of \n",
+       "tissues and the enzymatic production of certain neurotransmitters, and Vitamin D is essential for strong bones and \n",
+       "teeth as it helps the body absorb calcium.\n",
+       "Error reason: The model's prediction does not match the ground truth because it provided additional information \n",
+       "about the roles of Vitamin A, C, and D in the body that were not included in the ground truth. For example, it \n",
+       "mentioned that Vitamin A is important for the growth and development of cells, including skin cells, and that \n",
+       "Vitamin C is necessary for the production of collagen. It also mentioned that Vitamin D plays a role in immune \n",
+       "function and may help reduce the risk of certain diseases such as cancer and heart disease. These additional \n",
+       "details are not wrong, but they do not align with the simpler descriptions provided in the ground truth.\n",
+       "\n",
+       "
\n" + ], + "text/plain": [ + "\n", + "\u001b[32mInput: Vitamin A, Vitamin C, Vitamin D\u001b[0m\n", + "\u001b[32mPrediction: Vitamin A is crucial for good vision and a healthy immune system. It is also important for the growth \u001b[0m\n", + "\u001b[32mand development of cells, including skin cells. Vitamin C is an antioxidant that helps protect cells from damage \u001b[0m\n", + "\u001b[32mand is necessary for the production of collagen, a protein that helps with wound healing and maintaining healthy \u001b[0m\n", + "\u001b[32mskin, bones, and blood vessels. Vitamin D is essential for strong bones and teeth as it helps the body absorb \u001b[0m\n", + "\u001b[32mcalcium. It also plays a role in immune function and may help reduce the risk of certain diseases such as cancer \u001b[0m\n", + "\u001b[32mand heart disease.\u001b[0m\n", + "\u001b[32mGround truth: Vitamin A is crucial for good vision and a healthy immune system, Vitamin C helps in the repair of \u001b[0m\n", + "\u001b[32mtissues and the enzymatic production of certain neurotransmitters, and Vitamin D is essential for strong bones and \u001b[0m\n", + "\u001b[32mteeth as it helps the body absorb calcium.\u001b[0m\n", + "\u001b[32mError reason: The model's prediction does not match the ground truth because it provided additional information \u001b[0m\n", + "\u001b[32mabout the roles of Vitamin A, C, and D in the body that were not included in the ground truth. For example, it \u001b[0m\n", + "\u001b[32mmentioned that Vitamin A is important for the growth and development of cells, including skin cells, and that \u001b[0m\n", + "\u001b[32mVitamin C is necessary for the production of collagen. It also mentioned that Vitamin D plays a role in immune \u001b[0m\n", + "\u001b[32mfunction and may help reduce the risk of certain diseases such as cancer and heart disease. These additional \u001b[0m\n", + "\u001b[32mdetails are not wrong, but they do not align with the simpler descriptions provided in the ground truth.\u001b[0m\n", + "\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
Improve \"skill_1\" skill based on analysis ...\n",
+       "
\n" + ], + "text/plain": [ + "Improve \u001b[32m\"skill_1\"\u001b[0m skill based on analysis \u001b[33m...\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
Updated instructions for skill \"skill_1\":\n",
+       "\n",
+       "
\n" + ], + "text/plain": [ + "Updated instructions for skill \u001b[32m\"skill_1\"\u001b[0m:\n", + "\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
For each nutrient or vitamin listed in the input, provide a concise description of its primary role or function in \n",
+       "the human body. Avoid including additional details or secondary functions. Do not include any percentages or \n",
+       "recommended daily intake values, but focus on explaining the main function of each nutrient or vitamin for the \n",
+       "body.\n",
+       "\n",
+       "Examples:\n",
+       "\n",
+       "Input: Vitamin A, Vitamin C, Vitamin D\n",
+       "Output: Vitamin A is crucial for good vision and a healthy immune system, Vitamin C helps in the repair of tissues,\n",
+       "and Vitamin D is essential for strong bones and teeth.\n",
+       "\n",
+       "Input: Calcium, Iron, Magnesium\n",
+       "Output: Calcium is necessary for maintaining healthy bones and teeth, Iron is crucial for making red blood cells, \n",
+       "and Magnesium plays a role in the metabolism of food.\n",
+       "\n",
+       "Input:  Carbohydrates, Proteins, Fats\n",
+       "Output: Carbohydrates provide quick energy, proteins are essential for muscle repair, and fats are vital for \n",
+       "long-term energy storage.\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[1;32mFor each nutrient or vitamin listed in the input, provide a concise description of its primary role or function in \u001b[0m\n", + "\u001b[1;32mthe human body. Avoid including additional details or secondary functions. Do not include any percentages or \u001b[0m\n", + "\u001b[1;32mrecommended daily intake values, but focus on explaining the main function of each nutrient or vitamin for the \u001b[0m\n", + "\u001b[1;32mbody.\u001b[0m\n", + "\n", + "\u001b[1;32mExamples:\u001b[0m\n", + "\n", + "\u001b[1;32mInput: Vitamin A, Vitamin C, Vitamin D\u001b[0m\n", + "\u001b[1;32mOutput: Vitamin A is crucial for good vision and a healthy immune system, Vitamin C helps in the repair of tissues,\u001b[0m\n", + "\u001b[1;32mand Vitamin D is essential for strong bones and teeth.\u001b[0m\n", + "\n", + "\u001b[1;32mInput: Calcium, Iron, Magnesium\u001b[0m\n", + "\u001b[1;32mOutput: Calcium is necessary for maintaining healthy bones and teeth, Iron is crucial for making red blood cells, \u001b[0m\n", + "\u001b[1;32mand Magnesium plays a role in the metabolism of food.\u001b[0m\n", + "\n", + "\u001b[1;32mInput: Carbohydrates, Proteins, Fats\u001b[0m\n", + "\u001b[1;32mOutput: Carbohydrates provide quick energy, proteins are essential for muscle repair, and fats are vital for \u001b[0m\n", + "\u001b[1;32mlong-term energy storage.\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
Re-apply skill_1 skill to dataset ...\n",
+       "
\n" + ], + "text/plain": [ + "Re-apply skill_1 skill to dataset \u001b[33m...\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
Applying skill: skill_1\n",
+       "
\n" + ], + "text/plain": [ + "Applying skill: skill_1\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|█████████| 3/3 [00:02<00:00, 1.28it/s]\n" + ] + }, + { + "data": { + "text/html": [ + "
\n",
+       "\n",
+       "=> Iteration #4: Comparing to ground truth, analyzing and improving ...\n",
+       "
\n" + ], + "text/plain": [ + "\n", + "\n", + "=> Iteration #\u001b[1;36m4\u001b[0m: Comparing to ground truth, analyzing and improving \u001b[33m...\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
Comparing predictions to ground truth data ...\n",
+       "
\n" + ], + "text/plain": [ + "Comparing predictions to ground truth data \u001b[33m...\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
                                                                                                                   \n",
+       "  category         entities           text               skill_0             skill_1            skill_0   skill_1  \n",
+       " ───────────────────────────────────────────────────────────────────────────────────────────────────────────────── \n",
+       "  Macronutrients   Carbohydrates,     Carbohydrates       Carbohydrates,      Carbohydrates     True      False    \n",
+       "                   Proteins, Fats     provide quick      Proteins, Fats      provide quick                         \n",
+       "                                      energy, proteins                       energy, proteins                      \n",
+       "                                      are essential                          are essential                         \n",
+       "                                      for muscle                             for muscle                            \n",
+       "                                      repair and                             repair, and fats                      \n",
+       "                                      growth, and fats                       are vital for                         \n",
+       "                                      are vital for                          long-term energy                      \n",
+       "                                      long-term energy                       storage.                              \n",
+       "                                      storage and cell                                                             \n",
+       "                                      function.                                                                    \n",
+       "  Vitamins         Vitamin A,         Vitamin A is       Vitamin A,          Vitamin A is       True      False    \n",
+       "                   Vitamin C,         crucial for good   Vitamin C,          crucial for good                      \n",
+       "                   Vitamin D          vision and a       Vitamin D           vision and a                          \n",
+       "                                      healthy immune                         healthy immune                        \n",
+       "                                      system, Vitamin                        system, Vitamin                       \n",
+       "                                      C helps in the                         C helps in the                        \n",
+       "                                      repair of                              repair of                             \n",
+       "                                      tissues and the                        tissues, and                          \n",
+       "                                      enzymatic                              Vitamin D is                          \n",
+       "                                      production of                          essential for                         \n",
+       "                                      certain                                strong bones and                      \n",
+       "                                      neurotransmitte…                       teeth.                                \n",
+       "                                      and Vitamin D is                                                             \n",
+       "                                      essential for                                                                \n",
+       "                                      strong bones and                                                             \n",
+       "                                      teeth as it                                                                  \n",
+       "                                      helps the body                                                               \n",
+       "                                      absorb calcium.                                                              \n",
+       "  Minerals         Calcium, Iron,     Calcium is         Calcium, Iron,       Calcium is        True      False    \n",
+       "                   Magnesium          necessary for      Magnesium           necessary for                         \n",
+       "                                      maintaining                            maintaining                           \n",
+       "                                      healthy bones                          healthy bones                         \n",
+       "                                      and teeth, Iron                        and teeth, Iron                       \n",
+       "                                      is crucial for                         is crucial for                        \n",
+       "                                      making red blood                       making red blood                      \n",
+       "                                      cells and                              cells, and                            \n",
+       "                                      transporting                           Magnesium plays                       \n",
+       "                                      oxygen                                 a role in the                         \n",
+       "                                      throughout the                         metabolism of                         \n",
+       "                                      body, and                              food.                                 \n",
+       "                                      Magnesium plays                                                              \n",
+       "                                      a role in over                                                               \n",
+       "                                      300 enzyme                                                                   \n",
+       "                                      reactions in the                                                             \n",
+       "                                      human body,                                                                  \n",
+       "                                      including the                                                                \n",
+       "                                      metabolism of                                                                \n",
+       "                                      food, synthesis                                                              \n",
+       "                                      of fatty acids                                                               \n",
+       "                                      and proteins,                                                                \n",
+       "                                      and the                                                                      \n",
+       "                                      transmission of                                                              \n",
+       "                                      nerve impulses.                                                              \n",
+       "                                                                                                                   \n",
+       "
\n" + ], + "text/plain": [ + " \n", + " \u001b[1;35m \u001b[0m\u001b[1;35mcategory \u001b[0m\u001b[1;35m \u001b[0m \u001b[1;35m \u001b[0m\u001b[1;35mentities \u001b[0m\u001b[1;35m \u001b[0m \u001b[1;35m \u001b[0m\u001b[1;35mtext \u001b[0m\u001b[1;35m \u001b[0m \u001b[1;35m \u001b[0m\u001b[1;35mskill_0 \u001b[0m\u001b[1;35m \u001b[0m \u001b[1;35m \u001b[0m\u001b[1;35mskill_1 \u001b[0m\u001b[1;35m \u001b[0m \u001b[1;35m \u001b[0m\u001b[1;35mskill_0\u001b[0m\u001b[1;35m \u001b[0m \u001b[1;35m \u001b[0m\u001b[1;35mskill_1\u001b[0m\u001b[1;35m \u001b[0m \n", + " ───────────────────────────────────────────────────────────────────────────────────────────────────────────────── \n", + " Macronutrients Carbohydrates, Carbohydrates Carbohydrates, Carbohydrates True False \n", + " Proteins, Fats provide quick Proteins, Fats provide quick \n", + " energy, proteins energy, proteins \n", + " are essential are essential \n", + " for muscle for muscle \n", + " repair and repair, and fats \n", + " growth, and fats are vital for \n", + " are vital for long-term energy \n", + " long-term energy storage. \n", + " storage and cell \n", + " function. \n", + " \u001b[2m \u001b[0m\u001b[2mVitamins \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mVitamin A, \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mVitamin A is \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mVitamin A, \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mVitamin A is \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mTrue \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mFalse \u001b[0m\u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mVitamin C, \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mcrucial for good\u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mVitamin C, \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mcrucial for good\u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mVitamin D \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mvision and a \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mVitamin D \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mvision and a \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mhealthy immune \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mhealthy immune \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2msystem, Vitamin \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2msystem, Vitamin \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mC helps in the \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mC helps in the \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mrepair of \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mrepair of \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mtissues and the \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mtissues, and \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2menzymatic \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mVitamin D is \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mproduction of \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2messential for \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mcertain \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mstrong bones and\u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mneurotransmitte…\u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mteeth. \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mand Vitamin D is\u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2messential for \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mstrong bones and\u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mteeth as it \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mhelps the body \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mabsorb calcium. \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " Minerals Calcium, Iron, Calcium is Calcium, Iron, Calcium is True False \n", + " Magnesium necessary for Magnesium necessary for \n", + " maintaining maintaining \n", + " healthy bones healthy bones \n", + " and teeth, Iron and teeth, Iron \n", + " is crucial for is crucial for \n", + " making red blood making red blood \n", + " cells and cells, and \n", + " transporting Magnesium plays \n", + " oxygen a role in the \n", + " throughout the metabolism of \n", + " body, and food. \n", + " Magnesium plays \n", + " a role in over \n", + " 300 enzyme \n", + " reactions in the \n", + " human body, \n", + " including the \n", + " metabolism of \n", + " food, synthesis \n", + " of fatty acids \n", + " and proteins, \n", + " and the \n", + " transmission of \n", + " nerve impulses. \n", + " \n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
Accuracy = 0.00%\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[1;31mAccuracy = \u001b[0m\u001b[1;36m0.00\u001b[0m\u001b[1;31m%\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
Analyze evaluation experience ...\n",
+       "
\n" + ], + "text/plain": [ + "Analyze evaluation experience \u001b[33m...\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|████████| 3/3 [00:00<00:00, 210.07it/s]\n", + "100%|█████████| 3/3 [00:12<00:00, 4.15s/it]\n" + ] + }, + { + "data": { + "text/html": [ + "
Error analysis for skill \"skill_1\":\n",
+       "\n",
+       "
\n" + ], + "text/plain": [ + "Error analysis for skill \u001b[32m\"skill_1\"\u001b[0m:\n", + "\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n",
+       "Input: Vitamin A, Vitamin C, Vitamin D\n",
+       "Prediction: Vitamin A is crucial for good vision and a healthy immune system, Vitamin C helps in the repair of \n",
+       "tissues, and Vitamin D is essential for strong bones and teeth.\n",
+       "Ground truth: Vitamin A is crucial for good vision and a healthy immune system, Vitamin C helps in the repair of \n",
+       "tissues and the enzymatic production of certain neurotransmitters, and Vitamin D is essential for strong bones and \n",
+       "teeth as it helps the body absorb calcium.\n",
+       "Error reason: The prediction did not follow the instruction to avoid including additional details or secondary \n",
+       "functions. The ground truth includes additional functions of Vitamin C and Vitamin D, which are not present in the \n",
+       "prediction.\n",
+       "\n",
+       "Input: Calcium, Iron, Magnesium\n",
+       "Prediction:  Calcium is necessary for maintaining healthy bones and teeth, Iron is crucial for making red blood \n",
+       "cells, and Magnesium plays a role in the metabolism of food.\n",
+       "Ground truth: Calcium is necessary for maintaining healthy bones and teeth, Iron is crucial for making red blood \n",
+       "cells and transporting oxygen throughout the body, and Magnesium plays a role in over 300 enzyme reactions in the \n",
+       "human body, including the metabolism of food, synthesis of fatty acids and proteins, and the transmission of nerve \n",
+       "impulses.\n",
+       "Error reason: The instructions asked for a concise description of the primary role or function of each nutrient in \n",
+       "the human body, without including additional details or secondary functions. The ground truth, however, provides \n",
+       "additional details about the roles of Iron and Magnesium, which goes beyond the primary function, hence not \n",
+       "following the instructions correctly.\n",
+       "\n",
+       "Input:  Carbohydrates, Proteins, Fats\n",
+       "Prediction:  Carbohydrates provide quick energy, proteins are essential for muscle repair, and fats are vital for \n",
+       "long-term energy storage.\n",
+       "Ground truth: Carbohydrates provide quick energy, proteins are essential for muscle repair and growth, and fats are\n",
+       "vital for long-term energy storage and cell function.\n",
+       "Error reason: The original instruction was not clear about including additional functions of the nutrients. The \n",
+       "prediction missed the additional function of proteins for growth and of fats for cell function.\n",
+       "\n",
+       "
\n" + ], + "text/plain": [ + "\n", + "\u001b[32mInput: Vitamin A, Vitamin C, Vitamin D\u001b[0m\n", + "\u001b[32mPrediction: Vitamin A is crucial for good vision and a healthy immune system, Vitamin C helps in the repair of \u001b[0m\n", + "\u001b[32mtissues, and Vitamin D is essential for strong bones and teeth.\u001b[0m\n", + "\u001b[32mGround truth: Vitamin A is crucial for good vision and a healthy immune system, Vitamin C helps in the repair of \u001b[0m\n", + "\u001b[32mtissues and the enzymatic production of certain neurotransmitters, and Vitamin D is essential for strong bones and \u001b[0m\n", + "\u001b[32mteeth as it helps the body absorb calcium.\u001b[0m\n", + "\u001b[32mError reason: The prediction did not follow the instruction to avoid including additional details or secondary \u001b[0m\n", + "\u001b[32mfunctions. The ground truth includes additional functions of Vitamin C and Vitamin D, which are not present in the \u001b[0m\n", + "\u001b[32mprediction.\u001b[0m\n", + "\n", + "\u001b[32mInput: Calcium, Iron, Magnesium\u001b[0m\n", + "\u001b[32mPrediction: Calcium is necessary for maintaining healthy bones and teeth, Iron is crucial for making red blood \u001b[0m\n", + "\u001b[32mcells, and Magnesium plays a role in the metabolism of food.\u001b[0m\n", + "\u001b[32mGround truth: Calcium is necessary for maintaining healthy bones and teeth, Iron is crucial for making red blood \u001b[0m\n", + "\u001b[32mcells and transporting oxygen throughout the body, and Magnesium plays a role in over \u001b[0m\u001b[1;36m300\u001b[0m\u001b[32m enzyme reactions in the \u001b[0m\n", + "\u001b[32mhuman body, including the metabolism of food, synthesis of fatty acids and proteins, and the transmission of nerve \u001b[0m\n", + "\u001b[32mimpulses.\u001b[0m\n", + "\u001b[32mError reason: The instructions asked for a concise description of the primary role or function of each nutrient in \u001b[0m\n", + "\u001b[32mthe human body, without including additional details or secondary functions. The ground truth, however, provides \u001b[0m\n", + "\u001b[32madditional details about the roles of Iron and Magnesium, which goes beyond the primary function, hence not \u001b[0m\n", + "\u001b[32mfollowing the instructions correctly.\u001b[0m\n", + "\n", + "\u001b[32mInput: Carbohydrates, Proteins, Fats\u001b[0m\n", + "\u001b[32mPrediction: Carbohydrates provide quick energy, proteins are essential for muscle repair, and fats are vital for \u001b[0m\n", + "\u001b[32mlong-term energy storage.\u001b[0m\n", + "\u001b[32mGround truth: Carbohydrates provide quick energy, proteins are essential for muscle repair and growth, and fats are\u001b[0m\n", + "\u001b[32mvital for long-term energy storage and cell function.\u001b[0m\n", + "\u001b[32mError reason: The original instruction was not clear about including additional functions of the nutrients. The \u001b[0m\n", + "\u001b[32mprediction missed the additional function of proteins for growth and of fats for cell function.\u001b[0m\n", + "\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
Improve \"skill_1\" skill based on analysis ...\n",
+       "
\n" + ], + "text/plain": [ + "Improve \u001b[32m\"skill_1\"\u001b[0m skill based on analysis \u001b[33m...\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
Updated instructions for skill \"skill_1\":\n",
+       "\n",
+       "
\n" + ], + "text/plain": [ + "Updated instructions for skill \u001b[32m\"skill_1\"\u001b[0m:\n", + "\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
For each nutrient or vitamin listed in the input, provide a concise description of its primary role or function in \n",
+       "the human body. You can include one or two additional functions if they are commonly associated with the nutrient \n",
+       "or vitamin. Do not include any percentages or recommended daily intake values. The focus should be on explaining \n",
+       "the main function of each nutrient or vitamin for the body, along with a few other significant roles they play.\n",
+       "\n",
+       "Examples:\n",
+       "\n",
+       "Input: Vitamin A, Vitamin C, Vitamin D\n",
+       "Output: Vitamin A is crucial for good vision and a healthy immune system, Vitamin C helps in the repair of tissues \n",
+       "and the enzymatic production of certain neurotransmitters, and Vitamin D is essential for strong bones and teeth as\n",
+       "it helps the body absorb calcium.\n",
+       "\n",
+       "Input: Calcium, Iron, Magnesium\n",
+       "Output: Calcium is necessary for maintaining healthy bones and teeth, Iron is crucial for making red blood cells \n",
+       "and transporting oxygen throughout the body, and Magnesium plays a role in over 300 enzyme reactions in the human \n",
+       "body, including the metabolism of food, synthesis of fatty acids and proteins, and the transmission of nerve \n",
+       "impulses.\n",
+       "\n",
+       "Input:  Carbohydrates, Proteins, Fats\n",
+       "Output: Carbohydrates provide quick energy, proteins are essential for muscle repair and growth, and fats are vital\n",
+       "for long-term energy storage and cell function.\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[1;32mFor each nutrient or vitamin listed in the input, provide a concise description of its primary role or function in \u001b[0m\n", + "\u001b[1;32mthe human body. You can include one or two additional functions if they are commonly associated with the nutrient \u001b[0m\n", + "\u001b[1;32mor vitamin. Do not include any percentages or recommended daily intake values. The focus should be on explaining \u001b[0m\n", + "\u001b[1;32mthe main function of each nutrient or vitamin for the body, along with a few other significant roles they play.\u001b[0m\n", + "\n", + "\u001b[1;32mExamples:\u001b[0m\n", + "\n", + "\u001b[1;32mInput: Vitamin A, Vitamin C, Vitamin D\u001b[0m\n", + "\u001b[1;32mOutput: Vitamin A is crucial for good vision and a healthy immune system, Vitamin C helps in the repair of tissues \u001b[0m\n", + "\u001b[1;32mand the enzymatic production of certain neurotransmitters, and Vitamin D is essential for strong bones and teeth as\u001b[0m\n", + "\u001b[1;32mit helps the body absorb calcium.\u001b[0m\n", + "\n", + "\u001b[1;32mInput: Calcium, Iron, Magnesium\u001b[0m\n", + "\u001b[1;32mOutput: Calcium is necessary for maintaining healthy bones and teeth, Iron is crucial for making red blood cells \u001b[0m\n", + "\u001b[1;32mand transporting oxygen throughout the body, and Magnesium plays a role in over \u001b[0m\u001b[1;36m300\u001b[0m\u001b[1;32m enzyme reactions in the human \u001b[0m\n", + "\u001b[1;32mbody, including the metabolism of food, synthesis of fatty acids and proteins, and the transmission of nerve \u001b[0m\n", + "\u001b[1;32mimpulses.\u001b[0m\n", + "\n", + "\u001b[1;32mInput: Carbohydrates, Proteins, Fats\u001b[0m\n", + "\u001b[1;32mOutput: Carbohydrates provide quick energy, proteins are essential for muscle repair and growth, and fats are vital\u001b[0m\n", + "\u001b[1;32mfor long-term energy storage and cell function.\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
Re-apply skill_1 skill to dataset ...\n",
+       "
\n" + ], + "text/plain": [ + "Re-apply skill_1 skill to dataset \u001b[33m...\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
Applying skill: skill_1\n",
+       "
\n" + ], + "text/plain": [ + "Applying skill: skill_1\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|█████████| 3/3 [00:03<00:00, 1.16s/it]\n" + ] + }, + { + "data": { + "text/html": [ + "
Train is done!\n",
+       "
\n" + ], + "text/plain": [ + "Train is done!\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "GroundTruthSignal(match= skill_0 skill_1\n", + "0 True False\n", + "1 True False\n", + "2 True False, errors={'skill_0': Empty DataFrame\n", + "Columns: [predictions, entities]\n", + "Index: [], 'skill_1': predictions \\\n", + "0 Carbohydrates provide quick energy, proteins ... \n", + "1 Vitamin A is crucial for good vision and a hea... \n", + "2 Calcium is necessary for maintaining healthy ... \n", + "\n", + " text \n", + "0 Carbohydrates provide quick energy, proteins a... \n", + "1 Vitamin A is crucial for good vision and a hea... \n", + "2 Calcium is necessary for maintaining healthy b... })" + ] + }, + "execution_count": 34, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import pandas as pd\n", + "\n", + "from adala.agents import Agent\n", + "from adala.skills import LinearSkillSet, TextGenerationSkill\n", + "from adala.environments import BasicEnvironment\n", + "from adala.runtimes import OpenAIRuntime\n", + "\n", + "agent = Agent(\n", + " \n", + " # Require agent to learn sequence of two skills\n", + " skills=LinearSkillSet(skills=[\n", + " TextGenerationSkill(name=\"skill_0\", instructions=\"...\", input_data_field=\"category\"),\n", + " TextGenerationSkill(name=\"skill_1\", instructions=\"...\", input_data_field=\"skill_0\")\n", + " ]),\n", + " \n", + " # provide ground truth demonstration in environment\n", + " environment=BasicEnvironment(\n", + " ground_truth_dataset=pd.DataFrame(\n", + " [{\n", + " \"category\": \"Macronutrients\",\n", + " \"entities\": \"Carbohydrates, Proteins, Fats\",\n", + " \"text\": \"Carbohydrates provide quick energy, proteins are essential for muscle repair and growth, and fats are vital for long-term energy storage and cell function.\"\n", + " }, {\n", + " \"category\": \"Vitamins\",\n", + " \"entities\": \"Vitamin A, Vitamin C, Vitamin D\",\n", + " \"text\": \"Vitamin A is crucial for good vision and a healthy immune system, Vitamin C helps in the repair of tissues and the enzymatic production of certain neurotransmitters, and Vitamin D is essential for strong bones and teeth as it helps the body absorb calcium.\"\n", + " }, {\n", + " \"category\": \"Minerals\",\n", + " \"entities\": \"Calcium, Iron, Magnesium\",\n", + " \"text\": \"Calcium is necessary for maintaining healthy bones and teeth, Iron is crucial for making red blood cells and transporting oxygen throughout the body, and Magnesium plays a role in over 300 enzyme reactions in the human body, including the metabolism of food, synthesis of fatty acids and proteins, and the transmission of nerve impulses.\"\n", + " }]\n", + " ),\n", + " ground_truth_columns={\n", + " 'skill_0': 'entities',\n", + " 'skill_1': 'text'\n", + " },\n", + " matching_function='fuzzy',\n", + " matching_threshold=0.9\n", + " ),\n", + ").learn(learning_iterations=5)\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
Applying skill: skill_0\n",
+       "
\n" + ], + "text/plain": [ + "Applying skill: skill_0\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|█████████| 3/3 [00:00<00:00, 29.02it/s]\n" + ] + }, + { + "data": { + "text/html": [ + "
Applying skill: skill_1\n",
+       "
\n" + ], + "text/plain": [ + "Applying skill: skill_1\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|█████████| 3/3 [00:00<00:00, 30.00it/s]\n" + ] + } + ], + "source": [ + "predictions = agent.run(pd.DataFrame([\n", + " ['Trace Minerals'],\n", + " ['Water-Soluble Vitamins'],\n", + " ['Fatty Acids']\n", + "], columns=['category']))" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
categoryskill_0skill_1
0Trace MineralsZinc, Copper, SeleniumZinc is important for immune function, wound ...
1Water-Soluble VitaminsVitamin B, Vitamin C, FolateVitamin B is a group of essential vitamins th...
2Fatty AcidsOmega-3, Omega-6, Saturated FatOmega-3 fatty acids are important for brain f...
\n", + "
" + ], + "text/plain": [ + " category skill_0 \\\n", + "0 Trace Minerals Zinc, Copper, Selenium \n", + "1 Water-Soluble Vitamins Vitamin B, Vitamin C, Folate \n", + "2 Fatty Acids Omega-3, Omega-6, Saturated Fat \n", + "\n", + " skill_1 \n", + "0 Zinc is important for immune function, wound ... \n", + "1 Vitamin B is a group of essential vitamins th... \n", + "2 Omega-3 fatty acids are important for brain f... " + ] + }, + "execution_count": 39, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "predictions" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "adala", + "language": "python", + "name": "adala" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.5" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/tests/test_agent_basics.py b/tests/test_agent_basics.py index 71c6793..7d2f10d 100644 --- a/tests/test_agent_basics.py +++ b/tests/test_agent_basics.py @@ -15,67 +15,67 @@ @patching( target_function=PatchedCalls.GUIDANCE.value, data=[ - # call[0]: apply first skill 0->1, first row + # call[0]: apply first skill 0->1, first row, GT = 1 5 1 {'input': {'input': '0 5 0'}, 'output': {'predictions': '1 5 1'}}, - # call[1]: apply first skill 0->1, second row + # call[1]: apply first skill 0->1, second row, GT = 1 1 1 -> ERROR! {'input': {'input': '0 0 0'}, 'output': {'predictions': '1 5 1'}}, - # call[2]: analyze errors first skill 0->1 + # call[2]: prepare error inputs for first skill 0->1, second row + {'input': {'input': '0 0 0', '0->1': '1 5 1'}, 'output': 'Input: 0 0 0'}, + # call[3]: analyze errors first skill 0->1 { 'input': { - 'input': '0 0 0', - '0->1': '1 5 1', - 'gt_0': '1 1 1' + 'input': 'Input: 0 0 0', + 'prediction': '1 5 1', + 'ground_truth': '1 1 1' }, 'output': { 'reason': '0 transformed to 5 instead of 1' } }, - # call[3]: build error report for first skill 0->1 + # call[4]: build error report for first skill 0->1 { 'input': { 'predictions_and_errors': [{ - 'input': '0 0 0', - '0->1': '1 5 1', - 'gt_0': '1 1 1', + 'input': 'Input: 0 0 0', + 'prediction': '1 5 1', + 'ground_truth': '1 1 1', 'reason': '0 transformed to 5 instead of 1' }]}, 'output': '''\ Input: 0 0 0 Prediction: 1 5 1 Ground Truth: 1 1 1 - Reason: 0 transformed to 5 instead of 1 + Error reason: 0 transformed to 5 instead of 1 ''', }, - # call[4]: improve first skill 0->1 + # call[5]: improve first skill 0->1 { 'input': { 'error_analysis': '''\ Input: 0 0 0 Prediction: 1 5 1 Ground Truth: 1 1 1 - Reason: 0 transformed to 5 instead of 1 + Error reason: 0 transformed to 5 instead of 1 '''}, 'output': { 'new_instruction': 'Transform 0 to 1' } }, - # call[5]: reapply skill 0->1, first row - {'input': {'input': '0 5 0'}, 'output': {'predictions': '1 5 1'}}, # call[6]: reapply skill 0->1, first row + {'input': {'input': '0 5 0'}, 'output': {'predictions': '1 5 1'}}, + # call[7]: reapply skill 0->1, first row {'input': {'input': '0 0 0'}, 'output': {'predictions': '1 1 1'}}, ] ) def test_agent_quickstart_single_skill(): from adala.agents import Agent - from adala.skills import LinearSkillSet + from adala.skills import LinearSkillSet, LLMSkill from adala.environments import BasicEnvironment agent = Agent( skills=LinearSkillSet( - skills={ - "0->1": "...", - } + skills=[LLMSkill(name="0->1", instructions="...", input_data_field="input")] ), environment=BasicEnvironment( ground_truth_dataset=pd.DataFrame([ @@ -113,124 +113,121 @@ def test_agent_quickstart_single_skill(): data=[ # call[0]: apply first skill 0->1, first row, GT = 1 5 1 {'input': {'input': '0 5 0'}, 'output': {'predictions': '1 5 1'}}, - # call[1]: apply first skill 0->1, second row, GT = 1 1 1 + # call[1]: apply first skill 0->1, second row, GT = 1 1 1 -> ERROR! {'input': {'input': '0 0 0'}, 'output': {'predictions': '1 5 1'}}, # call[2]: apply second skill 1->2, first row, GT = 2 5 2 {'input': {'input': '0 5 0', '0->1': '1 5 1'}, 'output': {'predictions': '2 5 2'}}, - # call[3]: apply second skill 1->2, second row, GT = 2 2 2 + # call[3]: apply second skill 1->2, second row, GT = 2 2 2 -> ERROR {'input': {'input': '0 0 0', '0->1': '1 5 1'}, 'output': {'predictions': '2 5 2'}}, - # call[4]: analyze errors first skill 0->1, error in the second row (0 0 0 -> 1 5 1) + # call[4]: prepare error inputs for first skill 0->1, second row + {'input': {'input': '0 0 0', '0->1': '1 5 1'}, 'output': 'Input: 0 0 0'}, + # call[5]: analyze errors first skill 0->1, error in the second row (0 0 0 -> 1 5 1) { 'input': { - 'input': '0 0 0', - '0->1': '1 5 1', - 'gt_0': '1 1 1' + 'input': 'Input: 0 0 0', + 'prediction': '1 5 1', + 'ground_truth': '1 1 1' }, 'output': { 'reason': '0 transformed to 5 instead of 1' } }, - # call[5]: build error report for first skill 0->1 + # call[6]: build error report for first skill 0->1 { 'input': { 'predictions_and_errors': [{ - 'input': '0 0 0', - '0->1': '1 5 1', - '1->2': '2 5 2', - 'gt_0': '1 1 1', - 'gt_1': '2 2 2', + 'input': 'Input: 0 0 0', + 'prediction': '1 5 1', + 'ground_truth': '1 1 1', 'reason': '0 transformed to 5 instead of 1' }]}, 'output': '''\ Input: 0 0 0 Prediction: 1 5 1 Ground Truth: 1 1 1 - Reason: 0 transformed to 5 instead of 1 + Error reason: 0 transformed to 5 instead of 1 ''', }, - # call[6]: improve first skill 0->1 + # call[7]: improve first skill 0->1 { 'input': { 'error_analysis': '''\ Input: 0 0 0 Prediction: 1 5 1 Ground Truth: 1 1 1 - Reason: 0 transformed to 5 instead of 1 + Error reason: 0 transformed to 5 instead of 1 '''}, 'output': { 'new_instruction': 'Transform 0 to 1' } }, - # call[7]: reapply first skill 0->1, first row, GT = 1 5 1 + # call[8]: reapply first skill 0->1, first row, GT = 1 5 1 {'input': {'input': '0 5 0'}, 'output': {'predictions': '1 5 1'}}, - # call[8]: reapply first skill 0->1, second row, GT = 1 1 1 + # call[9]: reapply first skill 0->1, second row, GT = 1 1 1 {'input': {'input': '0 0 0'}, 'output': {'predictions': '1 1 1'}}, - # call[9]: reapply second skill 1->2, first row, GT = 2 5 2 + # call[10]: reapply second skill 1->2, first row, GT = 2 5 2 -> ERROR! {'input': {'input': '0 5 0', '0->1': '1 5 1'}, 'output': {'predictions': '2 2 2'}}, - # call[10]: reapply second skill 1->2, second row, GT = 2 2 2 + # call[11]: reapply second skill 1->2, second row, GT = 2 2 2 {'input': {'input': '0 0 0', '0->1': '1 1 1'}, 'output': {'predictions': '2 2 2'}}, - # call[11]: analyze errors second skill 1->2 (first row 2 2 2 instead of 2 5 2) + # call[12]: prepare error inputs for second skill 1->2, first row + {'input': {'input': '0 5 0', '0->1': '1 5 1'}, 'output': 'Input: 1 5 1'}, + # call[13]: analyze errors second skill 1->2 (first row 2 2 2 instead of 2 5 2) { 'input': { - 'input': '0 5 0', - '0->1': '1 5 1', - '1->2': '2 2 2', - 'gt_0': '1 5 1', - 'gt_1': '2 5 2' + 'input': 'Input: 1 5 1', + 'prediction': '2 2 2', + 'ground_truth': '2 5 2', }, 'output': { 'reason': '5 transformed to 2 instead of remaining 5' } }, - # call[12]: build error report for second skill 1->2 + # call[14]: build error report for second skill 1->2 { 'input': { 'predictions_and_errors': [{ - 'input': '0 5 0', - '0->1': '1 5 1', - '1->2': '2 2 2', - 'gt_0': '1 5 1', - 'gt_1': '2 5 2', + 'input': 'Input: 1 5 1', + 'prediction': '2 2 2', + 'ground_truth': '2 5 2', 'reason': '5 transformed to 2 instead of remaining 5' }]}, 'output': '''\ Input: 1 5 1 Prediction: 2 2 2 Ground Truth: 2 5 2 - Reason: 5 transformed to 2 instead of remaining 5 + Error reason: 5 transformed to 2 instead of remaining 5 ''', }, - # call[13]: improve second skill 1->2 + # call[15]: improve second skill 1->2 { 'input': { 'error_analysis': '''\ Input: 1 5 1 Prediction: 2 2 2 Ground Truth: 2 5 2 - Reason: 5 transformed to 2 instead of remaining 5 + Error reason: 5 transformed to 2 instead of remaining 5 '''}, 'output': { 'new_instruction': 'Transform 1 to 2' } }, - # call[14]: reapply second skill 1->2, first row, GT = 2 5 2 + # call[16]: reapply second skill 1->2, first row, GT = 2 5 2 {'input': {'input': '0 5 0', '0->1': '1 5 1'}, 'output': {'predictions': '2 5 2'}}, - # call[15]: reapply second skill 1->2, second row, GT = 2 2 2 + # call[17]: reapply second skill 1->2, second row, GT = 2 2 2 {'input': {'input': '0 0 0', '0->1': '1 1 1'}, 'output': {'predictions': '2 2 2'}}, ] ) def test_agent_quickstart_two_skills(): from adala.agents import Agent - from adala.skills import LinearSkillSet + from adala.skills import LinearSkillSet, LLMSkill from adala.environments import BasicEnvironment agent = Agent( skills=LinearSkillSet( - skills={ - "0->1": "...", - "1->2": "..." - }, - skill_sequence=["0->1", "1->2"] + skills=[ + LLMSkill(name='0->1', instructions='...', input_data_field='input'), + LLMSkill(name='1->2', instructions='...', input_data_field='0->1') + ] ), environment=BasicEnvironment( ground_truth_dataset=pd.DataFrame([ @@ -274,73 +271,66 @@ def test_agent_quickstart_two_skills(): {'input': {'input': '0 5 0', '0->1': '1 5 1'}, 'output': {'predictions': '2 5 4'}}, # call[3]: apply third skill 2->3, GT = 3 5 3 -> Also error, but it is due to previous error {'input': {'input': '0 5 0', '0->1': '1 5 1', '1->2': '2 5 4'}, 'output': {'predictions': '3 5 4'}}, - # call[4]: analyze errors for second skill 1->2 (2 5 4 instead of 2 5 2) + # call[4]: prepare error input for second skill 1->2 (2 5 4 instead of 2 5 2) + {'input': {'input': '0 5 0', '0->1': '1 5 1', '1->2': '2 5 4', '2->3': '3 5 4'}, 'output': 'Input: 1 5 1'}, + # call[5]: analyze errors for second skill 1->2 (2 5 4 instead of 2 5 2) { 'input': { - 'input': '0 5 0', - '0->1': '1 5 1', - '1->2': '2 5 4', - '2->3': '3 5 4', - 'gt_0': '1 5 1', - 'gt_1': '2 5 2', - 'gt_2': '3 5 3', + 'input': 'Input: 1 5 1', + 'prediction': '2 5 4', + 'ground_truth': '2 5 2', }, 'output': { 'reason': '1 transformed to 4 instead of 2' } }, - # call[5]: build error report for second skill 1->2 + # call[6]: build error report for second skill 1->2 { 'input': { 'predictions_and_errors': [{ - 'input': '0 5 0', - '0->1': '1 5 1', - '1->2': '2 5 4', - '2->3': '3 5 4', - 'gt_0': '1 5 1', - 'gt_1': '2 5 2', - 'gt_2': '3 5 3', + 'input': 'Input: 1 5 1', + 'prediction': '2 5 4', + 'ground_truth': '2 5 2', 'reason': '1 transformed to 4 instead of 2' }]}, 'output': '''\ - Input: 0 5 0 + Input: 1 5 1 Prediction: 2 5 4 Ground Truth: 2 5 2 - Reason: 1 transformed to 4 instead of 2 + Error reason: 1 transformed to 4 instead of 2 ''', }, - # call[6]: improve first skill 0->1 + # call[7]: improve first skill 0->1 { 'input': { 'error_analysis': '''\ - Input: 0 5 0 + Input: 1 5 1 Prediction: 2 5 4 Ground Truth: 2 5 2 - Reason: 1 transformed to 4 instead of 2 + Error reason: 1 transformed to 4 instead of 2 '''}, 'output': { 'new_instruction': 'Transform 1 to 2' } }, - # call[7]: apply second skill 1->2, GT = 2 5 2 + # call[8]: apply second skill 1->2, GT = 2 5 2 {'input': {'input': '0 5 0', '0->1': '1 5 1'}, 'output': {'predictions': '2 5 2'}}, - # call[8]: apply third skill 2->3, GT = 3 5 3 + # call[9]: apply third skill 2->3, GT = 3 5 3 {'input': {'input': '0 5 0', '0->1': '1 5 1', '1->2': '2 5 2'}, 'output': {'predictions': '3 5 3'}}, ] ) def test_agent_quickstart_three_skills_only_second_fail(): from adala.agents import Agent - from adala.skills import LinearSkillSet + from adala.skills import LinearSkillSet, LLMSkill from adala.environments import BasicEnvironment agent = Agent( skills=LinearSkillSet( - skills={ - "0->1": "...", - "1->2": "...", - "2->3": "..." - }, - skill_sequence=["0->1", "1->2", "2->3"] + skills=[ + LLMSkill(name="0->1", instructions="...", input_data_field="input"), + LLMSkill(name="1->2", instructions="...", input_data_field="0->1"), + LLMSkill(name="2->3", instructions="...", input_data_field="1->2"), + ] ), environment=BasicEnvironment( ground_truth_dataset=pd.DataFrame([ diff --git a/tests/test_classification.py b/tests/test_classification.py index 66b8447..e685dd9 100644 --- a/tests/test_classification.py +++ b/tests/test_classification.py @@ -19,6 +19,8 @@ def process_record_generator(*args, **kwargs): # errors if i < 2: + yield {'reason': 'Test reason'} + yield {'reason': 'Test reason'} yield {'reason': 'Test reason'} yield {'reason': 'Test reason'} yield {'': 'Test reason'} diff --git a/tests/test_llm_skillset.py b/tests/test_llm_skillset.py index da03719..44d1025 100644 --- a/tests/test_llm_skillset.py +++ b/tests/test_llm_skillset.py @@ -34,15 +34,15 @@ strict=False ) def test_llm_linear_skillset(): - from adala.skills.skillset import LinearSkillSet + from adala.skills.skillset import LinearSkillSet, LLMSkill from adala.datasets import DataFrameDataset, InternalDataFrame from adala.runtimes import OpenAIRuntime skillset = LinearSkillSet( skills=[ - "Extract named entities", - "Translate to French", - "Create a structured output in JSON format" + LLMSkill(name="skill_0", instructions="Extract named entities", input_data_field="text"), + LLMSkill(name="skill_1", instructions="Translate to French", input_data_field="skill_0"), + LLMSkill(name="skill_2", instructions="Create a structured output in JSON format", input_data_field="skill_1"), ] ) dataset = DataFrameDataset(df=InternalDataFrame([ From 8f37dc5e4602c01d48b024ddda73e5d23bf5a424 Mon Sep 17 00:00:00 2001 From: nik Date: Mon, 30 Oct 2023 09:24:53 +0000 Subject: [PATCH 5/5] Fix notebook examples --- adala/agents/base.py | 9 +- adala/skills/skillset.py | 3 +- examples/classification_skill.ipynb | 422 +++++----- examples/classification_skill_with_CoT.ipynb | 27 +- examples/question_answering_skill.ipynb | 27 +- examples/quickstart.ipynb | 770 +++++++++++++------ examples/summarization_skill.ipynb | 35 +- examples/text_generation_skill.ipynb | 27 +- examples/translation_skill.ipynb | 61 +- 9 files changed, 840 insertions(+), 541 deletions(-) diff --git a/adala/agents/base.py b/adala/agents/base.py index d94c5db..11608f8 100644 --- a/adala/agents/base.py +++ b/adala/agents/base.py @@ -25,6 +25,8 @@ class Agent(BaseModel, ABC): memory (LongTermMemory, optional): The agent's long-term memory. Defaults to None. runtimes (Dict[str, Runtime], optional): The runtimes available to the agent. Defaults to predefined runtimes. default_runtime (str): The default runtime used by the agent. Defaults to 'openai'. + teacher_runtimes (Dict[str, Runtime], optional): The runtimes available to the agent's teacher. Defaults to predefined runtimes. + default_teacher_runtime (str): The default runtime used by the agent's teacher. Defaults to 'openai-gpt3'. """ environment: Union[InternalDataFrame, Dataset, Environment] = Field(default_factory=DataFrameDataset) @@ -198,18 +200,15 @@ def learn( Args: learning_iterations (int, optional): The number of iterations for learning. Defaults to 3. accuracy_threshold (float, optional): The desired accuracy threshold to reach. Defaults to 0.9. - update_skills (bool, optional): Flag to determine if skills should be updated after learning. Defaults to True. update_memory (bool, optional): Flag to determine if memory should be updated after learning. Defaults to True. request_environment_feedback (bool, optional): Flag to determine if feedback should be requested from the environment. Defaults to True. - experience (ShortTermMemory, optional): Initial experience for the learning process. Defaults to None. runtime (str, optional): The runtime to be used for the learning process. Defaults to None. - + teacher_runtime (str, optional): The teacher runtime to be used for the learning process. Defaults to None. Returns: - ShortTermMemory: The short-term memory after the learning process. + GroundTruthSignal: The ground truth signal. """ runtime = self.get_runtime(runtime=runtime) - # TODO: support teacher runtime input, not default teacher_runtime = self.get_teacher_runtime(runtime=teacher_runtime) dataset = self.environment.as_dataset() diff --git a/adala/skills/skillset.py b/adala/skills/skillset.py index f71088d..d727cfe 100644 --- a/adala/skills/skillset.py +++ b/adala/skills/skillset.py @@ -19,10 +19,9 @@ class SkillSet(BaseModel, ABC): cases, task decomposition can involve a graph-based approach. Args: - skills (Union[List[str], Dict[str, str], List[BaseSkill], Dict[str, BaseSkill]]): Provided skills + skills (Dict[str, BaseSkill]): Skills in the skill set. """ - # skills: Union[List[str], Dict[str, str], List[BaseSkill], Dict[str, BaseSkill]] skills: Dict[str, BaseSkill] @abstractmethod diff --git a/examples/classification_skill.ipynb b/examples/classification_skill.ipynb index f5acd9c..fe63102 100644 --- a/examples/classification_skill.ipynb +++ b/examples/classification_skill.ipynb @@ -2,7 +2,6 @@ "cells": [ { "cell_type": "markdown", - "id": "94ad15ac", "metadata": {}, "source": [ "# Classification skill" @@ -10,8 +9,7 @@ }, { "cell_type": "code", - "execution_count": 11, - "id": "a2f6d99b", + "execution_count": 1, "metadata": {}, "outputs": [ { @@ -78,7 +76,7 @@ "4 Natural finish for your lips. Beauty/Personal Care" ] }, - "execution_count": 11, + "execution_count": 1, "metadata": {}, "output_type": "execute_result" } @@ -97,15 +95,27 @@ }, { "cell_type": "code", - "execution_count": 15, - "id": "6ee2cebf", + "execution_count": 2, "metadata": {}, "outputs": [ + { + "data": { + "text/html": [ + "
Applying skill: product_category_classification\n",
+       "
\n" + ], + "text/plain": [ + "Applying skill: product_category_classification\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, { "name": "stderr", "output_type": "stream", "text": [ - "100%|█████████████████| 5/5 [00:00<00:00, 45.32it/s]\n" + "100%|████████████████████████████████| 5/5 [00:00<00:00, 40.72it/s]\n" ] }, { @@ -142,7 +152,7 @@ "data": { "text/html": [ "
                                                                                                                   \n",
-       "  text                   category               product_category_cl…   score                  category__x__produ…  \n",
+       "  text                   category               product_category_cl…   score                  product_category_c…  \n",
        " ───────────────────────────────────────────────────────────────────────────────────────────────────────────────── \n",
        "  Apple product with a   Electronics            Electronics            {'Footwear/Clothing…   True                 \n",
        "  sleek design.                                                        -7.4104013,                                 \n",
@@ -201,7 +211,7 @@
       ],
       "text/plain": [
        "                                                                                                                   \n",
-       " \u001b[1;35m \u001b[0m\u001b[1;35mtext                \u001b[0m\u001b[1;35m \u001b[0m \u001b[1;35m \u001b[0m\u001b[1;35mcategory            \u001b[0m\u001b[1;35m \u001b[0m \u001b[1;35m \u001b[0m\u001b[1;35mproduct_category_cl…\u001b[0m\u001b[1;35m \u001b[0m \u001b[1;35m \u001b[0m\u001b[1;35mscore               \u001b[0m\u001b[1;35m \u001b[0m \u001b[1;35m \u001b[0m\u001b[1;35mcategory__x__produ…\u001b[0m\u001b[1;35m \u001b[0m \n",
+       " \u001b[1;35m \u001b[0m\u001b[1;35mtext                \u001b[0m\u001b[1;35m \u001b[0m \u001b[1;35m \u001b[0m\u001b[1;35mcategory            \u001b[0m\u001b[1;35m \u001b[0m \u001b[1;35m \u001b[0m\u001b[1;35mproduct_category_cl…\u001b[0m\u001b[1;35m \u001b[0m \u001b[1;35m \u001b[0m\u001b[1;35mscore               \u001b[0m\u001b[1;35m \u001b[0m \u001b[1;35m \u001b[0m\u001b[1;35mproduct_category_c…\u001b[0m\u001b[1;35m \u001b[0m \n",
        " ───────────────────────────────────────────────────────────────────────────────────────────────────────────────── \n",
        "  Apple product with a   Electronics            Electronics            {'Footwear/Clothing…   True                 \n",
        "  sleek design.                                                        -7.4104013,                                 \n",
@@ -261,6 +271,19 @@
      "metadata": {},
      "output_type": "display_data"
     },
+    {
+     "data": {
+      "text/html": [
+       "
Accuracy = 80.00%\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[1;31mAccuracy = \u001b[0m\u001b[1;36m80.00\u001b[0m\u001b[1;31m%\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, { "data": { "text/html": [ @@ -278,18 +301,20 @@ "name": "stderr", "output_type": "stream", "text": [ - "100%|████████████████| 1/1 [00:00<00:00, 137.21it/s]\n", - "100%|█████████████████| 1/1 [00:04<00:00, 4.85s/it]\n" + "100%|███████████████████████████████| 1/1 [00:00<00:00, 140.29it/s]\n", + "100%|████████████████████████████████| 1/1 [00:02<00:00, 2.67s/it]\n" ] }, { "data": { "text/html": [ - "
Number of errors: 1\n",
+       "
Error analysis for skill \"product_category_classification\":\n",
+       "\n",
        "
\n" ], "text/plain": [ - "Number of errors: \u001b[1;36m1\u001b[0m\n" + "Error analysis for skill \u001b[32m\"product_category_classification\"\u001b[0m:\n", + "\n" ] }, "metadata": {}, @@ -298,11 +323,27 @@ { "data": { "text/html": [ - "
Accuracy = 80.00%\n",
+       "
\n",
+       "Input: Laptop stand for the kitchen.\n",
+       "Prediction: Electronics\n",
+       "Ground truth: Furniture/Home Decor\n",
+       "Error reason: The error reason is that the original instruction does not provide clear guidelines on how to label \n",
+       "products that have multiple potential categories. In this case, the input \"Laptop stand for the kitchen\" could be \n",
+       "interpreted as both an electronic device (laptop stand) and a piece of furniture/home decor (for the kitchen). \n",
+       "Without further clarification in the instructions, it is difficult to determine the correct label.\n",
+       "\n",
        "
\n" ], "text/plain": [ - "\u001b[1;31mAccuracy = \u001b[0m\u001b[1;36m80.00\u001b[0m\u001b[1;31m%\u001b[0m\n" + "\n", + "\u001b[32mInput: Laptop stand for the kitchen.\u001b[0m\n", + "\u001b[32mPrediction: Electronics\u001b[0m\n", + "\u001b[32mGround truth: Furniture/Home Decor\u001b[0m\n", + "\u001b[32mError reason: The error reason is that the original instruction does not provide clear guidelines on how to label \u001b[0m\n", + "\u001b[32mproducts that have multiple potential categories. In this case, the input \u001b[0m\u001b[32m\"Laptop stand for the kitchen\"\u001b[0m\u001b[32m could be \u001b[0m\n", + "\u001b[32minterpreted as both an electronic device \u001b[0m\u001b[1;32m(\u001b[0m\u001b[32mlaptop stand\u001b[0m\u001b[1;32m)\u001b[0m\u001b[32m and a piece of furniture/home decor \u001b[0m\u001b[1;32m(\u001b[0m\u001b[32mfor the kitchen\u001b[0m\u001b[1;32m)\u001b[0m\u001b[32m. \u001b[0m\n", + "\u001b[32mWithout further clarification in the instructions, it is difficult to determine the correct label.\u001b[0m\n", + "\n" ] }, "metadata": {}, @@ -339,37 +380,33 @@ { "data": { "text/html": [ - "
Categorize the input text into one of the following labels: ['Footwear/Clothing', 'Electronics', 'Food/Beverages', \n",
-       "'Furniture/Home Decor', 'Beauty/Personal Care']. Choose the label that best represents the main category of the \n",
-       "input text.\n",
-       "\n",
-       "Examples:\n",
+       "
Label the input text with the most relevant label based on the primary function or purpose of the product. If the \n",
+       "product can be categorized into multiple labels, prioritize the label that best represents the primary function or \n",
+       "purpose. If it is still unclear, choose the label that is most commonly associated with similar products.\n",
        "\n",
        "Input: Laptop stand for the kitchen.\n",
        "Output: Furniture/Home Decor\n",
        "\n",
-       "Input: Running shoes for men.\n",
-       "Output: Footwear/Clothing\n",
+       "Input: Smartwatch with fitness tracking features.\n",
+       "Output: Electronics\n",
        "\n",
-       "Input: Organic shampoo for dry hair.\n",
-       "Output: Beauty/Personal Care\n",
+       "Input: Organic dark chocolate bar.\n",
+       "Output: Food/Beverages\n",
        "
\n" ], "text/plain": [ - "\u001b[1;32mCategorize the input text into one of the following labels: \u001b[0m\u001b[1;32m[\u001b[0m\u001b[32m'Footwear/Clothing'\u001b[0m\u001b[1;32m, \u001b[0m\u001b[32m'Electronics'\u001b[0m\u001b[1;32m, \u001b[0m\u001b[32m'Food/Beverages'\u001b[0m\u001b[1;32m, \u001b[0m\n", - "\u001b[32m'Furniture/Home Decor'\u001b[0m\u001b[1;32m, \u001b[0m\u001b[32m'Beauty/Personal Care'\u001b[0m\u001b[1;32m]\u001b[0m\u001b[1;32m. Choose the label that best represents the main category of the \u001b[0m\n", - "\u001b[1;32minput text.\u001b[0m\n", - "\n", - "\u001b[1;32mExamples:\u001b[0m\n", + "\u001b[1;32mLabel the input text with the most relevant label based on the primary function or purpose of the product. If the \u001b[0m\n", + "\u001b[1;32mproduct can be categorized into multiple labels, prioritize the label that best represents the primary function or \u001b[0m\n", + "\u001b[1;32mpurpose. If it is still unclear, choose the label that is most commonly associated with similar products.\u001b[0m\n", "\n", "\u001b[1;32mInput: Laptop stand for the kitchen.\u001b[0m\n", "\u001b[1;32mOutput: Furniture/Home Decor\u001b[0m\n", "\n", - "\u001b[1;32mInput: Running shoes for men.\u001b[0m\n", - "\u001b[1;32mOutput: Footwear/Clothing\u001b[0m\n", + "\u001b[1;32mInput: Smartwatch with fitness tracking features.\u001b[0m\n", + "\u001b[1;32mOutput: Electronics\u001b[0m\n", "\n", - "\u001b[1;32mInput: Organic shampoo for dry hair.\u001b[0m\n", - "\u001b[1;32mOutput: Beauty/Personal Care\u001b[0m\n" + "\u001b[1;32mInput: Organic dark chocolate bar.\u001b[0m\n", + "\u001b[1;32mOutput: Food/Beverages\u001b[0m\n" ] }, "metadata": {}, @@ -388,11 +425,24 @@ "metadata": {}, "output_type": "display_data" }, + { + "data": { + "text/html": [ + "
Applying skill: product_category_classification\n",
+       "
\n" + ], + "text/plain": [ + "Applying skill: product_category_classification\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, { "name": "stderr", "output_type": "stream", "text": [ - "100%|█████████████████| 5/5 [00:03<00:00, 1.48it/s]\n" + "100%|████████████████████████████████| 5/5 [00:01<00:00, 3.32it/s]\n" ] }, { @@ -429,121 +479,121 @@ "data": { "text/html": [ "
                                                                                                                   \n",
-       "  text                   category               product_category_cl…   score                  category__x__produ…  \n",
+       "  text                   category               product_category_cl…   score                  product_category_c…  \n",
        " ───────────────────────────────────────────────────────────────────────────────────────────────────────────────── \n",
        "  Apple product with a   Electronics            Electronics            {'Footwear/Clothing…   True                 \n",
-       "  sleek design.                                                        -13.669698,                                 \n",
+       "  sleek design.                                                        -23.812187,                                 \n",
        "                                                                       'Electronics':                              \n",
-       "                                                                       -4.4849444000336985…                        \n",
+       "                                                                       -1.9361265000364793…                        \n",
        "                                                                       'Food/Beverages':                           \n",
-       "                                                                       -14.937825,                                 \n",
+       "                                                                       -20.240387,                                 \n",
        "                                                                       'Furniture/Home                             \n",
-       "                                                                       Decor': -13.595754,                         \n",
+       "                                                                       Decor': -15.734467,                         \n",
        "                                                                       'Beauty/Personal                            \n",
-       "                                                                       Care': -13.327497}                          \n",
+       "                                                                       Care': -17.71083}                           \n",
        "  Laptop stand for the   Furniture/Home Decor   Furniture/Home Decor   {'Footwear/Clothing…   True                 \n",
-       "  kitchen.                                                             -9.9471035,                                 \n",
+       "  kitchen.                                                             -16.188046,                                 \n",
        "                                                                       'Electronics':                              \n",
-       "                                                                       -4.787397,                                  \n",
+       "                                                                       -3.246235,                                  \n",
        "                                                                       'Food/Beverages':                           \n",
-       "                                                                       -12.115164,                                 \n",
+       "                                                                       -11.704685,                                 \n",
        "                                                                       'Furniture/Home                             \n",
        "                                                                       Decor':                                     \n",
-       "                                                                       -0.0084281690000000…                        \n",
+       "                                                                       -0.039707113,                               \n",
        "                                                                       'Beauty/Personal                            \n",
-       "                                                                       Care': -12.145201}                          \n",
+       "                                                                       Care': -15.307133}                          \n",
        "  Chocolate leather      Footwear/Clothing      Footwear/Clothing      {'Footwear/Clothing…   True                 \n",
-       "  boots.                                                               -0.0003247375000000…                        \n",
+       "  boots.                                                               -0.0002129574700000…                        \n",
        "                                                                       'Electronics':                              \n",
-       "                                                                       -17.322811,                                 \n",
+       "                                                                       -14.297362,                                 \n",
        "                                                                       'Food/Beverages':                           \n",
-       "                                                                       -8.062444,                                  \n",
+       "                                                                       -13.440421,                                 \n",
        "                                                                       'Furniture/Home                             \n",
-       "                                                                       Decor': -12.040547,                         \n",
+       "                                                                       Decor': -10.221389,                         \n",
        "                                                                       'Beauty/Personal                            \n",
-       "                                                                       Care': -12.584134}                          \n",
+       "                                                                       Care': -8.653571}                           \n",
        "  Wooden cream for       Furniture/Home Decor   Furniture/Home Decor   {'Footwear/Clothing…   True                 \n",
-       "  surfaces.                                                            -15.480099,                                 \n",
+       "  surfaces.                                                            -15.676728,                                 \n",
        "                                                                       'Electronics':                              \n",
-       "                                                                       -17.015057,                                 \n",
+       "                                                                       -12.5098505,                                \n",
        "                                                                       'Food/Beverages':                           \n",
-       "                                                                       -13.499149,                                 \n",
+       "                                                                       -10.770715,                                 \n",
        "                                                                       'Furniture/Home                             \n",
        "                                                                       Decor':                                     \n",
-       "                                                                       -0.0001718358800000…                        \n",
+       "                                                                       -0.0001917392200000…                        \n",
        "                                                                       'Beauty/Personal                            \n",
-       "                                                                       Care': -8.679317}                           \n",
+       "                                                                       Care': -8.698747}                           \n",
        "  Natural finish for     Beauty/Personal Care   Beauty/Personal Care   {'Footwear/Clothing…   True                 \n",
-       "  your lips.                                                           -11.842119,                                 \n",
+       "  your lips.                                                           -18.403374,                                 \n",
        "                                                                       'Electronics':                              \n",
-       "                                                                       -14.539164,                                 \n",
+       "                                                                       -17.621948,                                 \n",
        "                                                                       'Food/Beverages':                           \n",
-       "                                                                       -13.285265,                                 \n",
+       "                                                                       -14.839035,                                 \n",
        "                                                                       'Furniture/Home                             \n",
-       "                                                                       Decor': -14.923815,                         \n",
+       "                                                                       Decor': -18.330505,                         \n",
        "                                                                       'Beauty/Personal                            \n",
        "                                                                       Care':                                      \n",
-       "                                                                       -9.72990600003512e-…                        \n",
+       "                                                                       -4.3201999994718403…                        \n",
        "                                                                                                                   \n",
        "
\n" ], "text/plain": [ " \n", - " \u001b[1;35m \u001b[0m\u001b[1;35mtext \u001b[0m\u001b[1;35m \u001b[0m \u001b[1;35m \u001b[0m\u001b[1;35mcategory \u001b[0m\u001b[1;35m \u001b[0m \u001b[1;35m \u001b[0m\u001b[1;35mproduct_category_cl…\u001b[0m\u001b[1;35m \u001b[0m \u001b[1;35m \u001b[0m\u001b[1;35mscore \u001b[0m\u001b[1;35m \u001b[0m \u001b[1;35m \u001b[0m\u001b[1;35mcategory__x__produ…\u001b[0m\u001b[1;35m \u001b[0m \n", + " \u001b[1;35m \u001b[0m\u001b[1;35mtext \u001b[0m\u001b[1;35m \u001b[0m \u001b[1;35m \u001b[0m\u001b[1;35mcategory \u001b[0m\u001b[1;35m \u001b[0m \u001b[1;35m \u001b[0m\u001b[1;35mproduct_category_cl…\u001b[0m\u001b[1;35m \u001b[0m \u001b[1;35m \u001b[0m\u001b[1;35mscore \u001b[0m\u001b[1;35m \u001b[0m \u001b[1;35m \u001b[0m\u001b[1;35mproduct_category_c…\u001b[0m\u001b[1;35m \u001b[0m \n", " ───────────────────────────────────────────────────────────────────────────────────────────────────────────────── \n", " Apple product with a Electronics Electronics {'Footwear/Clothing… True \n", - " sleek design. -13.669698, \n", + " sleek design. -23.812187, \n", " 'Electronics': \n", - " -4.4849444000336985… \n", + " -1.9361265000364793… \n", " 'Food/Beverages': \n", - " -14.937825, \n", + " -20.240387, \n", " 'Furniture/Home \n", - " Decor': -13.595754, \n", + " Decor': -15.734467, \n", " 'Beauty/Personal \n", - " Care': -13.327497} \n", + " Care': -17.71083} \n", " \u001b[2m \u001b[0m\u001b[2mLaptop stand for the\u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mFurniture/Home Decor\u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mFurniture/Home Decor\u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m{'Footwear/Clothing…\u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mTrue \u001b[0m\u001b[2m \u001b[0m \n", - " \u001b[2m \u001b[0m\u001b[2mkitchen. \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m-9.9471035, \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m\u001b[2mkitchen. \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m-16.188046, \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m'Electronics': \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", - " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m-4.787397, \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m-3.246235, \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m'Food/Beverages': \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", - " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m-12.115164, \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m-11.704685, \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m'Furniture/Home \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mDecor': \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", - " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m-0.0084281690000000…\u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m-0.039707113, \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m'Beauty/Personal \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", - " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mCare': -12.145201} \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mCare': -15.307133} \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", " Chocolate leather Footwear/Clothing Footwear/Clothing {'Footwear/Clothing… True \n", - " boots. -0.0003247375000000… \n", + " boots. -0.0002129574700000… \n", " 'Electronics': \n", - " -17.322811, \n", + " -14.297362, \n", " 'Food/Beverages': \n", - " -8.062444, \n", + " -13.440421, \n", " 'Furniture/Home \n", - " Decor': -12.040547, \n", + " Decor': -10.221389, \n", " 'Beauty/Personal \n", - " Care': -12.584134} \n", + " Care': -8.653571} \n", " \u001b[2m \u001b[0m\u001b[2mWooden cream for \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mFurniture/Home Decor\u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mFurniture/Home Decor\u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m{'Footwear/Clothing…\u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mTrue \u001b[0m\u001b[2m \u001b[0m \n", - " \u001b[2m \u001b[0m\u001b[2msurfaces. \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m-15.480099, \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m\u001b[2msurfaces. \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m-15.676728, \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m'Electronics': \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", - " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m-17.015057, \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m-12.5098505, \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m'Food/Beverages': \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", - " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m-13.499149, \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m-10.770715, \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m'Furniture/Home \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mDecor': \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", - " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m-0.0001718358800000…\u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m-0.0001917392200000…\u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m'Beauty/Personal \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", - " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mCare': -8.679317} \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mCare': -8.698747} \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", " Natural finish for Beauty/Personal Care Beauty/Personal Care {'Footwear/Clothing… True \n", - " your lips. -11.842119, \n", + " your lips. -18.403374, \n", " 'Electronics': \n", - " -14.539164, \n", + " -17.621948, \n", " 'Food/Beverages': \n", - " -13.285265, \n", + " -14.839035, \n", " 'Furniture/Home \n", - " Decor': -14.923815, \n", + " Decor': -18.330505, \n", " 'Beauty/Personal \n", " Care': \n", - " -9.72990600003512e-… \n", + " -4.3201999994718403… \n", " \n" ] }, @@ -553,50 +603,11 @@ { "data": { "text/html": [ - "
Analyze evaluation experience ...\n",
+       "
No skill to improve found. Stopping learning process.\n",
        "
\n" ], "text/plain": [ - "Analyze evaluation experience \u001b[33m...\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
Number of errors: 0\n",
-       "
\n" - ], - "text/plain": [ - "Number of errors: \u001b[1;36m0\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
Accuracy = 100.00%\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[1;31mAccuracy = \u001b[0m\u001b[1;36m100.00\u001b[0m\u001b[1;31m%\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
Accuracy threshold reached (1.0 >= 0.9)\n",
-       "
\n" - ], - "text/plain": [ - "Accuracy threshold reached \u001b[1m(\u001b[0m\u001b[1;36m1.0\u001b[0m >= \u001b[1;36m0.9\u001b[0m\u001b[1m)\u001b[0m\n" + "No skill to improve found. Stopping learning process.\n" ] }, "metadata": {}, @@ -618,62 +629,17 @@ { "data": { "text/plain": [ - "ShortTermMemory(dataset=DataFrameDataset(df= text category\n", - "0 Apple product with a sleek design. Electronics\n", - "1 Laptop stand for the kitchen. Furniture/Home Decor\n", - "2 Chocolate leather boots. Footwear/Clothing\n", - "3 Wooden cream for surfaces. Furniture/Home Decor\n", - "4 Natural finish for your lips. Beauty/Personal Care), predictions= text category \\\n", - "0 Apple product with a sleek design. Electronics \n", - "1 Laptop stand for the kitchen. Furniture/Home Decor \n", - "2 Chocolate leather boots. Footwear/Clothing \n", - "3 Wooden cream for surfaces. Furniture/Home Decor \n", - "4 Natural finish for your lips. Beauty/Personal Care \n", - "\n", - " product_category_classification \\\n", - "0 Electronics \n", - "1 Furniture/Home Decor \n", - "2 Footwear/Clothing \n", - "3 Furniture/Home Decor \n", - "4 Beauty/Personal Care \n", - "\n", - " score \n", - "0 {'Footwear/Clothing': -13.669698, 'Electronics... \n", - "1 {'Footwear/Clothing': -9.9471035, 'Electronics... \n", - "2 {'Footwear/Clothing': -0.0003247375000000436, ... \n", - "3 {'Footwear/Clothing': -15.480099, 'Electronics... \n", - "4 {'Footwear/Clothing': -11.842119, 'Electronics... , evaluations= text category \\\n", - "0 Apple product with a sleek design. Electronics \n", - "1 Laptop stand for the kitchen. Furniture/Home Decor \n", - "2 Chocolate leather boots. Footwear/Clothing \n", - "3 Wooden cream for surfaces. Furniture/Home Decor \n", - "4 Natural finish for your lips. Beauty/Personal Care \n", - "\n", - " product_category_classification \\\n", - "0 Electronics \n", - "1 Furniture/Home Decor \n", - "2 Footwear/Clothing \n", - "3 Furniture/Home Decor \n", - "4 Beauty/Personal Care \n", - "\n", - " score \\\n", - "0 {'Footwear/Clothing': -13.669698, 'Electronics... \n", - "1 {'Footwear/Clothing': -9.9471035, 'Electronics... \n", - "2 {'Footwear/Clothing': -0.0003247375000000436, ... \n", - "3 {'Footwear/Clothing': -15.480099, 'Electronics... \n", - "4 {'Footwear/Clothing': -11.842119, 'Electronics... \n", - "\n", - " category__x__product_category_classification \n", - "0 True \n", - "1 True \n", - "2 True \n", - "3 True \n", - "4 True , ground_truth_column_name='category', match_column_name='category__x__product_category_classification', errors=Empty DataFrame\n", - "Columns: [text, category, product_category_classification, score, category__x__product_category_classification]\n", - "Index: [], accuracy=1.0, initial_instructions='Label the input text with the following labels: {{labels}}', updated_instructions=\"Categorize the input text into one of the following labels: ['Footwear/Clothing', 'Electronics', 'Food/Beverages', 'Furniture/Home Decor', 'Beauty/Personal Care']. Choose the label that best represents the main category of the input text.\\n\\nExamples:\\n\\nInput: Laptop stand for the kitchen.\\nOutput: Furniture/Home Decor\\n\\nInput: Running shoes for men.\\nOutput: Footwear/Clothing\\n\\nInput: Organic shampoo for dry hair.\\nOutput: Beauty/Personal Care\")" + "GroundTruthSignal(match= product_category_classification\n", + "0 True\n", + "1 True\n", + "2 True\n", + "3 True\n", + "4 True, errors={'product_category_classification': Empty DataFrame\n", + "Columns: [predictions, category]\n", + "Index: []})" ] }, - "execution_count": 15, + "execution_count": 2, "metadata": {}, "output_type": "execute_result" } @@ -697,7 +663,7 @@ " ),\n", " environment=BasicEnvironment(\n", " ground_truth_dataset=df,\n", - " ground_truth_column='category'\n", + " ground_truth_columns={'product_category_classification': 'category'}\n", " )\n", ")\n", "\n", @@ -706,8 +672,7 @@ }, { "cell_type": "code", - "execution_count": 17, - "id": "4a876f3d", + "execution_count": 3, "metadata": {}, "outputs": [ { @@ -716,20 +681,18 @@ "
Total Agent Skills: 1\n",
        "\n",
        "product_category_classification\n",
-       "Categorize the input text into one of the following labels: ['Footwear/Clothing', 'Electronics', 'Food/Beverages', \n",
-       "'Furniture/Home Decor', 'Beauty/Personal Care']. Choose the label that best represents the main category of the \n",
-       "input text.\n",
-       "\n",
-       "Examples:\n",
+       "Label the input text with the most relevant label based on the primary function or purpose of the product. If the \n",
+       "product can be categorized into multiple labels, prioritize the label that best represents the primary function or \n",
+       "purpose. If it is still unclear, choose the label that is most commonly associated with similar products.\n",
        "\n",
        "Input: Laptop stand for the kitchen.\n",
        "Output: Furniture/Home Decor\n",
        "\n",
-       "Input: Running shoes for men.\n",
-       "Output: Footwear/Clothing\n",
+       "Input: Smartwatch with fitness tracking features.\n",
+       "Output: Electronics\n",
        "\n",
-       "Input: Organic shampoo for dry hair.\n",
-       "Output: Beauty/Personal Care\n",
+       "Input: Organic dark chocolate bar.\n",
+       "Output: Food/Beverages\n",
        "\n",
        "
\n" ], @@ -737,20 +700,18 @@ "\u001b[1;34mTotal Agent Skills: \u001b[0m\u001b[1;34m1\u001b[0m\n", "\n", "\u001b[1;4;32mproduct_category_classification\u001b[0m\n", - "\u001b[32mCategorize the input text into one of the following labels: \u001b[0m\u001b[1;32m[\u001b[0m\u001b[32m'Footwear/Clothing'\u001b[0m\u001b[32m, \u001b[0m\u001b[32m'Electronics'\u001b[0m\u001b[32m, \u001b[0m\u001b[32m'Food/Beverages'\u001b[0m\u001b[32m, \u001b[0m\n", - "\u001b[32m'Furniture/Home Decor'\u001b[0m\u001b[32m, \u001b[0m\u001b[32m'Beauty/Personal Care'\u001b[0m\u001b[1;32m]\u001b[0m\u001b[32m. Choose the label that best represents the main category of the \u001b[0m\n", - "\u001b[32minput text.\u001b[0m\n", - "\n", - "\u001b[32mExamples:\u001b[0m\n", + "\u001b[32mLabel the input text with the most relevant label based on the primary function or purpose of the product. If the \u001b[0m\n", + "\u001b[32mproduct can be categorized into multiple labels, prioritize the label that best represents the primary function or \u001b[0m\n", + "\u001b[32mpurpose. If it is still unclear, choose the label that is most commonly associated with similar products.\u001b[0m\n", "\n", "\u001b[32mInput: Laptop stand for the kitchen.\u001b[0m\n", "\u001b[32mOutput: Furniture/Home Decor\u001b[0m\n", "\n", - "\u001b[32mInput: Running shoes for men.\u001b[0m\n", - "\u001b[32mOutput: Footwear/Clothing\u001b[0m\n", + "\u001b[32mInput: Smartwatch with fitness tracking features.\u001b[0m\n", + "\u001b[32mOutput: Electronics\u001b[0m\n", "\n", - "\u001b[32mInput: Organic shampoo for dry hair.\u001b[0m\n", - "\u001b[32mOutput: Beauty/Personal Care\u001b[0m\n", + "\u001b[32mInput: Organic dark chocolate bar.\u001b[0m\n", + "\u001b[32mOutput: Food/Beverages\u001b[0m\n", "\n" ] }, @@ -766,15 +727,27 @@ }, { "cell_type": "code", - "execution_count": 18, - "id": "ee97ee22", + "execution_count": 4, "metadata": {}, "outputs": [ + { + "data": { + "text/html": [ + "
Applying skill: product_category_classification\n",
+       "
\n" + ], + "text/plain": [ + "Applying skill: product_category_classification\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, { "name": "stderr", "output_type": "stream", "text": [ - "100%|█████████████████| 5/5 [00:02<00:00, 2.37it/s]\n" + "100%|████████████████████████████████| 5/5 [00:02<00:00, 1.78it/s]\n" ] } ], @@ -787,13 +760,12 @@ " \"Leather grain snack bar.\" # Potential categories: Footwear/Clothing or Food/Beverages\n", "], columns=['text'])\n", "\n", - "run = agent.apply_skills(test_df)" + "predictions = agent.run(test_df)" ] }, { "cell_type": "code", - "execution_count": 20, - "id": "03cce2a7", + "execution_count": 5, "metadata": {}, "outputs": [ { @@ -827,31 +799,31 @@ " 0\n", " Stainless steel apple peeler.\n", " Food/Beverages\n", - " {'Footwear/Clothing': -5.903179, 'Electronics'...\n", + " {'Footwear/Clothing': -11.584652, 'Electronics...\n", " \n", " \n", " 1\n", " Silk finish touch screen.\n", " Electronics\n", - " {'Footwear/Clothing': -11.517515, 'Electronics...\n", + " {'Footwear/Clothing': -18.90214, 'Electronics'...\n", " \n", " \n", " 2\n", " Chocolate coated boots.\n", " Footwear/Clothing\n", - " {'Footwear/Clothing': -0.074807025, 'Electroni...\n", + " {'Footwear/Clothing': -0.20086760000000006, 'E...\n", " \n", " \n", " 3\n", " Natural wood fragrance.\n", - " Furniture/Home Decor\n", - " {'Footwear/Clothing': -15.117043, 'Electronics...\n", + " Beauty/Personal Care\n", + " {'Footwear/Clothing': -14.69353, 'Electronics'...\n", " \n", " \n", " 4\n", " Leather grain snack bar.\n", " Food/Beverages\n", - " {'Footwear/Clothing': -9.763915, 'Electronics'...\n", + " {'Footwear/Clothing': -16.15361, 'Electronics'...\n", " \n", " \n", "\n", @@ -862,32 +834,32 @@ "0 Stainless steel apple peeler. Food/Beverages \n", "1 Silk finish touch screen. Electronics \n", "2 Chocolate coated boots. Footwear/Clothing \n", - "3 Natural wood fragrance. Furniture/Home Decor \n", + "3 Natural wood fragrance. Beauty/Personal Care \n", "4 Leather grain snack bar. Food/Beverages \n", "\n", " score \n", - "0 {'Footwear/Clothing': -5.903179, 'Electronics'... \n", - "1 {'Footwear/Clothing': -11.517515, 'Electronics... \n", - "2 {'Footwear/Clothing': -0.074807025, 'Electroni... \n", - "3 {'Footwear/Clothing': -15.117043, 'Electronics... \n", - "4 {'Footwear/Clothing': -9.763915, 'Electronics'... " + "0 {'Footwear/Clothing': -11.584652, 'Electronics... \n", + "1 {'Footwear/Clothing': -18.90214, 'Electronics'... \n", + "2 {'Footwear/Clothing': -0.20086760000000006, 'E... \n", + "3 {'Footwear/Clothing': -14.69353, 'Electronics'... \n", + "4 {'Footwear/Clothing': -16.15361, 'Electronics'... " ] }, - "execution_count": 20, + "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "run.predictions" + "predictions" ] } ], "metadata": { "kernelspec": { - "display_name": "Python 3 (ipykernel)", + "display_name": "adala", "language": "python", - "name": "python3" + "name": "adala" }, "language_info": { "codemirror_mode": { @@ -899,7 +871,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.4" + "version": "3.11.5" } }, "nbformat": 4, diff --git a/examples/classification_skill_with_CoT.ipynb b/examples/classification_skill_with_CoT.ipynb index d6e2a61..d79cf53 100644 --- a/examples/classification_skill_with_CoT.ipynb +++ b/examples/classification_skill_with_CoT.ipynb @@ -2,7 +2,6 @@ "cells": [ { "cell_type": "markdown", - "id": "94ad15ac", "metadata": {}, "source": [ "# Classification skill with Chain-of-Thoughts" @@ -11,7 +10,6 @@ { "cell_type": "code", "execution_count": 1, - "id": "a2f6d99b", "metadata": {}, "outputs": [ { @@ -98,14 +96,26 @@ { "cell_type": "code", "execution_count": 2, - "id": "6ee2cebf", "metadata": {}, "outputs": [ + { + "data": { + "text/html": [ + "
Applying skill: product_category_classification\n",
+       "
\n" + ], + "text/plain": [ + "Applying skill: product_category_classification\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, { "name": "stderr", "output_type": "stream", "text": [ - "100%|█████████████████| 5/5 [00:00<00:00, 7.41it/s]\n" + "100%|████████████████████████████████| 5/5 [00:00<00:00, 7.42it/s]\n" ] }, { @@ -234,16 +244,15 @@ " )\n", ")\n", "\n", - "run = agent.apply_skills(df)\n", - "run.predictions" + "agent.run(df)" ] } ], "metadata": { "kernelspec": { - "display_name": "Python 3 (ipykernel)", + "display_name": "adala", "language": "python", - "name": "python3" + "name": "adala" }, "language_info": { "codemirror_mode": { @@ -255,7 +264,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.4" + "version": "3.11.5" } }, "nbformat": 4, diff --git a/examples/question_answering_skill.ipynb b/examples/question_answering_skill.ipynb index dd51761..254016b 100644 --- a/examples/question_answering_skill.ipynb +++ b/examples/question_answering_skill.ipynb @@ -2,7 +2,6 @@ "cells": [ { "cell_type": "markdown", - "id": "94ad15ac", "metadata": {}, "source": [ "# Question-answering skill" @@ -11,7 +10,6 @@ { "cell_type": "code", "execution_count": 1, - "id": "a2f6d99b", "metadata": {}, "outputs": [ { @@ -105,14 +103,26 @@ { "cell_type": "code", "execution_count": 2, - "id": "6ee2cebf", "metadata": {}, "outputs": [ + { + "data": { + "text/html": [ + "
Applying skill: qa_skill\n",
+       "
\n" + ], + "text/plain": [ + "Applying skill: qa_skill\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, { "name": "stderr", "output_type": "stream", "text": [ - "100%|█████████████████| 5/5 [00:02<00:00, 1.91it/s]\n" + "100%|████████████████████████████████| 5/5 [00:00<00:00, 64.16it/s]\n" ] }, { @@ -215,16 +225,15 @@ " )\n", ")\n", "\n", - "run = agent.apply_skills(df)\n", - "run.predictions" + "agent.run(df)" ] } ], "metadata": { "kernelspec": { - "display_name": "Python 3 (ipykernel)", + "display_name": "adala", "language": "python", - "name": "python3" + "name": "adala" }, "language_info": { "codemirror_mode": { @@ -236,7 +245,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.4" + "version": "3.11.5" } }, "nbformat": 4, diff --git a/examples/quickstart.ipynb b/examples/quickstart.ipynb index 3eab497..fd9721f 100644 --- a/examples/quickstart.ipynb +++ b/examples/quickstart.ipynb @@ -2,7 +2,6 @@ "cells": [ { "cell_type": "markdown", - "id": "a6c119c3", "metadata": {}, "source": [ "# ADALA Quickstart\n", @@ -20,7 +19,6 @@ }, { "cell_type": "markdown", - "id": "55c19afc", "metadata": {}, "source": [ "## Dataset Creation\n", @@ -30,7 +28,6 @@ { "cell_type": "code", "execution_count": 1, - "id": "5d5b37a3", "metadata": {}, "outputs": [ { @@ -118,7 +115,6 @@ }, { "cell_type": "markdown", - "id": "9ce6651b", "metadata": {}, "source": [ "We instantiate Dataset that uses this pandas dataframe as a data source. Dataset object takes care of input data schema and data streaming:" @@ -127,7 +123,6 @@ { "cell_type": "code", "execution_count": 2, - "id": "93a31f60", "metadata": {}, "outputs": [], "source": [ @@ -138,7 +133,6 @@ }, { "cell_type": "markdown", - "id": "0dc201b3", "metadata": {}, "source": [ "## Create Agent\n", @@ -152,8 +146,7 @@ }, { "cell_type": "code", - "execution_count": 3, - "id": "a1310fce", + "execution_count": 9, "metadata": { "scrolled": true }, @@ -165,17 +158,17 @@ "\n", "Environment: BasicEnvironment\n", "Skills: subjectivity_detection\n", - "Runtimes: openai, openai-gpt3, openai-gpt4\n", + "Runtimes: openai\n", "Default Runtime: openai\n", "Default Teacher Runtime: openai-gpt4\n", "
\n" ], "text/plain": [ - "\u001B[1;34mAgent Instance\u001B[0m\n", + "\u001b[1;34mAgent Instance\u001b[0m\n", "\n", "Environment: BasicEnvironment\n", "Skills: subjectivity_detection\n", - "Runtimes: openai, openai-gpt3, openai-gpt4\n", + "Runtimes: openai\n", "Default Runtime: openai\n", "Default Teacher Runtime: openai-gpt4\n" ] @@ -205,17 +198,20 @@ " # basic environment extracts ground truth signal from the input records\n", " environment=BasicEnvironment(\n", " ground_truth_dataset=dataset,\n", - " ground_truth_column='ground_truth'\n", + " ground_truth_columns={'subjectivity_detection': 'ground_truth'}\n", " ),\n", " \n", " runtimes = {\n", " # You can specify your OPENAI API KEY here via `OpenAIRuntime(..., api_key='your-api-key')`\n", " 'openai': OpenAIRuntime(model='gpt-3.5-turbo-instruct'),\n", - " 'openai-gpt3': OpenAIRuntime(model='gpt-3.5-turbo'),\n", - " 'openai-gpt4': OpenAIRuntime(model='gpt-4'),\n", " },\n", " default_runtime='openai',\n", " \n", + " teacher_runtimes = {\n", + " 'openai-gpt3': OpenAIRuntime(model='gpt-3.5-turbo'),\n", + " 'openai-gpt4': OpenAIRuntime(model='gpt-4'),\n", + " },\n", + " \n", " # NOTE! If you don't have an access to gpt4 - replace it with \"openai-gpt3\"\n", " default_teacher_runtime='openai-gpt4'\n", ")\n", @@ -225,7 +221,6 @@ }, { "cell_type": "markdown", - "id": "8340dde8", "metadata": {}, "source": [ "## Learning Agent\n", @@ -235,17 +230,29 @@ }, { "cell_type": "code", - "execution_count": 4, - "id": "666c8d0f", + "execution_count": 10, "metadata": { "scrolled": true }, "outputs": [ + { + "data": { + "text/html": [ + "
Applying skill: subjectivity_detection\n",
+       "
\n" + ], + "text/plain": [ + "Applying skill: subjectivity_detection\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, { "name": "stderr", "output_type": "stream", "text": [ - "100%|█████████████████| 5/5 [00:00<00:00, 39.18it/s]\n" + "100%|████████████████████████████████| 5/5 [00:00<00:00, 45.97it/s]\n" ] }, { @@ -259,7 +266,7 @@ "text/plain": [ "\n", "\n", - "=> Iteration #\u001B[1;36m0\u001B[0m: Comparing to ground truth, analyzing and improving \u001B[33m...\u001B[0m\n" + "=> Iteration #\u001b[1;36m0\u001b[0m: Comparing to ground truth, analyzing and improving \u001b[33m...\u001b[0m\n" ] }, "metadata": {}, @@ -272,7 +279,7 @@ "
\n" ], "text/plain": [ - "Comparing predictions to ground truth data \u001B[33m...\u001B[0m\n" + "Comparing predictions to ground truth data \u001b[33m...\u001b[0m\n" ] }, "metadata": {}, @@ -282,7 +289,7 @@ "data": { "text/html": [ "
                                                                                                                   \n",
-       "  text                     ground_truth   subjectivity_detection   score                    ground_truth__x__sub…  \n",
+       "  text                     ground_truth   subjectivity_detection   score                    subjectivity_detecti…  \n",
        " ───────────────────────────────────────────────────────────────────────────────────────────────────────────────── \n",
        "  The mic is great.        Subjective     Subjective               {'Subjective':           True                   \n",
        "                                                                   -0.02697588099999997,                           \n",
@@ -309,24 +316,24 @@
       ],
       "text/plain": [
        "                                                                                                                   \n",
-       " \u001B[1;35m \u001B[0m\u001B[1;35mtext                  \u001B[0m\u001B[1;35m \u001B[0m \u001B[1;35m \u001B[0m\u001B[1;35mground_truth\u001B[0m\u001B[1;35m \u001B[0m \u001B[1;35m \u001B[0m\u001B[1;35msubjectivity_detection\u001B[0m\u001B[1;35m \u001B[0m \u001B[1;35m \u001B[0m\u001B[1;35mscore                 \u001B[0m\u001B[1;35m \u001B[0m \u001B[1;35m \u001B[0m\u001B[1;35mground_truth__x__sub…\u001B[0m\u001B[1;35m \u001B[0m \n",
+       " \u001b[1;35m \u001b[0m\u001b[1;35mtext                  \u001b[0m\u001b[1;35m \u001b[0m \u001b[1;35m \u001b[0m\u001b[1;35mground_truth\u001b[0m\u001b[1;35m \u001b[0m \u001b[1;35m \u001b[0m\u001b[1;35msubjectivity_detection\u001b[0m\u001b[1;35m \u001b[0m \u001b[1;35m \u001b[0m\u001b[1;35mscore                 \u001b[0m\u001b[1;35m \u001b[0m \u001b[1;35m \u001b[0m\u001b[1;35msubjectivity_detecti…\u001b[0m\u001b[1;35m \u001b[0m \n",
        " ───────────────────────────────────────────────────────────────────────────────────────────────────────────────── \n",
        "  The mic is great.        Subjective     Subjective               {'Subjective':           True                   \n",
        "                                                                   -0.02697588099999997,                           \n",
        "                                                                   'Objective':                                    \n",
        "                                                                   -3.6262724}                                     \n",
-       " \u001B[2m \u001B[0m\u001B[2mWill order from them  \u001B[0m\u001B[2m \u001B[0m \u001B[2m \u001B[0m\u001B[2mSubjective  \u001B[0m\u001B[2m \u001B[0m \u001B[2m \u001B[0m\u001B[2mSubjective            \u001B[0m\u001B[2m \u001B[0m \u001B[2m \u001B[0m\u001B[2m{'Subjective':        \u001B[0m\u001B[2m \u001B[0m \u001B[2m \u001B[0m\u001B[2mTrue                 \u001B[0m\u001B[2m \u001B[0m \n",
-       " \u001B[2m \u001B[0m\u001B[2magain!                \u001B[0m\u001B[2m \u001B[0m \u001B[2m              \u001B[0m \u001B[2m                        \u001B[0m \u001B[2m \u001B[0m\u001B[2m-0.11282212000000001, \u001B[0m\u001B[2m \u001B[0m \u001B[2m                       \u001B[0m \n",
-       " \u001B[2m                        \u001B[0m \u001B[2m              \u001B[0m \u001B[2m                        \u001B[0m \u001B[2m \u001B[0m\u001B[2m'Objective':          \u001B[0m\u001B[2m \u001B[0m \u001B[2m                       \u001B[0m \n",
-       " \u001B[2m                        \u001B[0m \u001B[2m              \u001B[0m \u001B[2m                        \u001B[0m \u001B[2m \u001B[0m\u001B[2m-2.2378219999999995}  \u001B[0m\u001B[2m \u001B[0m \u001B[2m                       \u001B[0m \n",
+       " \u001b[2m \u001b[0m\u001b[2mWill order from them  \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mSubjective  \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mSubjective            \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m{'Subjective':        \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mTrue                 \u001b[0m\u001b[2m \u001b[0m \n",
+       " \u001b[2m \u001b[0m\u001b[2magain!                \u001b[0m\u001b[2m \u001b[0m \u001b[2m              \u001b[0m \u001b[2m                        \u001b[0m \u001b[2m \u001b[0m\u001b[2m-0.11282212000000001, \u001b[0m\u001b[2m \u001b[0m \u001b[2m                       \u001b[0m \n",
+       " \u001b[2m                        \u001b[0m \u001b[2m              \u001b[0m \u001b[2m                        \u001b[0m \u001b[2m \u001b[0m\u001b[2m'Objective':          \u001b[0m\u001b[2m \u001b[0m \u001b[2m                       \u001b[0m \n",
+       " \u001b[2m                        \u001b[0m \u001b[2m              \u001b[0m \u001b[2m                        \u001b[0m \u001b[2m \u001b[0m\u001b[2m-2.2378219999999995}  \u001b[0m\u001b[2m \u001b[0m \u001b[2m                       \u001b[0m \n",
        "  Not loud enough and      Objective      Subjective               {'Subjective':           False                  \n",
        "  doesn't turn on like                                             -0.014163457000000034,                          \n",
        "  it should.                                                       'Objective':                                    \n",
        "                                                                   -4.2641635}                                     \n",
-       " \u001B[2m \u001B[0m\u001B[2mThe phone doesn't seem\u001B[0m\u001B[2m \u001B[0m \u001B[2m \u001B[0m\u001B[2mObjective   \u001B[0m\u001B[2m \u001B[0m \u001B[2m \u001B[0m\u001B[2mObjective             \u001B[0m\u001B[2m \u001B[0m \u001B[2m \u001B[0m\u001B[2m{'Subjective':        \u001B[0m\u001B[2m \u001B[0m \u001B[2m \u001B[0m\u001B[2mTrue                 \u001B[0m\u001B[2m \u001B[0m \n",
-       " \u001B[2m \u001B[0m\u001B[2mto accept anything    \u001B[0m\u001B[2m \u001B[0m \u001B[2m              \u001B[0m \u001B[2m                        \u001B[0m \u001B[2m \u001B[0m\u001B[2m-2.0720863,           \u001B[0m\u001B[2m \u001B[0m \u001B[2m                       \u001B[0m \n",
-       " \u001B[2m \u001B[0m\u001B[2mexcept CBR mp3s       \u001B[0m\u001B[2m \u001B[0m \u001B[2m              \u001B[0m \u001B[2m                        \u001B[0m \u001B[2m \u001B[0m\u001B[2m'Objective':          \u001B[0m\u001B[2m \u001B[0m \u001B[2m                       \u001B[0m \n",
-       " \u001B[2m                        \u001B[0m \u001B[2m              \u001B[0m \u001B[2m                        \u001B[0m \u001B[2m \u001B[0m\u001B[2m-0.13458653999999995} \u001B[0m\u001B[2m \u001B[0m \u001B[2m                       \u001B[0m \n",
+       " \u001b[2m \u001b[0m\u001b[2mThe phone doesn't seem\u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mObjective   \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mObjective             \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m{'Subjective':        \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mTrue                 \u001b[0m\u001b[2m \u001b[0m \n",
+       " \u001b[2m \u001b[0m\u001b[2mto accept anything    \u001b[0m\u001b[2m \u001b[0m \u001b[2m              \u001b[0m \u001b[2m                        \u001b[0m \u001b[2m \u001b[0m\u001b[2m-2.0720863,           \u001b[0m\u001b[2m \u001b[0m \u001b[2m                       \u001b[0m \n",
+       " \u001b[2m \u001b[0m\u001b[2mexcept CBR mp3s       \u001b[0m\u001b[2m \u001b[0m \u001b[2m              \u001b[0m \u001b[2m                        \u001b[0m \u001b[2m \u001b[0m\u001b[2m'Objective':          \u001b[0m\u001b[2m \u001b[0m \u001b[2m                       \u001b[0m \n",
+       " \u001b[2m                        \u001b[0m \u001b[2m              \u001b[0m \u001b[2m                        \u001b[0m \u001b[2m \u001b[0m\u001b[2m-0.13458653999999995} \u001b[0m\u001b[2m \u001b[0m \u001b[2m                       \u001b[0m \n",
        "  All three broke within   Objective      Objective                {'Subjective':           True                   \n",
        "  two months of use.                                               -2.1821797,                                     \n",
        "                                                                   'Objective':                                    \n",
@@ -337,6 +344,19 @@
      "metadata": {},
      "output_type": "display_data"
     },
+    {
+     "data": {
+      "text/html": [
+       "
Accuracy = 80.00%\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[1;31mAccuracy = \u001b[0m\u001b[1;36m80.00\u001b[0m\u001b[1;31m%\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, { "data": { "text/html": [ @@ -344,7 +364,7 @@ "
\n" ], "text/plain": [ - "Analyze evaluation experience \u001B[33m...\u001B[0m\n" + "Analyze evaluation experience \u001b[33m...\u001b[0m\n" ] }, "metadata": {}, @@ -354,18 +374,20 @@ "name": "stderr", "output_type": "stream", "text": [ - "100%|████████████████| 1/1 [00:00<00:00, 153.47it/s]\n", - "100%|█████████████████| 1/1 [00:00<00:00, 31.21it/s]\n" + "100%|███████████████████████████████| 1/1 [00:00<00:00, 170.90it/s]\n", + "100%|████████████████████████████████| 1/1 [00:00<00:00, 29.05it/s]\n" ] }, { "data": { "text/html": [ - "
Number of errors: 1\n",
+       "
Error analysis for skill \"subjectivity_detection\":\n",
+       "\n",
        "
\n" ], "text/plain": [ - "Number of errors: \u001B[1;36m1\u001B[0m\n" + "Error analysis for skill \u001b[32m\"subjectivity_detection\"\u001b[0m:\n", + "\n" ] }, "metadata": {}, @@ -374,11 +396,25 @@ { "data": { "text/html": [ - "
Accuracy = 80.00%\n",
+       "
\n",
+       "Input: Not loud enough and doesn't turn on like it should.\n",
+       "Prediction: Subjective\n",
+       "Ground truth: Objective\n",
+       "Error reason: The model might have considered the phrases \"not loud enough\" and \"doesn't turn on like it should\" as\n",
+       "personal opinions or experiences, hence it classified the review as subjective. However, these are factual \n",
+       "statements about the product's performance, making the review objective.\n",
+       "\n",
        "
\n" ], "text/plain": [ - "\u001B[1;31mAccuracy = \u001B[0m\u001B[1;36m80.00\u001B[0m\u001B[1;31m%\u001B[0m\n" + "\n", + "\u001b[32mInput: Not loud enough and doesn't turn on like it should.\u001b[0m\n", + "\u001b[32mPrediction: Subjective\u001b[0m\n", + "\u001b[32mGround truth: Objective\u001b[0m\n", + "\u001b[32mError reason: The model might have considered the phrases \u001b[0m\u001b[32m\"not loud enough\"\u001b[0m\u001b[32m and \u001b[0m\u001b[32m\"doesn't turn on like it should\"\u001b[0m\u001b[32m as\u001b[0m\n", + "\u001b[32mpersonal opinions or experiences, hence it classified the review as subjective. However, these are factual \u001b[0m\n", + "\u001b[32mstatements about the product's performance, making the review objective.\u001b[0m\n", + "\n" ] }, "metadata": {}, @@ -391,7 +427,7 @@ "
\n" ], "text/plain": [ - "Improve \u001B[32m\"subjectivity_detection\"\u001B[0m skill based on analysis \u001B[33m...\u001B[0m\n" + "Improve \u001b[32m\"subjectivity_detection\"\u001b[0m skill based on analysis \u001b[33m...\u001b[0m\n" ] }, "metadata": {}, @@ -405,7 +441,7 @@ "
\n" ], "text/plain": [ - "Updated instructions for skill \u001B[32m\"subjectivity_detection\"\u001B[0m:\n", + "Updated instructions for skill \u001b[32m\"subjectivity_detection\"\u001b[0m:\n", "\n" ] }, @@ -415,35 +451,37 @@ { "data": { "text/html": [ - "
Determine whether the given product review contains \"Subjective\" (based on personal feelings, tastes, or opinions) \n",
-       "or \"Objective\" (based on facts) statements.\n",
+       "
Classify a product review as either expressing \"Subjective\" or \"Objective\" statements. A \"Subjective\" statement is \n",
+       "based on personal opinions, feelings, or tastes. An \"Objective\" statement is based on factual information about the\n",
+       "product, such as its features or performance, and is not influenced by personal feelings or opinions. \n",
        "\n",
        "Examples:\n",
        "\n",
-       "Input: Not loud enough and doesn't turn on like it should.\n",
+       "Input: The color of this phone is black.\n",
        "Output: Objective\n",
        "\n",
-       "Input: I personally think the sound quality is not up to the mark.\n",
+       "Input: I think this phone is too expensive for its features.\n",
        "Output: Subjective\n",
        "\n",
-       "Input: The phone's battery lasts for 10 hours.\n",
+       "Input: The battery life of this laptop lasts for 10 hours.\n",
        "Output: Objective\n",
        "
\n" ], "text/plain": [ - "\u001B[1;32mDetermine whether the given product review contains \u001B[0m\u001B[32m\"Subjective\"\u001B[0m\u001B[1;32m \u001B[0m\u001B[1;32m(\u001B[0m\u001B[1;32mbased on personal feelings, tastes, or opinions\u001B[0m\u001B[1;32m)\u001B[0m\u001B[1;32m \u001B[0m\n", - "\u001B[1;32mor \u001B[0m\u001B[32m\"Objective\"\u001B[0m\u001B[1;32m \u001B[0m\u001B[1;32m(\u001B[0m\u001B[1;32mbased on facts\u001B[0m\u001B[1;32m)\u001B[0m\u001B[1;32m statements.\u001B[0m\n", + "\u001b[1;32mClassify a product review as either expressing \u001b[0m\u001b[32m\"Subjective\"\u001b[0m\u001b[1;32m or \u001b[0m\u001b[32m\"Objective\"\u001b[0m\u001b[1;32m statements. A \u001b[0m\u001b[32m\"Subjective\"\u001b[0m\u001b[1;32m statement is \u001b[0m\n", + "\u001b[1;32mbased on personal opinions, feelings, or tastes. An \u001b[0m\u001b[32m\"Objective\"\u001b[0m\u001b[1;32m statement is based on factual information about the\u001b[0m\n", + "\u001b[1;32mproduct, such as its features or performance, and is not influenced by personal feelings or opinions. \u001b[0m\n", "\n", - "\u001B[1;32mExamples:\u001B[0m\n", + "\u001b[1;32mExamples:\u001b[0m\n", "\n", - "\u001B[1;32mInput: Not loud enough and doesn't turn on like it should.\u001B[0m\n", - "\u001B[1;32mOutput: Objective\u001B[0m\n", + "\u001b[1;32mInput: The color of this phone is black.\u001b[0m\n", + "\u001b[1;32mOutput: Objective\u001b[0m\n", "\n", - "\u001B[1;32mInput: I personally think the sound quality is not up to the mark.\u001B[0m\n", - "\u001B[1;32mOutput: Subjective\u001B[0m\n", + "\u001b[1;32mInput: I think this phone is too expensive for its features.\u001b[0m\n", + "\u001b[1;32mOutput: Subjective\u001b[0m\n", "\n", - "\u001B[1;32mInput: The phone's battery lasts for \u001B[0m\u001B[1;36m10\u001B[0m\u001B[1;32m hours.\u001B[0m\n", - "\u001B[1;32mOutput: Objective\u001B[0m\n" + "\u001b[1;32mInput: The battery life of this laptop lasts for \u001b[0m\u001b[1;36m10\u001b[0m\u001b[1;32m hours.\u001b[0m\n", + "\u001b[1;32mOutput: Objective\u001b[0m\n" ] }, "metadata": {}, @@ -456,7 +494,20 @@ "
\n" ], "text/plain": [ - "Re-apply subjectivity_detection skill to dataset \u001B[33m...\u001B[0m\n" + "Re-apply subjectivity_detection skill to dataset \u001b[33m...\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
Applying skill: subjectivity_detection\n",
+       "
\n" + ], + "text/plain": [ + "Applying skill: subjectivity_detection\n" ] }, "metadata": {}, @@ -466,7 +517,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "100%|█████████████████| 5/5 [00:00<00:00, 48.32it/s]\n" + "100%|████████████████████████████████| 5/5 [00:00<00:00, 26.29it/s]\n" ] }, { @@ -480,7 +531,7 @@ "text/plain": [ "\n", "\n", - "=> Iteration #\u001B[1;36m1\u001B[0m: Comparing to ground truth, analyzing and improving \u001B[33m...\u001B[0m\n" + "=> Iteration #\u001b[1;36m1\u001b[0m: Comparing to ground truth, analyzing and improving \u001b[33m...\u001b[0m\n" ] }, "metadata": {}, @@ -493,7 +544,7 @@ "
\n" ], "text/plain": [ - "Comparing predictions to ground truth data \u001B[33m...\u001B[0m\n" + "Comparing predictions to ground truth data \u001b[33m...\u001b[0m\n" ] }, "metadata": {}, @@ -503,61 +554,74 @@ "data": { "text/html": [ "
                                                                                                                   \n",
-       "  text                     ground_truth   subjectivity_detection   score                    ground_truth__x__sub…  \n",
+       "  text                     ground_truth   subjectivity_detection   score                    subjectivity_detecti…  \n",
        " ───────────────────────────────────────────────────────────────────────────────────────────────────────────────── \n",
-       "  The mic is great.        Subjective     Objective                {'Subjective':           False                  \n",
-       "                                                                   -2.2253392,                                     \n",
+       "  The mic is great.        Subjective     Subjective               {'Subjective':           True                   \n",
+       "                                                                   -0.12713461999999998,                           \n",
        "                                                                   'Objective':                                    \n",
-       "                                                                   -0.11432376000000005}                           \n",
-       "  Will order from them     Subjective     Objective                {'Subjective':           False                  \n",
-       "  again!                                                           -0.8573844400000001,                            \n",
+       "                                                                   -2.1254027}                                     \n",
+       "  Will order from them     Subjective     Subjective               {'Subjective':           True                   \n",
+       "  again!                                                           -0.007335418999999971…                          \n",
        "                                                                   'Objective':                                    \n",
-       "                                                                   -0.5521171}                                     \n",
-       "  Not loud enough and      Objective      Objective                {'Subjective':           True                   \n",
-       "  doesn't turn on like                                             -4.0895286,                                     \n",
+       "                                                                   -4.9187045}                                     \n",
+       "  Not loud enough and      Objective      Subjective               {'Subjective':           False                  \n",
+       "  doesn't turn on like                                             -0.000693016340000051…                          \n",
        "  it should.                                                       'Objective':                                    \n",
-       "                                                                   -0.01688896000000003}                           \n",
+       "                                                                   -7.2748933}                                     \n",
        "  The phone doesn't seem   Objective      Objective                {'Subjective':           True                   \n",
-       "  to accept anything                                               -2.8614092,                                     \n",
+       "  to accept anything                                               -2.4914062,                                     \n",
        "  except CBR mp3s                                                  'Objective':                                    \n",
-       "                                                                   -0.058888500000000066}                          \n",
-       "  All three broke within   Objective      Objective                {'Subjective':           True                   \n",
-       "  two months of use.                                               -4.7739024,                                     \n",
+       "                                                                   -0.086422645}                                   \n",
+       "  All three broke within   Objective      Subjective               {'Subjective':           False                  \n",
+       "  two months of use.                                               -0.08145889000000005,                           \n",
        "                                                                   'Objective':                                    \n",
-       "                                                                   -0.008483256000000052}                          \n",
+       "                                                                   -2.5481107}                                     \n",
        "                                                                                                                   \n",
        "
\n" ], "text/plain": [ " \n", - " \u001B[1;35m \u001B[0m\u001B[1;35mtext \u001B[0m\u001B[1;35m \u001B[0m \u001B[1;35m \u001B[0m\u001B[1;35mground_truth\u001B[0m\u001B[1;35m \u001B[0m \u001B[1;35m \u001B[0m\u001B[1;35msubjectivity_detection\u001B[0m\u001B[1;35m \u001B[0m \u001B[1;35m \u001B[0m\u001B[1;35mscore \u001B[0m\u001B[1;35m \u001B[0m \u001B[1;35m \u001B[0m\u001B[1;35mground_truth__x__sub…\u001B[0m\u001B[1;35m \u001B[0m \n", + " \u001b[1;35m \u001b[0m\u001b[1;35mtext \u001b[0m\u001b[1;35m \u001b[0m \u001b[1;35m \u001b[0m\u001b[1;35mground_truth\u001b[0m\u001b[1;35m \u001b[0m \u001b[1;35m \u001b[0m\u001b[1;35msubjectivity_detection\u001b[0m\u001b[1;35m \u001b[0m \u001b[1;35m \u001b[0m\u001b[1;35mscore \u001b[0m\u001b[1;35m \u001b[0m \u001b[1;35m \u001b[0m\u001b[1;35msubjectivity_detecti…\u001b[0m\u001b[1;35m \u001b[0m \n", " ───────────────────────────────────────────────────────────────────────────────────────────────────────────────── \n", - " The mic is great. Subjective Objective {'Subjective': False \n", - " -2.2253392, \n", + " The mic is great. Subjective Subjective {'Subjective': True \n", + " -0.12713461999999998, \n", " 'Objective': \n", - " -0.11432376000000005} \n", - " \u001B[2m \u001B[0m\u001B[2mWill order from them \u001B[0m\u001B[2m \u001B[0m \u001B[2m \u001B[0m\u001B[2mSubjective \u001B[0m\u001B[2m \u001B[0m \u001B[2m \u001B[0m\u001B[2mObjective \u001B[0m\u001B[2m \u001B[0m \u001B[2m \u001B[0m\u001B[2m{'Subjective': \u001B[0m\u001B[2m \u001B[0m \u001B[2m \u001B[0m\u001B[2mFalse \u001B[0m\u001B[2m \u001B[0m \n", - " \u001B[2m \u001B[0m\u001B[2magain! \u001B[0m\u001B[2m \u001B[0m \u001B[2m \u001B[0m \u001B[2m \u001B[0m \u001B[2m \u001B[0m\u001B[2m-0.8573844400000001, \u001B[0m\u001B[2m \u001B[0m \u001B[2m \u001B[0m \n", - " \u001B[2m \u001B[0m \u001B[2m \u001B[0m \u001B[2m \u001B[0m \u001B[2m \u001B[0m\u001B[2m'Objective': \u001B[0m\u001B[2m \u001B[0m \u001B[2m \u001B[0m \n", - " \u001B[2m \u001B[0m \u001B[2m \u001B[0m \u001B[2m \u001B[0m \u001B[2m \u001B[0m\u001B[2m-0.5521171} \u001B[0m\u001B[2m \u001B[0m \u001B[2m \u001B[0m \n", - " Not loud enough and Objective Objective {'Subjective': True \n", - " doesn't turn on like -4.0895286, \n", + " -2.1254027} \n", + " \u001b[2m \u001b[0m\u001b[2mWill order from them \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mSubjective \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mSubjective \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m{'Subjective': \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mTrue \u001b[0m\u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m\u001b[2magain! \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m-0.007335418999999971…\u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m'Objective': \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m-4.9187045} \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " Not loud enough and Objective Subjective {'Subjective': False \n", + " doesn't turn on like -0.000693016340000051… \n", " it should. 'Objective': \n", - " -0.01688896000000003} \n", - " \u001B[2m \u001B[0m\u001B[2mThe phone doesn't seem\u001B[0m\u001B[2m \u001B[0m \u001B[2m \u001B[0m\u001B[2mObjective \u001B[0m\u001B[2m \u001B[0m \u001B[2m \u001B[0m\u001B[2mObjective \u001B[0m\u001B[2m \u001B[0m \u001B[2m \u001B[0m\u001B[2m{'Subjective': \u001B[0m\u001B[2m \u001B[0m \u001B[2m \u001B[0m\u001B[2mTrue \u001B[0m\u001B[2m \u001B[0m \n", - " \u001B[2m \u001B[0m\u001B[2mto accept anything \u001B[0m\u001B[2m \u001B[0m \u001B[2m \u001B[0m \u001B[2m \u001B[0m \u001B[2m \u001B[0m\u001B[2m-2.8614092, \u001B[0m\u001B[2m \u001B[0m \u001B[2m \u001B[0m \n", - " \u001B[2m \u001B[0m\u001B[2mexcept CBR mp3s \u001B[0m\u001B[2m \u001B[0m \u001B[2m \u001B[0m \u001B[2m \u001B[0m \u001B[2m \u001B[0m\u001B[2m'Objective': \u001B[0m\u001B[2m \u001B[0m \u001B[2m \u001B[0m \n", - " \u001B[2m \u001B[0m \u001B[2m \u001B[0m \u001B[2m \u001B[0m \u001B[2m \u001B[0m\u001B[2m-0.058888500000000066}\u001B[0m\u001B[2m \u001B[0m \u001B[2m \u001B[0m \n", - " All three broke within Objective Objective {'Subjective': True \n", - " two months of use. -4.7739024, \n", + " -7.2748933} \n", + " \u001b[2m \u001b[0m\u001b[2mThe phone doesn't seem\u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mObjective \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mObjective \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m{'Subjective': \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mTrue \u001b[0m\u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m\u001b[2mto accept anything \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m-2.4914062, \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m\u001b[2mexcept CBR mp3s \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m'Objective': \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m-0.086422645} \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " All three broke within Objective Subjective {'Subjective': False \n", + " two months of use. -0.08145889000000005, \n", " 'Objective': \n", - " -0.008483256000000052} \n", + " -2.5481107} \n", " \n" ] }, "metadata": {}, "output_type": "display_data" }, + { + "data": { + "text/html": [ + "
Accuracy = 60.00%\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[1;31mAccuracy = \u001b[0m\u001b[1;36m60.00\u001b[0m\u001b[1;31m%\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, { "data": { "text/html": [ @@ -565,7 +629,7 @@ "
\n" ], "text/plain": [ - "Analyze evaluation experience \u001B[33m...\u001B[0m\n" + "Analyze evaluation experience \u001b[33m...\u001b[0m\n" ] }, "metadata": {}, @@ -575,18 +639,20 @@ "name": "stderr", "output_type": "stream", "text": [ - "100%|████████████████| 2/2 [00:00<00:00, 229.64it/s]\n", - "100%|█████████████████| 2/2 [00:00<00:00, 24.71it/s]\n" + "100%|███████████████████████████████| 2/2 [00:00<00:00, 203.21it/s]\n", + "100%|████████████████████████████████| 2/2 [00:00<00:00, 23.67it/s]\n" ] }, { "data": { "text/html": [ - "
Number of errors: 2\n",
+       "
Error analysis for skill \"subjectivity_detection\":\n",
+       "\n",
        "
\n" ], "text/plain": [ - "Number of errors: \u001B[1;36m2\u001B[0m\n" + "Error analysis for skill \u001b[32m\"subjectivity_detection\"\u001b[0m:\n", + "\n" ] }, "metadata": {}, @@ -595,11 +661,39 @@ { "data": { "text/html": [ - "
Accuracy = 60.00%\n",
+       "
\n",
+       "Input: Not loud enough and doesn't turn on like it should.\n",
+       "Prediction: Subjective\n",
+       "Ground truth: Objective\n",
+       "Error reason: The model likely classified the statement as subjective because it seems to express personal \n",
+       "dissatisfaction. However, the statement is actually objective as it describes the product's features - its volume \n",
+       "and functionality.\n",
+       "\n",
+       "Input: All three broke within two months of use.\n",
+       "Prediction: Subjective\n",
+       "Ground truth: Objective\n",
+       "Error reason: The model incorrectly classified the statement as subjective, possibly because it interpreted the \n",
+       "phrase \"broke within two months of use\" as a personal experience or opinion. However, the statement is objective as\n",
+       "it presents a factual occurrence about the product's durability.\n",
+       "\n",
        "
\n" ], "text/plain": [ - "\u001B[1;31mAccuracy = \u001B[0m\u001B[1;36m60.00\u001B[0m\u001B[1;31m%\u001B[0m\n" + "\n", + "\u001b[32mInput: Not loud enough and doesn't turn on like it should.\u001b[0m\n", + "\u001b[32mPrediction: Subjective\u001b[0m\n", + "\u001b[32mGround truth: Objective\u001b[0m\n", + "\u001b[32mError reason: The model likely classified the statement as subjective because it seems to express personal \u001b[0m\n", + "\u001b[32mdissatisfaction. However, the statement is actually objective as it describes the product's features - its volume \u001b[0m\n", + "\u001b[32mand functionality.\u001b[0m\n", + "\n", + "\u001b[32mInput: All three broke within two months of use.\u001b[0m\n", + "\u001b[32mPrediction: Subjective\u001b[0m\n", + "\u001b[32mGround truth: Objective\u001b[0m\n", + "\u001b[32mError reason: The model incorrectly classified the statement as subjective, possibly because it interpreted the \u001b[0m\n", + "\u001b[32mphrase \u001b[0m\u001b[32m\"broke within two months of use\"\u001b[0m\u001b[32m as a personal experience or opinion. However, the statement is objective as\u001b[0m\n", + "\u001b[32mit presents a factual occurrence about the product's durability.\u001b[0m\n", + "\n" ] }, "metadata": {}, @@ -612,7 +706,7 @@ "
\n" ], "text/plain": [ - "Improve \u001B[32m\"subjectivity_detection\"\u001B[0m skill based on analysis \u001B[33m...\u001B[0m\n" + "Improve \u001b[32m\"subjectivity_detection\"\u001b[0m skill based on analysis \u001b[33m...\u001b[0m\n" ] }, "metadata": {}, @@ -626,7 +720,7 @@ "
\n" ], "text/plain": [ - "Updated instructions for skill \u001B[32m\"subjectivity_detection\"\u001B[0m:\n", + "Updated instructions for skill \u001b[32m\"subjectivity_detection\"\u001b[0m:\n", "\n" ] }, @@ -636,49 +730,53 @@ { "data": { "text/html": [ - "
Identify if the provided product review is \"Subjective\" (expressing personal feelings, tastes, or opinions) or \n",
-       "\"Objective\" (based on factual information). Consider a statement as subjective if it reflects personal judgment or \n",
-       "preference, and as objective if it states verifiable facts or features.\n",
+       "
Classify a product review as either expressing \"Subjective\" or \"Objective\" statements. A \"Subjective\" statement is \n",
+       "based on personal opinions, feelings, or tastes. An \"Objective\" statement is based on factual information about the\n",
+       "product, such as its features, performance, or occurrences during use, and is not influenced by personal feelings \n",
+       "or opinions. Even if the statement seems to express dissatisfaction or a negative experience, it should be \n",
+       "classified as \"Objective\" if it provides factual information about the product's characteristics or performance.\n",
        "\n",
        "Examples:\n",
        "\n",
-       "Input: Not loud enough and doesn't turn on like it should.\n",
+       "Input: The color of this phone is black.\n",
        "Output: Objective\n",
        "\n",
-       "Input: I personally think the sound quality is not up to the mark.\n",
+       "Input: I think this phone is too expensive for its features.\n",
        "Output: Subjective\n",
        "\n",
-       "Input: The phone's battery lasts for 10 hours.\n",
+       "Input: Not loud enough and doesn't turn on like it should.\n",
        "Output: Objective\n",
        "\n",
-       "Input: The mic is great.\n",
-       "Output: Subjective\n",
+       "Input: All three broke within two months of use.\n",
+       "Output: Objective\n",
        "\n",
-       "Input: Will order from them again!\n",
+       "Input: I don't like the design of this laptop.\n",
        "Output: Subjective\n",
        "
\n" ], "text/plain": [ - "\u001B[1;32mIdentify if the provided product review is \u001B[0m\u001B[32m\"Subjective\"\u001B[0m\u001B[1;32m \u001B[0m\u001B[1;32m(\u001B[0m\u001B[1;32mexpressing personal feelings, tastes, or opinions\u001B[0m\u001B[1;32m)\u001B[0m\u001B[1;32m or \u001B[0m\n", - "\u001B[32m\"Objective\"\u001B[0m\u001B[1;32m \u001B[0m\u001B[1;32m(\u001B[0m\u001B[1;32mbased on factual information\u001B[0m\u001B[1;32m)\u001B[0m\u001B[1;32m. Consider a statement as subjective if it reflects personal judgment or \u001B[0m\n", - "\u001B[1;32mpreference, and as objective if it states verifiable facts or features.\u001B[0m\n", + "\u001b[1;32mClassify a product review as either expressing \u001b[0m\u001b[32m\"Subjective\"\u001b[0m\u001b[1;32m or \u001b[0m\u001b[32m\"Objective\"\u001b[0m\u001b[1;32m statements. A \u001b[0m\u001b[32m\"Subjective\"\u001b[0m\u001b[1;32m statement is \u001b[0m\n", + "\u001b[1;32mbased on personal opinions, feelings, or tastes. An \u001b[0m\u001b[32m\"Objective\"\u001b[0m\u001b[1;32m statement is based on factual information about the\u001b[0m\n", + "\u001b[1;32mproduct, such as its features, performance, or occurrences during use, and is not influenced by personal feelings \u001b[0m\n", + "\u001b[1;32mor opinions. Even if the statement seems to express dissatisfaction or a negative experience, it should be \u001b[0m\n", + "\u001b[1;32mclassified as \u001b[0m\u001b[32m\"Objective\"\u001b[0m\u001b[1;32m if it provides factual information about the product's characteristics or performance.\u001b[0m\n", "\n", - "\u001B[1;32mExamples:\u001B[0m\n", + "\u001b[1;32mExamples:\u001b[0m\n", "\n", - "\u001B[1;32mInput: Not loud enough and doesn't turn on like it should.\u001B[0m\n", - "\u001B[1;32mOutput: Objective\u001B[0m\n", + "\u001b[1;32mInput: The color of this phone is black.\u001b[0m\n", + "\u001b[1;32mOutput: Objective\u001b[0m\n", "\n", - "\u001B[1;32mInput: I personally think the sound quality is not up to the mark.\u001B[0m\n", - "\u001B[1;32mOutput: Subjective\u001B[0m\n", + "\u001b[1;32mInput: I think this phone is too expensive for its features.\u001b[0m\n", + "\u001b[1;32mOutput: Subjective\u001b[0m\n", "\n", - "\u001B[1;32mInput: The phone's battery lasts for \u001B[0m\u001B[1;36m10\u001B[0m\u001B[1;32m hours.\u001B[0m\n", - "\u001B[1;32mOutput: Objective\u001B[0m\n", + "\u001b[1;32mInput: Not loud enough and doesn't turn on like it should.\u001b[0m\n", + "\u001b[1;32mOutput: Objective\u001b[0m\n", "\n", - "\u001B[1;32mInput: The mic is great.\u001B[0m\n", - "\u001B[1;32mOutput: Subjective\u001B[0m\n", + "\u001b[1;32mInput: All three broke within two months of use.\u001b[0m\n", + "\u001b[1;32mOutput: Objective\u001b[0m\n", "\n", - "\u001B[1;32mInput: Will order from them again!\u001B[0m\n", - "\u001B[1;32mOutput: Subjective\u001B[0m\n" + "\u001b[1;32mInput: I don't like the design of this laptop.\u001b[0m\n", + "\u001b[1;32mOutput: Subjective\u001b[0m\n" ] }, "metadata": {}, @@ -691,7 +789,20 @@ "
\n" ], "text/plain": [ - "Re-apply subjectivity_detection skill to dataset \u001B[33m...\u001B[0m\n" + "Re-apply subjectivity_detection skill to dataset \u001b[33m...\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
Applying skill: subjectivity_detection\n",
+       "
\n" + ], + "text/plain": [ + "Applying skill: subjectivity_detection\n" ] }, "metadata": {}, @@ -701,7 +812,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "100%|█████████████████| 5/5 [00:00<00:00, 35.93it/s]\n" + "100%|████████████████████████████████| 5/5 [00:00<00:00, 32.77it/s]\n" ] }, { @@ -715,7 +826,7 @@ "text/plain": [ "\n", "\n", - "=> Iteration #\u001B[1;36m2\u001B[0m: Comparing to ground truth, analyzing and improving \u001B[33m...\u001B[0m\n" + "=> Iteration #\u001b[1;36m2\u001b[0m: Comparing to ground truth, analyzing and improving \u001b[33m...\u001b[0m\n" ] }, "metadata": {}, @@ -728,7 +839,7 @@ "
\n" ], "text/plain": [ - "Comparing predictions to ground truth data \u001B[33m...\u001B[0m\n" + "Comparing predictions to ground truth data \u001b[33m...\u001b[0m\n" ] }, "metadata": {}, @@ -738,59 +849,74 @@ "data": { "text/html": [ "
                                                                                                                   \n",
-       "  text                     ground_truth   subjectivity_detection   score                    ground_truth__x__sub…  \n",
+       "  text                     ground_truth   subjectivity_detection   score                    subjectivity_detecti…  \n",
        " ───────────────────────────────────────────────────────────────────────────────────────────────────────────────── \n",
-       "  The mic is great.        Subjective     Subjective               {'Subjective':           True                   \n",
-       "                                                                   -0.022607480000000055,                          \n",
-       "                                                                   'Objective': -3.80076}                          \n",
+       "  The mic is great.        Subjective     Objective                {'Subjective':           False                  \n",
+       "                                                                   -5.5978823,                                     \n",
+       "                                                                   'Objective':                                    \n",
+       "                                                                   -0.003712546499999969}                          \n",
        "  Will order from them     Subjective     Subjective               {'Subjective':           True                   \n",
-       "  again!                                                           -0.05627503599999997,                           \n",
+       "  again!                                                           -0.6518025399999999,                            \n",
        "                                                                   'Objective':                                    \n",
-       "                                                                   -2.9055107}                                     \n",
+       "                                                                   -0.7362752}                                     \n",
        "  Not loud enough and      Objective      Objective                {'Subjective':           True                   \n",
-       "  doesn't turn on like                                             -2.897738,                                      \n",
+       "  doesn't turn on like                                             -4.672154,                                      \n",
        "  it should.                                                       'Objective':                                    \n",
-       "                                                                   -0.05672692499999995}                           \n",
+       "                                                                   -0.009396199000000013}                          \n",
        "  The phone doesn't seem   Objective      Objective                {'Subjective':           True                   \n",
-       "  to accept anything                                               -3.8168292,                                     \n",
+       "  to accept anything                                               -4.6575603,                                     \n",
        "  except CBR mp3s                                                  'Objective':                                    \n",
-       "                                                                   -0.022242965000000038}                          \n",
+       "                                                                   -0.009534958999999949}                          \n",
        "  All three broke within   Objective      Objective                {'Subjective':           True                   \n",
-       "  two months of use.                                               -4.800799,                                      \n",
+       "  two months of use.                                               -3.9477026,                                     \n",
        "                                                                   'Objective':                                    \n",
-       "                                                                   -0.008257226000000043}                          \n",
+       "                                                                   -0.019487570000000034}                          \n",
        "                                                                                                                   \n",
        "
\n" ], "text/plain": [ " \n", - " \u001B[1;35m \u001B[0m\u001B[1;35mtext \u001B[0m\u001B[1;35m \u001B[0m \u001B[1;35m \u001B[0m\u001B[1;35mground_truth\u001B[0m\u001B[1;35m \u001B[0m \u001B[1;35m \u001B[0m\u001B[1;35msubjectivity_detection\u001B[0m\u001B[1;35m \u001B[0m \u001B[1;35m \u001B[0m\u001B[1;35mscore \u001B[0m\u001B[1;35m \u001B[0m \u001B[1;35m \u001B[0m\u001B[1;35mground_truth__x__sub…\u001B[0m\u001B[1;35m \u001B[0m \n", + " \u001b[1;35m \u001b[0m\u001b[1;35mtext \u001b[0m\u001b[1;35m \u001b[0m \u001b[1;35m \u001b[0m\u001b[1;35mground_truth\u001b[0m\u001b[1;35m \u001b[0m \u001b[1;35m \u001b[0m\u001b[1;35msubjectivity_detection\u001b[0m\u001b[1;35m \u001b[0m \u001b[1;35m \u001b[0m\u001b[1;35mscore \u001b[0m\u001b[1;35m \u001b[0m \u001b[1;35m \u001b[0m\u001b[1;35msubjectivity_detecti…\u001b[0m\u001b[1;35m \u001b[0m \n", " ───────────────────────────────────────────────────────────────────────────────────────────────────────────────── \n", - " The mic is great. Subjective Subjective {'Subjective': True \n", - " -0.022607480000000055, \n", - " 'Objective': -3.80076} \n", - " \u001B[2m \u001B[0m\u001B[2mWill order from them \u001B[0m\u001B[2m \u001B[0m \u001B[2m \u001B[0m\u001B[2mSubjective \u001B[0m\u001B[2m \u001B[0m \u001B[2m \u001B[0m\u001B[2mSubjective \u001B[0m\u001B[2m \u001B[0m \u001B[2m \u001B[0m\u001B[2m{'Subjective': \u001B[0m\u001B[2m \u001B[0m \u001B[2m \u001B[0m\u001B[2mTrue \u001B[0m\u001B[2m \u001B[0m \n", - " \u001B[2m \u001B[0m\u001B[2magain! \u001B[0m\u001B[2m \u001B[0m \u001B[2m \u001B[0m \u001B[2m \u001B[0m \u001B[2m \u001B[0m\u001B[2m-0.05627503599999997, \u001B[0m\u001B[2m \u001B[0m \u001B[2m \u001B[0m \n", - " \u001B[2m \u001B[0m \u001B[2m \u001B[0m \u001B[2m \u001B[0m \u001B[2m \u001B[0m\u001B[2m'Objective': \u001B[0m\u001B[2m \u001B[0m \u001B[2m \u001B[0m \n", - " \u001B[2m \u001B[0m \u001B[2m \u001B[0m \u001B[2m \u001B[0m \u001B[2m \u001B[0m\u001B[2m-2.9055107} \u001B[0m\u001B[2m \u001B[0m \u001B[2m \u001B[0m \n", + " The mic is great. Subjective Objective {'Subjective': False \n", + " -5.5978823, \n", + " 'Objective': \n", + " -0.003712546499999969} \n", + " \u001b[2m \u001b[0m\u001b[2mWill order from them \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mSubjective \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mSubjective \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m{'Subjective': \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mTrue \u001b[0m\u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m\u001b[2magain! \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m-0.6518025399999999, \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m'Objective': \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m-0.7362752} \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", " Not loud enough and Objective Objective {'Subjective': True \n", - " doesn't turn on like -2.897738, \n", + " doesn't turn on like -4.672154, \n", " it should. 'Objective': \n", - " -0.05672692499999995} \n", - " \u001B[2m \u001B[0m\u001B[2mThe phone doesn't seem\u001B[0m\u001B[2m \u001B[0m \u001B[2m \u001B[0m\u001B[2mObjective \u001B[0m\u001B[2m \u001B[0m \u001B[2m \u001B[0m\u001B[2mObjective \u001B[0m\u001B[2m \u001B[0m \u001B[2m \u001B[0m\u001B[2m{'Subjective': \u001B[0m\u001B[2m \u001B[0m \u001B[2m \u001B[0m\u001B[2mTrue \u001B[0m\u001B[2m \u001B[0m \n", - " \u001B[2m \u001B[0m\u001B[2mto accept anything \u001B[0m\u001B[2m \u001B[0m \u001B[2m \u001B[0m \u001B[2m \u001B[0m \u001B[2m \u001B[0m\u001B[2m-3.8168292, \u001B[0m\u001B[2m \u001B[0m \u001B[2m \u001B[0m \n", - " \u001B[2m \u001B[0m\u001B[2mexcept CBR mp3s \u001B[0m\u001B[2m \u001B[0m \u001B[2m \u001B[0m \u001B[2m \u001B[0m \u001B[2m \u001B[0m\u001B[2m'Objective': \u001B[0m\u001B[2m \u001B[0m \u001B[2m \u001B[0m \n", - " \u001B[2m \u001B[0m \u001B[2m \u001B[0m \u001B[2m \u001B[0m \u001B[2m \u001B[0m\u001B[2m-0.022242965000000038}\u001B[0m\u001B[2m \u001B[0m \u001B[2m \u001B[0m \n", + " -0.009396199000000013} \n", + " \u001b[2m \u001b[0m\u001b[2mThe phone doesn't seem\u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mObjective \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mObjective \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m{'Subjective': \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mTrue \u001b[0m\u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m\u001b[2mto accept anything \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m-4.6575603, \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m\u001b[2mexcept CBR mp3s \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m'Objective': \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m-0.009534958999999949}\u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", " All three broke within Objective Objective {'Subjective': True \n", - " two months of use. -4.800799, \n", + " two months of use. -3.9477026, \n", " 'Objective': \n", - " -0.008257226000000043} \n", + " -0.019487570000000034} \n", " \n" ] }, "metadata": {}, "output_type": "display_data" }, + { + "data": { + "text/html": [ + "
Accuracy = 80.00%\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[1;31mAccuracy = \u001b[0m\u001b[1;36m80.00\u001b[0m\u001b[1;31m%\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, { "data": { "text/html": [ @@ -798,20 +924,30 @@ "\n" ], "text/plain": [ - "Analyze evaluation experience \u001B[33m...\u001B[0m\n" + "Analyze evaluation experience \u001b[33m...\u001b[0m\n" ] }, "metadata": {}, "output_type": "display_data" }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|███████████████████████████████| 1/1 [00:00<00:00, 221.92it/s]\n", + "100%|████████████████████████████████| 1/1 [00:00<00:00, 19.60it/s]\n" + ] + }, { "data": { "text/html": [ - "
Number of errors: 0\n",
+       "
Error analysis for skill \"subjectivity_detection\":\n",
+       "\n",
        "
\n" ], "text/plain": [ - "Number of errors: \u001B[1;36m0\u001B[0m\n" + "Error analysis for skill \u001b[32m\"subjectivity_detection\"\u001b[0m:\n", + "\n" ] }, "metadata": {}, @@ -820,11 +956,38 @@ { "data": { "text/html": [ - "
Accuracy = 100.00%\n",
+       "
\n",
+       "Input: The mic is great.\n",
+       "Prediction: Objective\n",
+       "Ground truth: Subjective\n",
+       "Error reason: The model made an error because the statement \"The mic is great\" is subjective, as it expresses a \n",
+       "personal opinion or feeling about the quality of the microphone, rather than providing factual information about \n",
+       "its characteristics or performance.\n",
+       "\n",
        "
\n" ], "text/plain": [ - "\u001B[1;31mAccuracy = \u001B[0m\u001B[1;36m100.00\u001B[0m\u001B[1;31m%\u001B[0m\n" + "\n", + "\u001b[32mInput: The mic is great.\u001b[0m\n", + "\u001b[32mPrediction: Objective\u001b[0m\n", + "\u001b[32mGround truth: Subjective\u001b[0m\n", + "\u001b[32mError reason: The model made an error because the statement \u001b[0m\u001b[32m\"The mic is great\"\u001b[0m\u001b[32m is subjective, as it expresses a \u001b[0m\n", + "\u001b[32mpersonal opinion or feeling about the quality of the microphone, rather than providing factual information about \u001b[0m\n", + "\u001b[32mits characteristics or performance.\u001b[0m\n", + "\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
Improve \"subjectivity_detection\" skill based on analysis ...\n",
+       "
\n" + ], + "text/plain": [ + "Improve \u001b[32m\"subjectivity_detection\"\u001b[0m skill based on analysis \u001b[33m...\u001b[0m\n" ] }, "metadata": {}, @@ -833,16 +996,116 @@ { "data": { "text/html": [ - "
Accuracy threshold reached (1.0 >= 0.95)\n",
+       "
Updated instructions for skill \"subjectivity_detection\":\n",
+       "\n",
+       "
\n" + ], + "text/plain": [ + "Updated instructions for skill \u001b[32m\"subjectivity_detection\"\u001b[0m:\n", + "\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
Classify a product review as either expressing \"Subjective\" or \"Objective\" statements. A \"Subjective\" statement is \n",
+       "based on personal opinions, feelings, or tastes, and may include evaluative words such as 'great', 'good', 'bad', \n",
+       "'like', 'dislike', etc. An \"Objective\" statement is based on factual information about the product, such as its \n",
+       "features, performance, or occurrences during use, and is not influenced by personal feelings or opinions. It \n",
+       "typically includes factual descriptions or reports of product performance. Even if the statement seems to express \n",
+       "dissatisfaction or a negative experience, it should be classified as \"Objective\" if it provides factual information\n",
+       "about the product's characteristics or performance.\n",
+       "\n",
+       "Examples:\n",
+       "\n",
+       "Input: The color of this phone is black.\n",
+       "Output: Objective\n",
+       "\n",
+       "Input: I think this phone is too expensive for its features.\n",
+       "Output: Subjective\n",
+       "\n",
+       "Input: Not loud enough and doesn't turn on like it should.\n",
+       "Output: Objective\n",
+       "\n",
+       "Input: All three broke within two months of use.\n",
+       "Output: Objective\n",
+       "\n",
+       "Input: I don't like the design of this laptop.\n",
+       "Output: Subjective\n",
+       "\n",
+       "Input: The mic is great.\n",
+       "Output: Subjective\n",
        "
\n" ], "text/plain": [ - "Accuracy threshold reached \u001B[1m(\u001B[0m\u001B[1;36m1.0\u001B[0m >= \u001B[1;36m0.95\u001B[0m\u001B[1m)\u001B[0m\n" + "\u001b[1;32mClassify a product review as either expressing \u001b[0m\u001b[32m\"Subjective\"\u001b[0m\u001b[1;32m or \u001b[0m\u001b[32m\"Objective\"\u001b[0m\u001b[1;32m statements. A \u001b[0m\u001b[32m\"Subjective\"\u001b[0m\u001b[1;32m statement is \u001b[0m\n", + "\u001b[1;32mbased on personal opinions, feelings, or tastes, and may include evaluative words such as \u001b[0m\u001b[32m'great'\u001b[0m\u001b[1;32m, \u001b[0m\u001b[32m'good'\u001b[0m\u001b[1;32m, \u001b[0m\u001b[32m'bad'\u001b[0m\u001b[1;32m, \u001b[0m\n", + "\u001b[32m'like'\u001b[0m\u001b[1;32m, \u001b[0m\u001b[32m'dislike'\u001b[0m\u001b[1;32m, etc. An \u001b[0m\u001b[32m\"Objective\"\u001b[0m\u001b[1;32m statement is based on factual information about the product, such as its \u001b[0m\n", + "\u001b[1;32mfeatures, performance, or occurrences during use, and is not influenced by personal feelings or opinions. It \u001b[0m\n", + "\u001b[1;32mtypically includes factual descriptions or reports of product performance. Even if the statement seems to express \u001b[0m\n", + "\u001b[1;32mdissatisfaction or a negative experience, it should be classified as \u001b[0m\u001b[32m\"Objective\"\u001b[0m\u001b[1;32m if it provides factual information\u001b[0m\n", + "\u001b[1;32mabout the product's characteristics or performance.\u001b[0m\n", + "\n", + "\u001b[1;32mExamples:\u001b[0m\n", + "\n", + "\u001b[1;32mInput: The color of this phone is black.\u001b[0m\n", + "\u001b[1;32mOutput: Objective\u001b[0m\n", + "\n", + "\u001b[1;32mInput: I think this phone is too expensive for its features.\u001b[0m\n", + "\u001b[1;32mOutput: Subjective\u001b[0m\n", + "\n", + "\u001b[1;32mInput: Not loud enough and doesn't turn on like it should.\u001b[0m\n", + "\u001b[1;32mOutput: Objective\u001b[0m\n", + "\n", + "\u001b[1;32mInput: All three broke within two months of use.\u001b[0m\n", + "\u001b[1;32mOutput: Objective\u001b[0m\n", + "\n", + "\u001b[1;32mInput: I don't like the design of this laptop.\u001b[0m\n", + "\u001b[1;32mOutput: Subjective\u001b[0m\n", + "\n", + "\u001b[1;32mInput: The mic is great.\u001b[0m\n", + "\u001b[1;32mOutput: Subjective\u001b[0m\n" ] }, "metadata": {}, "output_type": "display_data" }, + { + "data": { + "text/html": [ + "
Re-apply subjectivity_detection skill to dataset ...\n",
+       "
\n" + ], + "text/plain": [ + "Re-apply subjectivity_detection skill to dataset \u001b[33m...\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
Applying skill: subjectivity_detection\n",
+       "
\n" + ], + "text/plain": [ + "Applying skill: subjectivity_detection\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|████████████████████████████████| 5/5 [00:00<00:00, 24.79it/s]\n" + ] + }, { "data": { "text/html": [ @@ -858,12 +1121,11 @@ } ], "source": [ - "learning_experience = agent.learn(learning_iterations=3, accuracy_threshold=0.95)" + "ground_truth_signal = agent.learn(learning_iterations=3, accuracy_threshold=0.95)" ] }, { "cell_type": "markdown", - "id": "ee1573e3", "metadata": {}, "source": [ "Let's see the final instructions:" @@ -871,8 +1133,7 @@ }, { "cell_type": "code", - "execution_count": 5, - "id": "f5b67bd4", + "execution_count": 13, "metadata": {}, "outputs": [ { @@ -881,53 +1142,67 @@ "
Total Agent Skills: 1\n",
        "\n",
        "subjectivity_detection\n",
-       "Identify if the provided product review is \"Subjective\" (expressing personal feelings, tastes, or opinions) or \n",
-       "\"Objective\" (based on factual information). Consider a statement as subjective if it reflects personal judgment or \n",
-       "preference, and as objective if it states verifiable facts or features.\n",
+       "Classify a product review as either expressing \"Subjective\" or \"Objective\" statements. A \"Subjective\" statement is \n",
+       "based on personal opinions, feelings, or tastes, and may include evaluative words such as 'great', 'good', 'bad', \n",
+       "'like', 'dislike', etc. An \"Objective\" statement is based on factual information about the product, such as its \n",
+       "features, performance, or occurrences during use, and is not influenced by personal feelings or opinions. It \n",
+       "typically includes factual descriptions or reports of product performance. Even if the statement seems to express \n",
+       "dissatisfaction or a negative experience, it should be classified as \"Objective\" if it provides factual information\n",
+       "about the product's characteristics or performance.\n",
        "\n",
        "Examples:\n",
        "\n",
-       "Input: Not loud enough and doesn't turn on like it should.\n",
+       "Input: The color of this phone is black.\n",
        "Output: Objective\n",
        "\n",
-       "Input: I personally think the sound quality is not up to the mark.\n",
+       "Input: I think this phone is too expensive for its features.\n",
        "Output: Subjective\n",
        "\n",
-       "Input: The phone's battery lasts for 10 hours.\n",
+       "Input: Not loud enough and doesn't turn on like it should.\n",
+       "Output: Objective\n",
+       "\n",
+       "Input: All three broke within two months of use.\n",
        "Output: Objective\n",
        "\n",
-       "Input: The mic is great.\n",
+       "Input: I don't like the design of this laptop.\n",
        "Output: Subjective\n",
        "\n",
-       "Input: Will order from them again!\n",
+       "Input: The mic is great.\n",
        "Output: Subjective\n",
        "\n",
        "
\n" ], "text/plain": [ - "\u001B[1;34mTotal Agent Skills: \u001B[0m\u001B[1;34m1\u001B[0m\n", + "\u001b[1;34mTotal Agent Skills: \u001b[0m\u001b[1;34m1\u001b[0m\n", "\n", - "\u001B[1;4;32msubjectivity_detection\u001B[0m\n", - "\u001B[32mIdentify if the provided product review is \u001B[0m\u001B[32m\"Subjective\"\u001B[0m\u001B[32m \u001B[0m\u001B[1;32m(\u001B[0m\u001B[32mexpressing personal feelings, tastes, or opinions\u001B[0m\u001B[1;32m)\u001B[0m\u001B[32m or \u001B[0m\n", - "\u001B[32m\"Objective\"\u001B[0m\u001B[32m \u001B[0m\u001B[1;32m(\u001B[0m\u001B[32mbased on factual information\u001B[0m\u001B[1;32m)\u001B[0m\u001B[32m. Consider a statement as subjective if it reflects personal judgment or \u001B[0m\n", - "\u001B[32mpreference, and as objective if it states verifiable facts or features.\u001B[0m\n", + "\u001b[1;4;32msubjectivity_detection\u001b[0m\n", + "\u001b[32mClassify a product review as either expressing \u001b[0m\u001b[32m\"Subjective\"\u001b[0m\u001b[32m or \u001b[0m\u001b[32m\"Objective\"\u001b[0m\u001b[32m statements. A \u001b[0m\u001b[32m\"Subjective\"\u001b[0m\u001b[32m statement is \u001b[0m\n", + "\u001b[32mbased on personal opinions, feelings, or tastes, and may include evaluative words such as \u001b[0m\u001b[32m'great'\u001b[0m\u001b[32m, \u001b[0m\u001b[32m'good'\u001b[0m\u001b[32m, \u001b[0m\u001b[32m'bad'\u001b[0m\u001b[32m, \u001b[0m\n", + "\u001b[32m'like'\u001b[0m\u001b[32m, \u001b[0m\u001b[32m'dislike'\u001b[0m\u001b[32m, etc. An \u001b[0m\u001b[32m\"Objective\"\u001b[0m\u001b[32m statement is based on factual information about the product, such as its \u001b[0m\n", + "\u001b[32mfeatures, performance, or occurrences during use, and is not influenced by personal feelings or opinions. It \u001b[0m\n", + "\u001b[32mtypically includes factual descriptions or reports of product performance. Even if the statement seems to express \u001b[0m\n", + "\u001b[32mdissatisfaction or a negative experience, it should be classified as \u001b[0m\u001b[32m\"Objective\"\u001b[0m\u001b[32m if it provides factual information\u001b[0m\n", + "\u001b[32mabout the product's characteristics or performance.\u001b[0m\n", "\n", - "\u001B[32mExamples:\u001B[0m\n", + "\u001b[32mExamples:\u001b[0m\n", "\n", - "\u001B[32mInput: Not loud enough and doesn't turn on like it should.\u001B[0m\n", - "\u001B[32mOutput: Objective\u001B[0m\n", + "\u001b[32mInput: The color of this phone is black.\u001b[0m\n", + "\u001b[32mOutput: Objective\u001b[0m\n", "\n", - "\u001B[32mInput: I personally think the sound quality is not up to the mark.\u001B[0m\n", - "\u001B[32mOutput: Subjective\u001B[0m\n", + "\u001b[32mInput: I think this phone is too expensive for its features.\u001b[0m\n", + "\u001b[32mOutput: Subjective\u001b[0m\n", "\n", - "\u001B[32mInput: The phone's battery lasts for \u001B[0m\u001B[1;32m10\u001B[0m\u001B[32m hours.\u001B[0m\n", - "\u001B[32mOutput: Objective\u001B[0m\n", + "\u001b[32mInput: Not loud enough and doesn't turn on like it should.\u001b[0m\n", + "\u001b[32mOutput: Objective\u001b[0m\n", "\n", - "\u001B[32mInput: The mic is great.\u001B[0m\n", - "\u001B[32mOutput: Subjective\u001B[0m\n", + "\u001b[32mInput: All three broke within two months of use.\u001b[0m\n", + "\u001b[32mOutput: Objective\u001b[0m\n", "\n", - "\u001B[32mInput: Will order from them again!\u001B[0m\n", - "\u001B[32mOutput: Subjective\u001B[0m\n", + "\u001b[32mInput: I don't like the design of this laptop.\u001b[0m\n", + "\u001b[32mOutput: Subjective\u001b[0m\n", + "\n", + "\u001b[32mInput: The mic is great.\u001b[0m\n", + "\u001b[32mOutput: Subjective\u001b[0m\n", "\n" ] }, @@ -941,7 +1216,6 @@ }, { "cell_type": "markdown", - "id": "54ec4568", "metadata": {}, "source": [ "... and predictions created by the skill:" @@ -949,10 +1223,29 @@ }, { "cell_type": "code", - "execution_count": 6, - "id": "baa69db8", + "execution_count": 14, "metadata": {}, "outputs": [ + { + "data": { + "text/html": [ + "
Applying skill: subjectivity_detection\n",
+       "
\n" + ], + "text/plain": [ + "Applying skill: subjectivity_detection\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|████████████████████████████████| 5/5 [00:00<00:00, 24.62it/s]\n" + ] + }, { "data": { "text/html": [ @@ -986,35 +1279,35 @@ " The mic is great.\n", " Subjective\n", " Subjective\n", - " {'Subjective': -0.022607480000000055, 'Objecti...\n", + " {'Subjective': -0.11115719, 'Objective': -2.25...\n", " \n", " \n", " 1\n", " Will order from them again!\n", " Subjective\n", " Subjective\n", - " {'Subjective': -0.05627503599999997, 'Objectiv...\n", + " {'Subjective': -0.44846975999999994, 'Objectiv...\n", " \n", " \n", " 2\n", " Not loud enough and doesn't turn on like it sh...\n", " Objective\n", " Objective\n", - " {'Subjective': -2.897738, 'Objective': -0.0567...\n", + " {'Subjective': -4.4792867, 'Objective': -0.011...\n", " \n", " \n", " 3\n", " The phone doesn't seem to accept anything exce...\n", " Objective\n", " Objective\n", - " {'Subjective': -3.8168292, 'Objective': -0.022...\n", + " {'Subjective': -4.990218, 'Objective': -0.0068...\n", " \n", " \n", " 4\n", " All three broke within two months of use.\n", " Objective\n", " Objective\n", - " {'Subjective': -4.800799, 'Objective': -0.0082...\n", + " {'Subjective': -4.19226, 'Objective': -0.01522...\n", " \n", " \n", "\n", @@ -1029,25 +1322,24 @@ "4 All three broke within two months of use. Objective \n", "\n", " subjectivity_detection score \n", - "0 Subjective {'Subjective': -0.022607480000000055, 'Objecti... \n", - "1 Subjective {'Subjective': -0.05627503599999997, 'Objectiv... \n", - "2 Objective {'Subjective': -2.897738, 'Objective': -0.0567... \n", - "3 Objective {'Subjective': -3.8168292, 'Objective': -0.022... \n", - "4 Objective {'Subjective': -4.800799, 'Objective': -0.0082... " + "0 Subjective {'Subjective': -0.11115719, 'Objective': -2.25... \n", + "1 Subjective {'Subjective': -0.44846975999999994, 'Objectiv... \n", + "2 Objective {'Subjective': -4.4792867, 'Objective': -0.011... \n", + "3 Objective {'Subjective': -4.990218, 'Objective': -0.0068... \n", + "4 Objective {'Subjective': -4.19226, 'Objective': -0.01522... " ] }, - "execution_count": 6, + "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "learning_experience.predictions" + "agent.run(dataset)" ] }, { "cell_type": "markdown", - "id": "b8d49385", "metadata": {}, "source": [ "## Applying learned skills to the real data\n", @@ -1057,8 +1349,7 @@ }, { "cell_type": "code", - "execution_count": 7, - "id": "60a79462", + "execution_count": 15, "metadata": {}, "outputs": [ { @@ -1114,7 +1405,7 @@ "3 VERY DISAPPOINTED." ] }, - "execution_count": 7, + "execution_count": 15, "metadata": {}, "output_type": "execute_result" } @@ -1131,26 +1422,37 @@ }, { "cell_type": "code", - "execution_count": 8, - "id": "2f2bf273", + "execution_count": 16, "metadata": {}, "outputs": [ + { + "data": { + "text/html": [ + "
Applying skill: subjectivity_detection\n",
+       "
\n" + ], + "text/plain": [ + "Applying skill: subjectivity_detection\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, { "name": "stderr", "output_type": "stream", "text": [ - "100%|█████████████████| 4/4 [00:00<00:00, 32.32it/s]\n" + "100%|████████████████████████████████| 4/4 [00:04<00:00, 1.00s/it]\n" ] } ], "source": [ - "result = agent.apply_skills(test_df)" + "predictions = agent.run(test_df)" ] }, { "cell_type": "code", - "execution_count": 9, - "id": "e6c50ede", + "execution_count": 17, "metadata": {}, "outputs": [ { @@ -1184,25 +1486,25 @@ " 0\n", " Doesn't hold charge.\n", " Objective\n", - " {'Subjective': -4.9062243, 'Objective': -0.007...\n", + " {'Subjective': -6.241702, 'Objective': -0.0019...\n", " \n", " \n", " 1\n", " Excellent bluetooth headset\n", - " Objective\n", - " {'Subjective': -1.450324, 'Objective': -0.2672...\n", + " Subjective\n", + " {'Subjective': -0.39792176999999995, 'Objectiv...\n", " \n", " \n", " 2\n", " I love this thing!\n", " Subjective\n", - " {'Subjective': -0.0014673689999999905, 'Object...\n", + " {'Subjective': -0.0008572661999999637, 'Object...\n", " \n", " \n", " 3\n", " VERY DISAPPOINTED.\n", " Subjective\n", - " {'Subjective': -0.17851222999999997, 'Objectiv...\n", + " {'Subjective': -0.21449489999999996, 'Objectiv...\n", " \n", " \n", "\n", @@ -1211,40 +1513,32 @@ "text/plain": [ " text subjectivity_detection \\\n", "0 Doesn't hold charge. Objective \n", - "1 Excellent bluetooth headset Objective \n", + "1 Excellent bluetooth headset Subjective \n", "2 I love this thing! Subjective \n", "3 VERY DISAPPOINTED. Subjective \n", "\n", " score \n", - "0 {'Subjective': -4.9062243, 'Objective': -0.007... \n", - "1 {'Subjective': -1.450324, 'Objective': -0.2672... \n", - "2 {'Subjective': -0.0014673689999999905, 'Object... \n", - "3 {'Subjective': -0.17851222999999997, 'Objectiv... " + "0 {'Subjective': -6.241702, 'Objective': -0.0019... \n", + "1 {'Subjective': -0.39792176999999995, 'Objectiv... \n", + "2 {'Subjective': -0.0008572661999999637, 'Object... \n", + "3 {'Subjective': -0.21449489999999996, 'Objectiv... " ] }, - "execution_count": 9, + "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "result.predictions" + "predictions" ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "0922915b", - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { "kernelspec": { - "display_name": "Python 3 (ipykernel)", + "display_name": "adala", "language": "python", - "name": "python3" + "name": "adala" }, "language_info": { "codemirror_mode": { @@ -1256,9 +1550,9 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.4" + "version": "3.11.5" } }, "nbformat": 4, "nbformat_minor": 5 -} \ No newline at end of file +} diff --git a/examples/summarization_skill.ipynb b/examples/summarization_skill.ipynb index 27c4792..609d64e 100644 --- a/examples/summarization_skill.ipynb +++ b/examples/summarization_skill.ipynb @@ -2,7 +2,6 @@ "cells": [ { "cell_type": "markdown", - "id": "94ad15ac", "metadata": {}, "source": [ "# Summarization skill" @@ -10,8 +9,7 @@ }, { "cell_type": "code", - "execution_count": 2, - "id": "a2f6d99b", + "execution_count": 1, "metadata": {}, "outputs": [ { @@ -62,7 +60,7 @@ "2 Vitamin D is a fat-soluble nutrient. It is one..." ] }, - "execution_count": 2, + "execution_count": 1, "metadata": {}, "output_type": "execute_result" } @@ -79,15 +77,27 @@ }, { "cell_type": "code", - "execution_count": 3, - "id": "6ee2cebf", + "execution_count": 2, "metadata": {}, "outputs": [ + { + "data": { + "text/html": [ + "
Applying skill: summarization\n",
+       "
\n" + ], + "text/plain": [ + "Applying skill: summarization\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, { "name": "stderr", "output_type": "stream", "text": [ - "100%|█████████████████| 3/3 [00:05<00:00, 1.73s/it]\n" + "100%|████████████████████████████████| 3/3 [00:00<00:00, 52.08it/s]\n" ] }, { @@ -147,7 +157,7 @@ "2 \\nVitamin D is a fat-soluble nutrient that is ... " ] }, - "execution_count": 3, + "execution_count": 2, "metadata": {}, "output_type": "execute_result" } @@ -163,16 +173,15 @@ " )\n", ")\n", "\n", - "run = agent.apply_skills(df)\n", - "run.predictions" + "agent.run(df)" ] } ], "metadata": { "kernelspec": { - "display_name": "Python 3 (ipykernel)", + "display_name": "adala", "language": "python", - "name": "python3" + "name": "adala" }, "language_info": { "codemirror_mode": { @@ -184,7 +193,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.4" + "version": "3.11.5" } }, "nbformat": 4, diff --git a/examples/text_generation_skill.ipynb b/examples/text_generation_skill.ipynb index 88eb374..d89b811 100644 --- a/examples/text_generation_skill.ipynb +++ b/examples/text_generation_skill.ipynb @@ -2,7 +2,6 @@ "cells": [ { "cell_type": "markdown", - "id": "94ad15ac", "metadata": {}, "source": [ "# Text generation skill" @@ -11,7 +10,6 @@ { "cell_type": "code", "execution_count": 1, - "id": "a2f6d99b", "metadata": {}, "outputs": [ { @@ -154,14 +152,26 @@ { "cell_type": "code", "execution_count": 2, - "id": "6ee2cebf", "metadata": {}, "outputs": [ + { + "data": { + "text/html": [ + "
Applying skill: text_generation\n",
+       "
\n" + ], + "text/plain": [ + "Applying skill: text_generation\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, { "name": "stderr", "output_type": "stream", "text": [ - "100%|███████████████| 10/10 [00:24<00:00, 2.48s/it]\n" + "100%|██████████████████████████████| 10/10 [00:00<00:00, 71.35it/s]\n" ] }, { @@ -309,16 +319,15 @@ " )\n", ")\n", "\n", - "run = agent.apply_skills(df)\n", - "run.predictions" + "agent.run(df)" ] } ], "metadata": { "kernelspec": { - "display_name": "Python 3 (ipykernel)", + "display_name": "adala", "language": "python", - "name": "python3" + "name": "adala" }, "language_info": { "codemirror_mode": { @@ -330,7 +339,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.4" + "version": "3.11.5" } }, "nbformat": 4, diff --git a/examples/translation_skill.ipynb b/examples/translation_skill.ipynb index d830f36..6e87ae9 100644 --- a/examples/translation_skill.ipynb +++ b/examples/translation_skill.ipynb @@ -2,7 +2,6 @@ "cells": [ { "cell_type": "markdown", - "id": "94ad15ac", "metadata": {}, "source": [ "# Translation skill" @@ -10,8 +9,7 @@ }, { "cell_type": "code", - "execution_count": 13, - "id": "a2f6d99b", + "execution_count": 1, "metadata": {}, "outputs": [ { @@ -108,7 +106,7 @@ "9 सपने सच होते हैं Hindi" ] }, - "execution_count": 13, + "execution_count": 1, "metadata": {}, "output_type": "execute_result" } @@ -132,35 +130,29 @@ }, { "cell_type": "code", - "execution_count": 14, - "id": "6ee2cebf", + "execution_count": 2, "metadata": {}, "outputs": [ + { + "data": { + "text/html": [ + "
Applying skill: translation\n",
+       "
\n" + ], + "text/plain": [ + "Applying skill: translation\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, { "name": "stderr", "output_type": "stream", "text": [ - "100%|███████████████| 10/10 [00:08<00:00, 1.13it/s]\n" + "100%|██████████████████████████████| 10/10 [00:00<00:00, 61.83it/s]\n" ] - } - ], - "source": [ - "from adala.agents import Agent\n", - "from adala.environments import BasicEnvironment\n", - "from adala.skills.generation.translation import TranslationSkill\n", - "from rich import print\n", - "\n", - "agent = Agent(skills=TranslationSkill(input_data_field='text', target_language='Swahili'))\n", - "\n", - "run = agent.apply_skills(df)" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "id": "ee97ee22", - "metadata": {}, - "outputs": [ + }, { "data": { "text/html": [ @@ -278,21 +270,28 @@ "9 Ndoto zinatimia " ] }, - "execution_count": 15, + "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "run.predictions" + "from adala.agents import Agent\n", + "from adala.environments import BasicEnvironment\n", + "from adala.skills.generation.translation import TranslationSkill\n", + "from rich import print\n", + "\n", + "agent = Agent(skills=TranslationSkill(input_data_field='text', target_language='Swahili'))\n", + "\n", + "agent.run(df)" ] } ], "metadata": { "kernelspec": { - "display_name": "Python 3 (ipykernel)", + "display_name": "adala", "language": "python", - "name": "python3" + "name": "adala" }, "language_info": { "codemirror_mode": { @@ -304,7 +303,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.4" + "version": "3.11.5" } }, "nbformat": 4,