diff --git a/README.md b/README.md index 9017402..b453a17 100644 --- a/README.md +++ b/README.md @@ -128,7 +128,7 @@ agent = Agent( # connect to a dataset environment=BasicEnvironment( ground_truth_dataset=ground_truth_dataset, - ground_truth_column="ground_truth" + ground_truth_columns={"sentiment_classification": "ground_truth"} ), # define a skill @@ -148,10 +148,10 @@ agent = Agent( default_runtime='openai', # NOTE! If you have access to GPT-4, you can uncomment the lines bellow for better results - # default_teacher_runtime='openai-gpt4', - # teacher_runtimes = { - # 'openai-gpt4': OpenAIRuntime(model='gpt-4') - # } +# default_teacher_runtime='openai-gpt4', +# teacher_runtimes = { +# 'openai-gpt4': OpenAIRuntime(model='gpt-4') +# } ) print(agent) @@ -160,9 +160,9 @@ print(agent.skills) agent.learn(learning_iterations=3, accuracy_threshold=0.95) print('\n=> Run tests ...') -run = agent.apply_skills(predict_dataset) +predictions = agent.run(predict_dataset) print('\n => Test results:') -print(run) +print(predictions) ``` ### π Available skills diff --git a/adala/agents/base.py b/adala/agents/base.py index c65f65c..11608f8 100644 --- a/adala/agents/base.py +++ b/adala/agents/base.py @@ -1,15 +1,17 @@ from pydantic import BaseModel, Field, SkipValidation, field_validator, model_validator from abc import ABC, abstractmethod -from typing import Any, Optional, List, Dict, Union -from adala.environments.base import Environment, BasicEnvironment +from typing import Any, Optional, List, Dict, Union, Tuple +from rich import print + +from adala.environments.base import Environment, BasicEnvironment, GroundTruthSignal from adala.datasets import Dataset, DataFrameDataset from adala.runtimes.base import Runtime, LLMRuntime, LLMRuntimeType, LLMRuntimeModelType from adala.runtimes.openai import OpenAIRuntime -from adala.memories.base import ShortTermMemory, LongTermMemory +from adala.memories.base import Memory from adala.skills.base import BaseSkill from adala.skills.skillset import SkillSet, LinearSkillSet from adala.utils.logs import print_dataframe, print_text, print_error -from adala.utils.internal_data import InternalDataFrame +from adala.utils.internal_data import InternalDataFrame, InternalDataFrameConcat class Agent(BaseModel, ABC): @@ -23,12 +25,14 @@ class Agent(BaseModel, ABC): memory (LongTermMemory, optional): The agent's long-term memory. Defaults to None. runtimes (Dict[str, Runtime], optional): The runtimes available to the agent. Defaults to predefined runtimes. default_runtime (str): The default runtime used by the agent. Defaults to 'openai'. + teacher_runtimes (Dict[str, Runtime], optional): The runtimes available to the agent's teacher. Defaults to predefined runtimes. + default_teacher_runtime (str): The default runtime used by the agent's teacher. Defaults to 'openai-gpt3'. """ environment: Union[InternalDataFrame, Dataset, Environment] = Field(default_factory=DataFrameDataset) - skills: Union[SkillSet, BaseSkill, List[BaseSkill], Dict[str, BaseSkill]] + skills: SkillSet - memory: LongTermMemory = Field(default=None) + memory: Memory = Field(default=None) runtimes: Optional[Dict[str, Runtime]] = Field( default_factory=lambda: { 'openai': OpenAIRuntime(model='gpt-3.5-turbo-instruct'), @@ -90,7 +94,7 @@ def environment_validator(cls, v): v = BasicEnvironment(dataset=v) return v - @field_validator('skills') + @field_validator('skills', mode='before') def skills_validator(cls, v): """ Validates and possibly transforms the skills attribute. @@ -103,14 +107,11 @@ def skills_validator(cls, v): """ if isinstance(v, SkillSet): - pass + return v elif isinstance(v, BaseSkill): - v = LinearSkillSet(skills={'skill_0': v}) - elif isinstance(v, list): - v = LinearSkillSet(skills={f'skill_{i}': skill for i, skill in enumerate(v)}) - elif isinstance(v, dict): - v = LinearSkillSet(skills=v) - return v + return LinearSkillSet(skills={v.name: v}) + else: + return LinearSkillSet(skills=v) @model_validator(mode='after') def verify_input_parameters(self): @@ -169,116 +170,104 @@ def get_teacher_runtime(self, runtime: Optional[str] = None) -> Runtime: raise ValueError(f'Teacher Runtime "{runtime}" not found.') return self.teacher_runtimes[runtime] - def apply_skills( - self, - dataset: Union[Dataset, InternalDataFrame], - runtime: Optional[Union[str, Runtime]] = None, - experience: Optional[ShortTermMemory] = None, - ) -> ShortTermMemory: + def run(self, dataset: Union[Dataset, InternalDataFrame], runtime: Optional[str] = None) -> InternalDataFrame: """ - Applies the agent's skills to a given dataset using the specified runtime. + Runs the agent on the specified dataset. Args: - dataset (Dataset): The dataset to apply skills on. - runtime (str, optional): The runtime to use. Defaults to None. - experience (ShortTermMemory, optional): The agent's short-term memory. Defaults to None. + dataset (Union[Dataset, InternalDataFrame]): The dataset to run the agent on. + runtime (str, optional): The name of the runtime to use. Defaults to None, use the default runtime. Returns: - ShortTermMemory: The short-term memory resulting from the application of skills. + InternalDataFrame: The dataset with the agent's predictions. """ - runtime = runtime or self.default_runtime - if isinstance(dataset, InternalDataFrame): - dataset = DataFrameDataset(df=dataset) - if isinstance(runtime, str): - runtime = self.get_runtime(runtime=runtime) - return self.skills.apply(dataset=dataset, runtime=runtime, experience=experience) + runtime = self.get_runtime(runtime=runtime) + predictions = self.skills.apply(dataset, runtime=runtime) + return predictions def learn( self, learning_iterations: int = 3, accuracy_threshold: float = 0.9, - update_skills: bool = True, update_memory: bool = True, request_environment_feedback: bool = True, - experience: Optional[ShortTermMemory] = None, runtime: Optional[str] = None, - ) -> ShortTermMemory: + teacher_runtime: Optional[str] = None, + ) -> GroundTruthSignal: """ Enables the agent to learn and improve its skills based on interactions with its environment. Args: learning_iterations (int, optional): The number of iterations for learning. Defaults to 3. accuracy_threshold (float, optional): The desired accuracy threshold to reach. Defaults to 0.9. - update_skills (bool, optional): Flag to determine if skills should be updated after learning. Defaults to True. update_memory (bool, optional): Flag to determine if memory should be updated after learning. Defaults to True. request_environment_feedback (bool, optional): Flag to determine if feedback should be requested from the environment. Defaults to True. - experience (ShortTermMemory, optional): Initial experience for the learning process. Defaults to None. runtime (str, optional): The runtime to be used for the learning process. Defaults to None. - + teacher_runtime (str, optional): The teacher runtime to be used for the learning process. Defaults to None. Returns: - ShortTermMemory: The short-term memory after the learning process. + GroundTruthSignal: The ground truth signal. """ runtime = self.get_runtime(runtime=runtime) - # TODO: support teacher runtime input, not default - teacher_runtime = self.get_teacher_runtime(runtime=self.default_teacher_runtime) + teacher_runtime = self.get_teacher_runtime(runtime=teacher_runtime) - skills = self.skills.model_copy(deep=True) dataset = self.environment.as_dataset() # Apply agent skills to dataset and get experience with predictions - experience = self.apply_skills(dataset=dataset, runtime=runtime, experience=experience) - - # Agent select one skill to improve - learned_skill = skills.select_skill_to_improve(experience) + predictions = self.skills.apply(dataset, runtime=runtime) - # Request feedback from environment is necessary - if request_environment_feedback: - self.environment.request_feedback(learned_skill, experience) + ground_truth_signal = None for iteration in range(learning_iterations): print_text(f'\n\n=> Iteration #{iteration}: Comparing to ground truth, analyzing and improving ...') - # 1. EVALUATION PHASE: Compare predictions to ground truth - experience = self.environment.compare_to_ground_truth(learned_skill, experience) + # Request feedback from environment is necessary + if request_environment_feedback: + self.environment.request_feedback(self.skills, predictions) + + # Compare predictions to ground truth -> get ground truth signal + ground_truth_signal = self.environment.compare_to_ground_truth(self.skills, predictions) print_text(f'Comparing predictions to ground truth data ...') - print_dataframe(experience.evaluations) + print_dataframe(InternalDataFrameConcat([predictions, ground_truth_signal.match], axis=1)) + + # Use ground truth signal to find the skill to improve + accuracy = ground_truth_signal.get_accuracy() + train_skill = self.skills.select_skill_to_improve(accuracy, accuracy_threshold) + if not train_skill: + print_text(f'No skill to improve found. Stopping learning process.') + break + # select the worst performing skill + print_text(f'Accuracy = {accuracy[train_skill.name] * 100:0.2f}%', style='bold red') + + skill_errors = ground_truth_signal.get_errors(train_skill.name) # 2. ANALYSIS PHASE: Analyze evaluation experience, optionally use long term memory print_text(f'Analyze evaluation experience ...') - experience = learned_skill.analyze( - experience=experience, + error_analysis = train_skill.analyze( + predictions=predictions, + errors=skill_errors, student_runtime=runtime, teacher_runtime=teacher_runtime, memory=self.memory ) - print_text(f'Number of errors: {len(experience.errors)}') - - print_text(f'Accuracy = {experience.accuracy*100:0.2f}%', style='bold red') - if experience.accuracy >= accuracy_threshold: - print_text(f'Accuracy threshold reached ({experience.accuracy} >= {accuracy_threshold})') - break + print_text(f'Error analysis for skill "{train_skill.name}":\n') + print_text(error_analysis, style='green') + if self.memory and update_memory: + self.memory.remember(error_analysis, self.skills) # 3. IMPROVEMENT PHASE: Improve skills based on analysis - print_text(f"Improve \"{learned_skill.name}\" skill based on analysis ...") - experience = learned_skill.improve( - experience=experience, + print_text(f"Improve \"{train_skill.name}\" skill based on analysis ...") + train_skill.improve( + error_analysis=error_analysis, runtime=teacher_runtime, - update_instructions=True ) - print_text(f'Updated instructions for skill "{learned_skill.name}":\n') - print_text(learned_skill.instructions, style='bold green') + print_text(f'Updated instructions for skill "{train_skill.name}":\n') + print_text(train_skill.instructions, style='bold green') # 4. RE-APPLY PHASE: Re-apply skills to dataset - print_text(f"Re-apply {learned_skill.name} skill to dataset ...") - experience = learned_skill.apply(dataset, runtime, experience=experience) - - # Update skills and memory based on experience - if update_skills: - self.skills = skills - - if self.memory and update_memory: - self.memory.remember(experience, self.skills) + print_text(f"Re-apply {train_skill.name} skill to dataset ...") + self.skills[train_skill.name] = train_skill + predictions = self.skills.apply(predictions, runtime=runtime, improved_skill=train_skill.name) print_text('Train is done!') - return experience + return ground_truth_signal diff --git a/adala/environments/base.py b/adala/environments/base.py index 4f4a3f0..fe3e399 100644 --- a/adala/environments/base.py +++ b/adala/environments/base.py @@ -1,13 +1,38 @@ -from pydantic import BaseModel, dataclasses, Field, field_validator +from pydantic import BaseModel, Field, field_validator from abc import ABC, abstractmethod -from typing import Any, Optional, Dict, Union, Callable +from typing import Any, Optional, Dict, Union, Callable, Dict -from adala.utils.internal_data import InternalDataFrame, InternalDataFrameConcat +from adala.utils.internal_data import InternalDataFrame, InternalSeries, InternalDataFrameConcat +from adala.utils.matching import fuzzy_match from adala.skills.base import BaseSkill -from adala.memories.base import ShortTermMemory +from adala.skills.skillset import SkillSet from adala.datasets import Dataset, DataFrameDataset +class GroundTruthSignal(BaseModel): + match: InternalDataFrame + errors: Optional[Dict[str, InternalDataFrame]] = None + + def get_accuracy(self) -> InternalSeries: + return self.match.mean() + + def get_errors(self, skill_name: str) -> InternalDataFrame: + errors = self.errors[skill_name] + assert len(errors.columns) == 2 # ["predictions", "ground_truth name"] + return errors + + def __rich__(self): + text = '[bold blue]Ground Truth Signal:[/bold blue]\n\n' + text += f'\n[bold]Match[/bold]\n{self.match}' + if self.errors is not None: + for skill_name, errors in self.errors.items(): + text += f'\n[bold]Errors for {skill_name}[/bold]\n{errors}' + return text + + class Config: + arbitrary_types_allowed = True + + class Environment(BaseModel, ABC): """Abstract base class for environments. @@ -19,11 +44,11 @@ class Environment(BaseModel, ABC): """ @abstractmethod - def request_feedback(self, skill: BaseSkill, experience: ShortTermMemory): + def request_feedback(self, skill_set: SkillSet, predictions: InternalDataFrame): """Request user feedback using predictions and update internal ground truth set.""" @abstractmethod - def compare_to_ground_truth(self, skill: BaseSkill, experience: ShortTermMemory) -> ShortTermMemory: + def compare_to_ground_truth(self, skill_set: SkillSet, predictions: InternalDataFrame) -> GroundTruthSignal: """Compare predictions with ground truth and return the results.""" @abstractmethod @@ -53,14 +78,13 @@ class BasicEnvironment(Environment): Defaults to an empty DataFrameDataset. ground_truth_column (str): Name of the column containing ground truth in the dataset. Defaults to 'ground_truth'. - _prediction_column (str): Name of the column containing predictions. """ ground_truth_dataset: Union[InternalDataFrame, DataFrameDataset] = Field(default_factory=DataFrameDataset) - ground_truth_column: str = 'ground_truth' - - _prediction_column: str + ground_truth_columns: Dict[str, str] + matching_function: str = 'exact' + matching_threshold: float = 0.8 @field_validator('ground_truth_dataset') def _validate_ground_truth_dataset(cls, v): @@ -68,43 +92,54 @@ def _validate_ground_truth_dataset(cls, v): return DataFrameDataset(df=v) return v - def request_feedback(self, skill: BaseSkill, experience: ShortTermMemory): + def request_feedback(self, skill: BaseSkill, predictions: InternalDataFrame): """In the BasicEnvironment, ground truth is already provided with the input data.""" - def compare_to_ground_truth(self, skill: BaseSkill, experience: ShortTermMemory) -> ShortTermMemory: + def compare_to_ground_truth(self, skill_set: SkillSet, predictions: InternalDataFrame) -> GroundTruthSignal: """Compare the predictions with the ground truth using exact matching. Args: - skill (BaseSkill): The skill being evaluated. - experience (ShortTermMemory): The experience memory containing predictions. - + skill_set (SkillSet): The skill set being evaluated. + predictions (InternalDataFrame): The predictions to compare with ground truth. Returns: - ShortTermMemory: Updated memory containing evaluation results against ground truth. + GroundTruthSignal: The ground truth signal. """ - experience = experience.model_copy() - - gt = self.ground_truth_dataset.df[self.ground_truth_column] - pred = experience.predictions - # select - gt = gt[gt.index.isin(pred.index)] - if gt.empty: - # return empty memory - return experience - - gt = gt.to_frame(self.ground_truth_column) - - # compare ground truth with predictions using exact matching - match_column_name = f'{self.ground_truth_column}__x__{skill.name}' - evaluations = InternalDataFrameConcat([ - pred, - (gt[self.ground_truth_column] == pred[skill.name]).rename(match_column_name) - ], axis=1) - experience.evaluations = evaluations - # remember the last column names used in evaluations - experience.ground_truth_column_name = self.ground_truth_column - experience.match_column_name = match_column_name - return experience + ground_truth_match = InternalDataFrame() + errors = {} + for skill_id, skill in skill_set.skills.items(): + gt_column = self.ground_truth_columns[skill.name] + gt = self.ground_truth_dataset.df[gt_column] + pred = predictions[skill.name] + # from ground truth dataset, select only the rows that are in the predictions + gt, pred = gt.align(pred) + nonnull_index = gt.notnull() & pred.notnull() + gt = gt[nonnull_index] + pred = pred[nonnull_index] + # compare ground truth with predictions + if self.matching_function == 'exact': + gt_pred_match = gt == pred + elif self.matching_function == 'fuzzy': + gt_pred_match = fuzzy_match(gt, pred, threshold=self.matching_threshold) + else: + raise NotImplementedError(f'Unknown matching function {self.matching_function}') + + error_index = gt_pred_match[~gt_pred_match].index + # concatenate errors - dataframe with two columns: predictions and ground truth + errors[skill.name] = InternalDataFrameConcat([pred[error_index], gt[error_index]], axis=1) + errors[skill.name].columns = ["predictions", gt_column] + # concatenate matching columns + ground_truth_match = InternalDataFrameConcat([ + # previous skills' ground truth matches + ground_truth_match, + # current skill's ground truth match + gt_pred_match.rename(skill.name), + ], axis=1) + + return GroundTruthSignal( + match=ground_truth_match.reindex(predictions.index), + errors=errors + ) def as_dataset(self) -> Dataset: """Return the ground truth dataset. diff --git a/adala/memories/__init__.py b/adala/memories/__init__.py index 8f218bf..cf4e934 100644 --- a/adala/memories/__init__.py +++ b/adala/memories/__init__.py @@ -1,2 +1,2 @@ from .file_memory import FileMemory -from .base import ShortTermMemory, LongTermMemory \ No newline at end of file +from .base import Memory \ No newline at end of file diff --git a/adala/memories/base.py b/adala/memories/base.py index 822f9dd..db6376c 100644 --- a/adala/memories/base.py +++ b/adala/memories/base.py @@ -1,61 +1,13 @@ from __future__ import annotations from abc import ABC, abstractmethod -from typing import Any, Optional, TYPE_CHECKING +from typing import Any, Optional, TYPE_CHECKING, Dict -from pydantic import BaseModel +from pydantic import BaseModel, Field from adala.datasets.base import Dataset, InternalDataFrame from rich import print -if TYPE_CHECKING: - from adala.skills.skillset import SkillSet - -class ShortTermMemory(BaseModel): - """ - Base class for short term memory storage - """ - dataset: Dataset = None - predictions: InternalDataFrame = None - evaluations: InternalDataFrame = None - ground_truth_column_name: str = None - match_column_name: str = None - errors: InternalDataFrame = None - accuracy: float = None - initial_instructions: str = None - updated_instructions: str = None - - class Config: - arbitrary_types_allowed = True - - def reset(self): - self.predictions = None - self.evaluations = None - self.errors = None - self.accuracy = None - self.initial_instructions = None - self.updated_instructions = None - - def __rich__(self): - text = '[bold blue]Agent Experience:[/bold blue]\n\n' - if self.predictions is not None: - text += f'\n[bold]Predictions[/bold]\n{self.predictions}' - if self.evaluations is not None: - text += f'\n[bold]Evaluations[/bold]\n{self.evaluations}' - if self.errors is not None: - text += f'\n[bold]Errors[/bold]\n{self.errors}' - if self.accuracy is not None: - text += f'\n[bold]Accuracy[/bold]\n{self.accuracy}' - if self.initial_instructions is not None: - text += f'\n[bold]Initial Instructions[/bold]\n{self.initial_instructions}' - if self.updated_instructions is not None: - text += f'\n[bold]Updated Instructions[/bold]\n{self.updated_instructions}' - return text - - def display(self): - print(self) - - -class LongTermMemory(BaseModel, ABC): +class Memory(BaseModel, ABC): """ Base class for long-term memories. @@ -63,13 +15,13 @@ class LongTermMemory(BaseModel, ABC): """ @abstractmethod - def remember(self, experience: ShortTermMemory, skills: SkillSet): + def remember(self, observation: str, experience: Any): """ Base method for remembering experiences in long term memory. """ @abstractmethod - def retrieve(self, observations: ShortTermMemory) -> ShortTermMemory: + def retrieve(self, observation: str) -> Any: """ Base method for retrieving past experiences from long term memory, based on current observations """ diff --git a/adala/memories/file_memory.py b/adala/memories/file_memory.py index f297266..7707006 100644 --- a/adala/memories/file_memory.py +++ b/adala/memories/file_memory.py @@ -1,21 +1,26 @@ -from .base import LongTermMemory, ShortTermMemory +import json +from .base import Memory from typing import Any -class FileMemory(LongTermMemory): +class FileMemory(Memory): filepath: str - def remember(self, experience: ShortTermMemory): + def remember(self, observation: str, experience: Any): """ Serialize experience in JSON and append to file """ - experience_json = experience.model_dump_json() - with open(self.filepath, 'a') as f: - f.write(experience_json + '\n') + with open(self.filepath) as f: + memory = json.load(f) + memory[observation] = experience + with open(self.filepath, 'w') as f: + json.dump(memory, f, indent=2) - def retrieve(self, observations: ShortTermMemory) -> ShortTermMemory: + def retrieve(self, observation: str) -> Any: """ Retrieve experience from file """ - raise NotImplementedError + with open(self.filepath) as f: + memory = json.load(f) + return memory[observation] diff --git a/adala/runtimes/base.py b/adala/runtimes/base.py index c77cb4b..be9c3bd 100644 --- a/adala/runtimes/base.py +++ b/adala/runtimes/base.py @@ -96,7 +96,7 @@ def init_runtime(self): self._create_program() return self - def get_outputs(self, output_template: str) -> List[str]: + def get_outputs(self, output_template: Optional[str] = None) -> List[str]: """Extracts output fields from the output template. Args: @@ -107,6 +107,8 @@ def get_outputs(self, output_template: str) -> List[str]: """ # search for all occurrences of {{...'output'...}} # TODO: this is a very naive regex implementation - likely to fail in many cases + if output_template is None: + return [] outputs = re.findall(r'\'(.*?)\'', output_template) return outputs @@ -116,7 +118,8 @@ def _process_record( program, extra_fields, outputs=None - ): + ) -> Dict[str, Any]: + """Processes a single record using the guidance program. Args: @@ -138,15 +141,14 @@ def _process_record( if 'text' in verified_input: verified_input['text_'] = verified_input['text'] del verified_input['text'] - verified_input.update(extra_fields) if self.verbose: - print_text(verified_input) + print_text(str(verified_input)) result = program( silent=not self.verbose, **verified_input ) - if outputs is None: + if not outputs: verified_output = {'': str(result)} else: verified_output = {field: result[field] for field in outputs} @@ -180,7 +182,8 @@ def get_output_program(self, output_template): callable: The generated output program. """ - return guidance(output_template, llm=self._llm) + output_program = guidance(output_template, llm=self._llm) + return output_program def get_instructions_program(self, instructions): """Generates an instructions program from the provided template. @@ -192,14 +195,30 @@ def get_instructions_program(self, instructions): callable: The generated instructions program. """ - return guidance(instructions, llm=self._llm) + instructions_program = guidance(instructions, llm=self._llm) + return instructions_program + + def _prepare_program_and_params(self, input_template, output_template, instructions, extra_fields): + extra_fields = extra_fields or {} + extra_fields = extra_fields.copy() + # if only one program template is provided, use it as a program + if output_template is None and instructions is None: + program = self.get_input_program(input_template) + else: + program = self._program + extra_fields.update({ + 'input_program': self.get_input_program(input_template), + 'output_program': self.get_output_program(output_template), + 'instructions_program': self.get_instructions_program(instructions), + }) + return program, extra_fields def process_record( self, record: Dict[str, Any], input_template: str, - output_template: str, - instructions: str, + output_template: Optional[str] = None, + instructions: Optional[str] = None, extra_fields: Optional[Dict[str, Any]] = None, ) -> Dict[str, Any]: """Processes a record using the provided templates and instructions. @@ -214,18 +233,11 @@ def process_record( Returns: Dict[str, Any]: The processed record. """ - - outputs = re.findall(r'\'(.*?)\'', output_template) - - input = record.copy() - input.update({ - 'input_program': self.get_input_program(input_template), - 'output_program': self.get_output_program(output_template), - 'instructions_program': self.get_instructions_program(instructions), - }) + outputs = self.get_outputs(output_template) + program, extra_fields = self._prepare_program_and_params(input_template, output_template, instructions, extra_fields) output = self._process_record( - record=input, - program=self._program, + record=record, + program=program, outputs=outputs, extra_fields=extra_fields ) @@ -235,8 +247,8 @@ def process_batch( self, batch: InternalDataFrame, input_template: str, - output_template: str, - instructions: str, + output_template: Optional[str] = None, + instructions: Optional[str] = None, extra_fields: Optional[Dict[str, Any]] = None, ) -> InternalDataFrame: """Processes a batch of records using the provided templates and instructions. @@ -253,52 +265,17 @@ def process_batch( """ outputs = self.get_outputs(output_template) - - extra_fields = extra_fields or {} - # copy extra fields to avoid modification of the original dict - extra_fields = extra_fields.copy() - # TODO: it's not efficient way to initialize the program here - should be done once - extra_fields.update({ - 'input_program': self.get_input_program(input_template), - 'output_program': self.get_output_program(output_template), - 'instructions_program': self.get_instructions_program(instructions), - }) + program, extra_fields = self._prepare_program_and_params(input_template, output_template, instructions, extra_fields) output = batch.progress_apply( self._process_record, axis=1, result_type='expand', - program=self._program, + program=program, outputs=outputs, extra_fields=extra_fields ) return output - def process_batch_inputs( - self, - batch: InternalDataFrame, - input_template: str, - extra_fields: Optional[Dict[str, Any]] = None, - ) -> InternalDataFrame: - """Processes inputs for a batch of records using the provided input template. - - Args: - batch (InternalDataFrame): The batch of records for input processing. - input_template (str): The template for input processing. - extra_fields (Dict[str, Any], optional): Additional fields to include during input processing. - - Returns: - InternalDataFrame: The processed inputs for the batch of records. - """ - - output = batch.progress_apply( - self._process_record, - axis=1, - result_type='expand', - program=self.get_input_program(input_template), - extra_fields=extra_fields or {} - ) - return output - class CodeRuntime(Runtime): """Base class representing a runtime designed for executing code.""" diff --git a/adala/skills/base.py b/adala/skills/base.py index 09e3b3c..eca0e9f 100644 --- a/adala/skills/base.py +++ b/adala/skills/base.py @@ -11,7 +11,7 @@ from adala.runtimes.base import LLMRuntime from adala.datasets import Dataset, DataFrameDataset from adala.runtimes.base import Runtime -from adala.memories.base import ShortTermMemory, LongTermMemory +from adala.memories.base import Memory from adala.utils.internal_data import InternalDataFrame, InternalDataFrameConcat from adala.utils.logs import print_error @@ -114,7 +114,10 @@ def __call__(self, input: InternalDataFrame, runtime: Runtime, dataset: Dataset) instructions=self.instructions, extra_fields=self._get_extra_fields() ) - return InternalDataFrameConcat((input, runtime_predictions), axis=1) + runtime_predictions.rename(columns={self.prediction_field: self.name}, inplace=True) + output = input.copy() + output[runtime_predictions.columns] = runtime_predictions[runtime_predictions.columns] + return output def _get_extra_fields(self): """ @@ -135,27 +138,27 @@ def _get_extra_fields(self): def apply( self, dataset: Dataset, runtime: Runtime, - experience: ShortTermMemory - ) -> ShortTermMemory: + ) -> InternalDataFrame: """ Applies the skill to a dataset and returns the results. Args: dataset (Dataset): The dataset on which the skill is to be applied. runtime (Runtime): The runtime instance to be used for processing. - experience (ShortTermMemory): Previous experiences or results. - + Returns: ShortTermMemory: The updated experience after applying the skill. """ @abstractmethod def analyze( - self, experience: ShortTermMemory, + self, + predictions: InternalDataFrame, + errors: InternalDataFrame, student_runtime: Runtime, teacher_runtime: Optional[Runtime] = None, - memory: Optional[LongTermMemory] = None, - ) -> ShortTermMemory: + memory: Optional[Memory] = None, + ) -> str: """ Analyzes the results to derive new experiences. @@ -172,18 +175,16 @@ def analyze( @abstractmethod def improve( self, - experience: ShortTermMemory, - runtime: Runtime, - update_instructions: bool = True, - ) -> ShortTermMemory: + error_analysis: str, + runtime: Runtime + ): """ Refines the current state of the skill based on its experiences. Args: experience (ShortTermMemory): The current experience. runtime (Runtime): The runtime instance to be used for processing. - update_instructions (bool, optional): Flag to decide if instructions should be updated. Defaults to True. - + Returns: ShortTermMemory: The updated experience after improvements. """ @@ -200,8 +201,7 @@ def apply( self, dataset: Union[Dataset, InternalDataFrame], runtime: LLMRuntime, - experience: ShortTermMemory - ) -> ShortTermMemory: + ) -> InternalDataFrame: """ Applies the LLM skill on a dataset and returns the results. @@ -213,8 +213,6 @@ def apply( Returns: ShortTermMemory: The updated experience after applying the skill. """ - - experience = experience.model_copy() predictions = [] if isinstance(dataset, InternalDataFrame): @@ -224,21 +222,19 @@ def apply( runtime_predictions = self(batch, runtime, dataset) predictions.append(runtime_predictions) - if not predictions: - predictions = InternalDataFrame() - else: - predictions = InternalDataFrameConcat(predictions, copy=False) - predictions.rename(columns={self.prediction_field: self.name}, inplace=True) + if predictions: + return InternalDataFrameConcat(predictions, copy=False) - experience.predictions = predictions - return experience + return InternalDataFrame(columns=dataset.df.columns.tolist() + [self.name]) def analyze( - self, experience: ShortTermMemory, + self, + predictions: InternalDataFrame, + errors: InternalDataFrame, student_runtime: Runtime, teacher_runtime: Optional[Runtime] = None, - memory: Optional[LongTermMemory] = None - ) -> ShortTermMemory: + memory: Optional[Memory] = None + ) -> str: """ Analyzes the results to identify any discrepancies and returns the observed experience. @@ -251,41 +247,33 @@ def analyze( Returns: ShortTermMemory: The updated experience after analysis. """ - - experience = experience.model_copy() - - # TODO: can be multiple prediction validation fields - match = experience.match_column_name - errors = experience.evaluations[~experience.evaluations[match]] - experience.accuracy = experience.evaluations[match].mean() - if errors.empty: - # No errors - nothing to analyze - experience.errors = errors - return experience # collect errors and create error report # first sample errors - make it uniform, but more sophisticated sampling can be implemented - errors = errors.sample(n=min(3, errors.shape[0])) - - # collect error inputs from runtime + MAX_ERRORS = 3 + errors = errors.sample(n=min(MAX_ERRORS, errors.shape[0])) + # TODO: ground truth column name can be the input parameter that comes from GT signal + ground_truth_column_name = errors.columns[-1] extra_fields = self._get_extra_fields() - inputs = student_runtime.process_batch_inputs( - batch=errors, + + # get error prepared inputs + inputs = student_runtime.process_batch( + batch=predictions.loc[errors.index], input_template=self.input_template, extra_fields=extra_fields ) - # construct error report - errors = pd.concat([ - inputs, - errors[[self.name, experience.ground_truth_column_name]] - ], axis=1) - errors.columns = ['input', 'prediction', 'ground_truth'] if not teacher_runtime: teacher_runtime = student_runtime + predictions_and_errors = pd.concat([ + inputs, + predictions[self.name].loc[errors.index], + errors[ground_truth_column_name] + ], axis=1) + predictions_and_errors.columns = ['input', 'prediction', 'ground_truth'] error_reasons = teacher_runtime.process_batch( - errors, + batch=predictions_and_errors, instructions="{{#system~}}\n" "LLM prompt was created by concatenating instructions with text input:\n\n" "Prediction = LLM(Input, Instructions)\n\n" @@ -298,46 +286,51 @@ def analyze( "{{input}}\n" "Prediction: {{prediction}}\n" "Ground truth: {{ground_truth}}\n" - "Explanation:\n" + "Error reason:\n" "{{~/user}}", output_template="{{#assistant~}}{{gen 'reason'}}{{~/assistant}}", extra_fields=extra_fields ) - errors['reason'] = error_reasons['reason'] - - experience.errors = errors - return experience + predictions_and_errors['reason'] = error_reasons['reason'] + # build error report + result = teacher_runtime.process_record( + record={ + 'predictions_and_errors': predictions_and_errors.to_dict(orient='records'), + }, + input_template="{{#each predictions_and_errors}}" + "\n{{this.input}}\n" + "Prediction: {{this.prediction}}\n" + "Ground truth: {{this.ground_truth}}\n" + 'Error reason: {{this.reason}}\n' + "{{/each}}" + ) + # no specific output specified, all output is in the error report + error_report = result[''] + return error_report def improve( self, - experience: ShortTermMemory, + error_analysis: str, runtime: Runtime, - update_instructions: bool = True, - ) -> ShortTermMemory: + ): """ Refines the LLM skill based on its recent experiences. Args: experience (ShortTermMemory): The current experience. runtime (Runtime): The runtime instance to be used for processing. - update_instructions (bool, optional): Flag to decide if instructions should be updated. Defaults to True. - - Returns: - ShortTermMemory: The updated experience after improvements. """ - - experience = experience.model_copy() - errors = experience.errors.to_dict(orient='records') result = runtime.process_record( record={ - 'errors': errors + 'error_analysis': error_analysis }, instructions="{{#system~}}\n" "LLM prompt was created by concatenating instructions with text input:\n\n" "Prediction = LLM(Input, Instructions)\n\n" "We expect the prediction to be equal to the ground truth.\n" - "Your task is to craft a revised concise instruction for the LLM. " + "Your task is to analyze errors made by old instructions " + "and craft new instructions for the LLM.\n" "Follow best practices for LLM prompt engineering.\n" "Include 2-3 examples at the end of your response to demonstrate how the new instruction would be applied.\n" "Use the following format for your examples:\n" @@ -345,23 +338,11 @@ def improve( "Output: ...\n\n" "{{~/system}}\n", input_template="{{#user~}}\n" - f"Old instruction: {self.instructions}\n\n" - "Errors:\n{{#each errors}}" - "\n{{this.input}}\n" - "Prediction: {{this.prediction}}\n" - "Ground truth: {{this.ground_truth}}\n" - "{{/each}}\n" + f"Old instructions: {self.instructions}\n\n" + "Errors:\n{{error_analysis}}\n" "New instruction:\n" "{{~/user}}", output_template="{{#assistant~}}{{gen 'new_instruction'}}{{~/assistant}}", extra_fields=self._get_extra_fields() ) - new_instruction = result['new_instruction'] - - experience.initial_instructions = self.instructions - experience.updated_instructions = new_instruction - - if update_instructions: - self.instructions = new_instruction - - return experience + self.instructions = result['new_instruction'] diff --git a/adala/skills/skillset.py b/adala/skills/skillset.py index 4704642..d727cfe 100644 --- a/adala/skills/skillset.py +++ b/adala/skills/skillset.py @@ -1,9 +1,11 @@ from pydantic import BaseModel, model_validator, field_validator from abc import ABC, abstractmethod -from typing import List, Union, Dict, Any, Optional +from typing import List, Union, Dict, Any, Optional, Mapping +from collections import OrderedDict from adala.datasets.base import Dataset from adala.runtimes.base import Runtime -from adala.memories.base import ShortTermMemory +from adala.utils.logs import print_text +from adala.utils.internal_data import InternalDataFrame, InternalSeries, InternalDataFrameConcat from .base import BaseSkill, LLMSkill @@ -17,36 +19,71 @@ class SkillSet(BaseModel, ABC): cases, task decomposition can involve a graph-based approach. Args: - skills (Union[List[str], Dict[str, str], List[BaseSkill], Dict[str, BaseSkill]]): Provided skills + skills (Dict[str, BaseSkill]): Skills in the skill set. """ - skills: Union[List[str], Dict[str, str], List[BaseSkill], Dict[str, BaseSkill]] + skills: Dict[str, BaseSkill] @abstractmethod - def apply(self, dataset: Dataset, runtime: Runtime, experience: Optional[ShortTermMemory] = None) -> ShortTermMemory: + def apply( + self, + dataset: Union[Dataset, InternalDataFrame], + runtime: Runtime, + improved_skill: Optional[str] = None + ) -> InternalDataFrame: """ Apply the skill set to a dataset using a specified runtime. Args: - dataset (Dataset): The dataset to apply the skill set to. + dataset (Union[Dataset, InternalDataFrame]): The dataset to apply the skill set to. runtime (Runtime): The runtime environment in which to apply the skills. - experience (Optional[ShortTermMemory], optional): Existing experience data. Defaults to None. - + improved_skill (Optional[str], optional): Name of the skill to start from (to optimize calculations). Defaults to None. Returns: - ShortTermMemory: Updated experience after applying the skill set. + InternalDataFrame: Skill predictions. """ @abstractmethod - def select_skill_to_improve(self, experience: ShortTermMemory) -> BaseSkill: + def select_skill_to_improve(self, accuracy: Mapping, accuracy_threshold: Optional[float] = 1.0) -> Optional[BaseSkill]: """ - Select the next skill to enhance based on the current experience. - + Select skill to improve based on accuracy. + Args: - experience (ShortTermMemory): Current experience data. - + accuracy (Mapping): Skills accuracies. + accuracy_threshold (Optional[float], optional): Accuracy threshold. Defaults to 1.0. Returns: - BaseSkill: Skill selected for improvement. + Optional[BaseSkill]: Skill to improve. None if no skill to improve. + """ + + def __getitem__(self, skill_name) -> BaseSkill: """ + Select skill by name. + + Args: + skill_name (str): Name of the skill to select. + + Returns: + BaseSkill: Skill + """ + return self.skills[skill_name] + + def __setitem__(self, skill_name, skill: BaseSkill): + """ + Set skill by name. + + Args: + skill_name (str): Name of the skill to set. + skill (BaseSkill): Skill to set. + """ + self.skills[skill_name] = skill + + def get_skill_names(self) -> List[str]: + """ + Get list of skill names. + + Returns: + List[str]: List of skill names. + """ + return list(self.skills.keys()) class LinearSkillSet(SkillSet): @@ -78,8 +115,9 @@ class LinearSkillSet(SkillSet): """ skill_sequence: List[str] = None + input_data_field: Optional[str] = None - @field_validator('skills') + @field_validator('skills', mode='before') def skills_validator(cls, v: Union[List[str], List[BaseSkill], Dict[str, BaseSkill]]) -> Dict[str, BaseSkill]: """ Validates and converts the skills attribute to a dictionary of skill names to BaseSkill instances. @@ -90,12 +128,13 @@ def skills_validator(cls, v: Union[List[str], List[BaseSkill], Dict[str, BaseSki Returns: Dict[str, BaseSkill]: Dictionary mapping skill names to their corresponding BaseSkill instances. """ + skills = OrderedDict() if not v: - return {} - skills = {} + return skills + + input_data_field = None if isinstance(v, list) and isinstance(v[0], str): # if list of strings presented, they are interpreted as skill instructions - input_data_field = 'text' for i, instructions in enumerate(v): skill_name = f"skill_{i}" skills[skill_name] = LLMSkill( @@ -107,7 +146,6 @@ def skills_validator(cls, v: Union[List[str], List[BaseSkill], Dict[str, BaseSki input_data_field = skill_name elif isinstance(v, dict) and isinstance(v[list(v.keys())[0]], str): # if dictionary of strings presented, they are interpreted as skill instructions - input_data_field = 'text' for skill_name, instructions in v.items(): skills[skill_name] = LLMSkill( name=skill_name, @@ -118,7 +156,8 @@ def skills_validator(cls, v: Union[List[str], List[BaseSkill], Dict[str, BaseSki input_data_field = skill_name elif isinstance(v, list) and isinstance(v[0], BaseSkill): # convert list of skill names to dictionary - skills = {skill.name: skill for skill in v} + for skill in v: + skills[skill.name] = skill elif isinstance(v, dict): skills = v else: @@ -133,54 +172,64 @@ def skill_sequence_validator(self): Returns: LinearSkillSet: The current instance with updated skill_sequence attribute. """ - if self.skill_sequence is None: # use default skill sequence defined by lexicographical order - self.skill_sequence = sorted(self.skills.keys()) + self.skill_sequence = list(self.skills.keys()) + if len(self.skill_sequence) != len(self.skills): + raise ValueError(f"skill_sequence must contain all skill names - " + f"length of skill_sequence is {len(self.skill_sequence)} " + f"while length of skills is {len(self.skills)}") return self def apply( - self, dataset: Dataset, + self, + dataset: Union[Dataset, InternalDataFrame], runtime: Runtime, - experience: Optional[ShortTermMemory] = None - ) -> ShortTermMemory: + improved_skill: Optional[str] = None, + ) -> InternalDataFrame: """ Sequentially applies each skill on the dataset, enhancing the agent's experience. Args: dataset (Dataset): The dataset to apply the skills on. runtime (Runtime): The runtime environment in which to apply the skills. - experience (Optional[ShortTermMemory], optional): Existing experience data. Defaults to None. - + improved_skill (Optional[str], optional): Name of the skill to improve. Defaults to None. Returns: - ShortTermMemory: Updated experience after sequentially applying the skills. + InternalDataFrame: Skill predictions. """ - if experience is None: - experience = ShortTermMemory(dataset=dataset) - else: - experience = experience.model_copy() - for i, skill_name in enumerate(self.skill_sequence): + predictions = None + if improved_skill: + # start from the specified skill, assuming previous skills have already been applied + skill_sequence = self.skill_sequence[self.skill_sequence.index(improved_skill):] + else: + skill_sequence = self.skill_sequence + for i, skill_name in enumerate(skill_sequence): skill = self.skills[skill_name] # use input dataset for the first node in the pipeline - input_dataset = dataset if i == 0 else experience.predictions - experience = skill.apply(input_dataset, runtime, experience) + input_dataset = dataset if i == 0 else predictions + print_text(f"Applying skill: {skill_name}") + predictions = skill.apply(input_dataset, runtime) - return experience + return predictions - def select_skill_to_improve(self, experience: ShortTermMemory) -> BaseSkill: + def select_skill_to_improve( + self, + accuracy: Mapping, + accuracy_threshold: Optional[float] = 1.0 + ) -> Optional[BaseSkill]: """ - Picks the next skill for improvement in the sequence. - + Selects the skill with the lowest accuracy to improve. + Args: - experience (ShortTermMemory): Current experience data. - + accuracy (Mapping): Accuracy of each skill. + accuracy_threshold (Optional[float], optional): Accuracy threshold. Defaults to 1.0. Returns: - BaseSkill: The next skill selected for improvement. + Optional[BaseSkill]: Skill to improve. None if no skill to improve. """ - - # TODO: implement real logic for skill selection - return self.skills[self.skill_sequence[-1]] + for skill_name in self.skill_sequence: + if accuracy[skill_name] < accuracy_threshold: + return self.skills[skill_name] def __rich__(self): """Returns a rich representation of the skill.""" diff --git a/adala/utils/internal_data.py b/adala/utils/internal_data.py index 8c8f669..bec57e4 100644 --- a/adala/utils/internal_data.py +++ b/adala/utils/internal_data.py @@ -6,6 +6,7 @@ # Internal data tables representation. Replace this with Dask or Polars in the future. InternalDataFrame = pd.DataFrame +InternalSeries = pd.Series def InternalDataFrame_encoder(df: InternalDataFrame) -> List: diff --git a/adala/utils/matching.py b/adala/utils/matching.py new file mode 100644 index 0000000..d41fac2 --- /dev/null +++ b/adala/utils/matching.py @@ -0,0 +1,14 @@ +import pandas as pd +import difflib +from .internal_data import InternalSeries + + +# Function to apply fuzzy matching +def _fuzzy_match(str1, str2, match_threshold=0.95): + ratio = difflib.SequenceMatcher(None, str1.strip(), str2.strip()).ratio() + return ratio >= match_threshold + + +def fuzzy_match(x: InternalSeries, y: InternalSeries, threshold=0.8): + result = x.combine(y, lambda x, y: _fuzzy_match(x, y, threshold)) + return result diff --git a/docs/src/index.md b/docs/src/index.md index af173f7..92a6c9a 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -64,7 +64,7 @@ agent = Agent( # connect to a dataset environment=BasicEnvironment( ground_truth_dataset=ground_truth_dataset, - ground_truth_column="ground_truth" + ground_truth_columns={"sentiment_classification": "ground_truth"} ), # define a skill @@ -79,15 +79,15 @@ agent = Agent( runtimes = { # You can specify your OPENAI API KEY here via `OpenAIRuntime(..., api_key='your-api-key')` 'openai': OpenAIRuntime(model='gpt-3.5-turbo-instruct'), - 'openai-gpt3': OpenAIRuntime(model='gpt-3.5-turbo'), + 'openai-gpt3': OpenAIRuntime(model='gpt-3.5-turbo') }, default_runtime='openai', # NOTE! If you have access to GPT-4, you can uncomment the lines bellow for better results - # default_teacher_runtime='openai-gpt4', - # teacher_runtimes = { - # 'openai-gpt4': OpenAIRuntime(model='gpt-4') - # } +# default_teacher_runtime='openai-gpt4', +# teacher_runtimes = { +# 'openai-gpt4': OpenAIRuntime(model='gpt-4') +# } ) print(agent) @@ -96,9 +96,9 @@ print(agent.skills) agent.learn(learning_iterations=3, accuracy_threshold=0.95) print('\n=> Run tests ...') -run = agent.apply_skills(predict_dataset) +predictions = agent.run(predict_dataset) print('\n => Test results:') -print(run) +print(predictions) ``` ## Reference diff --git a/examples/classification_skill.ipynb b/examples/classification_skill.ipynb index f5acd9c..fe63102 100644 --- a/examples/classification_skill.ipynb +++ b/examples/classification_skill.ipynb @@ -2,7 +2,6 @@ "cells": [ { "cell_type": "markdown", - "id": "94ad15ac", "metadata": {}, "source": [ "# Classification skill" @@ -10,8 +9,7 @@ }, { "cell_type": "code", - "execution_count": 11, - "id": "a2f6d99b", + "execution_count": 1, "metadata": {}, "outputs": [ { @@ -78,7 +76,7 @@ "4 Natural finish for your lips. Beauty/Personal Care" ] }, - "execution_count": 11, + "execution_count": 1, "metadata": {}, "output_type": "execute_result" } @@ -97,15 +95,27 @@ }, { "cell_type": "code", - "execution_count": 15, - "id": "6ee2cebf", + "execution_count": 2, "metadata": {}, "outputs": [ + { + "data": { + "text/html": [ + "
Applying skill: product_category_classification\n", + "\n" + ], + "text/plain": [ + "Applying skill: product_category_classification\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, { "name": "stderr", "output_type": "stream", "text": [ - "100%|βββββββββββββββββ| 5/5 [00:00<00:00, 45.32it/s]\n" + "100%|ββββββββββββββββββββββββββββββββ| 5/5 [00:00<00:00, 40.72it/s]\n" ] }, { @@ -142,7 +152,7 @@ "data": { "text/html": [ "
\n", - " text category product_category_clβ¦ score category__x__produβ¦ \n", + " text category product_category_clβ¦ score product_category_cβ¦ \n", " βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ \n", " Apple product with a Electronics Electronics {'Footwear/Clothingβ¦ True \n", " sleek design. -7.4104013, \n", @@ -201,7 +211,7 @@ ], "text/plain": [ " \n", - " \u001b[1;35m \u001b[0m\u001b[1;35mtext \u001b[0m\u001b[1;35m \u001b[0m \u001b[1;35m \u001b[0m\u001b[1;35mcategory \u001b[0m\u001b[1;35m \u001b[0m \u001b[1;35m \u001b[0m\u001b[1;35mproduct_category_clβ¦\u001b[0m\u001b[1;35m \u001b[0m \u001b[1;35m \u001b[0m\u001b[1;35mscore \u001b[0m\u001b[1;35m \u001b[0m \u001b[1;35m \u001b[0m\u001b[1;35mcategory__x__produβ¦\u001b[0m\u001b[1;35m \u001b[0m \n", + " \u001b[1;35m \u001b[0m\u001b[1;35mtext \u001b[0m\u001b[1;35m \u001b[0m \u001b[1;35m \u001b[0m\u001b[1;35mcategory \u001b[0m\u001b[1;35m \u001b[0m \u001b[1;35m \u001b[0m\u001b[1;35mproduct_category_clβ¦\u001b[0m\u001b[1;35m \u001b[0m \u001b[1;35m \u001b[0m\u001b[1;35mscore \u001b[0m\u001b[1;35m \u001b[0m \u001b[1;35m \u001b[0m\u001b[1;35mproduct_category_cβ¦\u001b[0m\u001b[1;35m \u001b[0m \n", " βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ \n", " Apple product with a Electronics Electronics {'Footwear/Clothingβ¦ True \n", " sleek design. -7.4104013, \n", @@ -261,6 +271,19 @@ "metadata": {}, "output_type": "display_data" }, + { + "data": { + "text/html": [ + "\n" ], "text/plain": [ - "Comparing predictions to ground truth data \u001B[33m...\u001B[0m\n" + "Comparing predictions to ground truth data \u001b[33m...\u001b[0m\n" ] }, "metadata": {}, @@ -738,59 +849,74 @@ "data": { "text/html": [ "Accuracy = 80.00%\n", + "\n" + ], + "text/plain": [ + "\u001b[1;31mAccuracy = \u001b[0m\u001b[1;36m80.00\u001b[0m\u001b[1;31m%\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, { "data": { "text/html": [ @@ -278,18 +301,20 @@ "name": "stderr", "output_type": "stream", "text": [ - "100%|ββββββββββββββββ| 1/1 [00:00<00:00, 137.21it/s]\n", - "100%|βββββββββββββββββ| 1/1 [00:04<00:00, 4.85s/it]\n" + "100%|βββββββββββββββββββββββββββββββ| 1/1 [00:00<00:00, 140.29it/s]\n", + "100%|ββββββββββββββββββββββββββββββββ| 1/1 [00:02<00:00, 2.67s/it]\n" ] }, { "data": { "text/html": [ - "Number of errors: 1\n", + "\n" ], "text/plain": [ - "Analyze evaluation experience \u001B[33m...\u001B[0m\n" + "Analyze evaluation experience \u001b[33m...\u001b[0m\n" ] }, "metadata": {}, @@ -575,18 +639,20 @@ "name": "stderr", "output_type": "stream", "text": [ - "100%|ββββββββββββββββ| 2/2 [00:00<00:00, 229.64it/s]\n", - "100%|βββββββββββββββββ| 2/2 [00:00<00:00, 24.71it/s]\n" + "100%|βββββββββββββββββββββββββββββββ| 2/2 [00:00<00:00, 203.21it/s]\n", + "100%|ββββββββββββββββββββββββββββββββ| 2/2 [00:00<00:00, 23.67it/s]\n" ] }, { "data": { "text/html": [ - "Error analysis for skill \"product_category_classification\":\n", + "\n", "
\n" ], "text/plain": [ - "Number of errors: \u001b[1;36m1\u001b[0m\n" + "Error analysis for skill \u001b[32m\"product_category_classification\"\u001b[0m:\n", + "\n" ] }, "metadata": {}, @@ -298,11 +323,27 @@ { "data": { "text/html": [ - "Accuracy = 80.00%\n", + "\n" ], "text/plain": [ - "Comparing predictions to ground truth data \u001B[33m...\u001B[0m\n" + "Comparing predictions to ground truth data \u001b[33m...\u001b[0m\n" ] }, "metadata": {}, @@ -503,61 +554,74 @@ "data": { "text/html": [ "\n", + "Input: Laptop stand for the kitchen.\n", + "Prediction: Electronics\n", + "Ground truth: Furniture/Home Decor\n", + "Error reason: The error reason is that the original instruction does not provide clear guidelines on how to label \n", + "products that have multiple potential categories. In this case, the input \"Laptop stand for the kitchen\" could be \n", + "interpreted as both an electronic device (laptop stand) and a piece of furniture/home decor (for the kitchen). \n", + "Without further clarification in the instructions, it is difficult to determine the correct label.\n", + "\n", "\n" ], "text/plain": [ - "\u001b[1;31mAccuracy = \u001b[0m\u001b[1;36m80.00\u001b[0m\u001b[1;31m%\u001b[0m\n" + "\n", + "\u001b[32mInput: Laptop stand for the kitchen.\u001b[0m\n", + "\u001b[32mPrediction: Electronics\u001b[0m\n", + "\u001b[32mGround truth: Furniture/Home Decor\u001b[0m\n", + "\u001b[32mError reason: The error reason is that the original instruction does not provide clear guidelines on how to label \u001b[0m\n", + "\u001b[32mproducts that have multiple potential categories. In this case, the input \u001b[0m\u001b[32m\"Laptop stand for the kitchen\"\u001b[0m\u001b[32m could be \u001b[0m\n", + "\u001b[32minterpreted as both an electronic device \u001b[0m\u001b[1;32m(\u001b[0m\u001b[32mlaptop stand\u001b[0m\u001b[1;32m)\u001b[0m\u001b[32m and a piece of furniture/home decor \u001b[0m\u001b[1;32m(\u001b[0m\u001b[32mfor the kitchen\u001b[0m\u001b[1;32m)\u001b[0m\u001b[32m. \u001b[0m\n", + "\u001b[32mWithout further clarification in the instructions, it is difficult to determine the correct label.\u001b[0m\n", + "\n" ] }, "metadata": {}, @@ -339,37 +380,33 @@ { "data": { "text/html": [ - "Categorize the input text into one of the following labels: ['Footwear/Clothing', 'Electronics', 'Food/Beverages', \n", - "'Furniture/Home Decor', 'Beauty/Personal Care']. Choose the label that best represents the main category of the \n", - "input text.\n", - "\n", - "Examples:\n", + "\n" ], "text/plain": [ - "Comparing predictions to ground truth data \u001B[33m...\u001B[0m\n" + "Comparing predictions to ground truth data \u001b[33m...\u001b[0m\n" ] }, "metadata": {}, @@ -282,7 +289,7 @@ "data": { "text/html": [ "Label the input text with the most relevant label based on the primary function or purpose of the product. If the \n", + "product can be categorized into multiple labels, prioritize the label that best represents the primary function or \n", + "purpose. If it is still unclear, choose the label that is most commonly associated with similar products.\n", "\n", "Input: Laptop stand for the kitchen.\n", "Output: Furniture/Home Decor\n", "\n", - "Input: Running shoes for men.\n", - "Output: Footwear/Clothing\n", + "Input: Smartwatch with fitness tracking features.\n", + "Output: Electronics\n", "\n", - "Input: Organic shampoo for dry hair.\n", - "Output: Beauty/Personal Care\n", + "Input: Organic dark chocolate bar.\n", + "Output: Food/Beverages\n", "\n" ], "text/plain": [ - "\u001b[1;32mCategorize the input text into one of the following labels: \u001b[0m\u001b[1;32m[\u001b[0m\u001b[32m'Footwear/Clothing'\u001b[0m\u001b[1;32m, \u001b[0m\u001b[32m'Electronics'\u001b[0m\u001b[1;32m, \u001b[0m\u001b[32m'Food/Beverages'\u001b[0m\u001b[1;32m, \u001b[0m\n", - "\u001b[32m'Furniture/Home Decor'\u001b[0m\u001b[1;32m, \u001b[0m\u001b[32m'Beauty/Personal Care'\u001b[0m\u001b[1;32m]\u001b[0m\u001b[1;32m. Choose the label that best represents the main category of the \u001b[0m\n", - "\u001b[1;32minput text.\u001b[0m\n", - "\n", - "\u001b[1;32mExamples:\u001b[0m\n", + "\u001b[1;32mLabel the input text with the most relevant label based on the primary function or purpose of the product. If the \u001b[0m\n", + "\u001b[1;32mproduct can be categorized into multiple labels, prioritize the label that best represents the primary function or \u001b[0m\n", + "\u001b[1;32mpurpose. If it is still unclear, choose the label that is most commonly associated with similar products.\u001b[0m\n", "\n", "\u001b[1;32mInput: Laptop stand for the kitchen.\u001b[0m\n", "\u001b[1;32mOutput: Furniture/Home Decor\u001b[0m\n", "\n", - "\u001b[1;32mInput: Running shoes for men.\u001b[0m\n", - "\u001b[1;32mOutput: Footwear/Clothing\u001b[0m\n", + "\u001b[1;32mInput: Smartwatch with fitness tracking features.\u001b[0m\n", + "\u001b[1;32mOutput: Electronics\u001b[0m\n", "\n", - "\u001b[1;32mInput: Organic shampoo for dry hair.\u001b[0m\n", - "\u001b[1;32mOutput: Beauty/Personal Care\u001b[0m\n" + "\u001b[1;32mInput: Organic dark chocolate bar.\u001b[0m\n", + "\u001b[1;32mOutput: Food/Beverages\u001b[0m\n" ] }, "metadata": {}, @@ -388,11 +425,24 @@ "metadata": {}, "output_type": "display_data" }, + { + "data": { + "text/html": [ + "Applying skill: product_category_classification\n", + "\n" + ], + "text/plain": [ + "Applying skill: product_category_classification\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, { "name": "stderr", "output_type": "stream", "text": [ - "100%|βββββββββββββββββ| 5/5 [00:03<00:00, 1.48it/s]\n" + "100%|ββββββββββββββββββββββββββββββββ| 5/5 [00:01<00:00, 3.32it/s]\n" ] }, { @@ -429,121 +479,121 @@ "data": { "text/html": [ "\n", - " text category product_category_clβ¦ score category__x__produβ¦ \n", + " text category product_category_clβ¦ score product_category_cβ¦ \n", " βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ \n", " Apple product with a Electronics Electronics {'Footwear/Clothingβ¦ True \n", - " sleek design. -13.669698, \n", + " sleek design. -23.812187, \n", " 'Electronics': \n", - " -4.4849444000336985β¦ \n", + " -1.9361265000364793β¦ \n", " 'Food/Beverages': \n", - " -14.937825, \n", + " -20.240387, \n", " 'Furniture/Home \n", - " Decor': -13.595754, \n", + " Decor': -15.734467, \n", " 'Beauty/Personal \n", - " Care': -13.327497} \n", + " Care': -17.71083} \n", " Laptop stand for the Furniture/Home Decor Furniture/Home Decor {'Footwear/Clothingβ¦ True \n", - " kitchen. -9.9471035, \n", + " kitchen. -16.188046, \n", " 'Electronics': \n", - " -4.787397, \n", + " -3.246235, \n", " 'Food/Beverages': \n", - " -12.115164, \n", + " -11.704685, \n", " 'Furniture/Home \n", " Decor': \n", - " -0.0084281690000000β¦ \n", + " -0.039707113, \n", " 'Beauty/Personal \n", - " Care': -12.145201} \n", + " Care': -15.307133} \n", " Chocolate leather Footwear/Clothing Footwear/Clothing {'Footwear/Clothingβ¦ True \n", - " boots. -0.0003247375000000β¦ \n", + " boots. -0.0002129574700000β¦ \n", " 'Electronics': \n", - " -17.322811, \n", + " -14.297362, \n", " 'Food/Beverages': \n", - " -8.062444, \n", + " -13.440421, \n", " 'Furniture/Home \n", - " Decor': -12.040547, \n", + " Decor': -10.221389, \n", " 'Beauty/Personal \n", - " Care': -12.584134} \n", + " Care': -8.653571} \n", " Wooden cream for Furniture/Home Decor Furniture/Home Decor {'Footwear/Clothingβ¦ True \n", - " surfaces. -15.480099, \n", + " surfaces. -15.676728, \n", " 'Electronics': \n", - " -17.015057, \n", + " -12.5098505, \n", " 'Food/Beverages': \n", - " -13.499149, \n", + " -10.770715, \n", " 'Furniture/Home \n", " Decor': \n", - " -0.0001718358800000β¦ \n", + " -0.0001917392200000β¦ \n", " 'Beauty/Personal \n", - " Care': -8.679317} \n", + " Care': -8.698747} \n", " Natural finish for Beauty/Personal Care Beauty/Personal Care {'Footwear/Clothingβ¦ True \n", - " your lips. -11.842119, \n", + " your lips. -18.403374, \n", " 'Electronics': \n", - " -14.539164, \n", + " -17.621948, \n", " 'Food/Beverages': \n", - " -13.285265, \n", + " -14.839035, \n", " 'Furniture/Home \n", - " Decor': -14.923815, \n", + " Decor': -18.330505, \n", " 'Beauty/Personal \n", " Care': \n", - " -9.72990600003512e-β¦ \n", + " -4.3201999994718403β¦ \n", " \n", "\n" ], "text/plain": [ " \n", - " \u001b[1;35m \u001b[0m\u001b[1;35mtext \u001b[0m\u001b[1;35m \u001b[0m \u001b[1;35m \u001b[0m\u001b[1;35mcategory \u001b[0m\u001b[1;35m \u001b[0m \u001b[1;35m \u001b[0m\u001b[1;35mproduct_category_clβ¦\u001b[0m\u001b[1;35m \u001b[0m \u001b[1;35m \u001b[0m\u001b[1;35mscore \u001b[0m\u001b[1;35m \u001b[0m \u001b[1;35m \u001b[0m\u001b[1;35mcategory__x__produβ¦\u001b[0m\u001b[1;35m \u001b[0m \n", + " \u001b[1;35m \u001b[0m\u001b[1;35mtext \u001b[0m\u001b[1;35m \u001b[0m \u001b[1;35m \u001b[0m\u001b[1;35mcategory \u001b[0m\u001b[1;35m \u001b[0m \u001b[1;35m \u001b[0m\u001b[1;35mproduct_category_clβ¦\u001b[0m\u001b[1;35m \u001b[0m \u001b[1;35m \u001b[0m\u001b[1;35mscore \u001b[0m\u001b[1;35m \u001b[0m \u001b[1;35m \u001b[0m\u001b[1;35mproduct_category_cβ¦\u001b[0m\u001b[1;35m \u001b[0m \n", " βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ \n", " Apple product with a Electronics Electronics {'Footwear/Clothingβ¦ True \n", - " sleek design. -13.669698, \n", + " sleek design. -23.812187, \n", " 'Electronics': \n", - " -4.4849444000336985β¦ \n", + " -1.9361265000364793β¦ \n", " 'Food/Beverages': \n", - " -14.937825, \n", + " -20.240387, \n", " 'Furniture/Home \n", - " Decor': -13.595754, \n", + " Decor': -15.734467, \n", " 'Beauty/Personal \n", - " Care': -13.327497} \n", + " Care': -17.71083} \n", " \u001b[2m \u001b[0m\u001b[2mLaptop stand for the\u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mFurniture/Home Decor\u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mFurniture/Home Decor\u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m{'Footwear/Clothingβ¦\u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mTrue \u001b[0m\u001b[2m \u001b[0m \n", - " \u001b[2m \u001b[0m\u001b[2mkitchen. \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m-9.9471035, \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m\u001b[2mkitchen. \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m-16.188046, \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m'Electronics': \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", - " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m-4.787397, \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m-3.246235, \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m'Food/Beverages': \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", - " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m-12.115164, \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m-11.704685, \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m'Furniture/Home \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mDecor': \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", - " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m-0.0084281690000000β¦\u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m-0.039707113, \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m'Beauty/Personal \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", - " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mCare': -12.145201} \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mCare': -15.307133} \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", " Chocolate leather Footwear/Clothing Footwear/Clothing {'Footwear/Clothingβ¦ True \n", - " boots. -0.0003247375000000β¦ \n", + " boots. -0.0002129574700000β¦ \n", " 'Electronics': \n", - " -17.322811, \n", + " -14.297362, \n", " 'Food/Beverages': \n", - " -8.062444, \n", + " -13.440421, \n", " 'Furniture/Home \n", - " Decor': -12.040547, \n", + " Decor': -10.221389, \n", " 'Beauty/Personal \n", - " Care': -12.584134} \n", + " Care': -8.653571} \n", " \u001b[2m \u001b[0m\u001b[2mWooden cream for \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mFurniture/Home Decor\u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mFurniture/Home Decor\u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m{'Footwear/Clothingβ¦\u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mTrue \u001b[0m\u001b[2m \u001b[0m \n", - " \u001b[2m \u001b[0m\u001b[2msurfaces. \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m-15.480099, \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m\u001b[2msurfaces. \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m-15.676728, \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m'Electronics': \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", - " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m-17.015057, \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m-12.5098505, \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m'Food/Beverages': \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", - " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m-13.499149, \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m-10.770715, \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m'Furniture/Home \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mDecor': \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", - " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m-0.0001718358800000β¦\u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m-0.0001917392200000β¦\u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m'Beauty/Personal \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", - " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mCare': -8.679317} \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mCare': -8.698747} \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", " Natural finish for Beauty/Personal Care Beauty/Personal Care {'Footwear/Clothingβ¦ True \n", - " your lips. -11.842119, \n", + " your lips. -18.403374, \n", " 'Electronics': \n", - " -14.539164, \n", + " -17.621948, \n", " 'Food/Beverages': \n", - " -13.285265, \n", + " -14.839035, \n", " 'Furniture/Home \n", - " Decor': -14.923815, \n", + " Decor': -18.330505, \n", " 'Beauty/Personal \n", " Care': \n", - " -9.72990600003512e-β¦ \n", + " -4.3201999994718403β¦ \n", " \n" ] }, @@ -553,50 +603,11 @@ { "data": { "text/html": [ - "Analyze evaluation experience ...\n", + "\n" ], "text/plain": [ - "\u001B[1;34mAgent Instance\u001B[0m\n", + "\u001b[1;34mAgent Instance\u001b[0m\n", "\n", "Environment: BasicEnvironment\n", "Skills: subjectivity_detection\n", - "Runtimes: openai, openai-gpt3, openai-gpt4\n", + "Runtimes: openai\n", "Default Runtime: openai\n", "Default Teacher Runtime: openai-gpt4\n" ] @@ -205,17 +198,20 @@ " # basic environment extracts ground truth signal from the input records\n", " environment=BasicEnvironment(\n", " ground_truth_dataset=dataset,\n", - " ground_truth_column='ground_truth'\n", + " ground_truth_columns={'subjectivity_detection': 'ground_truth'}\n", " ),\n", " \n", " runtimes = {\n", " # You can specify your OPENAI API KEY here via `OpenAIRuntime(..., api_key='your-api-key')`\n", " 'openai': OpenAIRuntime(model='gpt-3.5-turbo-instruct'),\n", - " 'openai-gpt3': OpenAIRuntime(model='gpt-3.5-turbo'),\n", - " 'openai-gpt4': OpenAIRuntime(model='gpt-4'),\n", " },\n", " default_runtime='openai',\n", " \n", + " teacher_runtimes = {\n", + " 'openai-gpt3': OpenAIRuntime(model='gpt-3.5-turbo'),\n", + " 'openai-gpt4': OpenAIRuntime(model='gpt-4'),\n", + " },\n", + " \n", " # NOTE! If you don't have an access to gpt4 - replace it with \"openai-gpt3\"\n", " default_teacher_runtime='openai-gpt4'\n", ")\n", @@ -225,7 +221,6 @@ }, { "cell_type": "markdown", - "id": "8340dde8", "metadata": {}, "source": [ "## Learning Agent\n", @@ -235,17 +230,29 @@ }, { "cell_type": "code", - "execution_count": 4, - "id": "666c8d0f", + "execution_count": 10, "metadata": { "scrolled": true }, "outputs": [ + { + "data": { + "text/html": [ + "No skill to improve found. Stopping learning process.\n", "\n" ], "text/plain": [ - "Analyze evaluation experience \u001b[33m...\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "Number of errors: 0\n", - "
\n" - ], - "text/plain": [ - "Number of errors: \u001b[1;36m0\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "Accuracy = 100.00%\n", - "\n" - ], - "text/plain": [ - "\u001b[1;31mAccuracy = \u001b[0m\u001b[1;36m100.00\u001b[0m\u001b[1;31m%\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "Accuracy threshold reached (1.0 >= 0.9)\n", - "\n" - ], - "text/plain": [ - "Accuracy threshold reached \u001b[1m(\u001b[0m\u001b[1;36m1.0\u001b[0m >= \u001b[1;36m0.9\u001b[0m\u001b[1m)\u001b[0m\n" + "No skill to improve found. Stopping learning process.\n" ] }, "metadata": {}, @@ -618,62 +629,17 @@ { "data": { "text/plain": [ - "ShortTermMemory(dataset=DataFrameDataset(df= text category\n", - "0 Apple product with a sleek design. Electronics\n", - "1 Laptop stand for the kitchen. Furniture/Home Decor\n", - "2 Chocolate leather boots. Footwear/Clothing\n", - "3 Wooden cream for surfaces. Furniture/Home Decor\n", - "4 Natural finish for your lips. Beauty/Personal Care), predictions= text category \\\n", - "0 Apple product with a sleek design. Electronics \n", - "1 Laptop stand for the kitchen. Furniture/Home Decor \n", - "2 Chocolate leather boots. Footwear/Clothing \n", - "3 Wooden cream for surfaces. Furniture/Home Decor \n", - "4 Natural finish for your lips. Beauty/Personal Care \n", - "\n", - " product_category_classification \\\n", - "0 Electronics \n", - "1 Furniture/Home Decor \n", - "2 Footwear/Clothing \n", - "3 Furniture/Home Decor \n", - "4 Beauty/Personal Care \n", - "\n", - " score \n", - "0 {'Footwear/Clothing': -13.669698, 'Electronics... \n", - "1 {'Footwear/Clothing': -9.9471035, 'Electronics... \n", - "2 {'Footwear/Clothing': -0.0003247375000000436, ... \n", - "3 {'Footwear/Clothing': -15.480099, 'Electronics... \n", - "4 {'Footwear/Clothing': -11.842119, 'Electronics... , evaluations= text category \\\n", - "0 Apple product with a sleek design. Electronics \n", - "1 Laptop stand for the kitchen. Furniture/Home Decor \n", - "2 Chocolate leather boots. Footwear/Clothing \n", - "3 Wooden cream for surfaces. Furniture/Home Decor \n", - "4 Natural finish for your lips. Beauty/Personal Care \n", - "\n", - " product_category_classification \\\n", - "0 Electronics \n", - "1 Furniture/Home Decor \n", - "2 Footwear/Clothing \n", - "3 Furniture/Home Decor \n", - "4 Beauty/Personal Care \n", - "\n", - " score \\\n", - "0 {'Footwear/Clothing': -13.669698, 'Electronics... \n", - "1 {'Footwear/Clothing': -9.9471035, 'Electronics... \n", - "2 {'Footwear/Clothing': -0.0003247375000000436, ... \n", - "3 {'Footwear/Clothing': -15.480099, 'Electronics... \n", - "4 {'Footwear/Clothing': -11.842119, 'Electronics... \n", - "\n", - " category__x__product_category_classification \n", - "0 True \n", - "1 True \n", - "2 True \n", - "3 True \n", - "4 True , ground_truth_column_name='category', match_column_name='category__x__product_category_classification', errors=Empty DataFrame\n", - "Columns: [text, category, product_category_classification, score, category__x__product_category_classification]\n", - "Index: [], accuracy=1.0, initial_instructions='Label the input text with the following labels: {{labels}}', updated_instructions=\"Categorize the input text into one of the following labels: ['Footwear/Clothing', 'Electronics', 'Food/Beverages', 'Furniture/Home Decor', 'Beauty/Personal Care']. Choose the label that best represents the main category of the input text.\\n\\nExamples:\\n\\nInput: Laptop stand for the kitchen.\\nOutput: Furniture/Home Decor\\n\\nInput: Running shoes for men.\\nOutput: Footwear/Clothing\\n\\nInput: Organic shampoo for dry hair.\\nOutput: Beauty/Personal Care\")" + "GroundTruthSignal(match= product_category_classification\n", + "0 True\n", + "1 True\n", + "2 True\n", + "3 True\n", + "4 True, errors={'product_category_classification': Empty DataFrame\n", + "Columns: [predictions, category]\n", + "Index: []})" ] }, - "execution_count": 15, + "execution_count": 2, "metadata": {}, "output_type": "execute_result" } @@ -697,7 +663,7 @@ " ),\n", " environment=BasicEnvironment(\n", " ground_truth_dataset=df,\n", - " ground_truth_column='category'\n", + " ground_truth_columns={'product_category_classification': 'category'}\n", " )\n", ")\n", "\n", @@ -706,8 +672,7 @@ }, { "cell_type": "code", - "execution_count": 17, - "id": "4a876f3d", + "execution_count": 3, "metadata": {}, "outputs": [ { @@ -716,20 +681,18 @@ "Total Agent Skills: 1\n", "\n", "product_category_classification\n", - "Categorize the input text into one of the following labels: ['Footwear/Clothing', 'Electronics', 'Food/Beverages', \n", - "'Furniture/Home Decor', 'Beauty/Personal Care']. Choose the label that best represents the main category of the \n", - "input text.\n", - "\n", - "Examples:\n", + "Label the input text with the most relevant label based on the primary function or purpose of the product. If the \n", + "product can be categorized into multiple labels, prioritize the label that best represents the primary function or \n", + "purpose. If it is still unclear, choose the label that is most commonly associated with similar products.\n", "\n", "Input: Laptop stand for the kitchen.\n", "Output: Furniture/Home Decor\n", "\n", - "Input: Running shoes for men.\n", - "Output: Footwear/Clothing\n", + "Input: Smartwatch with fitness tracking features.\n", + "Output: Electronics\n", "\n", - "Input: Organic shampoo for dry hair.\n", - "Output: Beauty/Personal Care\n", + "Input: Organic dark chocolate bar.\n", + "Output: Food/Beverages\n", "\n", "\n" ], @@ -737,20 +700,18 @@ "\u001b[1;34mTotal Agent Skills: \u001b[0m\u001b[1;34m1\u001b[0m\n", "\n", "\u001b[1;4;32mproduct_category_classification\u001b[0m\n", - "\u001b[32mCategorize the input text into one of the following labels: \u001b[0m\u001b[1;32m[\u001b[0m\u001b[32m'Footwear/Clothing'\u001b[0m\u001b[32m, \u001b[0m\u001b[32m'Electronics'\u001b[0m\u001b[32m, \u001b[0m\u001b[32m'Food/Beverages'\u001b[0m\u001b[32m, \u001b[0m\n", - "\u001b[32m'Furniture/Home Decor'\u001b[0m\u001b[32m, \u001b[0m\u001b[32m'Beauty/Personal Care'\u001b[0m\u001b[1;32m]\u001b[0m\u001b[32m. Choose the label that best represents the main category of the \u001b[0m\n", - "\u001b[32minput text.\u001b[0m\n", - "\n", - "\u001b[32mExamples:\u001b[0m\n", + "\u001b[32mLabel the input text with the most relevant label based on the primary function or purpose of the product. If the \u001b[0m\n", + "\u001b[32mproduct can be categorized into multiple labels, prioritize the label that best represents the primary function or \u001b[0m\n", + "\u001b[32mpurpose. If it is still unclear, choose the label that is most commonly associated with similar products.\u001b[0m\n", "\n", "\u001b[32mInput: Laptop stand for the kitchen.\u001b[0m\n", "\u001b[32mOutput: Furniture/Home Decor\u001b[0m\n", "\n", - "\u001b[32mInput: Running shoes for men.\u001b[0m\n", - "\u001b[32mOutput: Footwear/Clothing\u001b[0m\n", + "\u001b[32mInput: Smartwatch with fitness tracking features.\u001b[0m\n", + "\u001b[32mOutput: Electronics\u001b[0m\n", "\n", - "\u001b[32mInput: Organic shampoo for dry hair.\u001b[0m\n", - "\u001b[32mOutput: Beauty/Personal Care\u001b[0m\n", + "\u001b[32mInput: Organic dark chocolate bar.\u001b[0m\n", + "\u001b[32mOutput: Food/Beverages\u001b[0m\n", "\n" ] }, @@ -766,15 +727,27 @@ }, { "cell_type": "code", - "execution_count": 18, - "id": "ee97ee22", + "execution_count": 4, "metadata": {}, "outputs": [ + { + "data": { + "text/html": [ + "Applying skill: product_category_classification\n", + "\n" + ], + "text/plain": [ + "Applying skill: product_category_classification\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, { "name": "stderr", "output_type": "stream", "text": [ - "100%|βββββββββββββββββ| 5/5 [00:02<00:00, 2.37it/s]\n" + "100%|ββββββββββββββββββββββββββββββββ| 5/5 [00:02<00:00, 1.78it/s]\n" ] } ], @@ -787,13 +760,12 @@ " \"Leather grain snack bar.\" # Potential categories: Footwear/Clothing or Food/Beverages\n", "], columns=['text'])\n", "\n", - "run = agent.apply_skills(test_df)" + "predictions = agent.run(test_df)" ] }, { "cell_type": "code", - "execution_count": 20, - "id": "03cce2a7", + "execution_count": 5, "metadata": {}, "outputs": [ { @@ -827,31 +799,31 @@ "0 \n", "Stainless steel apple peeler. \n", "Food/Beverages \n", - "{'Footwear/Clothing': -5.903179, 'Electronics'... \n", + "{'Footwear/Clothing': -11.584652, 'Electronics... \n", " \n", "\n", " \n", "1 \n", "Silk finish touch screen. \n", "Electronics \n", - "{'Footwear/Clothing': -11.517515, 'Electronics... \n", + "{'Footwear/Clothing': -18.90214, 'Electronics'... \n", "\n", " \n", "2 \n", "Chocolate coated boots. \n", "Footwear/Clothing \n", - "{'Footwear/Clothing': -0.074807025, 'Electroni... \n", + "{'Footwear/Clothing': -0.20086760000000006, 'E... \n", "\n", " \n", "3 \n", "Natural wood fragrance. \n", - "Furniture/Home Decor \n", - "{'Footwear/Clothing': -15.117043, 'Electronics... \n", + "Beauty/Personal Care \n", + "{'Footwear/Clothing': -14.69353, 'Electronics'... \n", "\n", " \n", " \n", "\n", @@ -862,32 +834,32 @@ "0 Stainless steel apple peeler. Food/Beverages \n", "1 Silk finish touch screen. Electronics \n", "2 Chocolate coated boots. Footwear/Clothing \n", - "3 Natural wood fragrance. Furniture/Home Decor \n", + "3 Natural wood fragrance. Beauty/Personal Care \n", "4 Leather grain snack bar. Food/Beverages \n", "\n", " score \n", - "0 {'Footwear/Clothing': -5.903179, 'Electronics'... \n", - "1 {'Footwear/Clothing': -11.517515, 'Electronics... \n", - "2 {'Footwear/Clothing': -0.074807025, 'Electroni... \n", - "3 {'Footwear/Clothing': -15.117043, 'Electronics... \n", - "4 {'Footwear/Clothing': -9.763915, 'Electronics'... " + "0 {'Footwear/Clothing': -11.584652, 'Electronics... \n", + "1 {'Footwear/Clothing': -18.90214, 'Electronics'... \n", + "2 {'Footwear/Clothing': -0.20086760000000006, 'E... \n", + "3 {'Footwear/Clothing': -14.69353, 'Electronics'... \n", + "4 {'Footwear/Clothing': -16.15361, 'Electronics'... " ] }, - "execution_count": 20, + "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "run.predictions" + "predictions" ] } ], "metadata": { "kernelspec": { - "display_name": "Python 3 (ipykernel)", + "display_name": "adala", "language": "python", - "name": "python3" + "name": "adala" }, "language_info": { "codemirror_mode": { @@ -899,7 +871,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.4" + "version": "3.11.5" } }, "nbformat": 4, diff --git a/examples/classification_skill_with_CoT.ipynb b/examples/classification_skill_with_CoT.ipynb index d6e2a61..d79cf53 100644 --- a/examples/classification_skill_with_CoT.ipynb +++ b/examples/classification_skill_with_CoT.ipynb @@ -2,7 +2,6 @@ "cells": [ { "cell_type": "markdown", - "id": "94ad15ac", "metadata": {}, "source": [ "# Classification skill with Chain-of-Thoughts" @@ -11,7 +10,6 @@ { "cell_type": "code", "execution_count": 1, - "id": "a2f6d99b", "metadata": {}, "outputs": [ { @@ -98,14 +96,26 @@ { "cell_type": "code", "execution_count": 2, - "id": "6ee2cebf", "metadata": {}, "outputs": [ + { + "data": { + "text/html": [ + "4 \n", "Leather grain snack bar. \n", "Food/Beverages \n", - "{'Footwear/Clothing': -9.763915, 'Electronics'... \n", + "{'Footwear/Clothing': -16.15361, 'Electronics'... \n", "Applying skill: product_category_classification\n", + "\n" + ], + "text/plain": [ + "Applying skill: product_category_classification\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, { "name": "stderr", "output_type": "stream", "text": [ - "100%|βββββββββββββββββ| 5/5 [00:00<00:00, 7.41it/s]\n" + "100%|ββββββββββββββββββββββββββββββββ| 5/5 [00:00<00:00, 7.42it/s]\n" ] }, { @@ -234,16 +244,15 @@ " )\n", ")\n", "\n", - "run = agent.apply_skills(df)\n", - "run.predictions" + "agent.run(df)" ] } ], "metadata": { "kernelspec": { - "display_name": "Python 3 (ipykernel)", + "display_name": "adala", "language": "python", - "name": "python3" + "name": "adala" }, "language_info": { "codemirror_mode": { @@ -255,7 +264,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.4" + "version": "3.11.5" } }, "nbformat": 4, diff --git a/examples/linear_skill_program.ipynb b/examples/linear_skill_program.ipynb new file mode 100644 index 0000000..ee8397c --- /dev/null +++ b/examples/linear_skill_program.ipynb @@ -0,0 +1,2753 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Learning sequence of skills" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Adala agent is able to create the sequence of skills based on the provided input/output samples.\n", + "In the example below, we ask agent to build two skills from scratch with the following requirements:\n", + "\n", + "1. First skill get's nutrients `\"category\"` name as input and should produce the output similar to what we specify in `\"entities\"` (for example, list of common nutrients based on provided category)\n", + "\n", + "2. Second skill gets the output of the first skill (`\"entities\"`) and generate the text, using the examples provided in the ground truth.\n", + "\n", + "In other words, agent learns how to perform the data generation pipeline like `\"category\"` --> `\"entities\"` --> `\"description\"`. \n", + "You can adjust to your specific use case" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "Applying skill: skill_0\n", + "\n" + ], + "text/plain": [ + "Applying skill: skill_0\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|βββββββββ| 3/3 [00:00<00:00, 58.05it/s]\n" + ] + }, + { + "data": { + "text/html": [ + "Applying skill: skill_1\n", + "\n" + ], + "text/plain": [ + "Applying skill: skill_1\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|βββββββββ| 3/3 [00:00<00:00, 70.03it/s]\n" + ] + }, + { + "data": { + "text/html": [ + "\n", + "\n", + "=> Iteration #0: Comparing to ground truth, analyzing and improving ...\n", + "\n" + ], + "text/plain": [ + "\n", + "\n", + "=> Iteration #\u001b[1;36m0\u001b[0m: Comparing to ground truth, analyzing and improving \u001b[33m...\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "Comparing predictions to ground truth data ...\n", + "
\n" + ], + "text/plain": [ + "Comparing predictions to ground truth data \u001b[33m...\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + " category entities text skill_0 skill_1 skill_0 skill_1 \n", + " βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ \n", + " Macronutrients Carbohydrates, Carbohydrates False False \n", + " Proteins, Fats provide quick Macronutrients Macronutrients \n", + " energy, proteins are essential are crucial \n", + " are essential nutrients that nutrients that \n", + " for muscle provide the body supply the body \n", + " repair and with energy and with energy and \n", + " growth, and fats support various facilitate \n", + " are vital for bodily functions. various bodily \n", + " long-term energy They are divided functions. They \n", + " storage and cell into three are categorized \n", + " function. categories: into three \n", + " carbohydrates, groups: \n", + " proteins, and carbohydrates, \n", + " fats. proteins, and \n", + " fats. \n", + " Carbohydrates are \n", + " the main source Carbohydrates \n", + " of energy for the are the primary \n", + " body. They are source of energy \n", + " found in foods for the body. \n", + " such as grains, They can be \n", + " fruits, and found in foods \n", + " vegetables. They like grains, \n", + " are broken down fruits, and \n", + " into glucose, vegetables. They \n", + " which is used by are broken down \n", + " the body for into glucose, \n", + " energy. which is \n", + " utilized by the \n", + " Proteins are body for energy. \n", + " important for \n", + " building and Proteins are \n", + " repairing tissues essential for \n", + " in the body. They building and \n", + " are found in repairing \n", + " foods such as tissues in the \n", + " meat, fish, eggs, body. They can \n", + " and beans. be found in \n", + " Proteins are made foods like meat, \n", + " up of amino fish, eggs, and \n", + " acids, which are beans. Proteins \n", + " essential for the are composed of \n", + " body to function amino acids, \n", + " properly. which are \n", + " necessary for \n", + " Fats are a proper bodily \n", + " concentrated function. \n", + " source of energy \n", + " and are important Fats are a \n", + " for insulation concentrated \n", + " and protection of source of energy \n", + " organs. They are and are vital \n", + " found in foods for insulation \n", + " such as oils, and protection \n", + " nuts, and of organs. They \n", + " avocados. Fats can be found in \n", + " are also foods like oils, \n", + " necessary for the nuts, and \n", + " absorption of avocados. Fats \n", + " certain vitamins are also crucial \n", + " and minerals. for the \n", + " absorption of \n", + " In addition to certain vitamins \n", + " providing energy, and minerals. \n", + " macronutrients \n", + " also play a role Aside from \n", + " in maintaining a providing \n", + " healthy immune energy, \n", + " system, macronutrients \n", + " regulating also play a \n", + " hormones, and significant role \n", + " supporting brain in maintaining a \n", + " function. It is strong immune \n", + " important to have system, \n", + " a balanced intake regulating \n", + " of all three hormones, and \n", + " macronutrients in supporting brain \n", + " order to maintain function. It is \n", + " overall health essential to \n", + " and well-being. have a \n", + " well-balanced \n", + " intake of all \n", + " three \n", + " macronutrients \n", + " in order to \n", + " promote overall \n", + " health and \n", + " well-being. \n", + " Vitamins Vitamin A, Vitamin A is False False \n", + " Vitamin C, crucial for good 1. Vitamin A 1. Vitamin A \n", + " Vitamin D vision and a 2. Vitamin B 2. Vitamin B \n", + " healthy immune 3. Vitamin C 3. Vitamin C \n", + " system, Vitamin 4. Vitamin D 4. Vitamin D \n", + " C helps in the 5. Vitamin E 5. Vitamin E \n", + " repair of 6. Vitamin K 6. Vitamin K \n", + " tissues and the 7. Thiamine 7. Thiamine \n", + " enzymatic (Vitamin B1) (Vitamin B1) \n", + " production of 8. Riboflavin 8. Riboflavin \n", + " certain (Vitamin B2) (Vitamin B2) \n", + " neurotransmitteβ¦ 9. Niacin 9. Niacin \n", + " and Vitamin D is (Vitamin B3) (Vitamin B3) \n", + " essential for 10. Pantothenic 10. Pantothenic \n", + " strong bones and acid (Vitamin B5) acid (Vitamin \n", + " teeth as it 11. Pyridoxine B5) \n", + " helps the body (Vitamin B6) 11. Pyridoxine \n", + " absorb calcium. 12. Biotin (Vitamin B6) \n", + " (Vitamin B7) 12. Biotin \n", + " 13. Folate (Vitamin B7) \n", + " (Vitamin B9) 13. Folate \n", + " 14. Cobalamin (Vitamin B9) \n", + " (Vitamin B12) 14. Cobalamin \n", + " 15. Choline (Vitamin B12) \n", + " 16. Inositol 15. Choline \n", + " 17. Vitamin B15 16. Inositol \n", + " 18. Vitamin B17 17. Vitamin B15 \n", + " 19. Vitamin F 18. Vitamin B17 \n", + " 20. Vitamin G 19. Vitamin F \n", + " 21. Vitamin H 20. Vitamin G \n", + " 22. Vitamin J 21. Vitamin H \n", + " 23. Vitamin L 22. Vitamin J \n", + " 24. Vitamin M 23. Vitamin L \n", + " 25. Vitamin P 24. Vitamin M \n", + " 26. Vitamin Q 25. Vitamin P \n", + " 27. Vitamin R 26. Vitamin Q \n", + " 28. Vitamin S 27. Vitamin R \n", + " 29. Vitamin T 28. Vitamin S \n", + " 30. Vitamin U 29. Vitamin T \n", + " 31. Vitamin V 30. Vitamin U \n", + " 32. Vitamin W 31. Vitamin V \n", + " 33. Vitamin X 32. Vitamin W \n", + " 34. Vitamin Y 33. Vitamin X \n", + " 35. Vitamin Z 34. Vitamin Y \n", + " 35. Vitamin Z \n", + " Minerals Calcium, Iron, Calcium is False False \n", + " Magnesium necessary for 1. Iron 1. Iron \n", + " maintaining 2. Calcium 2. Calcium \n", + " healthy bones 3. Magnesium 3. Magnesium \n", + " and teeth, Iron 4. Potassium 4. Potassium \n", + " is crucial for 5. Sodium 5. Sodium \n", + " making red blood 6. Zinc 6. Zinc \n", + " cells and 7. Copper 7. Copper \n", + " transporting 8. Manganese 8. Manganese \n", + " oxygen 9. Phosphorus 9. Phosphorus \n", + " throughout the 10. Selenium 10. Selenium \n", + " body, and 11. Chromium 11. Chromium \n", + " Magnesium plays 12. Iodine 12. Iodine \n", + " a role in over 13. Fluoride 13. Fluoride \n", + " 300 enzyme 14. Molybdenum 14. Molybdenum \n", + " reactions in the 15. Cobalt 15. Cobalt \n", + " human body, 16. Nickel 16. Nickel \n", + " including the 17. Vanadium 17. Vanadium \n", + " metabolism of 18. Silicon 18. Silicon \n", + " food, synthesis 19. Boron 19. Boron \n", + " of fatty acids 20. Chloride 20. Chloride \n", + " and proteins, \n", + " and the \n", + " transmission of \n", + " nerve impulses. \n", + " \n", + "\n" + ], + "text/plain": [ + " \n", + " \u001b[1;35m \u001b[0m\u001b[1;35mcategory \u001b[0m\u001b[1;35m \u001b[0m \u001b[1;35m \u001b[0m\u001b[1;35mentities \u001b[0m\u001b[1;35m \u001b[0m \u001b[1;35m \u001b[0m\u001b[1;35mtext \u001b[0m\u001b[1;35m \u001b[0m \u001b[1;35m \u001b[0m\u001b[1;35mskill_0 \u001b[0m\u001b[1;35m \u001b[0m \u001b[1;35m \u001b[0m\u001b[1;35mskill_1 \u001b[0m\u001b[1;35m \u001b[0m \u001b[1;35m \u001b[0m\u001b[1;35mskill_0\u001b[0m\u001b[1;35m \u001b[0m \u001b[1;35m \u001b[0m\u001b[1;35mskill_1\u001b[0m\u001b[1;35m \u001b[0m \n", + " βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ \n", + " Macronutrients Carbohydrates, Carbohydrates False False \n", + " Proteins, Fats provide quick Macronutrients Macronutrients \n", + " energy, proteins are essential are crucial \n", + " are essential nutrients that nutrients that \n", + " for muscle provide the body supply the body \n", + " repair and with energy and with energy and \n", + " growth, and fats support various facilitate \n", + " are vital for bodily functions. various bodily \n", + " long-term energy They are divided functions. They \n", + " storage and cell into three are categorized \n", + " function. categories: into three \n", + " carbohydrates, groups: \n", + " proteins, and carbohydrates, \n", + " fats. proteins, and \n", + " fats. \n", + " Carbohydrates are \n", + " the main source Carbohydrates \n", + " of energy for the are the primary \n", + " body. They are source of energy \n", + " found in foods for the body. \n", + " such as grains, They can be \n", + " fruits, and found in foods \n", + " vegetables. They like grains, \n", + " are broken down fruits, and \n", + " into glucose, vegetables. They \n", + " which is used by are broken down \n", + " the body for into glucose, \n", + " energy. which is \n", + " utilized by the \n", + " Proteins are body for energy. \n", + " important for \n", + " building and Proteins are \n", + " repairing tissues essential for \n", + " in the body. They building and \n", + " are found in repairing \n", + " foods such as tissues in the \n", + " meat, fish, eggs, body. They can \n", + " and beans. be found in \n", + " Proteins are made foods like meat, \n", + " up of amino fish, eggs, and \n", + " acids, which are beans. Proteins \n", + " essential for the are composed of \n", + " body to function amino acids, \n", + " properly. which are \n", + " necessary for \n", + " Fats are a proper bodily \n", + " concentrated function. \n", + " source of energy \n", + " and are important Fats are a \n", + " for insulation concentrated \n", + " and protection of source of energy \n", + " organs. They are and are vital \n", + " found in foods for insulation \n", + " such as oils, and protection \n", + " nuts, and of organs. They \n", + " avocados. Fats can be found in \n", + " are also foods like oils, \n", + " necessary for the nuts, and \n", + " absorption of avocados. Fats \n", + " certain vitamins are also crucial \n", + " and minerals. for the \n", + " absorption of \n", + " In addition to certain vitamins \n", + " providing energy, and minerals. \n", + " macronutrients \n", + " also play a role Aside from \n", + " in maintaining a providing \n", + " healthy immune energy, \n", + " system, macronutrients \n", + " regulating also play a \n", + " hormones, and significant role \n", + " supporting brain in maintaining a \n", + " function. It is strong immune \n", + " important to have system, \n", + " a balanced intake regulating \n", + " of all three hormones, and \n", + " macronutrients in supporting brain \n", + " order to maintain function. It is \n", + " overall health essential to \n", + " and well-being. have a \n", + " well-balanced \n", + " intake of all \n", + " three \n", + " macronutrients \n", + " in order to \n", + " promote overall \n", + " health and \n", + " well-being. \n", + " \u001b[2m \u001b[0m\u001b[2mVitamins \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mVitamin A, \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mVitamin A is \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mFalse \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mFalse \u001b[0m\u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mVitamin C, \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mcrucial for good\u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m1. Vitamin A \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m1. Vitamin A \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mVitamin D \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mvision and a \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m2. Vitamin B \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m2. Vitamin B \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mhealthy immune \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m3. Vitamin C \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m3. Vitamin C \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2msystem, Vitamin \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m4. Vitamin D \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m4. Vitamin D \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mC helps in the \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m5. Vitamin E \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m5. Vitamin E \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mrepair of \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m6. Vitamin K \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m6. Vitamin K \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mtissues and the \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m7. Thiamine \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m7. Thiamine \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2menzymatic \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m(Vitamin B1) \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m(Vitamin B1) \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mproduction of \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m8. Riboflavin \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m8. Riboflavin \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mcertain \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m(Vitamin B2) \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m(Vitamin B2) \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mneurotransmitteβ¦\u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m9. Niacin \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m9. Niacin \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mand Vitamin D is\u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m(Vitamin B3) \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m(Vitamin B3) \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2messential for \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m10. Pantothenic \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m10. Pantothenic \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mstrong bones and\u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2macid (Vitamin B5)\u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2macid (Vitamin \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mteeth as it \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m11. Pyridoxine \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mB5) \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mhelps the body \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m(Vitamin B6) \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m11. Pyridoxine \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mabsorb calcium. \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m12. Biotin \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m(Vitamin B6) \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m(Vitamin B7) \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m12. Biotin \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m13. Folate \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m(Vitamin B7) \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m(Vitamin B9) \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m13. Folate \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m14. Cobalamin \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m(Vitamin B9) \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m(Vitamin B12) \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m14. Cobalamin \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m15. Choline \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m(Vitamin B12) \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m16. Inositol \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m15. Choline \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m17. Vitamin B15 \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m16. Inositol \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m18. Vitamin B17 \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m17. Vitamin B15 \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m19. Vitamin F \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m18. Vitamin B17 \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m20. Vitamin G \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m19. Vitamin F \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m21. Vitamin H \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m20. Vitamin G \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m22. Vitamin J \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m21. Vitamin H \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m23. Vitamin L \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m22. Vitamin J \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m24. Vitamin M \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m23. Vitamin L \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m25. Vitamin P \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m24. Vitamin M \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m26. Vitamin Q \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m25. Vitamin P \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m27. Vitamin R \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m26. Vitamin Q \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m28. Vitamin S \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m27. Vitamin R \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m29. Vitamin T \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m28. Vitamin S \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m30. Vitamin U \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m29. Vitamin T \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m31. Vitamin V \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m30. Vitamin U \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m32. Vitamin W \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m31. Vitamin V \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m33. Vitamin X \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m32. Vitamin W \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m34. Vitamin Y \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m33. Vitamin X \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m35. Vitamin Z \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m34. Vitamin Y \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m35. Vitamin Z \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " Minerals Calcium, Iron, Calcium is False False \n", + " Magnesium necessary for 1. Iron 1. Iron \n", + " maintaining 2. Calcium 2. Calcium \n", + " healthy bones 3. Magnesium 3. Magnesium \n", + " and teeth, Iron 4. Potassium 4. Potassium \n", + " is crucial for 5. Sodium 5. Sodium \n", + " making red blood 6. Zinc 6. Zinc \n", + " cells and 7. Copper 7. Copper \n", + " transporting 8. Manganese 8. Manganese \n", + " oxygen 9. Phosphorus 9. Phosphorus \n", + " throughout the 10. Selenium 10. Selenium \n", + " body, and 11. Chromium 11. Chromium \n", + " Magnesium plays 12. Iodine 12. Iodine \n", + " a role in over 13. Fluoride 13. Fluoride \n", + " 300 enzyme 14. Molybdenum 14. Molybdenum \n", + " reactions in the 15. Cobalt 15. Cobalt \n", + " human body, 16. Nickel 16. Nickel \n", + " including the 17. Vanadium 17. Vanadium \n", + " metabolism of 18. Silicon 18. Silicon \n", + " food, synthesis 19. Boron 19. Boron \n", + " of fatty acids 20. Chloride 20. Chloride \n", + " and proteins, \n", + " and the \n", + " transmission of \n", + " nerve impulses. \n", + " \n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "Accuracy = 0.00%\n", + "\n" + ], + "text/plain": [ + "\u001b[1;31mAccuracy = \u001b[0m\u001b[1;36m0.00\u001b[0m\u001b[1;31m%\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "Analyze evaluation experience ...\n", + "
\n" + ], + "text/plain": [ + "Analyze evaluation experience \u001b[33m...\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|ββββββββ| 3/3 [00:00<00:00, 185.36it/s]\n", + "100%|βββββββββ| 3/3 [00:00<00:00, 20.22it/s]\n" + ] + }, + { + "data": { + "text/html": [ + "Error analysis for skill \"skill_0\":\n", + "\n", + "
\n" + ], + "text/plain": [ + "Error analysis for skill \u001b[32m\"skill_0\"\u001b[0m:\n", + "\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + "Input: Vitamins\n", + "Prediction: \n", + "1. Vitamin A\n", + "2. Vitamin B\n", + "3. Vitamin C\n", + "4. Vitamin D\n", + "5. Vitamin E\n", + "6. Vitamin K\n", + "7. Thiamine (Vitamin B1)\n", + "8. Riboflavin (Vitamin B2)\n", + "9. Niacin (Vitamin B3)\n", + "10. Pantothenic acid (Vitamin B5)\n", + "11. Pyridoxine (Vitamin B6)\n", + "12. Biotin (Vitamin B7)\n", + "13. Folate (Vitamin B9)\n", + "14. Cobalamin (Vitamin B12)\n", + "15. Choline\n", + "16. Inositol\n", + "17. Vitamin B15\n", + "18. Vitamin B17\n", + "19. Vitamin F\n", + "20. Vitamin G\n", + "21. Vitamin H\n", + "22. Vitamin J\n", + "23. Vitamin L\n", + "24. Vitamin M\n", + "25. Vitamin P\n", + "26. Vitamin Q\n", + "27. Vitamin R\n", + "28. Vitamin S\n", + "29. Vitamin T\n", + "30. Vitamin U\n", + "31. Vitamin V\n", + "32. Vitamin W\n", + "33. Vitamin X\n", + "34. Vitamin Y\n", + "35. Vitamin Z\n", + "Ground truth: Vitamin A, Vitamin C, Vitamin D\n", + "Error reason: The instructions are missing in the provided data, making it impossible to determine the specific \n", + "error in the prediction.\n", + "\n", + "Input: Macronutrients\n", + "Prediction: \n", + "Macronutrients are essential nutrients that provide the body with energy and support various bodily functions. They\n", + "are divided into three categories: carbohydrates, proteins, and fats.\n", + "\n", + "Carbohydrates are the main source of energy for the body. They are found in foods such as grains, fruits, and \n", + "vegetables. They are broken down into glucose, which is used by the body for energy.\n", + "\n", + "Proteins are important for building and repairing tissues in the body. They are found in foods such as meat, fish, \n", + "eggs, and beans. Proteins are made up of amino acids, which are essential for the body to function properly.\n", + "\n", + "Fats are a concentrated source of energy and are important for insulation and protection of organs. They are found \n", + "in foods such as oils, nuts, and avocados. Fats are also necessary for the absorption of certain vitamins and \n", + "minerals.\n", + "\n", + "In addition to providing energy, macronutrients also play a role in maintaining a healthy immune system, regulating\n", + "hormones, and supporting brain function. It is important to have a balanced intake of all three macronutrients in \n", + "order to maintain overall health and well-being.\n", + "Ground truth: Carbohydrates, Proteins, Fats\n", + "Error reason: The instructions were not clear or specific about what the model should predict about macronutrients.\n", + "As a result, the model provided a detailed explanation about macronutrients instead of just listing them as in the \n", + "ground truth.\n", + "\n", + "Input: Minerals\n", + "Prediction: \n", + "1. Iron\n", + "2. Calcium\n", + "3. Magnesium\n", + "4. Potassium\n", + "5. Sodium\n", + "6. Zinc\n", + "7. Copper\n", + "8. Manganese\n", + "9. Phosphorus\n", + "10. Selenium\n", + "11. Chromium\n", + "12. Iodine\n", + "13. Fluoride\n", + "14. Molybdenum\n", + "15. Cobalt\n", + "16. Nickel\n", + "17. Vanadium\n", + "18. Silicon\n", + "19. Boron\n", + "20. Chloride\n", + "Ground truth: Calcium, Iron, Magnesium\n", + "Error reason: The instructions are missing in the provided context, making it impossible to determine the specific \n", + "error in the prediction.\n", + "\n", + "\n" + ], + "text/plain": [ + "\n", + "\u001b[32mInput: Vitamins\u001b[0m\n", + "\u001b[32mPrediction: \u001b[0m\n", + "\u001b[1;36m1\u001b[0m\u001b[32m. Vitamin A\u001b[0m\n", + "\u001b[1;36m2\u001b[0m\u001b[32m. Vitamin B\u001b[0m\n", + "\u001b[1;36m3\u001b[0m\u001b[32m. Vitamin C\u001b[0m\n", + "\u001b[1;36m4\u001b[0m\u001b[32m. Vitamin D\u001b[0m\n", + "\u001b[1;36m5\u001b[0m\u001b[32m. Vitamin E\u001b[0m\n", + "\u001b[1;36m6\u001b[0m\u001b[32m. Vitamin K\u001b[0m\n", + "\u001b[1;36m7\u001b[0m\u001b[32m. Thiamine \u001b[0m\u001b[1;32m(\u001b[0m\u001b[32mVitamin B1\u001b[0m\u001b[1;32m)\u001b[0m\n", + "\u001b[1;36m8\u001b[0m\u001b[32m. Riboflavin \u001b[0m\u001b[1;32m(\u001b[0m\u001b[32mVitamin B2\u001b[0m\u001b[1;32m)\u001b[0m\n", + "\u001b[1;36m9\u001b[0m\u001b[32m. Niacin \u001b[0m\u001b[1;32m(\u001b[0m\u001b[32mVitamin B3\u001b[0m\u001b[1;32m)\u001b[0m\n", + "\u001b[1;36m10\u001b[0m\u001b[32m. Pantothenic acid \u001b[0m\u001b[1;32m(\u001b[0m\u001b[32mVitamin B5\u001b[0m\u001b[1;32m)\u001b[0m\n", + "\u001b[1;36m11\u001b[0m\u001b[32m. Pyridoxine \u001b[0m\u001b[1;32m(\u001b[0m\u001b[32mVitamin B6\u001b[0m\u001b[1;32m)\u001b[0m\n", + "\u001b[1;36m12\u001b[0m\u001b[32m. Biotin \u001b[0m\u001b[1;32m(\u001b[0m\u001b[32mVitamin B7\u001b[0m\u001b[1;32m)\u001b[0m\n", + "\u001b[1;36m13\u001b[0m\u001b[32m. Folate \u001b[0m\u001b[1;32m(\u001b[0m\u001b[32mVitamin B9\u001b[0m\u001b[1;32m)\u001b[0m\n", + "\u001b[1;36m14\u001b[0m\u001b[32m. Cobalamin \u001b[0m\u001b[1;32m(\u001b[0m\u001b[32mVitamin B12\u001b[0m\u001b[1;32m)\u001b[0m\n", + "\u001b[1;36m15\u001b[0m\u001b[32m. Choline\u001b[0m\n", + "\u001b[1;36m16\u001b[0m\u001b[32m. Inositol\u001b[0m\n", + "\u001b[1;36m17\u001b[0m\u001b[32m. Vitamin B15\u001b[0m\n", + "\u001b[1;36m18\u001b[0m\u001b[32m. Vitamin B17\u001b[0m\n", + "\u001b[1;36m19\u001b[0m\u001b[32m. Vitamin F\u001b[0m\n", + "\u001b[1;36m20\u001b[0m\u001b[32m. Vitamin G\u001b[0m\n", + "\u001b[1;36m21\u001b[0m\u001b[32m. Vitamin H\u001b[0m\n", + "\u001b[1;36m22\u001b[0m\u001b[32m. Vitamin J\u001b[0m\n", + "\u001b[1;36m23\u001b[0m\u001b[32m. Vitamin L\u001b[0m\n", + "\u001b[1;36m24\u001b[0m\u001b[32m. Vitamin M\u001b[0m\n", + "\u001b[1;36m25\u001b[0m\u001b[32m. Vitamin P\u001b[0m\n", + "\u001b[1;36m26\u001b[0m\u001b[32m. Vitamin Q\u001b[0m\n", + "\u001b[1;36m27\u001b[0m\u001b[32m. Vitamin R\u001b[0m\n", + "\u001b[1;36m28\u001b[0m\u001b[32m. Vitamin S\u001b[0m\n", + "\u001b[1;36m29\u001b[0m\u001b[32m. Vitamin T\u001b[0m\n", + "\u001b[1;36m30\u001b[0m\u001b[32m. Vitamin U\u001b[0m\n", + "\u001b[1;36m31\u001b[0m\u001b[32m. Vitamin V\u001b[0m\n", + "\u001b[1;36m32\u001b[0m\u001b[32m. Vitamin W\u001b[0m\n", + "\u001b[1;36m33\u001b[0m\u001b[32m. Vitamin X\u001b[0m\n", + "\u001b[1;36m34\u001b[0m\u001b[32m. Vitamin Y\u001b[0m\n", + "\u001b[1;36m35\u001b[0m\u001b[32m. Vitamin Z\u001b[0m\n", + "\u001b[32mGround truth: Vitamin A, Vitamin C, Vitamin D\u001b[0m\n", + "\u001b[32mError reason: The instructions are missing in the provided data, making it impossible to determine the specific \u001b[0m\n", + "\u001b[32merror in the prediction.\u001b[0m\n", + "\n", + "\u001b[32mInput: Macronutrients\u001b[0m\n", + "\u001b[32mPrediction: \u001b[0m\n", + "\u001b[32mMacronutrients are essential nutrients that provide the body with energy and support various bodily functions. They\u001b[0m\n", + "\u001b[32mare divided into three categories: carbohydrates, proteins, and fats.\u001b[0m\n", + "\n", + "\u001b[32mCarbohydrates are the main source of energy for the body. They are found in foods such as grains, fruits, and \u001b[0m\n", + "\u001b[32mvegetables. They are broken down into glucose, which is used by the body for energy.\u001b[0m\n", + "\n", + "\u001b[32mProteins are important for building and repairing tissues in the body. They are found in foods such as meat, fish, \u001b[0m\n", + "\u001b[32meggs, and beans. Proteins are made up of amino acids, which are essential for the body to function properly.\u001b[0m\n", + "\n", + "\u001b[32mFats are a concentrated source of energy and are important for insulation and protection of organs. They are found \u001b[0m\n", + "\u001b[32min foods such as oils, nuts, and avocados. Fats are also necessary for the absorption of certain vitamins and \u001b[0m\n", + "\u001b[32mminerals.\u001b[0m\n", + "\n", + "\u001b[32mIn addition to providing energy, macronutrients also play a role in maintaining a healthy immune system, regulating\u001b[0m\n", + "\u001b[32mhormones, and supporting brain function. It is important to have a balanced intake of all three macronutrients in \u001b[0m\n", + "\u001b[32morder to maintain overall health and well-being.\u001b[0m\n", + "\u001b[32mGround truth: Carbohydrates, Proteins, Fats\u001b[0m\n", + "\u001b[32mError reason: The instructions were not clear or specific about what the model should predict about macronutrients.\u001b[0m\n", + "\u001b[32mAs a result, the model provided a detailed explanation about macronutrients instead of just listing them as in the \u001b[0m\n", + "\u001b[32mground truth.\u001b[0m\n", + "\n", + "\u001b[32mInput: Minerals\u001b[0m\n", + "\u001b[32mPrediction: \u001b[0m\n", + "\u001b[1;36m1\u001b[0m\u001b[32m. Iron\u001b[0m\n", + "\u001b[1;36m2\u001b[0m\u001b[32m. Calcium\u001b[0m\n", + "\u001b[1;36m3\u001b[0m\u001b[32m. Magnesium\u001b[0m\n", + "\u001b[1;36m4\u001b[0m\u001b[32m. Potassium\u001b[0m\n", + "\u001b[1;36m5\u001b[0m\u001b[32m. Sodium\u001b[0m\n", + "\u001b[1;36m6\u001b[0m\u001b[32m. Zinc\u001b[0m\n", + "\u001b[1;36m7\u001b[0m\u001b[32m. Copper\u001b[0m\n", + "\u001b[1;36m8\u001b[0m\u001b[32m. Manganese\u001b[0m\n", + "\u001b[1;36m9\u001b[0m\u001b[32m. Phosphorus\u001b[0m\n", + "\u001b[1;36m10\u001b[0m\u001b[32m. Selenium\u001b[0m\n", + "\u001b[1;36m11\u001b[0m\u001b[32m. Chromium\u001b[0m\n", + "\u001b[1;36m12\u001b[0m\u001b[32m. Iodine\u001b[0m\n", + "\u001b[1;36m13\u001b[0m\u001b[32m. Fluoride\u001b[0m\n", + "\u001b[1;36m14\u001b[0m\u001b[32m. Molybdenum\u001b[0m\n", + "\u001b[1;36m15\u001b[0m\u001b[32m. Cobalt\u001b[0m\n", + "\u001b[1;36m16\u001b[0m\u001b[32m. Nickel\u001b[0m\n", + "\u001b[1;36m17\u001b[0m\u001b[32m. Vanadium\u001b[0m\n", + "\u001b[1;36m18\u001b[0m\u001b[32m. Silicon\u001b[0m\n", + "\u001b[1;36m19\u001b[0m\u001b[32m. Boron\u001b[0m\n", + "\u001b[1;36m20\u001b[0m\u001b[32m. Chloride\u001b[0m\n", + "\u001b[32mGround truth: Calcium, Iron, Magnesium\u001b[0m\n", + "\u001b[32mError reason: The instructions are missing in the provided context, making it impossible to determine the specific \u001b[0m\n", + "\u001b[32merror in the prediction.\u001b[0m\n", + "\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "Improve \"skill_0\" skill based on analysis ...\n", + "\n" + ], + "text/plain": [ + "Improve \u001b[32m\"skill_0\"\u001b[0m skill based on analysis \u001b[33m...\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "Updated instructions for skill \"skill_0\":\n", + "\n", + "
\n" + ], + "text/plain": [ + "Updated instructions for skill \u001b[32m\"skill_0\"\u001b[0m:\n", + "\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "Given a category of nutrients, list the most common types of nutrients in that category. Do not provide detailed \n", + "explanations or list all possible nutrients in the category, just list the most common ones. \n", + "\n", + "Examples:\n", + "\n", + "Input: Vitamins\n", + "Instructions: Given a category of nutrients, list the most common types of nutrients in that category. Do not \n", + "provide detailed explanations or list all possible nutrients in the category, just list the most common ones.\n", + "Output: Vitamin A, Vitamin B, Vitamin C, Vitamin D, Vitamin E, Vitamin K\n", + "\n", + "Input: Macronutrients\n", + "Instructions: Given a category of nutrients, list the most common types of nutrients in that category. Do not \n", + "provide detailed explanations or list all possible nutrients in the category, just list the most common ones.\n", + "Output: Carbohydrates, Proteins, Fats\n", + "\n", + "Input: Minerals\n", + "Instructions: Given a category of nutrients, list the most common types of nutrients in that category. Do not \n", + "provide detailed explanations or list all possible nutrients in the category, just list the most common ones.\n", + "Output: Calcium, Iron, Magnesium\n", + "\n" + ], + "text/plain": [ + "\u001b[1;32mGiven a category of nutrients, list the most common types of nutrients in that category. Do not provide detailed \u001b[0m\n", + "\u001b[1;32mexplanations or list all possible nutrients in the category, just list the most common ones. \u001b[0m\n", + "\n", + "\u001b[1;32mExamples:\u001b[0m\n", + "\n", + "\u001b[1;32mInput: Vitamins\u001b[0m\n", + "\u001b[1;32mInstructions: Given a category of nutrients, list the most common types of nutrients in that category. Do not \u001b[0m\n", + "\u001b[1;32mprovide detailed explanations or list all possible nutrients in the category, just list the most common ones.\u001b[0m\n", + "\u001b[1;32mOutput: Vitamin A, Vitamin B, Vitamin C, Vitamin D, Vitamin E, Vitamin K\u001b[0m\n", + "\n", + "\u001b[1;32mInput: Macronutrients\u001b[0m\n", + "\u001b[1;32mInstructions: Given a category of nutrients, list the most common types of nutrients in that category. Do not \u001b[0m\n", + "\u001b[1;32mprovide detailed explanations or list all possible nutrients in the category, just list the most common ones.\u001b[0m\n", + "\u001b[1;32mOutput: Carbohydrates, Proteins, Fats\u001b[0m\n", + "\n", + "\u001b[1;32mInput: Minerals\u001b[0m\n", + "\u001b[1;32mInstructions: Given a category of nutrients, list the most common types of nutrients in that category. Do not \u001b[0m\n", + "\u001b[1;32mprovide detailed explanations or list all possible nutrients in the category, just list the most common ones.\u001b[0m\n", + "\u001b[1;32mOutput: Calcium, Iron, Magnesium\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "Re-apply skill_0 skill to dataset ...\n", + "
\n" + ], + "text/plain": [ + "Re-apply skill_0 skill to dataset \u001b[33m...\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "Applying skill: skill_0\n", + "\n" + ], + "text/plain": [ + "Applying skill: skill_0\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|βββββββββ| 3/3 [00:01<00:00, 2.16it/s]\n" + ] + }, + { + "data": { + "text/html": [ + "Applying skill: skill_1\n", + "\n" + ], + "text/plain": [ + "Applying skill: skill_1\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|βββββββββ| 3/3 [00:00<00:00, 68.86it/s]\n" + ] + }, + { + "data": { + "text/html": [ + "\n", + "\n", + "=> Iteration #1: Comparing to ground truth, analyzing and improving ...\n", + "\n" + ], + "text/plain": [ + "\n", + "\n", + "=> Iteration #\u001b[1;36m1\u001b[0m: Comparing to ground truth, analyzing and improving \u001b[33m...\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "Comparing predictions to ground truth data ...\n", + "
\n" + ], + "text/plain": [ + "Comparing predictions to ground truth data \u001b[33m...\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + " category entities text skill_0 skill_1 skill_0 skill_1 \n", + " βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ \n", + " Macronutrients Carbohydrates, Carbohydrates Carbohydrates, True False \n", + " Proteins, Fats provide quick Proteins, Fats The recommended \n", + " energy, proteins daily intake of \n", + " are essential carbohydrates is \n", + " for muscle 45-65% of your \n", + " repair and total calorie \n", + " growth, and fats intake. This \n", + " are vital for means that for a \n", + " long-term energy 2000 calorie \n", + " storage and cell diet, you should \n", + " function. aim for 225-325 \n", + " grams of \n", + " carbohydrates \n", + " per day. \n", + " \n", + " The recommended \n", + " daily intake of \n", + " proteins is \n", + " 10-35% of your \n", + " total calorie \n", + " intake. This \n", + " means that for a \n", + " 2000 calorie \n", + " diet, you should \n", + " aim for 50-175 \n", + " grams of protein \n", + " per day. \n", + " \n", + " The recommended \n", + " daily intake of \n", + " fats is 20-35% \n", + " of your total \n", + " calorie intake. \n", + " This means that \n", + " for a 2000 \n", + " calorie diet, \n", + " you should aim \n", + " for 44-78 grams \n", + " of fat per day. \n", + " It is important \n", + " to choose \n", + " healthy sources \n", + " of fats, such as \n", + " avocados, nuts, \n", + " and olive oil, \n", + " and limit \n", + " saturated and \n", + " trans fats. \n", + " Vitamins Vitamin A, Vitamin A is Vitamin A, False False \n", + " Vitamin C, crucial for good Vitamin B, Vitamin A, \n", + " Vitamin D vision and a Vitamin C, Vitamin B, \n", + " healthy immune Vitamin D, Vitamin C, \n", + " system, Vitamin Vitamin E, Vitamin D, \n", + " C helps in the Vitamin K Vitamin E, \n", + " repair of Vitamin K \n", + " tissues and the \n", + " enzymatic \n", + " production of \n", + " certain \n", + " neurotransmitteβ¦ \n", + " and Vitamin D is \n", + " essential for \n", + " strong bones and \n", + " teeth as it \n", + " helps the body \n", + " absorb calcium. \n", + " Minerals Calcium, Iron, Calcium is Calcium, Iron, True False \n", + " Magnesium necessary for Magnesium Calcium: 20% \n", + " maintaining Iron: 10% \n", + " healthy bones Magnesium: 15% \n", + " and teeth, Iron \n", + " is crucial for \n", + " making red blood \n", + " cells and \n", + " transporting \n", + " oxygen \n", + " throughout the \n", + " body, and \n", + " Magnesium plays \n", + " a role in over \n", + " 300 enzyme \n", + " reactions in the \n", + " human body, \n", + " including the \n", + " metabolism of \n", + " food, synthesis \n", + " of fatty acids \n", + " and proteins, \n", + " and the \n", + " transmission of \n", + " nerve impulses. \n", + " \n", + "\n" + ], + "text/plain": [ + " \n", + " \u001b[1;35m \u001b[0m\u001b[1;35mcategory \u001b[0m\u001b[1;35m \u001b[0m \u001b[1;35m \u001b[0m\u001b[1;35mentities \u001b[0m\u001b[1;35m \u001b[0m \u001b[1;35m \u001b[0m\u001b[1;35mtext \u001b[0m\u001b[1;35m \u001b[0m \u001b[1;35m \u001b[0m\u001b[1;35mskill_0 \u001b[0m\u001b[1;35m \u001b[0m \u001b[1;35m \u001b[0m\u001b[1;35mskill_1 \u001b[0m\u001b[1;35m \u001b[0m \u001b[1;35m \u001b[0m\u001b[1;35mskill_0\u001b[0m\u001b[1;35m \u001b[0m \u001b[1;35m \u001b[0m\u001b[1;35mskill_1\u001b[0m\u001b[1;35m \u001b[0m \n", + " βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ \n", + " Macronutrients Carbohydrates, Carbohydrates Carbohydrates, True False \n", + " Proteins, Fats provide quick Proteins, Fats The recommended \n", + " energy, proteins daily intake of \n", + " are essential carbohydrates is \n", + " for muscle 45-65% of your \n", + " repair and total calorie \n", + " growth, and fats intake. This \n", + " are vital for means that for a \n", + " long-term energy 2000 calorie \n", + " storage and cell diet, you should \n", + " function. aim for 225-325 \n", + " grams of \n", + " carbohydrates \n", + " per day. \n", + " \n", + " The recommended \n", + " daily intake of \n", + " proteins is \n", + " 10-35% of your \n", + " total calorie \n", + " intake. This \n", + " means that for a \n", + " 2000 calorie \n", + " diet, you should \n", + " aim for 50-175 \n", + " grams of protein \n", + " per day. \n", + " \n", + " The recommended \n", + " daily intake of \n", + " fats is 20-35% \n", + " of your total \n", + " calorie intake. \n", + " This means that \n", + " for a 2000 \n", + " calorie diet, \n", + " you should aim \n", + " for 44-78 grams \n", + " of fat per day. \n", + " It is important \n", + " to choose \n", + " healthy sources \n", + " of fats, such as \n", + " avocados, nuts, \n", + " and olive oil, \n", + " and limit \n", + " saturated and \n", + " trans fats. \n", + " \u001b[2m \u001b[0m\u001b[2mVitamins \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mVitamin A, \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mVitamin A is \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mVitamin A, \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mFalse \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mFalse \u001b[0m\u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mVitamin C, \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mcrucial for good\u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mVitamin B, \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mVitamin A, \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mVitamin D \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mvision and a \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mVitamin C, \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mVitamin B, \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mhealthy immune \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mVitamin D, \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mVitamin C, \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2msystem, Vitamin \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mVitamin E, \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mVitamin D, \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mC helps in the \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mVitamin K \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mVitamin E, \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mrepair of \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mVitamin K \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mtissues and the \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2menzymatic \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mproduction of \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mcertain \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mneurotransmitteβ¦\u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mand Vitamin D is\u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2messential for \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mstrong bones and\u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mteeth as it \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mhelps the body \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mabsorb calcium. \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " Minerals Calcium, Iron, Calcium is Calcium, Iron, True False \n", + " Magnesium necessary for Magnesium Calcium: 20% \n", + " maintaining Iron: 10% \n", + " healthy bones Magnesium: 15% \n", + " and teeth, Iron \n", + " is crucial for \n", + " making red blood \n", + " cells and \n", + " transporting \n", + " oxygen \n", + " throughout the \n", + " body, and \n", + " Magnesium plays \n", + " a role in over \n", + " 300 enzyme \n", + " reactions in the \n", + " human body, \n", + " including the \n", + " metabolism of \n", + " food, synthesis \n", + " of fatty acids \n", + " and proteins, \n", + " and the \n", + " transmission of \n", + " nerve impulses. \n", + " \n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "Accuracy = 66.67%\n", + "\n" + ], + "text/plain": [ + "\u001b[1;31mAccuracy = \u001b[0m\u001b[1;36m66.67\u001b[0m\u001b[1;31m%\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "Analyze evaluation experience ...\n", + "
\n" + ], + "text/plain": [ + "Analyze evaluation experience \u001b[33m...\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|ββββββββ| 1/1 [00:00<00:00, 174.49it/s]\n", + "100%|βββββββββ| 1/1 [00:04<00:00, 4.15s/it]\n" + ] + }, + { + "data": { + "text/html": [ + "Error analysis for skill \"skill_0\":\n", + "\n", + "
\n" + ], + "text/plain": [ + "Error analysis for skill \u001b[32m\"skill_0\"\u001b[0m:\n", + "\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + "Input: Vitamins\n", + "Prediction: Vitamin A, Vitamin B, Vitamin C, Vitamin D, Vitamin E, Vitamin K\n", + "Ground truth: Vitamin A, Vitamin C, Vitamin D\n", + "Error reason: The LLM included more vitamins than the ground truth. The instruction does not specify a number of \n", + "vitamins to list, so the LLM's prediction is not necessarily incorrect. The discrepancy may be due to different \n", + "interpretations of \"most common\" vitamins.\n", + "\n", + "\n" + ], + "text/plain": [ + "\n", + "\u001b[32mInput: Vitamins\u001b[0m\n", + "\u001b[32mPrediction: Vitamin A, Vitamin B, Vitamin C, Vitamin D, Vitamin E, Vitamin K\u001b[0m\n", + "\u001b[32mGround truth: Vitamin A, Vitamin C, Vitamin D\u001b[0m\n", + "\u001b[32mError reason: The LLM included more vitamins than the ground truth. The instruction does not specify a number of \u001b[0m\n", + "\u001b[32mvitamins to list, so the LLM's prediction is not necessarily incorrect. The discrepancy may be due to different \u001b[0m\n", + "\u001b[32minterpretations of \u001b[0m\u001b[32m\"most common\"\u001b[0m\u001b[32m vitamins.\u001b[0m\n", + "\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "Improve \"skill_0\" skill based on analysis ...\n", + "\n" + ], + "text/plain": [ + "Improve \u001b[32m\"skill_0\"\u001b[0m skill based on analysis \u001b[33m...\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "Updated instructions for skill \"skill_0\":\n", + "\n", + "
\n" + ], + "text/plain": [ + "Updated instructions for skill \u001b[32m\"skill_0\"\u001b[0m:\n", + "\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "Given a category of nutrients, list the three most common types of nutrients in that category. Do not provide \n", + "detailed explanations or list all possible nutrients in the category, just list the three most common ones.\n", + "\n", + "Examples:\n", + "\n", + "Input: Vitamins\n", + "Instructions: Given a category of nutrients, list the three most common types of nutrients in that category. Do not\n", + "provide detailed explanations or list all possible nutrients in the category, just list the three most common ones.\n", + "Output: Vitamin A, Vitamin C, Vitamin D\n", + "\n", + "Input: Macronutrients\n", + "Instructions: Given a category of nutrients, list the three most common types of nutrients in that category. Do not\n", + "provide detailed explanations or list all possible nutrients in the category, just list the three most common ones.\n", + "Output: Carbohydrates, Proteins, Fats\n", + "\n", + "Input: Minerals\n", + "Instructions: Given a category of nutrients, list the three most common types of nutrients in that category. Do not\n", + "provide detailed explanations or list all possible nutrients in the category, just list the three most common ones.\n", + "Output: Calcium, Iron, Magnesium\n", + "\n" + ], + "text/plain": [ + "\u001b[1;32mGiven a category of nutrients, list the three most common types of nutrients in that category. Do not provide \u001b[0m\n", + "\u001b[1;32mdetailed explanations or list all possible nutrients in the category, just list the three most common ones.\u001b[0m\n", + "\n", + "\u001b[1;32mExamples:\u001b[0m\n", + "\n", + "\u001b[1;32mInput: Vitamins\u001b[0m\n", + "\u001b[1;32mInstructions: Given a category of nutrients, list the three most common types of nutrients in that category. Do not\u001b[0m\n", + "\u001b[1;32mprovide detailed explanations or list all possible nutrients in the category, just list the three most common ones.\u001b[0m\n", + "\u001b[1;32mOutput: Vitamin A, Vitamin C, Vitamin D\u001b[0m\n", + "\n", + "\u001b[1;32mInput: Macronutrients\u001b[0m\n", + "\u001b[1;32mInstructions: Given a category of nutrients, list the three most common types of nutrients in that category. Do not\u001b[0m\n", + "\u001b[1;32mprovide detailed explanations or list all possible nutrients in the category, just list the three most common ones.\u001b[0m\n", + "\u001b[1;32mOutput: Carbohydrates, Proteins, Fats\u001b[0m\n", + "\n", + "\u001b[1;32mInput: Minerals\u001b[0m\n", + "\u001b[1;32mInstructions: Given a category of nutrients, list the three most common types of nutrients in that category. Do not\u001b[0m\n", + "\u001b[1;32mprovide detailed explanations or list all possible nutrients in the category, just list the three most common ones.\u001b[0m\n", + "\u001b[1;32mOutput: Calcium, Iron, Magnesium\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "Re-apply skill_0 skill to dataset ...\n", + "
\n" + ], + "text/plain": [ + "Re-apply skill_0 skill to dataset \u001b[33m...\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "Applying skill: skill_0\n", + "\n" + ], + "text/plain": [ + "Applying skill: skill_0\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|βββββββββ| 3/3 [00:01<00:00, 2.11it/s]\n" + ] + }, + { + "data": { + "text/html": [ + "Applying skill: skill_1\n", + "\n" + ], + "text/plain": [ + "Applying skill: skill_1\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|βββββββββ| 3/3 [00:00<00:00, 60.17it/s]\n" + ] + }, + { + "data": { + "text/html": [ + "\n", + "\n", + "=> Iteration #2: Comparing to ground truth, analyzing and improving ...\n", + "\n" + ], + "text/plain": [ + "\n", + "\n", + "=> Iteration #\u001b[1;36m2\u001b[0m: Comparing to ground truth, analyzing and improving \u001b[33m...\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "Comparing predictions to ground truth data ...\n", + "
\n" + ], + "text/plain": [ + "Comparing predictions to ground truth data \u001b[33m...\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + " category entities text skill_0 skill_1 skill_0 skill_1 \n", + " βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ \n", + " Macronutrients Carbohydrates, Carbohydrates Carbohydrates, True False \n", + " Proteins, Fats provide quick Proteins, Fats The recommended \n", + " energy, proteins daily intake of \n", + " are essential carbohydrates is \n", + " for muscle 45-65% of your \n", + " repair and total calorie \n", + " growth, and fats intake. This \n", + " are vital for means that for a \n", + " long-term energy 2000 calorie \n", + " storage and cell diet, you should \n", + " function. aim for 225-325 \n", + " grams of \n", + " carbohydrates \n", + " per day. \n", + " \n", + " The recommended \n", + " daily intake of \n", + " proteins is \n", + " 10-35% of your \n", + " total calorie \n", + " intake. This \n", + " means that for a \n", + " 2000 calorie \n", + " diet, you should \n", + " aim for 50-175 \n", + " grams of protein \n", + " per day. \n", + " \n", + " The recommended \n", + " daily intake of \n", + " fats is 20-35% \n", + " of your total \n", + " calorie intake. \n", + " This means that \n", + " for a 2000 \n", + " calorie diet, \n", + " you should aim \n", + " for 44-78 grams \n", + " of fat per day. \n", + " It is important \n", + " to choose \n", + " healthy sources \n", + " of fats, such as \n", + " avocados, nuts, \n", + " and olive oil, \n", + " and limit \n", + " saturated and \n", + " trans fats. \n", + " Vitamins Vitamin A, Vitamin A is Vitamin A, True False \n", + " Vitamin C, crucial for good Vitamin C, Vitamin A, \n", + " Vitamin D vision and a Vitamin D Vitamin C, \n", + " healthy immune Vitamin D \n", + " system, Vitamin \n", + " C helps in the \n", + " repair of \n", + " tissues and the \n", + " enzymatic \n", + " production of \n", + " certain \n", + " neurotransmitteβ¦ \n", + " and Vitamin D is \n", + " essential for \n", + " strong bones and \n", + " teeth as it \n", + " helps the body \n", + " absorb calcium. \n", + " Minerals Calcium, Iron, Calcium is Calcium, Iron, True False \n", + " Magnesium necessary for Magnesium Calcium: 20% \n", + " maintaining Iron: 10% \n", + " healthy bones Magnesium: 15% \n", + " and teeth, Iron \n", + " is crucial for \n", + " making red blood \n", + " cells and \n", + " transporting \n", + " oxygen \n", + " throughout the \n", + " body, and \n", + " Magnesium plays \n", + " a role in over \n", + " 300 enzyme \n", + " reactions in the \n", + " human body, \n", + " including the \n", + " metabolism of \n", + " food, synthesis \n", + " of fatty acids \n", + " and proteins, \n", + " and the \n", + " transmission of \n", + " nerve impulses. \n", + " \n", + "\n" + ], + "text/plain": [ + " \n", + " \u001b[1;35m \u001b[0m\u001b[1;35mcategory \u001b[0m\u001b[1;35m \u001b[0m \u001b[1;35m \u001b[0m\u001b[1;35mentities \u001b[0m\u001b[1;35m \u001b[0m \u001b[1;35m \u001b[0m\u001b[1;35mtext \u001b[0m\u001b[1;35m \u001b[0m \u001b[1;35m \u001b[0m\u001b[1;35mskill_0 \u001b[0m\u001b[1;35m \u001b[0m \u001b[1;35m \u001b[0m\u001b[1;35mskill_1 \u001b[0m\u001b[1;35m \u001b[0m \u001b[1;35m \u001b[0m\u001b[1;35mskill_0\u001b[0m\u001b[1;35m \u001b[0m \u001b[1;35m \u001b[0m\u001b[1;35mskill_1\u001b[0m\u001b[1;35m \u001b[0m \n", + " βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ \n", + " Macronutrients Carbohydrates, Carbohydrates Carbohydrates, True False \n", + " Proteins, Fats provide quick Proteins, Fats The recommended \n", + " energy, proteins daily intake of \n", + " are essential carbohydrates is \n", + " for muscle 45-65% of your \n", + " repair and total calorie \n", + " growth, and fats intake. This \n", + " are vital for means that for a \n", + " long-term energy 2000 calorie \n", + " storage and cell diet, you should \n", + " function. aim for 225-325 \n", + " grams of \n", + " carbohydrates \n", + " per day. \n", + " \n", + " The recommended \n", + " daily intake of \n", + " proteins is \n", + " 10-35% of your \n", + " total calorie \n", + " intake. This \n", + " means that for a \n", + " 2000 calorie \n", + " diet, you should \n", + " aim for 50-175 \n", + " grams of protein \n", + " per day. \n", + " \n", + " The recommended \n", + " daily intake of \n", + " fats is 20-35% \n", + " of your total \n", + " calorie intake. \n", + " This means that \n", + " for a 2000 \n", + " calorie diet, \n", + " you should aim \n", + " for 44-78 grams \n", + " of fat per day. \n", + " It is important \n", + " to choose \n", + " healthy sources \n", + " of fats, such as \n", + " avocados, nuts, \n", + " and olive oil, \n", + " and limit \n", + " saturated and \n", + " trans fats. \n", + " \u001b[2m \u001b[0m\u001b[2mVitamins \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mVitamin A, \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mVitamin A is \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mVitamin A, \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mTrue \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mFalse \u001b[0m\u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mVitamin C, \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mcrucial for good\u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mVitamin C, \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mVitamin A, \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mVitamin D \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mvision and a \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mVitamin D \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mVitamin C, \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mhealthy immune \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mVitamin D \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2msystem, Vitamin \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mC helps in the \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mrepair of \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mtissues and the \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2menzymatic \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mproduction of \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mcertain \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mneurotransmitteβ¦\u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mand Vitamin D is\u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2messential for \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mstrong bones and\u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mteeth as it \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mhelps the body \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mabsorb calcium. \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " Minerals Calcium, Iron, Calcium is Calcium, Iron, True False \n", + " Magnesium necessary for Magnesium Calcium: 20% \n", + " maintaining Iron: 10% \n", + " healthy bones Magnesium: 15% \n", + " and teeth, Iron \n", + " is crucial for \n", + " making red blood \n", + " cells and \n", + " transporting \n", + " oxygen \n", + " throughout the \n", + " body, and \n", + " Magnesium plays \n", + " a role in over \n", + " 300 enzyme \n", + " reactions in the \n", + " human body, \n", + " including the \n", + " metabolism of \n", + " food, synthesis \n", + " of fatty acids \n", + " and proteins, \n", + " and the \n", + " transmission of \n", + " nerve impulses. \n", + " \n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "Accuracy = 0.00%\n", + "\n" + ], + "text/plain": [ + "\u001b[1;31mAccuracy = \u001b[0m\u001b[1;36m0.00\u001b[0m\u001b[1;31m%\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "Analyze evaluation experience ...\n", + "
\n" + ], + "text/plain": [ + "Analyze evaluation experience \u001b[33m...\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|ββββββββ| 3/3 [00:00<00:00, 243.33it/s]\n", + "100%|βββββββββ| 3/3 [00:04<00:00, 1.55s/it]\n" + ] + }, + { + "data": { + "text/html": [ + "Error analysis for skill \"skill_1\":\n", + "\n", + "
\n" + ], + "text/plain": [ + "Error analysis for skill \u001b[32m\"skill_1\"\u001b[0m:\n", + "\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + "Input: Vitamin A, Vitamin C, Vitamin D\n", + "Prediction: \n", + "Vitamin A, Vitamin C, Vitamin D\n", + "Ground truth: Vitamin A is crucial for good vision and a healthy immune system, Vitamin C helps in the repair of \n", + "tissues and the enzymatic production of certain neurotransmitters, and Vitamin D is essential for strong bones and \n", + "teeth as it helps the body absorb calcium.\n", + "Error reason: The instructions were not clear or specific, leading to the model simply repeating the input instead \n", + "of providing detailed information about each vitamin.\n", + "\n", + "Input: Calcium, Iron, Magnesium\n", + "Prediction: \n", + "Calcium: 20%\n", + "Iron: 10%\n", + "Magnesium: 15%\n", + "Ground truth: Calcium is necessary for maintaining healthy bones and teeth, Iron is crucial for making red blood \n", + "cells and transporting oxygen throughout the body, and Magnesium plays a role in over 300 enzyme reactions in the \n", + "human body, including the metabolism of food, synthesis of fatty acids and proteins, and the transmission of nerve \n", + "impulses.\n", + "Error reason: The model misunderstood the instructions, providing percentages instead of describing the roles of \n", + "Calcium, Iron, and Magnesium in the human body as per the ground truth.\n", + "\n", + "Input: Carbohydrates, Proteins, Fats\n", + "Prediction: \n", + "The recommended daily intake of carbohydrates is 45-65% of your total calorie intake. This means that for a 2000 \n", + "calorie diet, you should aim for 225-325 grams of carbohydrates per day.\n", + "\n", + "The recommended daily intake of proteins is 10-35% of your total calorie intake. This means that for a 2000 calorie\n", + "diet, you should aim for 50-175 grams of protein per day.\n", + "\n", + "The recommended daily intake of fats is 20-35% of your total calorie intake. This means that for a 2000 calorie \n", + "diet, you should aim for 44-78 grams of fat per day. It is important to choose healthy sources of fats, such as \n", + "avocados, nuts, and olive oil, and limit saturated and trans fats.\n", + "Ground truth: Carbohydrates provide quick energy, proteins are essential for muscle repair and growth, and fats are\n", + "vital for long-term energy storage and cell function.\n", + "Error reason: The instructions were not clear or specific, leading to a mismatch between the predicted output and \n", + "the ground truth. The model provided nutritional guidelines for the intake of carbohydrates, proteins, and fats, \n", + "while the ground truth was about the functions of these nutrients in the body.\n", + "\n", + "\n" + ], + "text/plain": [ + "\n", + "\u001b[32mInput: Vitamin A, Vitamin C, Vitamin D\u001b[0m\n", + "\u001b[32mPrediction: \u001b[0m\n", + "\u001b[32mVitamin A, Vitamin C, Vitamin D\u001b[0m\n", + "\u001b[32mGround truth: Vitamin A is crucial for good vision and a healthy immune system, Vitamin C helps in the repair of \u001b[0m\n", + "\u001b[32mtissues and the enzymatic production of certain neurotransmitters, and Vitamin D is essential for strong bones and \u001b[0m\n", + "\u001b[32mteeth as it helps the body absorb calcium.\u001b[0m\n", + "\u001b[32mError reason: The instructions were not clear or specific, leading to the model simply repeating the input instead \u001b[0m\n", + "\u001b[32mof providing detailed information about each vitamin.\u001b[0m\n", + "\n", + "\u001b[32mInput: Calcium, Iron, Magnesium\u001b[0m\n", + "\u001b[32mPrediction: \u001b[0m\n", + "\u001b[32mCalcium: \u001b[0m\u001b[1;36m20\u001b[0m\u001b[32m%\u001b[0m\n", + "\u001b[32mIron: \u001b[0m\u001b[1;36m10\u001b[0m\u001b[32m%\u001b[0m\n", + "\u001b[32mMagnesium: \u001b[0m\u001b[1;36m15\u001b[0m\u001b[32m%\u001b[0m\n", + "\u001b[32mGround truth: Calcium is necessary for maintaining healthy bones and teeth, Iron is crucial for making red blood \u001b[0m\n", + "\u001b[32mcells and transporting oxygen throughout the body, and Magnesium plays a role in over \u001b[0m\u001b[1;36m300\u001b[0m\u001b[32m enzyme reactions in the \u001b[0m\n", + "\u001b[32mhuman body, including the metabolism of food, synthesis of fatty acids and proteins, and the transmission of nerve \u001b[0m\n", + "\u001b[32mimpulses.\u001b[0m\n", + "\u001b[32mError reason: The model misunderstood the instructions, providing percentages instead of describing the roles of \u001b[0m\n", + "\u001b[32mCalcium, Iron, and Magnesium in the human body as per the ground truth.\u001b[0m\n", + "\n", + "\u001b[32mInput: Carbohydrates, Proteins, Fats\u001b[0m\n", + "\u001b[32mPrediction: \u001b[0m\n", + "\u001b[32mThe recommended daily intake of carbohydrates is \u001b[0m\u001b[1;36m45\u001b[0m\u001b[32m-\u001b[0m\u001b[1;36m65\u001b[0m\u001b[32m% of your total calorie intake. This means that for a \u001b[0m\u001b[1;36m2000\u001b[0m\u001b[32m \u001b[0m\n", + "\u001b[32mcalorie diet, you should aim for \u001b[0m\u001b[1;36m225\u001b[0m\u001b[32m-\u001b[0m\u001b[1;36m325\u001b[0m\u001b[32m grams of carbohydrates per day.\u001b[0m\n", + "\n", + "\u001b[32mThe recommended daily intake of proteins is \u001b[0m\u001b[1;36m10\u001b[0m\u001b[32m-\u001b[0m\u001b[1;36m35\u001b[0m\u001b[32m% of your total calorie intake. This means that for a \u001b[0m\u001b[1;36m2000\u001b[0m\u001b[32m calorie\u001b[0m\n", + "\u001b[32mdiet, you should aim for \u001b[0m\u001b[1;36m50\u001b[0m\u001b[32m-\u001b[0m\u001b[1;36m175\u001b[0m\u001b[32m grams of protein per day.\u001b[0m\n", + "\n", + "\u001b[32mThe recommended daily intake of fats is \u001b[0m\u001b[1;36m20\u001b[0m\u001b[32m-\u001b[0m\u001b[1;36m35\u001b[0m\u001b[32m% of your total calorie intake. This means that for a \u001b[0m\u001b[1;36m2000\u001b[0m\u001b[32m calorie \u001b[0m\n", + "\u001b[32mdiet, you should aim for \u001b[0m\u001b[1;36m44\u001b[0m\u001b[32m-\u001b[0m\u001b[1;36m78\u001b[0m\u001b[32m grams of fat per day. It is important to choose healthy sources of fats, such as \u001b[0m\n", + "\u001b[32mavocados, nuts, and olive oil, and limit saturated and trans fats.\u001b[0m\n", + "\u001b[32mGround truth: Carbohydrates provide quick energy, proteins are essential for muscle repair and growth, and fats are\u001b[0m\n", + "\u001b[32mvital for long-term energy storage and cell function.\u001b[0m\n", + "\u001b[32mError reason: The instructions were not clear or specific, leading to a mismatch between the predicted output and \u001b[0m\n", + "\u001b[32mthe ground truth. The model provided nutritional guidelines for the intake of carbohydrates, proteins, and fats, \u001b[0m\n", + "\u001b[32mwhile the ground truth was about the functions of these nutrients in the body.\u001b[0m\n", + "\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "Improve \"skill_1\" skill based on analysis ...\n", + "\n" + ], + "text/plain": [ + "Improve \u001b[32m\"skill_1\"\u001b[0m skill based on analysis \u001b[33m...\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "Updated instructions for skill \"skill_1\":\n", + "\n", + "
\n" + ], + "text/plain": [ + "Updated instructions for skill \u001b[32m\"skill_1\"\u001b[0m:\n", + "\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "For each nutrient or vitamin listed in the input, provide a brief description of its role or function in the human \n", + "body. Do not include any percentages or recommended daily intake values, but focus on explaining what each nutrient\n", + "or vitamin does for the body.\n", + "\n", + "Examples:\n", + "\n", + "Input: Vitamin A, Vitamin C, Vitamin D\n", + "Output: Vitamin A is crucial for good vision and a healthy immune system, Vitamin C helps in the repair of tissues \n", + "and the enzymatic production of certain neurotransmitters, and Vitamin D is essential for strong bones and teeth as\n", + "it helps the body absorb calcium.\n", + "\n", + "Input: Calcium, Iron, Magnesium\n", + "Output: Calcium is necessary for maintaining healthy bones and teeth, Iron is crucial for making red blood cells \n", + "and transporting oxygen throughout the body, and Magnesium plays a role in over 300 enzyme reactions in the human \n", + "body, including the metabolism of food, synthesis of fatty acids and proteins, and the transmission of nerve \n", + "impulses.\n", + "\n", + "Input: Carbohydrates, Proteins, Fats\n", + "Output: Carbohydrates provide quick energy, proteins are essential for muscle repair and growth, and fats are vital\n", + "for long-term energy storage and cell function.\n", + "\n" + ], + "text/plain": [ + "\u001b[1;32mFor each nutrient or vitamin listed in the input, provide a brief description of its role or function in the human \u001b[0m\n", + "\u001b[1;32mbody. Do not include any percentages or recommended daily intake values, but focus on explaining what each nutrient\u001b[0m\n", + "\u001b[1;32mor vitamin does for the body.\u001b[0m\n", + "\n", + "\u001b[1;32mExamples:\u001b[0m\n", + "\n", + "\u001b[1;32mInput: Vitamin A, Vitamin C, Vitamin D\u001b[0m\n", + "\u001b[1;32mOutput: Vitamin A is crucial for good vision and a healthy immune system, Vitamin C helps in the repair of tissues \u001b[0m\n", + "\u001b[1;32mand the enzymatic production of certain neurotransmitters, and Vitamin D is essential for strong bones and teeth as\u001b[0m\n", + "\u001b[1;32mit helps the body absorb calcium.\u001b[0m\n", + "\n", + "\u001b[1;32mInput: Calcium, Iron, Magnesium\u001b[0m\n", + "\u001b[1;32mOutput: Calcium is necessary for maintaining healthy bones and teeth, Iron is crucial for making red blood cells \u001b[0m\n", + "\u001b[1;32mand transporting oxygen throughout the body, and Magnesium plays a role in over \u001b[0m\u001b[1;36m300\u001b[0m\u001b[1;32m enzyme reactions in the human \u001b[0m\n", + "\u001b[1;32mbody, including the metabolism of food, synthesis of fatty acids and proteins, and the transmission of nerve \u001b[0m\n", + "\u001b[1;32mimpulses.\u001b[0m\n", + "\n", + "\u001b[1;32mInput: Carbohydrates, Proteins, Fats\u001b[0m\n", + "\u001b[1;32mOutput: Carbohydrates provide quick energy, proteins are essential for muscle repair and growth, and fats are vital\u001b[0m\n", + "\u001b[1;32mfor long-term energy storage and cell function.\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "Re-apply skill_1 skill to dataset ...\n", + "
\n" + ], + "text/plain": [ + "Re-apply skill_1 skill to dataset \u001b[33m...\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "Applying skill: skill_1\n", + "\n" + ], + "text/plain": [ + "Applying skill: skill_1\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|βββββββββ| 3/3 [00:03<00:00, 1.19s/it]\n" + ] + }, + { + "data": { + "text/html": [ + "\n", + "\n", + "=> Iteration #3: Comparing to ground truth, analyzing and improving ...\n", + "\n" + ], + "text/plain": [ + "\n", + "\n", + "=> Iteration #\u001b[1;36m3\u001b[0m: Comparing to ground truth, analyzing and improving \u001b[33m...\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "Comparing predictions to ground truth data ...\n", + "
\n" + ], + "text/plain": [ + "Comparing predictions to ground truth data \u001b[33m...\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + " category entities text skill_0 skill_1 skill_0 skill_1 \n", + " βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ \n", + " Macronutrients Carbohydrates, Carbohydrates Carbohydrates, Carbohydrates True True \n", + " Proteins, Fats provide quick Proteins, Fats provide quick \n", + " energy, proteins energy, proteins \n", + " are essential are essential \n", + " for muscle for muscle \n", + " repair and repair and \n", + " growth, and fats growth, and fats \n", + " are vital for are vital for \n", + " long-term energy long-term energy \n", + " storage and cell storage and cell \n", + " function. function. \n", + " Vitamins Vitamin A, Vitamin A is Vitamin A, Vitamin A is True False \n", + " Vitamin C, crucial for good Vitamin C, crucial for good \n", + " Vitamin D vision and a Vitamin D vision and a \n", + " healthy immune healthy immune \n", + " system, Vitamin system. It is \n", + " C helps in the also important \n", + " repair of for the growth \n", + " tissues and the and development \n", + " enzymatic of cells, \n", + " production of including skin \n", + " certain cells. Vitamin C \n", + " neurotransmitteβ¦ is an \n", + " and Vitamin D is antioxidant that \n", + " essential for helps protect \n", + " strong bones and cells from \n", + " teeth as it damage and is \n", + " helps the body necessary for \n", + " absorb calcium. the production \n", + " of collagen, a \n", + " protein that \n", + " helps with wound \n", + " healing and \n", + " maintaining \n", + " healthy skin, \n", + " bones, and blood \n", + " vessels. Vitamin \n", + " D is essential \n", + " for strong bones \n", + " and teeth as it \n", + " helps the body \n", + " absorb calcium. \n", + " It also plays a \n", + " role in immune \n", + " function and may \n", + " help reduce the \n", + " risk of certain \n", + " diseases such as \n", + " cancer and heart \n", + " disease. \n", + " Minerals Calcium, Iron, Calcium is Calcium, Iron, Calcium is True True \n", + " Magnesium necessary for Magnesium necessary for \n", + " maintaining maintaining \n", + " healthy bones healthy bones \n", + " and teeth, Iron and teeth, Iron \n", + " is crucial for is crucial for \n", + " making red blood making red blood \n", + " cells and cells and \n", + " transporting transporting \n", + " oxygen oxygen \n", + " throughout the throughout the \n", + " body, and body, and \n", + " Magnesium plays Magnesium plays \n", + " a role in over a role in over \n", + " 300 enzyme 300 enzyme \n", + " reactions in the reactions in the \n", + " human body, human body, \n", + " including the including the \n", + " metabolism of metabolism of \n", + " food, synthesis food, synthesis \n", + " of fatty acids of fatty acids \n", + " and proteins, and proteins, \n", + " and the and the \n", + " transmission of transmission of \n", + " nerve impulses. nerve impulses. \n", + " \n", + "\n" + ], + "text/plain": [ + " \n", + " \u001b[1;35m \u001b[0m\u001b[1;35mcategory \u001b[0m\u001b[1;35m \u001b[0m \u001b[1;35m \u001b[0m\u001b[1;35mentities \u001b[0m\u001b[1;35m \u001b[0m \u001b[1;35m \u001b[0m\u001b[1;35mtext \u001b[0m\u001b[1;35m \u001b[0m \u001b[1;35m \u001b[0m\u001b[1;35mskill_0 \u001b[0m\u001b[1;35m \u001b[0m \u001b[1;35m \u001b[0m\u001b[1;35mskill_1 \u001b[0m\u001b[1;35m \u001b[0m \u001b[1;35m \u001b[0m\u001b[1;35mskill_0\u001b[0m\u001b[1;35m \u001b[0m \u001b[1;35m \u001b[0m\u001b[1;35mskill_1\u001b[0m\u001b[1;35m \u001b[0m \n", + " βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ \n", + " Macronutrients Carbohydrates, Carbohydrates Carbohydrates, Carbohydrates True True \n", + " Proteins, Fats provide quick Proteins, Fats provide quick \n", + " energy, proteins energy, proteins \n", + " are essential are essential \n", + " for muscle for muscle \n", + " repair and repair and \n", + " growth, and fats growth, and fats \n", + " are vital for are vital for \n", + " long-term energy long-term energy \n", + " storage and cell storage and cell \n", + " function. function. \n", + " \u001b[2m \u001b[0m\u001b[2mVitamins \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mVitamin A, \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mVitamin A is \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mVitamin A, \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m Vitamin A is \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mTrue \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mFalse \u001b[0m\u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mVitamin C, \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mcrucial for good\u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mVitamin C, \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mcrucial for good\u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mVitamin D \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mvision and a \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mVitamin D \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mvision and a \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mhealthy immune \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mhealthy immune \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2msystem, Vitamin \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2msystem. It is \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mC helps in the \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2malso important \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mrepair of \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mfor the growth \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mtissues and the \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mand development \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2menzymatic \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mof cells, \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mproduction of \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mincluding skin \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mcertain \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mcells. Vitamin C\u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mneurotransmitteβ¦\u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mis an \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mand Vitamin D is\u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mantioxidant that\u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2messential for \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mhelps protect \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mstrong bones and\u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mcells from \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mteeth as it \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mdamage and is \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mhelps the body \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mnecessary for \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mabsorb calcium. \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mthe production \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mof collagen, a \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mprotein that \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mhelps with wound\u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mhealing and \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mmaintaining \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mhealthy skin, \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mbones, and blood\u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mvessels. Vitamin\u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mD is essential \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mfor strong bones\u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mand teeth as it \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mhelps the body \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mabsorb calcium. \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mIt also plays a \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mrole in immune \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mfunction and may\u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mhelp reduce the \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mrisk of certain \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mdiseases such as\u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mcancer and heart\u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mdisease. \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " Minerals Calcium, Iron, Calcium is Calcium, Iron, Calcium is True True \n", + " Magnesium necessary for Magnesium necessary for \n", + " maintaining maintaining \n", + " healthy bones healthy bones \n", + " and teeth, Iron and teeth, Iron \n", + " is crucial for is crucial for \n", + " making red blood making red blood \n", + " cells and cells and \n", + " transporting transporting \n", + " oxygen oxygen \n", + " throughout the throughout the \n", + " body, and body, and \n", + " Magnesium plays Magnesium plays \n", + " a role in over a role in over \n", + " 300 enzyme 300 enzyme \n", + " reactions in the reactions in the \n", + " human body, human body, \n", + " including the including the \n", + " metabolism of metabolism of \n", + " food, synthesis food, synthesis \n", + " of fatty acids of fatty acids \n", + " and proteins, and proteins, \n", + " and the and the \n", + " transmission of transmission of \n", + " nerve impulses. nerve impulses. \n", + " \n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "Accuracy = 66.67%\n", + "\n" + ], + "text/plain": [ + "\u001b[1;31mAccuracy = \u001b[0m\u001b[1;36m66.67\u001b[0m\u001b[1;31m%\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "Analyze evaluation experience ...\n", + "
\n" + ], + "text/plain": [ + "Analyze evaluation experience \u001b[33m...\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|ββββββββ| 1/1 [00:00<00:00, 201.00it/s]\n", + "100%|βββββββββ| 1/1 [00:09<00:00, 9.70s/it]\n" + ] + }, + { + "data": { + "text/html": [ + "Error analysis for skill \"skill_1\":\n", + "\n", + "
\n" + ], + "text/plain": [ + "Error analysis for skill \u001b[32m\"skill_1\"\u001b[0m:\n", + "\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + "Input: Vitamin A, Vitamin C, Vitamin D\n", + "Prediction: Vitamin A is crucial for good vision and a healthy immune system. It is also important for the growth \n", + "and development of cells, including skin cells. Vitamin C is an antioxidant that helps protect cells from damage \n", + "and is necessary for the production of collagen, a protein that helps with wound healing and maintaining healthy \n", + "skin, bones, and blood vessels. Vitamin D is essential for strong bones and teeth as it helps the body absorb \n", + "calcium. It also plays a role in immune function and may help reduce the risk of certain diseases such as cancer \n", + "and heart disease.\n", + "Ground truth: Vitamin A is crucial for good vision and a healthy immune system, Vitamin C helps in the repair of \n", + "tissues and the enzymatic production of certain neurotransmitters, and Vitamin D is essential for strong bones and \n", + "teeth as it helps the body absorb calcium.\n", + "Error reason: The model's prediction does not match the ground truth because it provided additional information \n", + "about the roles of Vitamin A, C, and D in the body that were not included in the ground truth. For example, it \n", + "mentioned that Vitamin A is important for the growth and development of cells, including skin cells, and that \n", + "Vitamin C is necessary for the production of collagen. It also mentioned that Vitamin D plays a role in immune \n", + "function and may help reduce the risk of certain diseases such as cancer and heart disease. These additional \n", + "details are not wrong, but they do not align with the simpler descriptions provided in the ground truth.\n", + "\n", + "\n" + ], + "text/plain": [ + "\n", + "\u001b[32mInput: Vitamin A, Vitamin C, Vitamin D\u001b[0m\n", + "\u001b[32mPrediction: Vitamin A is crucial for good vision and a healthy immune system. It is also important for the growth \u001b[0m\n", + "\u001b[32mand development of cells, including skin cells. Vitamin C is an antioxidant that helps protect cells from damage \u001b[0m\n", + "\u001b[32mand is necessary for the production of collagen, a protein that helps with wound healing and maintaining healthy \u001b[0m\n", + "\u001b[32mskin, bones, and blood vessels. Vitamin D is essential for strong bones and teeth as it helps the body absorb \u001b[0m\n", + "\u001b[32mcalcium. It also plays a role in immune function and may help reduce the risk of certain diseases such as cancer \u001b[0m\n", + "\u001b[32mand heart disease.\u001b[0m\n", + "\u001b[32mGround truth: Vitamin A is crucial for good vision and a healthy immune system, Vitamin C helps in the repair of \u001b[0m\n", + "\u001b[32mtissues and the enzymatic production of certain neurotransmitters, and Vitamin D is essential for strong bones and \u001b[0m\n", + "\u001b[32mteeth as it helps the body absorb calcium.\u001b[0m\n", + "\u001b[32mError reason: The model's prediction does not match the ground truth because it provided additional information \u001b[0m\n", + "\u001b[32mabout the roles of Vitamin A, C, and D in the body that were not included in the ground truth. For example, it \u001b[0m\n", + "\u001b[32mmentioned that Vitamin A is important for the growth and development of cells, including skin cells, and that \u001b[0m\n", + "\u001b[32mVitamin C is necessary for the production of collagen. It also mentioned that Vitamin D plays a role in immune \u001b[0m\n", + "\u001b[32mfunction and may help reduce the risk of certain diseases such as cancer and heart disease. These additional \u001b[0m\n", + "\u001b[32mdetails are not wrong, but they do not align with the simpler descriptions provided in the ground truth.\u001b[0m\n", + "\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "Improve \"skill_1\" skill based on analysis ...\n", + "\n" + ], + "text/plain": [ + "Improve \u001b[32m\"skill_1\"\u001b[0m skill based on analysis \u001b[33m...\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "Updated instructions for skill \"skill_1\":\n", + "\n", + "
\n" + ], + "text/plain": [ + "Updated instructions for skill \u001b[32m\"skill_1\"\u001b[0m:\n", + "\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "For each nutrient or vitamin listed in the input, provide a concise description of its primary role or function in \n", + "the human body. Avoid including additional details or secondary functions. Do not include any percentages or \n", + "recommended daily intake values, but focus on explaining the main function of each nutrient or vitamin for the \n", + "body.\n", + "\n", + "Examples:\n", + "\n", + "Input: Vitamin A, Vitamin C, Vitamin D\n", + "Output: Vitamin A is crucial for good vision and a healthy immune system, Vitamin C helps in the repair of tissues,\n", + "and Vitamin D is essential for strong bones and teeth.\n", + "\n", + "Input: Calcium, Iron, Magnesium\n", + "Output: Calcium is necessary for maintaining healthy bones and teeth, Iron is crucial for making red blood cells, \n", + "and Magnesium plays a role in the metabolism of food.\n", + "\n", + "Input: Carbohydrates, Proteins, Fats\n", + "Output: Carbohydrates provide quick energy, proteins are essential for muscle repair, and fats are vital for \n", + "long-term energy storage.\n", + "\n" + ], + "text/plain": [ + "\u001b[1;32mFor each nutrient or vitamin listed in the input, provide a concise description of its primary role or function in \u001b[0m\n", + "\u001b[1;32mthe human body. Avoid including additional details or secondary functions. Do not include any percentages or \u001b[0m\n", + "\u001b[1;32mrecommended daily intake values, but focus on explaining the main function of each nutrient or vitamin for the \u001b[0m\n", + "\u001b[1;32mbody.\u001b[0m\n", + "\n", + "\u001b[1;32mExamples:\u001b[0m\n", + "\n", + "\u001b[1;32mInput: Vitamin A, Vitamin C, Vitamin D\u001b[0m\n", + "\u001b[1;32mOutput: Vitamin A is crucial for good vision and a healthy immune system, Vitamin C helps in the repair of tissues,\u001b[0m\n", + "\u001b[1;32mand Vitamin D is essential for strong bones and teeth.\u001b[0m\n", + "\n", + "\u001b[1;32mInput: Calcium, Iron, Magnesium\u001b[0m\n", + "\u001b[1;32mOutput: Calcium is necessary for maintaining healthy bones and teeth, Iron is crucial for making red blood cells, \u001b[0m\n", + "\u001b[1;32mand Magnesium plays a role in the metabolism of food.\u001b[0m\n", + "\n", + "\u001b[1;32mInput: Carbohydrates, Proteins, Fats\u001b[0m\n", + "\u001b[1;32mOutput: Carbohydrates provide quick energy, proteins are essential for muscle repair, and fats are vital for \u001b[0m\n", + "\u001b[1;32mlong-term energy storage.\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "Re-apply skill_1 skill to dataset ...\n", + "
\n" + ], + "text/plain": [ + "Re-apply skill_1 skill to dataset \u001b[33m...\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "Applying skill: skill_1\n", + "\n" + ], + "text/plain": [ + "Applying skill: skill_1\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|βββββββββ| 3/3 [00:02<00:00, 1.28it/s]\n" + ] + }, + { + "data": { + "text/html": [ + "\n", + "\n", + "=> Iteration #4: Comparing to ground truth, analyzing and improving ...\n", + "\n" + ], + "text/plain": [ + "\n", + "\n", + "=> Iteration #\u001b[1;36m4\u001b[0m: Comparing to ground truth, analyzing and improving \u001b[33m...\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "Comparing predictions to ground truth data ...\n", + "
\n" + ], + "text/plain": [ + "Comparing predictions to ground truth data \u001b[33m...\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + " category entities text skill_0 skill_1 skill_0 skill_1 \n", + " βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ \n", + " Macronutrients Carbohydrates, Carbohydrates Carbohydrates, Carbohydrates True False \n", + " Proteins, Fats provide quick Proteins, Fats provide quick \n", + " energy, proteins energy, proteins \n", + " are essential are essential \n", + " for muscle for muscle \n", + " repair and repair, and fats \n", + " growth, and fats are vital for \n", + " are vital for long-term energy \n", + " long-term energy storage. \n", + " storage and cell \n", + " function. \n", + " Vitamins Vitamin A, Vitamin A is Vitamin A, Vitamin A is True False \n", + " Vitamin C, crucial for good Vitamin C, crucial for good \n", + " Vitamin D vision and a Vitamin D vision and a \n", + " healthy immune healthy immune \n", + " system, Vitamin system, Vitamin \n", + " C helps in the C helps in the \n", + " repair of repair of \n", + " tissues and the tissues, and \n", + " enzymatic Vitamin D is \n", + " production of essential for \n", + " certain strong bones and \n", + " neurotransmitteβ¦ teeth. \n", + " and Vitamin D is \n", + " essential for \n", + " strong bones and \n", + " teeth as it \n", + " helps the body \n", + " absorb calcium. \n", + " Minerals Calcium, Iron, Calcium is Calcium, Iron, Calcium is True False \n", + " Magnesium necessary for Magnesium necessary for \n", + " maintaining maintaining \n", + " healthy bones healthy bones \n", + " and teeth, Iron and teeth, Iron \n", + " is crucial for is crucial for \n", + " making red blood making red blood \n", + " cells and cells, and \n", + " transporting Magnesium plays \n", + " oxygen a role in the \n", + " throughout the metabolism of \n", + " body, and food. \n", + " Magnesium plays \n", + " a role in over \n", + " 300 enzyme \n", + " reactions in the \n", + " human body, \n", + " including the \n", + " metabolism of \n", + " food, synthesis \n", + " of fatty acids \n", + " and proteins, \n", + " and the \n", + " transmission of \n", + " nerve impulses. \n", + " \n", + "\n" + ], + "text/plain": [ + " \n", + " \u001b[1;35m \u001b[0m\u001b[1;35mcategory \u001b[0m\u001b[1;35m \u001b[0m \u001b[1;35m \u001b[0m\u001b[1;35mentities \u001b[0m\u001b[1;35m \u001b[0m \u001b[1;35m \u001b[0m\u001b[1;35mtext \u001b[0m\u001b[1;35m \u001b[0m \u001b[1;35m \u001b[0m\u001b[1;35mskill_0 \u001b[0m\u001b[1;35m \u001b[0m \u001b[1;35m \u001b[0m\u001b[1;35mskill_1 \u001b[0m\u001b[1;35m \u001b[0m \u001b[1;35m \u001b[0m\u001b[1;35mskill_0\u001b[0m\u001b[1;35m \u001b[0m \u001b[1;35m \u001b[0m\u001b[1;35mskill_1\u001b[0m\u001b[1;35m \u001b[0m \n", + " βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ \n", + " Macronutrients Carbohydrates, Carbohydrates Carbohydrates, Carbohydrates True False \n", + " Proteins, Fats provide quick Proteins, Fats provide quick \n", + " energy, proteins energy, proteins \n", + " are essential are essential \n", + " for muscle for muscle \n", + " repair and repair, and fats \n", + " growth, and fats are vital for \n", + " are vital for long-term energy \n", + " long-term energy storage. \n", + " storage and cell \n", + " function. \n", + " \u001b[2m \u001b[0m\u001b[2mVitamins \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mVitamin A, \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mVitamin A is \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mVitamin A, \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mVitamin A is \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mTrue \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mFalse \u001b[0m\u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mVitamin C, \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mcrucial for good\u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mVitamin C, \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mcrucial for good\u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mVitamin D \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mvision and a \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mVitamin D \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mvision and a \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mhealthy immune \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mhealthy immune \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2msystem, Vitamin \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2msystem, Vitamin \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mC helps in the \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mC helps in the \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mrepair of \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mrepair of \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mtissues and the \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mtissues, and \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2menzymatic \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mVitamin D is \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mproduction of \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2messential for \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mcertain \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mstrong bones and\u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mneurotransmitteβ¦\u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mteeth. \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mand Vitamin D is\u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2messential for \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mstrong bones and\u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mteeth as it \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mhelps the body \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mabsorb calcium. \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " Minerals Calcium, Iron, Calcium is Calcium, Iron, Calcium is True False \n", + " Magnesium necessary for Magnesium necessary for \n", + " maintaining maintaining \n", + " healthy bones healthy bones \n", + " and teeth, Iron and teeth, Iron \n", + " is crucial for is crucial for \n", + " making red blood making red blood \n", + " cells and cells, and \n", + " transporting Magnesium plays \n", + " oxygen a role in the \n", + " throughout the metabolism of \n", + " body, and food. \n", + " Magnesium plays \n", + " a role in over \n", + " 300 enzyme \n", + " reactions in the \n", + " human body, \n", + " including the \n", + " metabolism of \n", + " food, synthesis \n", + " of fatty acids \n", + " and proteins, \n", + " and the \n", + " transmission of \n", + " nerve impulses. \n", + " \n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "Accuracy = 0.00%\n", + "\n" + ], + "text/plain": [ + "\u001b[1;31mAccuracy = \u001b[0m\u001b[1;36m0.00\u001b[0m\u001b[1;31m%\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "Analyze evaluation experience ...\n", + "
\n" + ], + "text/plain": [ + "Analyze evaluation experience \u001b[33m...\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|ββββββββ| 3/3 [00:00<00:00, 210.07it/s]\n", + "100%|βββββββββ| 3/3 [00:12<00:00, 4.15s/it]\n" + ] + }, + { + "data": { + "text/html": [ + "Error analysis for skill \"skill_1\":\n", + "\n", + "
\n" + ], + "text/plain": [ + "Error analysis for skill \u001b[32m\"skill_1\"\u001b[0m:\n", + "\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + "Input: Vitamin A, Vitamin C, Vitamin D\n", + "Prediction: Vitamin A is crucial for good vision and a healthy immune system, Vitamin C helps in the repair of \n", + "tissues, and Vitamin D is essential for strong bones and teeth.\n", + "Ground truth: Vitamin A is crucial for good vision and a healthy immune system, Vitamin C helps in the repair of \n", + "tissues and the enzymatic production of certain neurotransmitters, and Vitamin D is essential for strong bones and \n", + "teeth as it helps the body absorb calcium.\n", + "Error reason: The prediction did not follow the instruction to avoid including additional details or secondary \n", + "functions. The ground truth includes additional functions of Vitamin C and Vitamin D, which are not present in the \n", + "prediction.\n", + "\n", + "Input: Calcium, Iron, Magnesium\n", + "Prediction: Calcium is necessary for maintaining healthy bones and teeth, Iron is crucial for making red blood \n", + "cells, and Magnesium plays a role in the metabolism of food.\n", + "Ground truth: Calcium is necessary for maintaining healthy bones and teeth, Iron is crucial for making red blood \n", + "cells and transporting oxygen throughout the body, and Magnesium plays a role in over 300 enzyme reactions in the \n", + "human body, including the metabolism of food, synthesis of fatty acids and proteins, and the transmission of nerve \n", + "impulses.\n", + "Error reason: The instructions asked for a concise description of the primary role or function of each nutrient in \n", + "the human body, without including additional details or secondary functions. The ground truth, however, provides \n", + "additional details about the roles of Iron and Magnesium, which goes beyond the primary function, hence not \n", + "following the instructions correctly.\n", + "\n", + "Input: Carbohydrates, Proteins, Fats\n", + "Prediction: Carbohydrates provide quick energy, proteins are essential for muscle repair, and fats are vital for \n", + "long-term energy storage.\n", + "Ground truth: Carbohydrates provide quick energy, proteins are essential for muscle repair and growth, and fats are\n", + "vital for long-term energy storage and cell function.\n", + "Error reason: The original instruction was not clear about including additional functions of the nutrients. The \n", + "prediction missed the additional function of proteins for growth and of fats for cell function.\n", + "\n", + "\n" + ], + "text/plain": [ + "\n", + "\u001b[32mInput: Vitamin A, Vitamin C, Vitamin D\u001b[0m\n", + "\u001b[32mPrediction: Vitamin A is crucial for good vision and a healthy immune system, Vitamin C helps in the repair of \u001b[0m\n", + "\u001b[32mtissues, and Vitamin D is essential for strong bones and teeth.\u001b[0m\n", + "\u001b[32mGround truth: Vitamin A is crucial for good vision and a healthy immune system, Vitamin C helps in the repair of \u001b[0m\n", + "\u001b[32mtissues and the enzymatic production of certain neurotransmitters, and Vitamin D is essential for strong bones and \u001b[0m\n", + "\u001b[32mteeth as it helps the body absorb calcium.\u001b[0m\n", + "\u001b[32mError reason: The prediction did not follow the instruction to avoid including additional details or secondary \u001b[0m\n", + "\u001b[32mfunctions. The ground truth includes additional functions of Vitamin C and Vitamin D, which are not present in the \u001b[0m\n", + "\u001b[32mprediction.\u001b[0m\n", + "\n", + "\u001b[32mInput: Calcium, Iron, Magnesium\u001b[0m\n", + "\u001b[32mPrediction: Calcium is necessary for maintaining healthy bones and teeth, Iron is crucial for making red blood \u001b[0m\n", + "\u001b[32mcells, and Magnesium plays a role in the metabolism of food.\u001b[0m\n", + "\u001b[32mGround truth: Calcium is necessary for maintaining healthy bones and teeth, Iron is crucial for making red blood \u001b[0m\n", + "\u001b[32mcells and transporting oxygen throughout the body, and Magnesium plays a role in over \u001b[0m\u001b[1;36m300\u001b[0m\u001b[32m enzyme reactions in the \u001b[0m\n", + "\u001b[32mhuman body, including the metabolism of food, synthesis of fatty acids and proteins, and the transmission of nerve \u001b[0m\n", + "\u001b[32mimpulses.\u001b[0m\n", + "\u001b[32mError reason: The instructions asked for a concise description of the primary role or function of each nutrient in \u001b[0m\n", + "\u001b[32mthe human body, without including additional details or secondary functions. The ground truth, however, provides \u001b[0m\n", + "\u001b[32madditional details about the roles of Iron and Magnesium, which goes beyond the primary function, hence not \u001b[0m\n", + "\u001b[32mfollowing the instructions correctly.\u001b[0m\n", + "\n", + "\u001b[32mInput: Carbohydrates, Proteins, Fats\u001b[0m\n", + "\u001b[32mPrediction: Carbohydrates provide quick energy, proteins are essential for muscle repair, and fats are vital for \u001b[0m\n", + "\u001b[32mlong-term energy storage.\u001b[0m\n", + "\u001b[32mGround truth: Carbohydrates provide quick energy, proteins are essential for muscle repair and growth, and fats are\u001b[0m\n", + "\u001b[32mvital for long-term energy storage and cell function.\u001b[0m\n", + "\u001b[32mError reason: The original instruction was not clear about including additional functions of the nutrients. The \u001b[0m\n", + "\u001b[32mprediction missed the additional function of proteins for growth and of fats for cell function.\u001b[0m\n", + "\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "Improve \"skill_1\" skill based on analysis ...\n", + "\n" + ], + "text/plain": [ + "Improve \u001b[32m\"skill_1\"\u001b[0m skill based on analysis \u001b[33m...\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "Updated instructions for skill \"skill_1\":\n", + "\n", + "
\n" + ], + "text/plain": [ + "Updated instructions for skill \u001b[32m\"skill_1\"\u001b[0m:\n", + "\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "For each nutrient or vitamin listed in the input, provide a concise description of its primary role or function in \n", + "the human body. You can include one or two additional functions if they are commonly associated with the nutrient \n", + "or vitamin. Do not include any percentages or recommended daily intake values. The focus should be on explaining \n", + "the main function of each nutrient or vitamin for the body, along with a few other significant roles they play.\n", + "\n", + "Examples:\n", + "\n", + "Input: Vitamin A, Vitamin C, Vitamin D\n", + "Output: Vitamin A is crucial for good vision and a healthy immune system, Vitamin C helps in the repair of tissues \n", + "and the enzymatic production of certain neurotransmitters, and Vitamin D is essential for strong bones and teeth as\n", + "it helps the body absorb calcium.\n", + "\n", + "Input: Calcium, Iron, Magnesium\n", + "Output: Calcium is necessary for maintaining healthy bones and teeth, Iron is crucial for making red blood cells \n", + "and transporting oxygen throughout the body, and Magnesium plays a role in over 300 enzyme reactions in the human \n", + "body, including the metabolism of food, synthesis of fatty acids and proteins, and the transmission of nerve \n", + "impulses.\n", + "\n", + "Input: Carbohydrates, Proteins, Fats\n", + "Output: Carbohydrates provide quick energy, proteins are essential for muscle repair and growth, and fats are vital\n", + "for long-term energy storage and cell function.\n", + "\n" + ], + "text/plain": [ + "\u001b[1;32mFor each nutrient or vitamin listed in the input, provide a concise description of its primary role or function in \u001b[0m\n", + "\u001b[1;32mthe human body. You can include one or two additional functions if they are commonly associated with the nutrient \u001b[0m\n", + "\u001b[1;32mor vitamin. Do not include any percentages or recommended daily intake values. The focus should be on explaining \u001b[0m\n", + "\u001b[1;32mthe main function of each nutrient or vitamin for the body, along with a few other significant roles they play.\u001b[0m\n", + "\n", + "\u001b[1;32mExamples:\u001b[0m\n", + "\n", + "\u001b[1;32mInput: Vitamin A, Vitamin C, Vitamin D\u001b[0m\n", + "\u001b[1;32mOutput: Vitamin A is crucial for good vision and a healthy immune system, Vitamin C helps in the repair of tissues \u001b[0m\n", + "\u001b[1;32mand the enzymatic production of certain neurotransmitters, and Vitamin D is essential for strong bones and teeth as\u001b[0m\n", + "\u001b[1;32mit helps the body absorb calcium.\u001b[0m\n", + "\n", + "\u001b[1;32mInput: Calcium, Iron, Magnesium\u001b[0m\n", + "\u001b[1;32mOutput: Calcium is necessary for maintaining healthy bones and teeth, Iron is crucial for making red blood cells \u001b[0m\n", + "\u001b[1;32mand transporting oxygen throughout the body, and Magnesium plays a role in over \u001b[0m\u001b[1;36m300\u001b[0m\u001b[1;32m enzyme reactions in the human \u001b[0m\n", + "\u001b[1;32mbody, including the metabolism of food, synthesis of fatty acids and proteins, and the transmission of nerve \u001b[0m\n", + "\u001b[1;32mimpulses.\u001b[0m\n", + "\n", + "\u001b[1;32mInput: Carbohydrates, Proteins, Fats\u001b[0m\n", + "\u001b[1;32mOutput: Carbohydrates provide quick energy, proteins are essential for muscle repair and growth, and fats are vital\u001b[0m\n", + "\u001b[1;32mfor long-term energy storage and cell function.\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "Re-apply skill_1 skill to dataset ...\n", + "
\n" + ], + "text/plain": [ + "Re-apply skill_1 skill to dataset \u001b[33m...\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "Applying skill: skill_1\n", + "\n" + ], + "text/plain": [ + "Applying skill: skill_1\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|βββββββββ| 3/3 [00:03<00:00, 1.16s/it]\n" + ] + }, + { + "data": { + "text/html": [ + "Train is done!\n", + "\n" + ], + "text/plain": [ + "Train is done!\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "GroundTruthSignal(match= skill_0 skill_1\n", + "0 True False\n", + "1 True False\n", + "2 True False, errors={'skill_0': Empty DataFrame\n", + "Columns: [predictions, entities]\n", + "Index: [], 'skill_1': predictions \\\n", + "0 Carbohydrates provide quick energy, proteins ... \n", + "1 Vitamin A is crucial for good vision and a hea... \n", + "2 Calcium is necessary for maintaining healthy ... \n", + "\n", + " text \n", + "0 Carbohydrates provide quick energy, proteins a... \n", + "1 Vitamin A is crucial for good vision and a hea... \n", + "2 Calcium is necessary for maintaining healthy b... })" + ] + }, + "execution_count": 34, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import pandas as pd\n", + "\n", + "from adala.agents import Agent\n", + "from adala.skills import LinearSkillSet, TextGenerationSkill\n", + "from adala.environments import BasicEnvironment\n", + "from adala.runtimes import OpenAIRuntime\n", + "\n", + "agent = Agent(\n", + " \n", + " # Require agent to learn sequence of two skills\n", + " skills=LinearSkillSet(skills=[\n", + " TextGenerationSkill(name=\"skill_0\", instructions=\"...\", input_data_field=\"category\"),\n", + " TextGenerationSkill(name=\"skill_1\", instructions=\"...\", input_data_field=\"skill_0\")\n", + " ]),\n", + " \n", + " # provide ground truth demonstration in environment\n", + " environment=BasicEnvironment(\n", + " ground_truth_dataset=pd.DataFrame(\n", + " [{\n", + " \"category\": \"Macronutrients\",\n", + " \"entities\": \"Carbohydrates, Proteins, Fats\",\n", + " \"text\": \"Carbohydrates provide quick energy, proteins are essential for muscle repair and growth, and fats are vital for long-term energy storage and cell function.\"\n", + " }, {\n", + " \"category\": \"Vitamins\",\n", + " \"entities\": \"Vitamin A, Vitamin C, Vitamin D\",\n", + " \"text\": \"Vitamin A is crucial for good vision and a healthy immune system, Vitamin C helps in the repair of tissues and the enzymatic production of certain neurotransmitters, and Vitamin D is essential for strong bones and teeth as it helps the body absorb calcium.\"\n", + " }, {\n", + " \"category\": \"Minerals\",\n", + " \"entities\": \"Calcium, Iron, Magnesium\",\n", + " \"text\": \"Calcium is necessary for maintaining healthy bones and teeth, Iron is crucial for making red blood cells and transporting oxygen throughout the body, and Magnesium plays a role in over 300 enzyme reactions in the human body, including the metabolism of food, synthesis of fatty acids and proteins, and the transmission of nerve impulses.\"\n", + " }]\n", + " ),\n", + " ground_truth_columns={\n", + " 'skill_0': 'entities',\n", + " 'skill_1': 'text'\n", + " },\n", + " matching_function='fuzzy',\n", + " matching_threshold=0.9\n", + " ),\n", + ").learn(learning_iterations=5)\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "Applying skill: skill_0\n", + "\n" + ], + "text/plain": [ + "Applying skill: skill_0\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|βββββββββ| 3/3 [00:00<00:00, 29.02it/s]\n" + ] + }, + { + "data": { + "text/html": [ + "Applying skill: skill_1\n", + "\n" + ], + "text/plain": [ + "Applying skill: skill_1\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|βββββββββ| 3/3 [00:00<00:00, 30.00it/s]\n" + ] + } + ], + "source": [ + "predictions = agent.run(pd.DataFrame([\n", + " ['Trace Minerals'],\n", + " ['Water-Soluble Vitamins'],\n", + " ['Fatty Acids']\n", + "], columns=['category']))" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "" + ], + "text/plain": [ + " category skill_0 \\\n", + "0 Trace Minerals Zinc, Copper, Selenium \n", + "1 Water-Soluble Vitamins Vitamin B, Vitamin C, Folate \n", + "2 Fatty Acids Omega-3, Omega-6, Saturated Fat \n", + "\n", + " skill_1 \n", + "0 Zinc is important for immune function, wound ... \n", + "1 Vitamin B is a group of essential vitamins th... \n", + "2 Omega-3 fatty acids are important for brain f... " + ] + }, + "execution_count": 39, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "predictions" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "adala", + "language": "python", + "name": "adala" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.5" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/examples/question_answering_skill.ipynb b/examples/question_answering_skill.ipynb index dd51761..254016b 100644 --- a/examples/question_answering_skill.ipynb +++ b/examples/question_answering_skill.ipynb @@ -2,7 +2,6 @@ "cells": [ { "cell_type": "markdown", - "id": "94ad15ac", "metadata": {}, "source": [ "# Question-answering skill" @@ -11,7 +10,6 @@ { "cell_type": "code", "execution_count": 1, - "id": "a2f6d99b", "metadata": {}, "outputs": [ { @@ -105,14 +103,26 @@ { "cell_type": "code", "execution_count": 2, - "id": "6ee2cebf", "metadata": {}, "outputs": [ + { + "data": { + "text/html": [ + "\n", + " \n", + "
\n", + "\n", + " \n", + " \n", + " \n", + "\n", + " category \n", + "skill_0 \n", + "skill_1 \n", + "\n", + " \n", + "0 \n", + "Trace Minerals \n", + "Zinc, Copper, Selenium \n", + "Zinc is important for immune function, wound ... \n", + "\n", + " \n", + "1 \n", + "Water-Soluble Vitamins \n", + "Vitamin B, Vitamin C, Folate \n", + "Vitamin B is a group of essential vitamins th... \n", + "\n", + " \n", + " \n", + "2 \n", + "Fatty Acids \n", + "Omega-3, Omega-6, Saturated Fat \n", + "Omega-3 fatty acids are important for brain f... \n", + "Applying skill: qa_skill\n", + "\n" + ], + "text/plain": [ + "Applying skill: qa_skill\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, { "name": "stderr", "output_type": "stream", "text": [ - "100%|βββββββββββββββββ| 5/5 [00:02<00:00, 1.91it/s]\n" + "100%|ββββββββββββββββββββββββββββββββ| 5/5 [00:00<00:00, 64.16it/s]\n" ] }, { @@ -215,16 +225,15 @@ " )\n", ")\n", "\n", - "run = agent.apply_skills(df)\n", - "run.predictions" + "agent.run(df)" ] } ], "metadata": { "kernelspec": { - "display_name": "Python 3 (ipykernel)", + "display_name": "adala", "language": "python", - "name": "python3" + "name": "adala" }, "language_info": { "codemirror_mode": { @@ -236,7 +245,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.4" + "version": "3.11.5" } }, "nbformat": 4, diff --git a/examples/quickstart.ipynb b/examples/quickstart.ipynb index 3eab497..fd9721f 100644 --- a/examples/quickstart.ipynb +++ b/examples/quickstart.ipynb @@ -2,7 +2,6 @@ "cells": [ { "cell_type": "markdown", - "id": "a6c119c3", "metadata": {}, "source": [ "# ADALA Quickstart\n", @@ -20,7 +19,6 @@ }, { "cell_type": "markdown", - "id": "55c19afc", "metadata": {}, "source": [ "## Dataset Creation\n", @@ -30,7 +28,6 @@ { "cell_type": "code", "execution_count": 1, - "id": "5d5b37a3", "metadata": {}, "outputs": [ { @@ -118,7 +115,6 @@ }, { "cell_type": "markdown", - "id": "9ce6651b", "metadata": {}, "source": [ "We instantiate Dataset that uses this pandas dataframe as a data source. Dataset object takes care of input data schema and data streaming:" @@ -127,7 +123,6 @@ { "cell_type": "code", "execution_count": 2, - "id": "93a31f60", "metadata": {}, "outputs": [], "source": [ @@ -138,7 +133,6 @@ }, { "cell_type": "markdown", - "id": "0dc201b3", "metadata": {}, "source": [ "## Create Agent\n", @@ -152,8 +146,7 @@ }, { "cell_type": "code", - "execution_count": 3, - "id": "a1310fce", + "execution_count": 9, "metadata": { "scrolled": true }, @@ -165,17 +158,17 @@ "\n", "Environment: BasicEnvironment\n", "Skills: subjectivity_detection\n", - "Runtimes: openai, openai-gpt3, openai-gpt4\n", + "Runtimes: openai\n", "Default Runtime: openai\n", "Default Teacher Runtime: openai-gpt4\n", "Applying skill: subjectivity_detection\n", + "\n" + ], + "text/plain": [ + "Applying skill: subjectivity_detection\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, { "name": "stderr", "output_type": "stream", "text": [ - "100%|βββββββββββββββββ| 5/5 [00:00<00:00, 39.18it/s]\n" + "100%|ββββββββββββββββββββββββββββββββ| 5/5 [00:00<00:00, 45.97it/s]\n" ] }, { @@ -259,7 +266,7 @@ "text/plain": [ "\n", "\n", - "=> Iteration #\u001B[1;36m0\u001B[0m: Comparing to ground truth, analyzing and improving \u001B[33m...\u001B[0m\n" + "=> Iteration #\u001b[1;36m0\u001b[0m: Comparing to ground truth, analyzing and improving \u001b[33m...\u001b[0m\n" ] }, "metadata": {}, @@ -272,7 +279,7 @@ "\n", - " text ground_truth subjectivity_detection score ground_truth__x__subβ¦ \n", + " text ground_truth subjectivity_detection score subjectivity_detectiβ¦ \n", " βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ \n", " The mic is great. Subjective Subjective {'Subjective': True \n", " -0.02697588099999997, \n", @@ -309,24 +316,24 @@ ], "text/plain": [ " \n", - " \u001B[1;35m \u001B[0m\u001B[1;35mtext \u001B[0m\u001B[1;35m \u001B[0m \u001B[1;35m \u001B[0m\u001B[1;35mground_truth\u001B[0m\u001B[1;35m \u001B[0m \u001B[1;35m \u001B[0m\u001B[1;35msubjectivity_detection\u001B[0m\u001B[1;35m \u001B[0m \u001B[1;35m \u001B[0m\u001B[1;35mscore \u001B[0m\u001B[1;35m \u001B[0m \u001B[1;35m \u001B[0m\u001B[1;35mground_truth__x__subβ¦\u001B[0m\u001B[1;35m \u001B[0m \n", + " \u001b[1;35m \u001b[0m\u001b[1;35mtext \u001b[0m\u001b[1;35m \u001b[0m \u001b[1;35m \u001b[0m\u001b[1;35mground_truth\u001b[0m\u001b[1;35m \u001b[0m \u001b[1;35m \u001b[0m\u001b[1;35msubjectivity_detection\u001b[0m\u001b[1;35m \u001b[0m \u001b[1;35m \u001b[0m\u001b[1;35mscore \u001b[0m\u001b[1;35m \u001b[0m \u001b[1;35m \u001b[0m\u001b[1;35msubjectivity_detectiβ¦\u001b[0m\u001b[1;35m \u001b[0m \n", " βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ \n", " The mic is great. Subjective Subjective {'Subjective': True \n", " -0.02697588099999997, \n", " 'Objective': \n", " -3.6262724} \n", - " \u001B[2m \u001B[0m\u001B[2mWill order from them \u001B[0m\u001B[2m \u001B[0m \u001B[2m \u001B[0m\u001B[2mSubjective \u001B[0m\u001B[2m \u001B[0m \u001B[2m \u001B[0m\u001B[2mSubjective \u001B[0m\u001B[2m \u001B[0m \u001B[2m \u001B[0m\u001B[2m{'Subjective': \u001B[0m\u001B[2m \u001B[0m \u001B[2m \u001B[0m\u001B[2mTrue \u001B[0m\u001B[2m \u001B[0m \n", - " \u001B[2m \u001B[0m\u001B[2magain! \u001B[0m\u001B[2m \u001B[0m \u001B[2m \u001B[0m \u001B[2m \u001B[0m \u001B[2m \u001B[0m\u001B[2m-0.11282212000000001, \u001B[0m\u001B[2m \u001B[0m \u001B[2m \u001B[0m \n", - " \u001B[2m \u001B[0m \u001B[2m \u001B[0m \u001B[2m \u001B[0m \u001B[2m \u001B[0m\u001B[2m'Objective': \u001B[0m\u001B[2m \u001B[0m \u001B[2m \u001B[0m \n", - " \u001B[2m \u001B[0m \u001B[2m \u001B[0m \u001B[2m \u001B[0m \u001B[2m \u001B[0m\u001B[2m-2.2378219999999995} \u001B[0m\u001B[2m \u001B[0m \u001B[2m \u001B[0m \n", + " \u001b[2m \u001b[0m\u001b[2mWill order from them \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mSubjective \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mSubjective \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m{'Subjective': \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mTrue \u001b[0m\u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m\u001b[2magain! \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m-0.11282212000000001, \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m'Objective': \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m-2.2378219999999995} \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", " Not loud enough and Objective Subjective {'Subjective': False \n", " doesn't turn on like -0.014163457000000034, \n", " it should. 'Objective': \n", " -4.2641635} \n", - " \u001B[2m \u001B[0m\u001B[2mThe phone doesn't seem\u001B[0m\u001B[2m \u001B[0m \u001B[2m \u001B[0m\u001B[2mObjective \u001B[0m\u001B[2m \u001B[0m \u001B[2m \u001B[0m\u001B[2mObjective \u001B[0m\u001B[2m \u001B[0m \u001B[2m \u001B[0m\u001B[2m{'Subjective': \u001B[0m\u001B[2m \u001B[0m \u001B[2m \u001B[0m\u001B[2mTrue \u001B[0m\u001B[2m \u001B[0m \n", - " \u001B[2m \u001B[0m\u001B[2mto accept anything \u001B[0m\u001B[2m \u001B[0m \u001B[2m \u001B[0m \u001B[2m \u001B[0m \u001B[2m \u001B[0m\u001B[2m-2.0720863, \u001B[0m\u001B[2m \u001B[0m \u001B[2m \u001B[0m \n", - " \u001B[2m \u001B[0m\u001B[2mexcept CBR mp3s \u001B[0m\u001B[2m \u001B[0m \u001B[2m \u001B[0m \u001B[2m \u001B[0m \u001B[2m \u001B[0m\u001B[2m'Objective': \u001B[0m\u001B[2m \u001B[0m \u001B[2m \u001B[0m \n", - " \u001B[2m \u001B[0m \u001B[2m \u001B[0m \u001B[2m \u001B[0m \u001B[2m \u001B[0m\u001B[2m-0.13458653999999995} \u001B[0m\u001B[2m \u001B[0m \u001B[2m \u001B[0m \n", + " \u001b[2m \u001b[0m\u001b[2mThe phone doesn't seem\u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mObjective \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mObjective \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m{'Subjective': \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mTrue \u001b[0m\u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m\u001b[2mto accept anything \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m-2.0720863, \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m\u001b[2mexcept CBR mp3s \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m'Objective': \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m-0.13458653999999995} \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", " All three broke within Objective Objective {'Subjective': True \n", " two months of use. -2.1821797, \n", " 'Objective': \n", @@ -337,6 +344,19 @@ "metadata": {}, "output_type": "display_data" }, + { + "data": { + "text/html": [ + "\n" ], "text/plain": [ - "Analyze evaluation experience \u001B[33m...\u001B[0m\n" + "Analyze evaluation experience \u001b[33m...\u001b[0m\n" ] }, "metadata": {}, @@ -354,18 +374,20 @@ "name": "stderr", "output_type": "stream", "text": [ - "100%|ββββββββββββββββ| 1/1 [00:00<00:00, 153.47it/s]\n", - "100%|βββββββββββββββββ| 1/1 [00:00<00:00, 31.21it/s]\n" + "100%|βββββββββββββββββββββββββββββββ| 1/1 [00:00<00:00, 170.90it/s]\n", + "100%|ββββββββββββββββββββββββββββββββ| 1/1 [00:00<00:00, 29.05it/s]\n" ] }, { "data": { "text/html": [ - "Accuracy = 80.00%\n", + "\n" + ], + "text/plain": [ + "\u001b[1;31mAccuracy = \u001b[0m\u001b[1;36m80.00\u001b[0m\u001b[1;31m%\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, { "data": { "text/html": [ @@ -344,7 +364,7 @@ "Number of errors: 1\n", + "\n" ], "text/plain": [ - "Updated instructions for skill \u001B[32m\"subjectivity_detection\"\u001B[0m:\n", + "Updated instructions for skill \u001b[32m\"subjectivity_detection\"\u001b[0m:\n", "\n" ] }, @@ -415,35 +451,37 @@ { "data": { "text/html": [ - "Error analysis for skill \"subjectivity_detection\":\n", + "\n", "
\n" ], "text/plain": [ - "Number of errors: \u001B[1;36m1\u001B[0m\n" + "Error analysis for skill \u001b[32m\"subjectivity_detection\"\u001b[0m:\n", + "\n" ] }, "metadata": {}, @@ -374,11 +396,25 @@ { "data": { "text/html": [ - "Accuracy = 80.00%\n", + "\n" ], "text/plain": [ - "Improve \u001B[32m\"subjectivity_detection\"\u001B[0m skill based on analysis \u001B[33m...\u001B[0m\n" + "Improve \u001b[32m\"subjectivity_detection\"\u001b[0m skill based on analysis \u001b[33m...\u001b[0m\n" ] }, "metadata": {}, @@ -405,7 +441,7 @@ "\n", + "Input: Not loud enough and doesn't turn on like it should.\n", + "Prediction: Subjective\n", + "Ground truth: Objective\n", + "Error reason: The model might have considered the phrases \"not loud enough\" and \"doesn't turn on like it should\" as\n", + "personal opinions or experiences, hence it classified the review as subjective. However, these are factual \n", + "statements about the product's performance, making the review objective.\n", + "\n", "\n" ], "text/plain": [ - "\u001B[1;31mAccuracy = \u001B[0m\u001B[1;36m80.00\u001B[0m\u001B[1;31m%\u001B[0m\n" + "\n", + "\u001b[32mInput: Not loud enough and doesn't turn on like it should.\u001b[0m\n", + "\u001b[32mPrediction: Subjective\u001b[0m\n", + "\u001b[32mGround truth: Objective\u001b[0m\n", + "\u001b[32mError reason: The model might have considered the phrases \u001b[0m\u001b[32m\"not loud enough\"\u001b[0m\u001b[32m and \u001b[0m\u001b[32m\"doesn't turn on like it should\"\u001b[0m\u001b[32m as\u001b[0m\n", + "\u001b[32mpersonal opinions or experiences, hence it classified the review as subjective. However, these are factual \u001b[0m\n", + "\u001b[32mstatements about the product's performance, making the review objective.\u001b[0m\n", + "\n" ] }, "metadata": {}, @@ -391,7 +427,7 @@ "Determine whether the given product review contains \"Subjective\" (based on personal feelings, tastes, or opinions) \n", - "or \"Objective\" (based on facts) statements.\n", + "\n" ], "text/plain": [ - "Re-apply subjectivity_detection skill to dataset \u001B[33m...\u001B[0m\n" + "Re-apply subjectivity_detection skill to dataset \u001b[33m...\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "Classify a product review as either expressing \"Subjective\" or \"Objective\" statements. A \"Subjective\" statement is \n", + "based on personal opinions, feelings, or tastes. An \"Objective\" statement is based on factual information about the\n", + "product, such as its features or performance, and is not influenced by personal feelings or opinions. \n", "\n", "Examples:\n", "\n", - "Input: Not loud enough and doesn't turn on like it should.\n", + "Input: The color of this phone is black.\n", "Output: Objective\n", "\n", - "Input: I personally think the sound quality is not up to the mark.\n", + "Input: I think this phone is too expensive for its features.\n", "Output: Subjective\n", "\n", - "Input: The phone's battery lasts for 10 hours.\n", + "Input: The battery life of this laptop lasts for 10 hours.\n", "Output: Objective\n", "\n" ], "text/plain": [ - "\u001B[1;32mDetermine whether the given product review contains \u001B[0m\u001B[32m\"Subjective\"\u001B[0m\u001B[1;32m \u001B[0m\u001B[1;32m(\u001B[0m\u001B[1;32mbased on personal feelings, tastes, or opinions\u001B[0m\u001B[1;32m)\u001B[0m\u001B[1;32m \u001B[0m\n", - "\u001B[1;32mor \u001B[0m\u001B[32m\"Objective\"\u001B[0m\u001B[1;32m \u001B[0m\u001B[1;32m(\u001B[0m\u001B[1;32mbased on facts\u001B[0m\u001B[1;32m)\u001B[0m\u001B[1;32m statements.\u001B[0m\n", + "\u001b[1;32mClassify a product review as either expressing \u001b[0m\u001b[32m\"Subjective\"\u001b[0m\u001b[1;32m or \u001b[0m\u001b[32m\"Objective\"\u001b[0m\u001b[1;32m statements. A \u001b[0m\u001b[32m\"Subjective\"\u001b[0m\u001b[1;32m statement is \u001b[0m\n", + "\u001b[1;32mbased on personal opinions, feelings, or tastes. An \u001b[0m\u001b[32m\"Objective\"\u001b[0m\u001b[1;32m statement is based on factual information about the\u001b[0m\n", + "\u001b[1;32mproduct, such as its features or performance, and is not influenced by personal feelings or opinions. \u001b[0m\n", "\n", - "\u001B[1;32mExamples:\u001B[0m\n", + "\u001b[1;32mExamples:\u001b[0m\n", "\n", - "\u001B[1;32mInput: Not loud enough and doesn't turn on like it should.\u001B[0m\n", - "\u001B[1;32mOutput: Objective\u001B[0m\n", + "\u001b[1;32mInput: The color of this phone is black.\u001b[0m\n", + "\u001b[1;32mOutput: Objective\u001b[0m\n", "\n", - "\u001B[1;32mInput: I personally think the sound quality is not up to the mark.\u001B[0m\n", - "\u001B[1;32mOutput: Subjective\u001B[0m\n", + "\u001b[1;32mInput: I think this phone is too expensive for its features.\u001b[0m\n", + "\u001b[1;32mOutput: Subjective\u001b[0m\n", "\n", - "\u001B[1;32mInput: The phone's battery lasts for \u001B[0m\u001B[1;36m10\u001B[0m\u001B[1;32m hours.\u001B[0m\n", - "\u001B[1;32mOutput: Objective\u001B[0m\n" + "\u001b[1;32mInput: The battery life of this laptop lasts for \u001b[0m\u001b[1;36m10\u001b[0m\u001b[1;32m hours.\u001b[0m\n", + "\u001b[1;32mOutput: Objective\u001b[0m\n" ] }, "metadata": {}, @@ -456,7 +494,20 @@ "Applying skill: subjectivity_detection\n", + "\n" + ], + "text/plain": [ + "Applying skill: subjectivity_detection\n" ] }, "metadata": {}, @@ -466,7 +517,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "100%|βββββββββββββββββ| 5/5 [00:00<00:00, 48.32it/s]\n" + "100%|ββββββββββββββββββββββββββββββββ| 5/5 [00:00<00:00, 26.29it/s]\n" ] }, { @@ -480,7 +531,7 @@ "text/plain": [ "\n", "\n", - "=> Iteration #\u001B[1;36m1\u001B[0m: Comparing to ground truth, analyzing and improving \u001B[33m...\u001B[0m\n" + "=> Iteration #\u001b[1;36m1\u001b[0m: Comparing to ground truth, analyzing and improving \u001b[33m...\u001b[0m\n" ] }, "metadata": {}, @@ -493,7 +544,7 @@ "\n", - " text ground_truth subjectivity_detection score ground_truth__x__subβ¦ \n", + " text ground_truth subjectivity_detection score subjectivity_detectiβ¦ \n", " βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ \n", - " The mic is great. Subjective Objective {'Subjective': False \n", - " -2.2253392, \n", + " The mic is great. Subjective Subjective {'Subjective': True \n", + " -0.12713461999999998, \n", " 'Objective': \n", - " -0.11432376000000005} \n", - " Will order from them Subjective Objective {'Subjective': False \n", - " again! -0.8573844400000001, \n", + " -2.1254027} \n", + " Will order from them Subjective Subjective {'Subjective': True \n", + " again! -0.007335418999999971β¦ \n", " 'Objective': \n", - " -0.5521171} \n", - " Not loud enough and Objective Objective {'Subjective': True \n", - " doesn't turn on like -4.0895286, \n", + " -4.9187045} \n", + " Not loud enough and Objective Subjective {'Subjective': False \n", + " doesn't turn on like -0.000693016340000051β¦ \n", " it should. 'Objective': \n", - " -0.01688896000000003} \n", + " -7.2748933} \n", " The phone doesn't seem Objective Objective {'Subjective': True \n", - " to accept anything -2.8614092, \n", + " to accept anything -2.4914062, \n", " except CBR mp3s 'Objective': \n", - " -0.058888500000000066} \n", - " All three broke within Objective Objective {'Subjective': True \n", - " two months of use. -4.7739024, \n", + " -0.086422645} \n", + " All three broke within Objective Subjective {'Subjective': False \n", + " two months of use. -0.08145889000000005, \n", " 'Objective': \n", - " -0.008483256000000052} \n", + " -2.5481107} \n", " \n", "\n" ], "text/plain": [ " \n", - " \u001B[1;35m \u001B[0m\u001B[1;35mtext \u001B[0m\u001B[1;35m \u001B[0m \u001B[1;35m \u001B[0m\u001B[1;35mground_truth\u001B[0m\u001B[1;35m \u001B[0m \u001B[1;35m \u001B[0m\u001B[1;35msubjectivity_detection\u001B[0m\u001B[1;35m \u001B[0m \u001B[1;35m \u001B[0m\u001B[1;35mscore \u001B[0m\u001B[1;35m \u001B[0m \u001B[1;35m \u001B[0m\u001B[1;35mground_truth__x__subβ¦\u001B[0m\u001B[1;35m \u001B[0m \n", + " \u001b[1;35m \u001b[0m\u001b[1;35mtext \u001b[0m\u001b[1;35m \u001b[0m \u001b[1;35m \u001b[0m\u001b[1;35mground_truth\u001b[0m\u001b[1;35m \u001b[0m \u001b[1;35m \u001b[0m\u001b[1;35msubjectivity_detection\u001b[0m\u001b[1;35m \u001b[0m \u001b[1;35m \u001b[0m\u001b[1;35mscore \u001b[0m\u001b[1;35m \u001b[0m \u001b[1;35m \u001b[0m\u001b[1;35msubjectivity_detectiβ¦\u001b[0m\u001b[1;35m \u001b[0m \n", " βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ \n", - " The mic is great. Subjective Objective {'Subjective': False \n", - " -2.2253392, \n", + " The mic is great. Subjective Subjective {'Subjective': True \n", + " -0.12713461999999998, \n", " 'Objective': \n", - " -0.11432376000000005} \n", - " \u001B[2m \u001B[0m\u001B[2mWill order from them \u001B[0m\u001B[2m \u001B[0m \u001B[2m \u001B[0m\u001B[2mSubjective \u001B[0m\u001B[2m \u001B[0m \u001B[2m \u001B[0m\u001B[2mObjective \u001B[0m\u001B[2m \u001B[0m \u001B[2m \u001B[0m\u001B[2m{'Subjective': \u001B[0m\u001B[2m \u001B[0m \u001B[2m \u001B[0m\u001B[2mFalse \u001B[0m\u001B[2m \u001B[0m \n", - " \u001B[2m \u001B[0m\u001B[2magain! \u001B[0m\u001B[2m \u001B[0m \u001B[2m \u001B[0m \u001B[2m \u001B[0m \u001B[2m \u001B[0m\u001B[2m-0.8573844400000001, \u001B[0m\u001B[2m \u001B[0m \u001B[2m \u001B[0m \n", - " \u001B[2m \u001B[0m \u001B[2m \u001B[0m \u001B[2m \u001B[0m \u001B[2m \u001B[0m\u001B[2m'Objective': \u001B[0m\u001B[2m \u001B[0m \u001B[2m \u001B[0m \n", - " \u001B[2m \u001B[0m \u001B[2m \u001B[0m \u001B[2m \u001B[0m \u001B[2m \u001B[0m\u001B[2m-0.5521171} \u001B[0m\u001B[2m \u001B[0m \u001B[2m \u001B[0m \n", - " Not loud enough and Objective Objective {'Subjective': True \n", - " doesn't turn on like -4.0895286, \n", + " -2.1254027} \n", + " \u001b[2m \u001b[0m\u001b[2mWill order from them \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mSubjective \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mSubjective \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m{'Subjective': \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mTrue \u001b[0m\u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m\u001b[2magain! \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m-0.007335418999999971β¦\u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m'Objective': \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m-4.9187045} \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " Not loud enough and Objective Subjective {'Subjective': False \n", + " doesn't turn on like -0.000693016340000051β¦ \n", " it should. 'Objective': \n", - " -0.01688896000000003} \n", - " \u001B[2m \u001B[0m\u001B[2mThe phone doesn't seem\u001B[0m\u001B[2m \u001B[0m \u001B[2m \u001B[0m\u001B[2mObjective \u001B[0m\u001B[2m \u001B[0m \u001B[2m \u001B[0m\u001B[2mObjective \u001B[0m\u001B[2m \u001B[0m \u001B[2m \u001B[0m\u001B[2m{'Subjective': \u001B[0m\u001B[2m \u001B[0m \u001B[2m \u001B[0m\u001B[2mTrue \u001B[0m\u001B[2m \u001B[0m \n", - " \u001B[2m \u001B[0m\u001B[2mto accept anything \u001B[0m\u001B[2m \u001B[0m \u001B[2m \u001B[0m \u001B[2m \u001B[0m \u001B[2m \u001B[0m\u001B[2m-2.8614092, \u001B[0m\u001B[2m \u001B[0m \u001B[2m \u001B[0m \n", - " \u001B[2m \u001B[0m\u001B[2mexcept CBR mp3s \u001B[0m\u001B[2m \u001B[0m \u001B[2m \u001B[0m \u001B[2m \u001B[0m \u001B[2m \u001B[0m\u001B[2m'Objective': \u001B[0m\u001B[2m \u001B[0m \u001B[2m \u001B[0m \n", - " \u001B[2m \u001B[0m \u001B[2m \u001B[0m \u001B[2m \u001B[0m \u001B[2m \u001B[0m\u001B[2m-0.058888500000000066}\u001B[0m\u001B[2m \u001B[0m \u001B[2m \u001B[0m \n", - " All three broke within Objective Objective {'Subjective': True \n", - " two months of use. -4.7739024, \n", + " -7.2748933} \n", + " \u001b[2m \u001b[0m\u001b[2mThe phone doesn't seem\u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mObjective \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mObjective \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m{'Subjective': \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mTrue \u001b[0m\u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m\u001b[2mto accept anything \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m-2.4914062, \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m\u001b[2mexcept CBR mp3s \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m'Objective': \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m-0.086422645} \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " All three broke within Objective Subjective {'Subjective': False \n", + " two months of use. -0.08145889000000005, \n", " 'Objective': \n", - " -0.008483256000000052} \n", + " -2.5481107} \n", " \n" ] }, "metadata": {}, "output_type": "display_data" }, + { + "data": { + "text/html": [ + "Accuracy = 60.00%\n", + "\n" + ], + "text/plain": [ + "\u001b[1;31mAccuracy = \u001b[0m\u001b[1;36m60.00\u001b[0m\u001b[1;31m%\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, { "data": { "text/html": [ @@ -565,7 +629,7 @@ "Number of errors: 2\n", + "\n" ], "text/plain": [ - "Updated instructions for skill \u001B[32m\"subjectivity_detection\"\u001B[0m:\n", + "Updated instructions for skill \u001b[32m\"subjectivity_detection\"\u001b[0m:\n", "\n" ] }, @@ -636,49 +730,53 @@ { "data": { "text/html": [ - "Error analysis for skill \"subjectivity_detection\":\n", + "\n", "
\n" ], "text/plain": [ - "Number of errors: \u001B[1;36m2\u001B[0m\n" + "Error analysis for skill \u001b[32m\"subjectivity_detection\"\u001b[0m:\n", + "\n" ] }, "metadata": {}, @@ -595,11 +661,39 @@ { "data": { "text/html": [ - "Accuracy = 60.00%\n", + "\n" ], "text/plain": [ - "Improve \u001B[32m\"subjectivity_detection\"\u001B[0m skill based on analysis \u001B[33m...\u001B[0m\n" + "Improve \u001b[32m\"subjectivity_detection\"\u001b[0m skill based on analysis \u001b[33m...\u001b[0m\n" ] }, "metadata": {}, @@ -626,7 +720,7 @@ "\n", + "Input: Not loud enough and doesn't turn on like it should.\n", + "Prediction: Subjective\n", + "Ground truth: Objective\n", + "Error reason: The model likely classified the statement as subjective because it seems to express personal \n", + "dissatisfaction. However, the statement is actually objective as it describes the product's features - its volume \n", + "and functionality.\n", + "\n", + "Input: All three broke within two months of use.\n", + "Prediction: Subjective\n", + "Ground truth: Objective\n", + "Error reason: The model incorrectly classified the statement as subjective, possibly because it interpreted the \n", + "phrase \"broke within two months of use\" as a personal experience or opinion. However, the statement is objective as\n", + "it presents a factual occurrence about the product's durability.\n", + "\n", "\n" ], "text/plain": [ - "\u001B[1;31mAccuracy = \u001B[0m\u001B[1;36m60.00\u001B[0m\u001B[1;31m%\u001B[0m\n" + "\n", + "\u001b[32mInput: Not loud enough and doesn't turn on like it should.\u001b[0m\n", + "\u001b[32mPrediction: Subjective\u001b[0m\n", + "\u001b[32mGround truth: Objective\u001b[0m\n", + "\u001b[32mError reason: The model likely classified the statement as subjective because it seems to express personal \u001b[0m\n", + "\u001b[32mdissatisfaction. However, the statement is actually objective as it describes the product's features - its volume \u001b[0m\n", + "\u001b[32mand functionality.\u001b[0m\n", + "\n", + "\u001b[32mInput: All three broke within two months of use.\u001b[0m\n", + "\u001b[32mPrediction: Subjective\u001b[0m\n", + "\u001b[32mGround truth: Objective\u001b[0m\n", + "\u001b[32mError reason: The model incorrectly classified the statement as subjective, possibly because it interpreted the \u001b[0m\n", + "\u001b[32mphrase \u001b[0m\u001b[32m\"broke within two months of use\"\u001b[0m\u001b[32m as a personal experience or opinion. However, the statement is objective as\u001b[0m\n", + "\u001b[32mit presents a factual occurrence about the product's durability.\u001b[0m\n", + "\n" ] }, "metadata": {}, @@ -612,7 +706,7 @@ "Identify if the provided product review is \"Subjective\" (expressing personal feelings, tastes, or opinions) or \n", - "\"Objective\" (based on factual information). Consider a statement as subjective if it reflects personal judgment or \n", - "preference, and as objective if it states verifiable facts or features.\n", + "\n" ], "text/plain": [ - "Re-apply subjectivity_detection skill to dataset \u001B[33m...\u001B[0m\n" + "Re-apply subjectivity_detection skill to dataset \u001b[33m...\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "Classify a product review as either expressing \"Subjective\" or \"Objective\" statements. A \"Subjective\" statement is \n", + "based on personal opinions, feelings, or tastes. An \"Objective\" statement is based on factual information about the\n", + "product, such as its features, performance, or occurrences during use, and is not influenced by personal feelings \n", + "or opinions. Even if the statement seems to express dissatisfaction or a negative experience, it should be \n", + "classified as \"Objective\" if it provides factual information about the product's characteristics or performance.\n", "\n", "Examples:\n", "\n", - "Input: Not loud enough and doesn't turn on like it should.\n", + "Input: The color of this phone is black.\n", "Output: Objective\n", "\n", - "Input: I personally think the sound quality is not up to the mark.\n", + "Input: I think this phone is too expensive for its features.\n", "Output: Subjective\n", "\n", - "Input: The phone's battery lasts for 10 hours.\n", + "Input: Not loud enough and doesn't turn on like it should.\n", "Output: Objective\n", "\n", - "Input: The mic is great.\n", - "Output: Subjective\n", + "Input: All three broke within two months of use.\n", + "Output: Objective\n", "\n", - "Input: Will order from them again!\n", + "Input: I don't like the design of this laptop.\n", "Output: Subjective\n", "\n" ], "text/plain": [ - "\u001B[1;32mIdentify if the provided product review is \u001B[0m\u001B[32m\"Subjective\"\u001B[0m\u001B[1;32m \u001B[0m\u001B[1;32m(\u001B[0m\u001B[1;32mexpressing personal feelings, tastes, or opinions\u001B[0m\u001B[1;32m)\u001B[0m\u001B[1;32m or \u001B[0m\n", - "\u001B[32m\"Objective\"\u001B[0m\u001B[1;32m \u001B[0m\u001B[1;32m(\u001B[0m\u001B[1;32mbased on factual information\u001B[0m\u001B[1;32m)\u001B[0m\u001B[1;32m. Consider a statement as subjective if it reflects personal judgment or \u001B[0m\n", - "\u001B[1;32mpreference, and as objective if it states verifiable facts or features.\u001B[0m\n", + "\u001b[1;32mClassify a product review as either expressing \u001b[0m\u001b[32m\"Subjective\"\u001b[0m\u001b[1;32m or \u001b[0m\u001b[32m\"Objective\"\u001b[0m\u001b[1;32m statements. A \u001b[0m\u001b[32m\"Subjective\"\u001b[0m\u001b[1;32m statement is \u001b[0m\n", + "\u001b[1;32mbased on personal opinions, feelings, or tastes. An \u001b[0m\u001b[32m\"Objective\"\u001b[0m\u001b[1;32m statement is based on factual information about the\u001b[0m\n", + "\u001b[1;32mproduct, such as its features, performance, or occurrences during use, and is not influenced by personal feelings \u001b[0m\n", + "\u001b[1;32mor opinions. Even if the statement seems to express dissatisfaction or a negative experience, it should be \u001b[0m\n", + "\u001b[1;32mclassified as \u001b[0m\u001b[32m\"Objective\"\u001b[0m\u001b[1;32m if it provides factual information about the product's characteristics or performance.\u001b[0m\n", "\n", - "\u001B[1;32mExamples:\u001B[0m\n", + "\u001b[1;32mExamples:\u001b[0m\n", "\n", - "\u001B[1;32mInput: Not loud enough and doesn't turn on like it should.\u001B[0m\n", - "\u001B[1;32mOutput: Objective\u001B[0m\n", + "\u001b[1;32mInput: The color of this phone is black.\u001b[0m\n", + "\u001b[1;32mOutput: Objective\u001b[0m\n", "\n", - "\u001B[1;32mInput: I personally think the sound quality is not up to the mark.\u001B[0m\n", - "\u001B[1;32mOutput: Subjective\u001B[0m\n", + "\u001b[1;32mInput: I think this phone is too expensive for its features.\u001b[0m\n", + "\u001b[1;32mOutput: Subjective\u001b[0m\n", "\n", - "\u001B[1;32mInput: The phone's battery lasts for \u001B[0m\u001B[1;36m10\u001B[0m\u001B[1;32m hours.\u001B[0m\n", - "\u001B[1;32mOutput: Objective\u001B[0m\n", + "\u001b[1;32mInput: Not loud enough and doesn't turn on like it should.\u001b[0m\n", + "\u001b[1;32mOutput: Objective\u001b[0m\n", "\n", - "\u001B[1;32mInput: The mic is great.\u001B[0m\n", - "\u001B[1;32mOutput: Subjective\u001B[0m\n", + "\u001b[1;32mInput: All three broke within two months of use.\u001b[0m\n", + "\u001b[1;32mOutput: Objective\u001b[0m\n", "\n", - "\u001B[1;32mInput: Will order from them again!\u001B[0m\n", - "\u001B[1;32mOutput: Subjective\u001B[0m\n" + "\u001b[1;32mInput: I don't like the design of this laptop.\u001b[0m\n", + "\u001b[1;32mOutput: Subjective\u001b[0m\n" ] }, "metadata": {}, @@ -691,7 +789,20 @@ "Applying skill: subjectivity_detection\n", + "\n" + ], + "text/plain": [ + "Applying skill: subjectivity_detection\n" ] }, "metadata": {}, @@ -701,7 +812,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "100%|βββββββββββββββββ| 5/5 [00:00<00:00, 35.93it/s]\n" + "100%|ββββββββββββββββββββββββββββββββ| 5/5 [00:00<00:00, 32.77it/s]\n" ] }, { @@ -715,7 +826,7 @@ "text/plain": [ "\n", "\n", - "=> Iteration #\u001B[1;36m2\u001B[0m: Comparing to ground truth, analyzing and improving \u001B[33m...\u001B[0m\n" + "=> Iteration #\u001b[1;36m2\u001b[0m: Comparing to ground truth, analyzing and improving \u001b[33m...\u001b[0m\n" ] }, "metadata": {}, @@ -728,7 +839,7 @@ "
\n", - " text ground_truth subjectivity_detection score ground_truth__x__subβ¦ \n", + " text ground_truth subjectivity_detection score subjectivity_detectiβ¦ \n", " βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ \n", - " The mic is great. Subjective Subjective {'Subjective': True \n", - " -0.022607480000000055, \n", - " 'Objective': -3.80076} \n", + " The mic is great. Subjective Objective {'Subjective': False \n", + " -5.5978823, \n", + " 'Objective': \n", + " -0.003712546499999969} \n", " Will order from them Subjective Subjective {'Subjective': True \n", - " again! -0.05627503599999997, \n", + " again! -0.6518025399999999, \n", " 'Objective': \n", - " -2.9055107} \n", + " -0.7362752} \n", " Not loud enough and Objective Objective {'Subjective': True \n", - " doesn't turn on like -2.897738, \n", + " doesn't turn on like -4.672154, \n", " it should. 'Objective': \n", - " -0.05672692499999995} \n", + " -0.009396199000000013} \n", " The phone doesn't seem Objective Objective {'Subjective': True \n", - " to accept anything -3.8168292, \n", + " to accept anything -4.6575603, \n", " except CBR mp3s 'Objective': \n", - " -0.022242965000000038} \n", + " -0.009534958999999949} \n", " All three broke within Objective Objective {'Subjective': True \n", - " two months of use. -4.800799, \n", + " two months of use. -3.9477026, \n", " 'Objective': \n", - " -0.008257226000000043} \n", + " -0.019487570000000034} \n", " \n", "\n" ], "text/plain": [ " \n", - " \u001B[1;35m \u001B[0m\u001B[1;35mtext \u001B[0m\u001B[1;35m \u001B[0m \u001B[1;35m \u001B[0m\u001B[1;35mground_truth\u001B[0m\u001B[1;35m \u001B[0m \u001B[1;35m \u001B[0m\u001B[1;35msubjectivity_detection\u001B[0m\u001B[1;35m \u001B[0m \u001B[1;35m \u001B[0m\u001B[1;35mscore \u001B[0m\u001B[1;35m \u001B[0m \u001B[1;35m \u001B[0m\u001B[1;35mground_truth__x__subβ¦\u001B[0m\u001B[1;35m \u001B[0m \n", + " \u001b[1;35m \u001b[0m\u001b[1;35mtext \u001b[0m\u001b[1;35m \u001b[0m \u001b[1;35m \u001b[0m\u001b[1;35mground_truth\u001b[0m\u001b[1;35m \u001b[0m \u001b[1;35m \u001b[0m\u001b[1;35msubjectivity_detection\u001b[0m\u001b[1;35m \u001b[0m \u001b[1;35m \u001b[0m\u001b[1;35mscore \u001b[0m\u001b[1;35m \u001b[0m \u001b[1;35m \u001b[0m\u001b[1;35msubjectivity_detectiβ¦\u001b[0m\u001b[1;35m \u001b[0m \n", " βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ \n", - " The mic is great. Subjective Subjective {'Subjective': True \n", - " -0.022607480000000055, \n", - " 'Objective': -3.80076} \n", - " \u001B[2m \u001B[0m\u001B[2mWill order from them \u001B[0m\u001B[2m \u001B[0m \u001B[2m \u001B[0m\u001B[2mSubjective \u001B[0m\u001B[2m \u001B[0m \u001B[2m \u001B[0m\u001B[2mSubjective \u001B[0m\u001B[2m \u001B[0m \u001B[2m \u001B[0m\u001B[2m{'Subjective': \u001B[0m\u001B[2m \u001B[0m \u001B[2m \u001B[0m\u001B[2mTrue \u001B[0m\u001B[2m \u001B[0m \n", - " \u001B[2m \u001B[0m\u001B[2magain! \u001B[0m\u001B[2m \u001B[0m \u001B[2m \u001B[0m \u001B[2m \u001B[0m \u001B[2m \u001B[0m\u001B[2m-0.05627503599999997, \u001B[0m\u001B[2m \u001B[0m \u001B[2m \u001B[0m \n", - " \u001B[2m \u001B[0m \u001B[2m \u001B[0m \u001B[2m \u001B[0m \u001B[2m \u001B[0m\u001B[2m'Objective': \u001B[0m\u001B[2m \u001B[0m \u001B[2m \u001B[0m \n", - " \u001B[2m \u001B[0m \u001B[2m \u001B[0m \u001B[2m \u001B[0m \u001B[2m \u001B[0m\u001B[2m-2.9055107} \u001B[0m\u001B[2m \u001B[0m \u001B[2m \u001B[0m \n", + " The mic is great. Subjective Objective {'Subjective': False \n", + " -5.5978823, \n", + " 'Objective': \n", + " -0.003712546499999969} \n", + " \u001b[2m \u001b[0m\u001b[2mWill order from them \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mSubjective \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mSubjective \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m{'Subjective': \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mTrue \u001b[0m\u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m\u001b[2magain! \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m-0.6518025399999999, \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m'Objective': \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m-0.7362752} \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", " Not loud enough and Objective Objective {'Subjective': True \n", - " doesn't turn on like -2.897738, \n", + " doesn't turn on like -4.672154, \n", " it should. 'Objective': \n", - " -0.05672692499999995} \n", - " \u001B[2m \u001B[0m\u001B[2mThe phone doesn't seem\u001B[0m\u001B[2m \u001B[0m \u001B[2m \u001B[0m\u001B[2mObjective \u001B[0m\u001B[2m \u001B[0m \u001B[2m \u001B[0m\u001B[2mObjective \u001B[0m\u001B[2m \u001B[0m \u001B[2m \u001B[0m\u001B[2m{'Subjective': \u001B[0m\u001B[2m \u001B[0m \u001B[2m \u001B[0m\u001B[2mTrue \u001B[0m\u001B[2m \u001B[0m \n", - " \u001B[2m \u001B[0m\u001B[2mto accept anything \u001B[0m\u001B[2m \u001B[0m \u001B[2m \u001B[0m \u001B[2m \u001B[0m \u001B[2m \u001B[0m\u001B[2m-3.8168292, \u001B[0m\u001B[2m \u001B[0m \u001B[2m \u001B[0m \n", - " \u001B[2m \u001B[0m\u001B[2mexcept CBR mp3s \u001B[0m\u001B[2m \u001B[0m \u001B[2m \u001B[0m \u001B[2m \u001B[0m \u001B[2m \u001B[0m\u001B[2m'Objective': \u001B[0m\u001B[2m \u001B[0m \u001B[2m \u001B[0m \n", - " \u001B[2m \u001B[0m \u001B[2m \u001B[0m \u001B[2m \u001B[0m \u001B[2m \u001B[0m\u001B[2m-0.022242965000000038}\u001B[0m\u001B[2m \u001B[0m \u001B[2m \u001B[0m \n", + " -0.009396199000000013} \n", + " \u001b[2m \u001b[0m\u001b[2mThe phone doesn't seem\u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mObjective \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mObjective \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m{'Subjective': \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2mTrue \u001b[0m\u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m\u001b[2mto accept anything \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m-4.6575603, \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m\u001b[2mexcept CBR mp3s \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m'Objective': \u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", + " \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m \u001b[2m \u001b[0m\u001b[2m-0.009534958999999949}\u001b[0m\u001b[2m \u001b[0m \u001b[2m \u001b[0m \n", " All three broke within Objective Objective {'Subjective': True \n", - " two months of use. -4.800799, \n", + " two months of use. -3.9477026, \n", " 'Objective': \n", - " -0.008257226000000043} \n", + " -0.019487570000000034} \n", " \n" ] }, "metadata": {}, "output_type": "display_data" }, + { + "data": { + "text/html": [ + "
Accuracy = 80.00%\n", + "\n" + ], + "text/plain": [ + "\u001b[1;31mAccuracy = \u001b[0m\u001b[1;36m80.00\u001b[0m\u001b[1;31m%\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, { "data": { "text/html": [ @@ -798,20 +924,30 @@ "\n" ], "text/plain": [ - "Analyze evaluation experience \u001B[33m...\u001B[0m\n" + "Analyze evaluation experience \u001b[33m...\u001b[0m\n" ] }, "metadata": {}, "output_type": "display_data" }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|βββββββββββββββββββββββββββββββ| 1/1 [00:00<00:00, 221.92it/s]\n", + "100%|ββββββββββββββββββββββββββββββββ| 1/1 [00:00<00:00, 19.60it/s]\n" + ] + }, { "data": { "text/html": [ - "
Number of errors: 0\n", + "Error analysis for skill \"subjectivity_detection\":\n", + "\n", "
\n" ], "text/plain": [ - "Number of errors: \u001B[1;36m0\u001B[0m\n" + "Error analysis for skill \u001b[32m\"subjectivity_detection\"\u001b[0m:\n", + "\n" ] }, "metadata": {}, @@ -820,11 +956,38 @@ { "data": { "text/html": [ - "Accuracy = 100.00%\n", + "\n", + "Input: The mic is great.\n", + "Prediction: Objective\n", + "Ground truth: Subjective\n", + "Error reason: The model made an error because the statement \"The mic is great\" is subjective, as it expresses a \n", + "personal opinion or feeling about the quality of the microphone, rather than providing factual information about \n", + "its characteristics or performance.\n", + "\n", "\n" ], "text/plain": [ - "\u001B[1;31mAccuracy = \u001B[0m\u001B[1;36m100.00\u001B[0m\u001B[1;31m%\u001B[0m\n" + "\n", + "\u001b[32mInput: The mic is great.\u001b[0m\n", + "\u001b[32mPrediction: Objective\u001b[0m\n", + "\u001b[32mGround truth: Subjective\u001b[0m\n", + "\u001b[32mError reason: The model made an error because the statement \u001b[0m\u001b[32m\"The mic is great\"\u001b[0m\u001b[32m is subjective, as it expresses a \u001b[0m\n", + "\u001b[32mpersonal opinion or feeling about the quality of the microphone, rather than providing factual information about \u001b[0m\n", + "\u001b[32mits characteristics or performance.\u001b[0m\n", + "\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "Improve \"subjectivity_detection\" skill based on analysis ...\n", + "\n" + ], + "text/plain": [ + "Improve \u001b[32m\"subjectivity_detection\"\u001b[0m skill based on analysis \u001b[33m...\u001b[0m\n" ] }, "metadata": {}, @@ -833,16 +996,116 @@ { "data": { "text/html": [ - "Accuracy threshold reached (1.0 >= 0.95)\n", + "Updated instructions for skill \"subjectivity_detection\":\n", + "\n", + "
\n" + ], + "text/plain": [ + "Updated instructions for skill \u001b[32m\"subjectivity_detection\"\u001b[0m:\n", + "\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "Classify a product review as either expressing \"Subjective\" or \"Objective\" statements. A \"Subjective\" statement is \n", + "based on personal opinions, feelings, or tastes, and may include evaluative words such as 'great', 'good', 'bad', \n", + "'like', 'dislike', etc. An \"Objective\" statement is based on factual information about the product, such as its \n", + "features, performance, or occurrences during use, and is not influenced by personal feelings or opinions. It \n", + "typically includes factual descriptions or reports of product performance. Even if the statement seems to express \n", + "dissatisfaction or a negative experience, it should be classified as \"Objective\" if it provides factual information\n", + "about the product's characteristics or performance.\n", + "\n", + "Examples:\n", + "\n", + "Input: The color of this phone is black.\n", + "Output: Objective\n", + "\n", + "Input: I think this phone is too expensive for its features.\n", + "Output: Subjective\n", + "\n", + "Input: Not loud enough and doesn't turn on like it should.\n", + "Output: Objective\n", + "\n", + "Input: All three broke within two months of use.\n", + "Output: Objective\n", + "\n", + "Input: I don't like the design of this laptop.\n", + "Output: Subjective\n", + "\n", + "Input: The mic is great.\n", + "Output: Subjective\n", "\n" ], "text/plain": [ - "Accuracy threshold reached \u001B[1m(\u001B[0m\u001B[1;36m1.0\u001B[0m >= \u001B[1;36m0.95\u001B[0m\u001B[1m)\u001B[0m\n" + "\u001b[1;32mClassify a product review as either expressing \u001b[0m\u001b[32m\"Subjective\"\u001b[0m\u001b[1;32m or \u001b[0m\u001b[32m\"Objective\"\u001b[0m\u001b[1;32m statements. A \u001b[0m\u001b[32m\"Subjective\"\u001b[0m\u001b[1;32m statement is \u001b[0m\n", + "\u001b[1;32mbased on personal opinions, feelings, or tastes, and may include evaluative words such as \u001b[0m\u001b[32m'great'\u001b[0m\u001b[1;32m, \u001b[0m\u001b[32m'good'\u001b[0m\u001b[1;32m, \u001b[0m\u001b[32m'bad'\u001b[0m\u001b[1;32m, \u001b[0m\n", + "\u001b[32m'like'\u001b[0m\u001b[1;32m, \u001b[0m\u001b[32m'dislike'\u001b[0m\u001b[1;32m, etc. An \u001b[0m\u001b[32m\"Objective\"\u001b[0m\u001b[1;32m statement is based on factual information about the product, such as its \u001b[0m\n", + "\u001b[1;32mfeatures, performance, or occurrences during use, and is not influenced by personal feelings or opinions. It \u001b[0m\n", + "\u001b[1;32mtypically includes factual descriptions or reports of product performance. Even if the statement seems to express \u001b[0m\n", + "\u001b[1;32mdissatisfaction or a negative experience, it should be classified as \u001b[0m\u001b[32m\"Objective\"\u001b[0m\u001b[1;32m if it provides factual information\u001b[0m\n", + "\u001b[1;32mabout the product's characteristics or performance.\u001b[0m\n", + "\n", + "\u001b[1;32mExamples:\u001b[0m\n", + "\n", + "\u001b[1;32mInput: The color of this phone is black.\u001b[0m\n", + "\u001b[1;32mOutput: Objective\u001b[0m\n", + "\n", + "\u001b[1;32mInput: I think this phone is too expensive for its features.\u001b[0m\n", + "\u001b[1;32mOutput: Subjective\u001b[0m\n", + "\n", + "\u001b[1;32mInput: Not loud enough and doesn't turn on like it should.\u001b[0m\n", + "\u001b[1;32mOutput: Objective\u001b[0m\n", + "\n", + "\u001b[1;32mInput: All three broke within two months of use.\u001b[0m\n", + "\u001b[1;32mOutput: Objective\u001b[0m\n", + "\n", + "\u001b[1;32mInput: I don't like the design of this laptop.\u001b[0m\n", + "\u001b[1;32mOutput: Subjective\u001b[0m\n", + "\n", + "\u001b[1;32mInput: The mic is great.\u001b[0m\n", + "\u001b[1;32mOutput: Subjective\u001b[0m\n" ] }, "metadata": {}, "output_type": "display_data" }, + { + "data": { + "text/html": [ + "Re-apply subjectivity_detection skill to dataset ...\n", + "
\n" + ], + "text/plain": [ + "Re-apply subjectivity_detection skill to dataset \u001b[33m...\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "Applying skill: subjectivity_detection\n", + "\n" + ], + "text/plain": [ + "Applying skill: subjectivity_detection\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|ββββββββββββββββββββββββββββββββ| 5/5 [00:00<00:00, 24.79it/s]\n" + ] + }, { "data": { "text/html": [ @@ -858,12 +1121,11 @@ } ], "source": [ - "learning_experience = agent.learn(learning_iterations=3, accuracy_threshold=0.95)" + "ground_truth_signal = agent.learn(learning_iterations=3, accuracy_threshold=0.95)" ] }, { "cell_type": "markdown", - "id": "ee1573e3", "metadata": {}, "source": [ "Let's see the final instructions:" @@ -871,8 +1133,7 @@ }, { "cell_type": "code", - "execution_count": 5, - "id": "f5b67bd4", + "execution_count": 13, "metadata": {}, "outputs": [ { @@ -881,53 +1142,67 @@ "Total Agent Skills: 1\n", "\n", "subjectivity_detection\n", - "Identify if the provided product review is \"Subjective\" (expressing personal feelings, tastes, or opinions) or \n", - "\"Objective\" (based on factual information). Consider a statement as subjective if it reflects personal judgment or \n", - "preference, and as objective if it states verifiable facts or features.\n", + "Classify a product review as either expressing \"Subjective\" or \"Objective\" statements. A \"Subjective\" statement is \n", + "based on personal opinions, feelings, or tastes, and may include evaluative words such as 'great', 'good', 'bad', \n", + "'like', 'dislike', etc. An \"Objective\" statement is based on factual information about the product, such as its \n", + "features, performance, or occurrences during use, and is not influenced by personal feelings or opinions. It \n", + "typically includes factual descriptions or reports of product performance. Even if the statement seems to express \n", + "dissatisfaction or a negative experience, it should be classified as \"Objective\" if it provides factual information\n", + "about the product's characteristics or performance.\n", "\n", "Examples:\n", "\n", - "Input: Not loud enough and doesn't turn on like it should.\n", + "Input: The color of this phone is black.\n", "Output: Objective\n", "\n", - "Input: I personally think the sound quality is not up to the mark.\n", + "Input: I think this phone is too expensive for its features.\n", "Output: Subjective\n", "\n", - "Input: The phone's battery lasts for 10 hours.\n", + "Input: Not loud enough and doesn't turn on like it should.\n", + "Output: Objective\n", + "\n", + "Input: All three broke within two months of use.\n", "Output: Objective\n", "\n", - "Input: The mic is great.\n", + "Input: I don't like the design of this laptop.\n", "Output: Subjective\n", "\n", - "Input: Will order from them again!\n", + "Input: The mic is great.\n", "Output: Subjective\n", "\n", "\n" ], "text/plain": [ - "\u001B[1;34mTotal Agent Skills: \u001B[0m\u001B[1;34m1\u001B[0m\n", + "\u001b[1;34mTotal Agent Skills: \u001b[0m\u001b[1;34m1\u001b[0m\n", "\n", - "\u001B[1;4;32msubjectivity_detection\u001B[0m\n", - "\u001B[32mIdentify if the provided product review is \u001B[0m\u001B[32m\"Subjective\"\u001B[0m\u001B[32m \u001B[0m\u001B[1;32m(\u001B[0m\u001B[32mexpressing personal feelings, tastes, or opinions\u001B[0m\u001B[1;32m)\u001B[0m\u001B[32m or \u001B[0m\n", - "\u001B[32m\"Objective\"\u001B[0m\u001B[32m \u001B[0m\u001B[1;32m(\u001B[0m\u001B[32mbased on factual information\u001B[0m\u001B[1;32m)\u001B[0m\u001B[32m. Consider a statement as subjective if it reflects personal judgment or \u001B[0m\n", - "\u001B[32mpreference, and as objective if it states verifiable facts or features.\u001B[0m\n", + "\u001b[1;4;32msubjectivity_detection\u001b[0m\n", + "\u001b[32mClassify a product review as either expressing \u001b[0m\u001b[32m\"Subjective\"\u001b[0m\u001b[32m or \u001b[0m\u001b[32m\"Objective\"\u001b[0m\u001b[32m statements. A \u001b[0m\u001b[32m\"Subjective\"\u001b[0m\u001b[32m statement is \u001b[0m\n", + "\u001b[32mbased on personal opinions, feelings, or tastes, and may include evaluative words such as \u001b[0m\u001b[32m'great'\u001b[0m\u001b[32m, \u001b[0m\u001b[32m'good'\u001b[0m\u001b[32m, \u001b[0m\u001b[32m'bad'\u001b[0m\u001b[32m, \u001b[0m\n", + "\u001b[32m'like'\u001b[0m\u001b[32m, \u001b[0m\u001b[32m'dislike'\u001b[0m\u001b[32m, etc. An \u001b[0m\u001b[32m\"Objective\"\u001b[0m\u001b[32m statement is based on factual information about the product, such as its \u001b[0m\n", + "\u001b[32mfeatures, performance, or occurrences during use, and is not influenced by personal feelings or opinions. It \u001b[0m\n", + "\u001b[32mtypically includes factual descriptions or reports of product performance. Even if the statement seems to express \u001b[0m\n", + "\u001b[32mdissatisfaction or a negative experience, it should be classified as \u001b[0m\u001b[32m\"Objective\"\u001b[0m\u001b[32m if it provides factual information\u001b[0m\n", + "\u001b[32mabout the product's characteristics or performance.\u001b[0m\n", "\n", - "\u001B[32mExamples:\u001B[0m\n", + "\u001b[32mExamples:\u001b[0m\n", "\n", - "\u001B[32mInput: Not loud enough and doesn't turn on like it should.\u001B[0m\n", - "\u001B[32mOutput: Objective\u001B[0m\n", + "\u001b[32mInput: The color of this phone is black.\u001b[0m\n", + "\u001b[32mOutput: Objective\u001b[0m\n", "\n", - "\u001B[32mInput: I personally think the sound quality is not up to the mark.\u001B[0m\n", - "\u001B[32mOutput: Subjective\u001B[0m\n", + "\u001b[32mInput: I think this phone is too expensive for its features.\u001b[0m\n", + "\u001b[32mOutput: Subjective\u001b[0m\n", "\n", - "\u001B[32mInput: The phone's battery lasts for \u001B[0m\u001B[1;32m10\u001B[0m\u001B[32m hours.\u001B[0m\n", - "\u001B[32mOutput: Objective\u001B[0m\n", + "\u001b[32mInput: Not loud enough and doesn't turn on like it should.\u001b[0m\n", + "\u001b[32mOutput: Objective\u001b[0m\n", "\n", - "\u001B[32mInput: The mic is great.\u001B[0m\n", - "\u001B[32mOutput: Subjective\u001B[0m\n", + "\u001b[32mInput: All three broke within two months of use.\u001b[0m\n", + "\u001b[32mOutput: Objective\u001b[0m\n", "\n", - "\u001B[32mInput: Will order from them again!\u001B[0m\n", - "\u001B[32mOutput: Subjective\u001B[0m\n", + "\u001b[32mInput: I don't like the design of this laptop.\u001b[0m\n", + "\u001b[32mOutput: Subjective\u001b[0m\n", + "\n", + "\u001b[32mInput: The mic is great.\u001b[0m\n", + "\u001b[32mOutput: Subjective\u001b[0m\n", "\n" ] }, @@ -941,7 +1216,6 @@ }, { "cell_type": "markdown", - "id": "54ec4568", "metadata": {}, "source": [ "... and predictions created by the skill:" @@ -949,10 +1223,29 @@ }, { "cell_type": "code", - "execution_count": 6, - "id": "baa69db8", + "execution_count": 14, "metadata": {}, "outputs": [ + { + "data": { + "text/html": [ + "Applying skill: subjectivity_detection\n", + "\n" + ], + "text/plain": [ + "Applying skill: subjectivity_detection\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|ββββββββββββββββββββββββββββββββ| 5/5 [00:00<00:00, 24.62it/s]\n" + ] + }, { "data": { "text/html": [ @@ -986,35 +1279,35 @@ "The mic is great. \n", "Subjective \n", "Subjective \n", - "{'Subjective': -0.022607480000000055, 'Objecti... \n", + "{'Subjective': -0.11115719, 'Objective': -2.25... \n", " \n", "\n", " \n", "1 \n", "Will order from them again! \n", "Subjective \n", "Subjective \n", - "{'Subjective': -0.05627503599999997, 'Objectiv... \n", + "{'Subjective': -0.44846975999999994, 'Objectiv... \n", "\n", " \n", "2 \n", "Not loud enough and doesn't turn on like it sh... \n", "Objective \n", "Objective \n", - "{'Subjective': -2.897738, 'Objective': -0.0567... \n", + "{'Subjective': -4.4792867, 'Objective': -0.011... \n", "\n", " \n", "3 \n", "The phone doesn't seem to accept anything exce... \n", "Objective \n", "Objective \n", - "{'Subjective': -3.8168292, 'Objective': -0.022... \n", + "{'Subjective': -4.990218, 'Objective': -0.0068... \n", "\n", " \n", " \n", "\n", @@ -1029,25 +1322,24 @@ "4 All three broke within two months of use. Objective \n", "\n", " subjectivity_detection score \n", - "0 Subjective {'Subjective': -0.022607480000000055, 'Objecti... \n", - "1 Subjective {'Subjective': -0.05627503599999997, 'Objectiv... \n", - "2 Objective {'Subjective': -2.897738, 'Objective': -0.0567... \n", - "3 Objective {'Subjective': -3.8168292, 'Objective': -0.022... \n", - "4 Objective {'Subjective': -4.800799, 'Objective': -0.0082... " + "0 Subjective {'Subjective': -0.11115719, 'Objective': -2.25... \n", + "1 Subjective {'Subjective': -0.44846975999999994, 'Objectiv... \n", + "2 Objective {'Subjective': -4.4792867, 'Objective': -0.011... \n", + "3 Objective {'Subjective': -4.990218, 'Objective': -0.0068... \n", + "4 Objective {'Subjective': -4.19226, 'Objective': -0.01522... " ] }, - "execution_count": 6, + "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "learning_experience.predictions" + "agent.run(dataset)" ] }, { "cell_type": "markdown", - "id": "b8d49385", "metadata": {}, "source": [ "## Applying learned skills to the real data\n", @@ -1057,8 +1349,7 @@ }, { "cell_type": "code", - "execution_count": 7, - "id": "60a79462", + "execution_count": 15, "metadata": {}, "outputs": [ { @@ -1114,7 +1405,7 @@ "3 VERY DISAPPOINTED." ] }, - "execution_count": 7, + "execution_count": 15, "metadata": {}, "output_type": "execute_result" } @@ -1131,26 +1422,37 @@ }, { "cell_type": "code", - "execution_count": 8, - "id": "2f2bf273", + "execution_count": 16, "metadata": {}, "outputs": [ + { + "data": { + "text/html": [ + "4 \n", "All three broke within two months of use. \n", "Objective \n", "Objective \n", - "{'Subjective': -4.800799, 'Objective': -0.0082... \n", + "{'Subjective': -4.19226, 'Objective': -0.01522... \n", "Applying skill: subjectivity_detection\n", + "\n" + ], + "text/plain": [ + "Applying skill: subjectivity_detection\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, { "name": "stderr", "output_type": "stream", "text": [ - "100%|βββββββββββββββββ| 4/4 [00:00<00:00, 32.32it/s]\n" + "100%|ββββββββββββββββββββββββββββββββ| 4/4 [00:04<00:00, 1.00s/it]\n" ] } ], "source": [ - "result = agent.apply_skills(test_df)" + "predictions = agent.run(test_df)" ] }, { "cell_type": "code", - "execution_count": 9, - "id": "e6c50ede", + "execution_count": 17, "metadata": {}, "outputs": [ { @@ -1184,25 +1486,25 @@ "0 \n", "Doesn't hold charge. \n", "Objective \n", - "{'Subjective': -4.9062243, 'Objective': -0.007... \n", + "{'Subjective': -6.241702, 'Objective': -0.0019... \n", " \n", "\n", " \n", "1 \n", "Excellent bluetooth headset \n", - "Objective \n", - "{'Subjective': -1.450324, 'Objective': -0.2672... \n", + "Subjective \n", + "{'Subjective': -0.39792176999999995, 'Objectiv... \n", "\n", " \n", "2 \n", "I love this thing! \n", "Subjective \n", - "{'Subjective': -0.0014673689999999905, 'Object... \n", + "{'Subjective': -0.0008572661999999637, 'Object... \n", "\n", " \n", " \n", "\n", @@ -1211,40 +1513,32 @@ "text/plain": [ " text subjectivity_detection \\\n", "0 Doesn't hold charge. Objective \n", - "1 Excellent bluetooth headset Objective \n", + "1 Excellent bluetooth headset Subjective \n", "2 I love this thing! Subjective \n", "3 VERY DISAPPOINTED. Subjective \n", "\n", " score \n", - "0 {'Subjective': -4.9062243, 'Objective': -0.007... \n", - "1 {'Subjective': -1.450324, 'Objective': -0.2672... \n", - "2 {'Subjective': -0.0014673689999999905, 'Object... \n", - "3 {'Subjective': -0.17851222999999997, 'Objectiv... " + "0 {'Subjective': -6.241702, 'Objective': -0.0019... \n", + "1 {'Subjective': -0.39792176999999995, 'Objectiv... \n", + "2 {'Subjective': -0.0008572661999999637, 'Object... \n", + "3 {'Subjective': -0.21449489999999996, 'Objectiv... " ] }, - "execution_count": 9, + "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "result.predictions" + "predictions" ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "0922915b", - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { "kernelspec": { - "display_name": "Python 3 (ipykernel)", + "display_name": "adala", "language": "python", - "name": "python3" + "name": "adala" }, "language_info": { "codemirror_mode": { @@ -1256,9 +1550,9 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.4" + "version": "3.11.5" } }, "nbformat": 4, "nbformat_minor": 5 -} \ No newline at end of file +} diff --git a/examples/summarization_skill.ipynb b/examples/summarization_skill.ipynb index 27c4792..609d64e 100644 --- a/examples/summarization_skill.ipynb +++ b/examples/summarization_skill.ipynb @@ -2,7 +2,6 @@ "cells": [ { "cell_type": "markdown", - "id": "94ad15ac", "metadata": {}, "source": [ "# Summarization skill" @@ -10,8 +9,7 @@ }, { "cell_type": "code", - "execution_count": 2, - "id": "a2f6d99b", + "execution_count": 1, "metadata": {}, "outputs": [ { @@ -62,7 +60,7 @@ "2 Vitamin D is a fat-soluble nutrient. It is one..." ] }, - "execution_count": 2, + "execution_count": 1, "metadata": {}, "output_type": "execute_result" } @@ -79,15 +77,27 @@ }, { "cell_type": "code", - "execution_count": 3, - "id": "6ee2cebf", + "execution_count": 2, "metadata": {}, "outputs": [ + { + "data": { + "text/html": [ + "3 \n", "VERY DISAPPOINTED. \n", "Subjective \n", - "{'Subjective': -0.17851222999999997, 'Objectiv... \n", + "{'Subjective': -0.21449489999999996, 'Objectiv... \n", "Applying skill: summarization\n", + "\n" + ], + "text/plain": [ + "Applying skill: summarization\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, { "name": "stderr", "output_type": "stream", "text": [ - "100%|βββββββββββββββββ| 3/3 [00:05<00:00, 1.73s/it]\n" + "100%|ββββββββββββββββββββββββββββββββ| 3/3 [00:00<00:00, 52.08it/s]\n" ] }, { @@ -147,7 +157,7 @@ "2 \\nVitamin D is a fat-soluble nutrient that is ... " ] }, - "execution_count": 3, + "execution_count": 2, "metadata": {}, "output_type": "execute_result" } @@ -163,16 +173,15 @@ " )\n", ")\n", "\n", - "run = agent.apply_skills(df)\n", - "run.predictions" + "agent.run(df)" ] } ], "metadata": { "kernelspec": { - "display_name": "Python 3 (ipykernel)", + "display_name": "adala", "language": "python", - "name": "python3" + "name": "adala" }, "language_info": { "codemirror_mode": { @@ -184,7 +193,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.4" + "version": "3.11.5" } }, "nbformat": 4, diff --git a/examples/text_generation_skill.ipynb b/examples/text_generation_skill.ipynb index 88eb374..d89b811 100644 --- a/examples/text_generation_skill.ipynb +++ b/examples/text_generation_skill.ipynb @@ -2,7 +2,6 @@ "cells": [ { "cell_type": "markdown", - "id": "94ad15ac", "metadata": {}, "source": [ "# Text generation skill" @@ -11,7 +10,6 @@ { "cell_type": "code", "execution_count": 1, - "id": "a2f6d99b", "metadata": {}, "outputs": [ { @@ -154,14 +152,26 @@ { "cell_type": "code", "execution_count": 2, - "id": "6ee2cebf", "metadata": {}, "outputs": [ + { + "data": { + "text/html": [ + "Applying skill: text_generation\n", + "\n" + ], + "text/plain": [ + "Applying skill: text_generation\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, { "name": "stderr", "output_type": "stream", "text": [ - "100%|βββββββββββββββ| 10/10 [00:24<00:00, 2.48s/it]\n" + "100%|ββββββββββββββββββββββββββββββ| 10/10 [00:00<00:00, 71.35it/s]\n" ] }, { @@ -309,16 +319,15 @@ " )\n", ")\n", "\n", - "run = agent.apply_skills(df)\n", - "run.predictions" + "agent.run(df)" ] } ], "metadata": { "kernelspec": { - "display_name": "Python 3 (ipykernel)", + "display_name": "adala", "language": "python", - "name": "python3" + "name": "adala" }, "language_info": { "codemirror_mode": { @@ -330,7 +339,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.4" + "version": "3.11.5" } }, "nbformat": 4, diff --git a/examples/translation_skill.ipynb b/examples/translation_skill.ipynb index d830f36..6e87ae9 100644 --- a/examples/translation_skill.ipynb +++ b/examples/translation_skill.ipynb @@ -2,7 +2,6 @@ "cells": [ { "cell_type": "markdown", - "id": "94ad15ac", "metadata": {}, "source": [ "# Translation skill" @@ -10,8 +9,7 @@ }, { "cell_type": "code", - "execution_count": 13, - "id": "a2f6d99b", + "execution_count": 1, "metadata": {}, "outputs": [ { @@ -108,7 +106,7 @@ "9 ΰ€Έΰ€ͺΰ€¨ΰ₯ ΰ€Έΰ€ ΰ€Ήΰ₯ΰ€€ΰ₯ ΰ€Ήΰ₯ΰ€ Hindi" ] }, - "execution_count": 13, + "execution_count": 1, "metadata": {}, "output_type": "execute_result" } @@ -132,35 +130,29 @@ }, { "cell_type": "code", - "execution_count": 14, - "id": "6ee2cebf", + "execution_count": 2, "metadata": {}, "outputs": [ + { + "data": { + "text/html": [ + "Applying skill: translation\n", + "\n" + ], + "text/plain": [ + "Applying skill: translation\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, { "name": "stderr", "output_type": "stream", "text": [ - "100%|βββββββββββββββ| 10/10 [00:08<00:00, 1.13it/s]\n" + "100%|ββββββββββββββββββββββββββββββ| 10/10 [00:00<00:00, 61.83it/s]\n" ] - } - ], - "source": [ - "from adala.agents import Agent\n", - "from adala.environments import BasicEnvironment\n", - "from adala.skills.generation.translation import TranslationSkill\n", - "from rich import print\n", - "\n", - "agent = Agent(skills=TranslationSkill(input_data_field='text', target_language='Swahili'))\n", - "\n", - "run = agent.apply_skills(df)" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "id": "ee97ee22", - "metadata": {}, - "outputs": [ + }, { "data": { "text/html": [ @@ -278,21 +270,28 @@ "9 Ndoto zinatimia " ] }, - "execution_count": 15, + "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "run.predictions" + "from adala.agents import Agent\n", + "from adala.environments import BasicEnvironment\n", + "from adala.skills.generation.translation import TranslationSkill\n", + "from rich import print\n", + "\n", + "agent = Agent(skills=TranslationSkill(input_data_field='text', target_language='Swahili'))\n", + "\n", + "agent.run(df)" ] } ], "metadata": { "kernelspec": { - "display_name": "Python 3 (ipykernel)", + "display_name": "adala", "language": "python", - "name": "python3" + "name": "adala" }, "language_info": { "codemirror_mode": { @@ -304,7 +303,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.4" + "version": "3.11.5" } }, "nbformat": 4, diff --git a/tests/test_agent_basics.py b/tests/test_agent_basics.py new file mode 100644 index 0000000..7d2f10d --- /dev/null +++ b/tests/test_agent_basics.py @@ -0,0 +1,357 @@ +import pandas as pd + +from utils import patching, PatchedCalls + + +@patching( + target_function=PatchedCalls.OPENAI_MODEL_LIST.value, + data=[ + # calling API model list for the first runtime (student) + {'input': {}, 'output': {'data': [{'id': 'gpt-3.5-turbo-instruct'}, {'id': 'gpt-3.5-turbo'}, {'id': 'gpt-4'}]}}, + # calling API model list for the second runtime (teacher) + {'input': {}, 'output': {'data': [{'id': 'gpt-3.5-turbo-instruct'}, {'id': 'gpt-3.5-turbo'}, {'id': 'gpt-4'}]}}, + ], +) +@patching( + target_function=PatchedCalls.GUIDANCE.value, + data=[ + # call[0]: apply first skill 0->1, first row, GT = 1 5 1 + {'input': {'input': '0 5 0'}, 'output': {'predictions': '1 5 1'}}, + # call[1]: apply first skill 0->1, second row, GT = 1 1 1 -> ERROR! + {'input': {'input': '0 0 0'}, 'output': {'predictions': '1 5 1'}}, + # call[2]: prepare error inputs for first skill 0->1, second row + {'input': {'input': '0 0 0', '0->1': '1 5 1'}, 'output': 'Input: 0 0 0'}, + # call[3]: analyze errors first skill 0->1 + { + 'input': { + 'input': 'Input: 0 0 0', + 'prediction': '1 5 1', + 'ground_truth': '1 1 1' + }, + 'output': { + 'reason': '0 transformed to 5 instead of 1' + } + }, + # call[4]: build error report for first skill 0->1 + { + 'input': { + 'predictions_and_errors': [{ + 'input': 'Input: 0 0 0', + 'prediction': '1 5 1', + 'ground_truth': '1 1 1', + 'reason': '0 transformed to 5 instead of 1' + }]}, + 'output': '''\ + Input: 0 0 0 + Prediction: 1 5 1 + Ground Truth: 1 1 1 + Error reason: 0 transformed to 5 instead of 1 + ''', + }, + # call[5]: improve first skill 0->1 + { + 'input': { + 'error_analysis': '''\ + Input: 0 0 0 + Prediction: 1 5 1 + Ground Truth: 1 1 1 + Error reason: 0 transformed to 5 instead of 1 + '''}, + 'output': { + 'new_instruction': 'Transform 0 to 1' + } + }, + # call[6]: reapply skill 0->1, first row + {'input': {'input': '0 5 0'}, 'output': {'predictions': '1 5 1'}}, + # call[7]: reapply skill 0->1, first row + {'input': {'input': '0 0 0'}, 'output': {'predictions': '1 1 1'}}, + + ] +) +def test_agent_quickstart_single_skill(): + from adala.agents import Agent + from adala.skills import LinearSkillSet, LLMSkill + from adala.environments import BasicEnvironment + + agent = Agent( + skills=LinearSkillSet( + skills=[LLMSkill(name="0->1", instructions="...", input_data_field="input")] + ), + environment=BasicEnvironment( + ground_truth_dataset=pd.DataFrame([ + ['0 5 0', '1 5 1'], + ['0 0 0', '1 1 1'] + ], columns=['input', 'gt_0']), + ground_truth_columns={ + "0->1": "gt_0" + } + ) + ) + + ground_truth_signal = agent.learn() + + # assert final instruction + assert agent.skills['0->1'].instructions == 'Transform 0 to 1' + # assert final accuracy for skill 0->1 + pd.testing.assert_series_equal( + pd.Series({'0->1': 1.0}), + ground_truth_signal.get_accuracy() + ) + + +@patching( + target_function=PatchedCalls.OPENAI_MODEL_LIST.value, + data=[ + # calling API model list for the first runtime (student) + {'input': {}, 'output': {'data': [{'id': 'gpt-3.5-turbo-instruct'}, {'id': 'gpt-3.5-turbo'}, {'id': 'gpt-4'}]}}, + # calling API model list for the second runtime (teacher) + {'input': {}, 'output': {'data': [{'id': 'gpt-3.5-turbo-instruct'}, {'id': 'gpt-3.5-turbo'}, {'id': 'gpt-4'}]}}, + ], +) +@patching( + target_function=PatchedCalls.GUIDANCE.value, + data=[ + # call[0]: apply first skill 0->1, first row, GT = 1 5 1 + {'input': {'input': '0 5 0'}, 'output': {'predictions': '1 5 1'}}, + # call[1]: apply first skill 0->1, second row, GT = 1 1 1 -> ERROR! + {'input': {'input': '0 0 0'}, 'output': {'predictions': '1 5 1'}}, + # call[2]: apply second skill 1->2, first row, GT = 2 5 2 + {'input': {'input': '0 5 0', '0->1': '1 5 1'}, 'output': {'predictions': '2 5 2'}}, + # call[3]: apply second skill 1->2, second row, GT = 2 2 2 -> ERROR + {'input': {'input': '0 0 0', '0->1': '1 5 1'}, 'output': {'predictions': '2 5 2'}}, + # call[4]: prepare error inputs for first skill 0->1, second row + {'input': {'input': '0 0 0', '0->1': '1 5 1'}, 'output': 'Input: 0 0 0'}, + # call[5]: analyze errors first skill 0->1, error in the second row (0 0 0 -> 1 5 1) + { + 'input': { + 'input': 'Input: 0 0 0', + 'prediction': '1 5 1', + 'ground_truth': '1 1 1' + }, + 'output': { + 'reason': '0 transformed to 5 instead of 1' + } + }, + # call[6]: build error report for first skill 0->1 + { + 'input': { + 'predictions_and_errors': [{ + 'input': 'Input: 0 0 0', + 'prediction': '1 5 1', + 'ground_truth': '1 1 1', + 'reason': '0 transformed to 5 instead of 1' + }]}, + 'output': '''\ + Input: 0 0 0 + Prediction: 1 5 1 + Ground Truth: 1 1 1 + Error reason: 0 transformed to 5 instead of 1 + ''', + }, + # call[7]: improve first skill 0->1 + { + 'input': { + 'error_analysis': '''\ + Input: 0 0 0 + Prediction: 1 5 1 + Ground Truth: 1 1 1 + Error reason: 0 transformed to 5 instead of 1 + '''}, + 'output': { + 'new_instruction': 'Transform 0 to 1' + } + }, + # call[8]: reapply first skill 0->1, first row, GT = 1 5 1 + {'input': {'input': '0 5 0'}, 'output': {'predictions': '1 5 1'}}, + # call[9]: reapply first skill 0->1, second row, GT = 1 1 1 + {'input': {'input': '0 0 0'}, 'output': {'predictions': '1 1 1'}}, + # call[10]: reapply second skill 1->2, first row, GT = 2 5 2 -> ERROR! + {'input': {'input': '0 5 0', '0->1': '1 5 1'}, 'output': {'predictions': '2 2 2'}}, + # call[11]: reapply second skill 1->2, second row, GT = 2 2 2 + {'input': {'input': '0 0 0', '0->1': '1 1 1'}, 'output': {'predictions': '2 2 2'}}, + # call[12]: prepare error inputs for second skill 1->2, first row + {'input': {'input': '0 5 0', '0->1': '1 5 1'}, 'output': 'Input: 1 5 1'}, + # call[13]: analyze errors second skill 1->2 (first row 2 2 2 instead of 2 5 2) + { + 'input': { + 'input': 'Input: 1 5 1', + 'prediction': '2 2 2', + 'ground_truth': '2 5 2', + }, + 'output': { + 'reason': '5 transformed to 2 instead of remaining 5' + } + }, + # call[14]: build error report for second skill 1->2 + { + 'input': { + 'predictions_and_errors': [{ + 'input': 'Input: 1 5 1', + 'prediction': '2 2 2', + 'ground_truth': '2 5 2', + 'reason': '5 transformed to 2 instead of remaining 5' + }]}, + 'output': '''\ + Input: 1 5 1 + Prediction: 2 2 2 + Ground Truth: 2 5 2 + Error reason: 5 transformed to 2 instead of remaining 5 + ''', + }, + # call[15]: improve second skill 1->2 + { + 'input': { + 'error_analysis': '''\ + Input: 1 5 1 + Prediction: 2 2 2 + Ground Truth: 2 5 2 + Error reason: 5 transformed to 2 instead of remaining 5 + '''}, + 'output': { + 'new_instruction': 'Transform 1 to 2' + } + }, + # call[16]: reapply second skill 1->2, first row, GT = 2 5 2 + {'input': {'input': '0 5 0', '0->1': '1 5 1'}, 'output': {'predictions': '2 5 2'}}, + # call[17]: reapply second skill 1->2, second row, GT = 2 2 2 + {'input': {'input': '0 0 0', '0->1': '1 1 1'}, 'output': {'predictions': '2 2 2'}}, + ] +) +def test_agent_quickstart_two_skills(): + from adala.agents import Agent + from adala.skills import LinearSkillSet, LLMSkill + from adala.environments import BasicEnvironment + + agent = Agent( + skills=LinearSkillSet( + skills=[ + LLMSkill(name='0->1', instructions='...', input_data_field='input'), + LLMSkill(name='1->2', instructions='...', input_data_field='0->1') + ] + ), + environment=BasicEnvironment( + ground_truth_dataset=pd.DataFrame([ + ['0 5 0', '1 5 1', '2 5 2'], + ['0 0 0', '1 1 1', '2 2 2'] + ], columns=['input', 'gt_0', 'gt_1']), + ground_truth_columns={ + "0->1": "gt_0", + "1->2": "gt_1" + } + ) + ) + + ground_truth_signal = agent.learn() + + # assert final instruction + assert agent.skills['0->1'].instructions == 'Transform 0 to 1' + assert agent.skills['1->2'].instructions == 'Transform 1 to 2' + # assert final accuracy for skill 0->1 + pd.testing.assert_series_equal( + pd.Series({'0->1': 1.0, '1->2': 1.0}), + ground_truth_signal.get_accuracy() + ) + + +@patching( + target_function=PatchedCalls.OPENAI_MODEL_LIST.value, + data=[ + # calling API model list for the first runtime (student) + {'input': {}, 'output': {'data': [{'id': 'gpt-3.5-turbo-instruct'}, {'id': 'gpt-3.5-turbo'}, {'id': 'gpt-4'}]}}, + # calling API model list for the second runtime (teacher) + {'input': {}, 'output': {'data': [{'id': 'gpt-3.5-turbo-instruct'}, {'id': 'gpt-3.5-turbo'}, {'id': 'gpt-4'}]}}, + ], +) +@patching( + target_function=PatchedCalls.GUIDANCE.value, + data=[ + # call[0]: apply first skill 0->1, GT = 1 5 1 + {'input': {'input': '0 5 0'}, 'output': {'predictions': '1 5 1'}}, + # call[1]: apply second skill 1->2, GT = 2 5 2 -> ERROR! + {'input': {'input': '0 5 0', '0->1': '1 5 1'}, 'output': {'predictions': '2 5 4'}}, + # call[3]: apply third skill 2->3, GT = 3 5 3 -> Also error, but it is due to previous error + {'input': {'input': '0 5 0', '0->1': '1 5 1', '1->2': '2 5 4'}, 'output': {'predictions': '3 5 4'}}, + # call[4]: prepare error input for second skill 1->2 (2 5 4 instead of 2 5 2) + {'input': {'input': '0 5 0', '0->1': '1 5 1', '1->2': '2 5 4', '2->3': '3 5 4'}, 'output': 'Input: 1 5 1'}, + # call[5]: analyze errors for second skill 1->2 (2 5 4 instead of 2 5 2) + { + 'input': { + 'input': 'Input: 1 5 1', + 'prediction': '2 5 4', + 'ground_truth': '2 5 2', + }, + 'output': { + 'reason': '1 transformed to 4 instead of 2' + } + }, + # call[6]: build error report for second skill 1->2 + { + 'input': { + 'predictions_and_errors': [{ + 'input': 'Input: 1 5 1', + 'prediction': '2 5 4', + 'ground_truth': '2 5 2', + 'reason': '1 transformed to 4 instead of 2' + }]}, + 'output': '''\ + Input: 1 5 1 + Prediction: 2 5 4 + Ground Truth: 2 5 2 + Error reason: 1 transformed to 4 instead of 2 + ''', + }, + # call[7]: improve first skill 0->1 + { + 'input': { + 'error_analysis': '''\ + Input: 1 5 1 + Prediction: 2 5 4 + Ground Truth: 2 5 2 + Error reason: 1 transformed to 4 instead of 2 + '''}, + 'output': { + 'new_instruction': 'Transform 1 to 2' + } + }, + # call[8]: apply second skill 1->2, GT = 2 5 2 + {'input': {'input': '0 5 0', '0->1': '1 5 1'}, 'output': {'predictions': '2 5 2'}}, + # call[9]: apply third skill 2->3, GT = 3 5 3 + {'input': {'input': '0 5 0', '0->1': '1 5 1', '1->2': '2 5 2'}, 'output': {'predictions': '3 5 3'}}, + ] +) +def test_agent_quickstart_three_skills_only_second_fail(): + from adala.agents import Agent + from adala.skills import LinearSkillSet, LLMSkill + from adala.environments import BasicEnvironment + + agent = Agent( + skills=LinearSkillSet( + skills=[ + LLMSkill(name="0->1", instructions="...", input_data_field="input"), + LLMSkill(name="1->2", instructions="...", input_data_field="0->1"), + LLMSkill(name="2->3", instructions="...", input_data_field="1->2"), + ] + ), + environment=BasicEnvironment( + ground_truth_dataset=pd.DataFrame([ + ['0 5 0', '1 5 1', '2 5 2', '3 5 3'], + ], columns=['input', 'gt_0', 'gt_1', 'gt_2']), + ground_truth_columns={ + "0->1": "gt_0", + "1->2": "gt_1", + "2->3": "gt_2" + } + ) + ) + + ground_truth_signal = agent.learn() + + # assert final instruction + assert agent.skills['0->1'].instructions == '...' + assert agent.skills['1->2'].instructions == 'Transform 1 to 2' + assert agent.skills['2->3'].instructions == '...' + # assert final accuracy for skill 0->1 + pd.testing.assert_series_equal( + pd.Series({'0->1': 1.0, '1->2': 1.0, '2->3': 1.0}), + ground_truth_signal.get_accuracy() + ) diff --git a/tests/test_classification.py b/tests/test_classification.py index 92a2b1e..e685dd9 100644 --- a/tests/test_classification.py +++ b/tests/test_classification.py @@ -18,13 +18,15 @@ def process_record_generator(*args, **kwargs): yield {'sentiment': 'Neutral'} # errors - yield {'reason': 'Test reason'} - yield {'reason': 'Test reason'} - yield {'reason': 'Test reason'} - yield {'reason': 'Test reason'} + if i < 2: + yield {'reason': 'Test reason'} + yield {'reason': 'Test reason'} + yield {'reason': 'Test reason'} + yield {'reason': 'Test reason'} + yield {'': 'Test reason'} - # instruction generation - yield {'new_instruction': 'Test instruction'} + # instruction generation + yield {'new_instruction': 'Test instruction'} # test yield {'sentiment': 'Positive'} @@ -64,7 +66,7 @@ def test_classification_skill( # connect to a dataset environment=BasicEnvironment( ground_truth_dataset=train_dataset, - ground_truth_column="ground_truth" + ground_truth_columns={"sentiment": "ground_truth"} ), # define a skill skills=ClassificationSkill( @@ -75,15 +77,15 @@ def test_classification_skill( ), ) run = agent.learn(learning_iterations=3, accuracy_threshold=0.95) - assert run.accuracy > 0.8 + assert run.get_accuracy()['sentiment'] > 0.8 print('\n\n=> Final instructions:') print('=====================') - print(f'{run.updated_instructions}') + print(f'{agent.skills["sentiment"].instructions}') print('=====================') print('\n=> Run test ...') - run = agent.apply_skills(test_dataset) - print_dataframe(run.predictions) + predictions = agent.run(test_dataset) + print_dataframe(predictions) - assert not run.predictions.empty + assert not predictions.empty diff --git a/tests/test_environments.py b/tests/test_environments.py index ab78eaf..cdc4fca 100644 --- a/tests/test_environments.py +++ b/tests/test_environments.py @@ -1,49 +1,110 @@ +import pandas as pd import pytest -from adala.memories.base import ShortTermMemory -from adala.skills.base import BaseSkill +from adala.skills import LinearSkillSet, LLMSkill from adala.utils.internal_data import InternalDataFrame from adala.environments.base import BasicEnvironment +NaN = float("nan") + + +@pytest.mark.parametrize("skillset, predictions, ground_truth, ground_truth_columns, expected_match, expected_errors", [ + # test single skill, full ground truth signal + ( + LinearSkillSet(skills=[LLMSkill(name='some_skill', input_data_field="text")]), + InternalDataFrame({"text": list('abcd'), "some_skill": ['1', '0', '1', '0']}), + InternalDataFrame({"my_ground_truth": ['1', '1', '1', '1']}), + {"some_skill": "my_ground_truth"}, + # match + InternalDataFrame({"some_skill": [True, False, True, False]}), + # errors + { + "some_skill": InternalDataFrame({ + "predictions": ['0', '0'], "my_ground_truth": ['1', '1']}, index=[1, 3]) + } + ), + # test two linear skills, partial ground truth signal + ( + # skills + LinearSkillSet(skills=[ + LLMSkill(name='skill_1', input_data_field="text"), + LLMSkill(name="skill_2", input_data_field="text") + ]), + # predictions + InternalDataFrame({ + "text": list('abcd'), + "skill_1": ['1', '0', '1', '0'], + "skill_2": ['1', '0', '0', '1'] + }, index=[11, 22, 33, 44]), + # ground truths + InternalDataFrame({ + "gt_1": [NaN, '0', NaN, '1'], + "gt_2": ['1', '0', '1', NaN], + }, index=[11, 22, 33, 44]), + {"skill_1": "gt_1", "skill_2": "gt_2"}, + # expected match + InternalDataFrame({ + "skill_1": [NaN, True, NaN, False], + "skill_2": [True, True, False, NaN] + }, index=[11, 22, 33, 44]), + # expected errors + { + "skill_1": InternalDataFrame({ + "predictions": ['0'], "gt_1": ['1']}, index=[44]), + "skill_2": InternalDataFrame({ + "predictions": ['0'], "gt_2": ['1']}, index=[33]) + } + ), + # test two linear skills, no ground truth signal for one skill, different size of dataframes + ( + # skills + LinearSkillSet(skills=[ + LLMSkill(name='skill_1', input_data_field="text"), + LLMSkill(name="skill_2", input_data_field="text") + ]), + # predictions + InternalDataFrame({ + "text": list('abcd'), + "skill_1": ['1', '0', '1', '0'], + "skill_2": ['1', '0', '0', '1'] + }, index=[11, 22, 33, 44]), + # ground truths + InternalDataFrame({ + "gt_1": [NaN, NaN], + "gt_2": ['1', '0'], + }, index=[99, 44]), + {"skill_1": "gt_1", "skill_2": "gt_2"}, + # expected match + InternalDataFrame({ + "skill_1": [NaN, NaN, NaN, NaN], + "skill_2": [NaN, NaN, NaN, False] + }, index=[11, 22, 33, 44]), + # expected errors + { + "skill_1": InternalDataFrame({ + "predictions": [], "gt_1": []}, index=[]), + "skill_2": InternalDataFrame({ + "predictions": ['1'], "gt_2": ['0']}, index=[44]) + } + ), +]) +def test_basic_env_compare_to_ground_truth(skillset, predictions, ground_truth, ground_truth_columns, expected_match, expected_errors): + + basic_env = BasicEnvironment( + ground_truth_dataset=ground_truth, + ground_truth_columns=ground_truth_columns + ) + + ground_truth_signal = basic_env.compare_to_ground_truth(skillset, predictions) + + # TODO: we should check the index type and dtype, but it's not working for empty and NaN dataframes + pd.testing.assert_frame_equal(expected_match, ground_truth_signal.match, check_index_type=False, check_dtype=False), \ + f'Expected: {expected_match}\nGot: {ground_truth_signal.match}' + + if expected_errors is not None: + for skill_name in skillset.skills: + skill_errors = ground_truth_signal.errors[skill_name] + expected_skill_errors = expected_errors[skill_name] + pd.testing.assert_frame_equal(expected_skill_errors, skill_errors, check_index_type=False, check_dtype=False), \ + f'Skill {skill_name}\n\nExpected: {expected_skill_errors}\nGot: {skill_errors}' -class TestSkill(BaseSkill): - def analyze(self, *args, **kwargs): - pass - - def apply(self, *args, **kwargs): - pass - - def improve(self, *args, **kwargs): - pass - - -@pytest.fixture -def basic_env(): - ground_truth_data = InternalDataFrame({"ground_truth": [1, 0, 1, 1]}) - return BasicEnvironment(ground_truth_dataset=ground_truth_data, ground_truth_column='ground_truth') - - -@pytest.fixture -def short_term_memory(): - return ShortTermMemory(predictions=InternalDataFrame({"some_skill": [1, 0, 1, 0]})) - - -@pytest.fixture -def some_skill(): - return TestSkill(name='some_skill', input_data_field="text") - - -def test_compare_to_ground_truth(basic_env, short_term_memory, some_skill): - experience = basic_env.compare_to_ground_truth(some_skill, short_term_memory) - - assert experience is not None - assert "evaluations" in experience.model_dump() - assert experience.ground_truth_column_name == 'ground_truth' - assert experience.match_column_name == 'ground_truth__x__some_skill' - - expected_evaluations = InternalDataFrame({ - "some_skill": [1, 0, 1, 0], - "ground_truth__x__some_skill": [True, True, True, False] - }) - - assert experience.evaluations.equals(expected_evaluations) \ No newline at end of file diff --git a/tests/test_llm_skillset.py b/tests/test_llm_skillset.py index 86cb519..44d1025 100644 --- a/tests/test_llm_skillset.py +++ b/tests/test_llm_skillset.py @@ -34,15 +34,15 @@ strict=False ) def test_llm_linear_skillset(): - from adala.skills.skillset import LinearSkillSet + from adala.skills.skillset import LinearSkillSet, LLMSkill from adala.datasets import DataFrameDataset, InternalDataFrame from adala.runtimes import OpenAIRuntime skillset = LinearSkillSet( skills=[ - "Extract named entities", - "Translate to French", - "Create a structured output in JSON format" + LLMSkill(name="skill_0", instructions="Extract named entities", input_data_field="text"), + LLMSkill(name="skill_1", instructions="Translate to French", input_data_field="skill_0"), + LLMSkill(name="skill_2", instructions="Create a structured output in JSON format", input_data_field="skill_1"), ] ) dataset = DataFrameDataset(df=InternalDataFrame([ @@ -50,12 +50,12 @@ def test_llm_linear_skillset(): "Apple's latest product, the iPhone 15, was released in September 2023.", # "The Louvre Museum in Paris houses the Mona Lisa." ], columns=["text"])) - result = skillset.apply( + predictions = skillset.apply( dataset=dataset, runtime=OpenAIRuntime(verbose=True), ) - assert result.predictions.equals(pd.DataFrame.from_records([ + pd.testing.assert_frame_equal(InternalDataFrame.from_records([ # FIRST ROW {'text': 'Barack Obama was the 44th president of the United States.', 'skill_0': '\n- Barack Obama (person)\n- 44th (ordinal number)\n- president (title)\n- United States (location)', @@ -71,4 +71,4 @@ def test_llm_linear_skillset(): # 'skill_0': '\n- The Louvre Museum (Organization)\n- Paris (Location)\n- Mona Lisa (Artwork)', # 'skill_1': "\n- Le MusΓ©e du Louvre (Organisation)\n- Paris (Lieu)\n- La Joconde (Εuvre d'art)", # 'skill_2': '\n{\n "Organisation": "Le MusΓ©e du Louvre",\n "Lieu": "Paris",\n "Εuvre d\'art": "La Joconde"\n}'} - ])) + ]), predictions) diff --git a/tests/utils.py b/tests/utils.py index 401dbc7..df2e55d 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -28,7 +28,7 @@ def wrapper(*args, **kwargs): def side_effect(*args, **kwargs): if call_index[0] >= len(data): - raise AssertionError(f"Unexpected call number {call_index[0] + 1} to {target_function}") + raise AssertionError(f"Unexpected call number {call_index[0]} to {target_function}") expected_input = data[call_index[0]]['input'] expected_output = data[call_index[0]]['output'] @@ -42,15 +42,25 @@ def side_effect(*args, **kwargs): if strict: if actual_input != expected_input: raise AssertionError( - f"Expected input {expected_input}\n\nbut got {actual_input}\non call number {call_index[0] + 1} to {target_function}") + f"Expected input {expected_input}\n\n" + f"but got {actual_input}\non call number {call_index[0]}" + f" to {target_function}") else: for key, value in expected_input.items(): if key not in actual_input: raise AssertionError( - f"Expected input {expected_input}\n\nbut key '{key}' was missing on actual call number {call_index[0] + 1} to {target_function}.\n\nActual input: {actual_input}") + f"Expected input {expected_input}\n\n" + f"but key '{key}' was missing " + f"on actual call number {call_index[0]} " + f"to {target_function}.\n\n" + f"Actual input: {actual_input}") if actual_input[key] != value: raise AssertionError( - f"Expected input {expected_input}\n\nbut actual_input['{key}'] != expected_input['{key}']\non call number {call_index[0] + 1} to {target_function}.\n\nActual input: {actual_input}") + f"Expected input {expected_input}\n\n" + f"but actual_input['{key}'] != expected_input['{key}']\n" + f"on call number {call_index[0]} " + f"to {target_function}.\n\n" + f"Actual input: {actual_input}") call_index[0] += 1 return expected_output