diff --git a/adala/skills/skillset.py b/adala/skills/skillset.py index ec759ac..839d82e 100644 --- a/adala/skills/skillset.py +++ b/adala/skills/skillset.py @@ -29,11 +29,11 @@ def apply( self, dataset: Union[Dataset, InternalDataFrame], runtime: Runtime, - improved_skill: Optional[str] = None + improved_skill: Optional[str] = None, ) -> InternalDataFrame: """ Apply the skill set to a dataset using a specified runtime. - + Args: dataset (Union[Dataset, InternalDataFrame]): The dataset to apply the skill set to. runtime (Runtime): The runtime environment in which to apply the skills. @@ -43,7 +43,9 @@ def apply( """ @abstractmethod - def select_skill_to_improve(self, accuracy: Mapping, accuracy_threshold: Optional[float] = 1.0) -> Optional[BaseSkill]: + def select_skill_to_improve( + self, accuracy: Mapping, accuracy_threshold: Optional[float] = 1.0 + ) -> Optional[BaseSkill]: """ Select skill to improve based on accuracy. @@ -90,12 +92,12 @@ class LinearSkillSet(SkillSet): """ Represents a sequence of skills that are acquired in a specific order to achieve a goal. - LinearSkillSet ensures that skills are developed in a sequential manner, determined either + LinearSkillSet ensures that skills are developed in a sequential manner, determined either by the provided skill_sequence or by the lexicographical order of skill names. Attributes: skills (Union[List[str], Dict[str, str], List[BaseSkill], Dict[str, BaseSkill]]): Provided skills - skill_sequence (List[str], optional): Ordered list of skill names indicating the order + skill_sequence (List[str], optional): Ordered list of skill names indicating the order in which they should be acquired. By default, lexographical order of skill names is used. input_data_field (Optional[str], optional): Name of the input data field. Defaults to None. @@ -117,8 +119,11 @@ class LinearSkillSet(SkillSet): skill_sequence: List[str] = None input_data_field: Optional[str] = None - @field_validator('skills', mode='before') - def skills_validator(cls, v: Union[List[str], List[BaseSkill], Dict[str, BaseSkill]]) -> Dict[str, BaseSkill]: + @field_validator("skills", mode="before") + @classmethod + def skills_validator( + cls, v: Union[List[str], List[BaseSkill], Dict[str, BaseSkill]] + ) -> Dict[str, BaseSkill]: """ Validates and converts the skills attribute to a dictionary of skill names to BaseSkill instances. @@ -140,7 +145,7 @@ def skills_validator(cls, v: Union[List[str], List[BaseSkill], Dict[str, BaseSki skills[skill_name] = LLMSkill( name=skill_name, instructions=instructions, - input_data_field=input_data_field + input_data_field=input_data_field, ) # Linear skillset creates skills pipeline - update input_data_field for next skill input_data_field = skill_name @@ -150,7 +155,7 @@ def skills_validator(cls, v: Union[List[str], List[BaseSkill], Dict[str, BaseSki skills[skill_name] = LLMSkill( name=skill_name, instructions=instructions, - input_data_field=input_data_field + input_data_field=input_data_field, ) # Linear skillset creates skills pipeline - update input_data_field for next skill input_data_field = skill_name @@ -164,8 +169,8 @@ def skills_validator(cls, v: Union[List[str], List[BaseSkill], Dict[str, BaseSki raise ValueError(f"skills must be a list or dictionary, not {type(skills)}") return skills - @model_validator(mode='after') - def skill_sequence_validator(self) -> 'LinearSkillSet': + @model_validator(mode="after") + def skill_sequence_validator(self) -> "LinearSkillSet": """ Validates and sets the default order for the skill sequence if not provided. @@ -176,9 +181,11 @@ def skill_sequence_validator(self) -> 'LinearSkillSet': # use default skill sequence defined by lexicographical order self.skill_sequence = list(self.skills.keys()) if len(self.skill_sequence) != len(self.skills): - raise ValueError(f"skill_sequence must contain all skill names - " - f"length of skill_sequence is {len(self.skill_sequence)} " - f"while length of skills is {len(self.skills)}") + raise ValueError( + f"skill_sequence must contain all skill names - " + f"length of skill_sequence is {len(self.skill_sequence)} " + f"while length of skills is {len(self.skills)}" + ) return self def apply( @@ -189,7 +196,7 @@ def apply( ) -> InternalDataFrame: """ Sequentially applies each skill on the dataset, enhancing the agent's experience. - + Args: dataset (Dataset): The dataset to apply the skills on. runtime (Runtime): The runtime environment in which to apply the skills. @@ -201,7 +208,9 @@ def apply( predictions = None if improved_skill: # start from the specified skill, assuming previous skills have already been applied - skill_sequence = self.skill_sequence[self.skill_sequence.index(improved_skill):] + skill_sequence = self.skill_sequence[ + self.skill_sequence.index(improved_skill) : + ] else: skill_sequence = self.skill_sequence for i, skill_name in enumerate(skill_sequence): @@ -210,16 +219,14 @@ def apply( input_dataset = dataset if i == 0 else predictions print_text(f"Applying skill: {skill_name}") predictions = skill.apply(input_dataset, runtime) - + return predictions def select_skill_to_improve( - self, - accuracy: Mapping, - accuracy_threshold: Optional[float] = 1.0 + self, accuracy: Mapping, accuracy_threshold: Optional[float] = 0.9 ) -> Optional[BaseSkill]: """ - Selects the skill with the lowest accuracy to improve. + Selects the first skill in the sequence with accuracy below the threshold to improve. Args: accuracy (Mapping): Accuracy of each skill. @@ -236,14 +243,105 @@ def __rich__(self): # TODO: move it to a base class and use repr derived from Skills text = f"[bold blue]Total Agent Skills: {len(self.skills)}[/bold blue]\n\n" for skill in self.skills.values(): - text += f'[bold underline green]{skill.name}[/bold underline green]\n' \ - f'[green]{skill.instructions}[green]\n' + text += ( + f"[bold underline green]{skill.name}[/bold underline green]\n" + f"[green]{skill.instructions}[green]\n" + ) return text class ParallelSkillSet(SkillSet): """ Represents a set of skills that are acquired simultaneously to reach a goal. + + In a ParallelSkillSet, each skill can be developed independently of the others. This is useful + for agents that require multiple, diverse capabilities, or tasks where each skill contributes a piece of + the overall solution. + + Examples: + Create a ParallelSkillSet with a list of skills specified as BaseSkill instances + >>> from adala.skills import ParallelSkillSet, TextClassificationSkill, TextGenerationSkill + >>> skillset = ParallelSkillSet(skills=[TextClassificationSkill(name='Classify sentiment', instructions='Classify the sentiment'), TextGenerationSkill(name='Summarize text', instructions='Generate a summar')]) + + Create a ParallelSkillSet with a dictionary of skill names to BaseSkill instances + >>> from adala.skills import ParallelSkillSet, TextClassificationSkill, TextGenerationSkill + >>> skillset = ParallelSkillSet(skills={'sentiment_analysis': TextClassificationSkill(name='Classify sentiment', instructions='Classify the sentiment'),'text_summary': TextGenerationSkill(name='Summarize text', instructions='Generate a summary')}) """ - - pass + + @field_validator("skills", mode="before") + @classmethod + def skills_validator( + cls, v: Union[List[BaseSkill], Dict[str, BaseSkill]] + ) -> Dict[str, BaseSkill]: + """ + Validates and converts the skills attribute to a dictionary of skill names to BaseSkill instances. + + Args: + v (List[BaseSkill], Dict[str, BaseSkill]]): The skills attribute to validate. + + Returns: + Dict[str, BaseSkill]: Dictionary mapping skill names to their corresponding BaseSkill instances. + """ + skills = OrderedDict() + if not v: + return skills + + if isinstance(v, list) and isinstance(v[0], BaseSkill): + # convert list of skill names to dictionary + for skill in v: + skills[skill.name] = skill + elif isinstance(v, dict): + skills = v + else: + raise ValidationError( + f"skills must be a list or dictionary, not {type(skills)}" + ) + return skills + + def apply( + self, + dataset: Union[Dataset, InternalDataFrame], + runtime: Runtime, + improved_skill: Optional[str] = None, + ) -> InternalDataFrame: + """ + Applies each skill on the dataset, enhancing the agent's experience. + + Args: + dataset (Dataset): The dataset to apply the skills on. + runtime (Runtime): The runtime environment in which to apply the skills. + improved_skill (Optional[str], optional): Unused in ParallelSkillSet. Defaults to None. + Returns: + InternalDataFrame: Skill predictions. + """ + predictions = None + + for i, skill_name in enumerate(self.skills.keys()): + skill = self.skills[skill_name] + # use input dataset for the first node in the pipeline + input_dataset = dataset if i == 0 else predictions + print_text(f"Applying skill: {skill_name}") + predictions = skill.apply(input_dataset, runtime) + + return predictions + + def select_skill_to_improve( + self, accuracy: Mapping, accuracy_threshold: Optional[float] = 0.9 + ) -> Optional[BaseSkill]: + """ + Selects the skill with the lowest accuracy to improve. + + Args: + accuracy (Mapping): Accuracy of each skill. + accuracy_threshold (Optional[float], optional): Accuracy threshold. Defaults to 1.0. + Returns: + Optional[BaseSkill]: Skill to improve. None if no skill to improve. + """ + skills_below_threshold = [ + skill_name + for skill_name in self.skills.keys() + if accuracy[skill_name] < accuracy_threshold + ] + if skills_below_threshold: + weakest_skill_name = min(skills_below_threshold, key=accuracy.get) + return self.skills[weakest_skill_name] diff --git a/tests/test_llm_parallel_skillset.py b/tests/test_llm_parallel_skillset.py new file mode 100644 index 0000000..74647be --- /dev/null +++ b/tests/test_llm_parallel_skillset.py @@ -0,0 +1,85 @@ +import pandas as pd + +from utils import patching, PatchedCalls + +@patching( + target_function=PatchedCalls.OPENAI_MODEL_LIST.value, + data=[{'input': {}, 'output': {'data': [{'id': 'gpt-3.5-turbo-instruct'}]}}], +) +@patching( + target_function=PatchedCalls.GUIDANCE.value, + data=[ + # Responses for the first text entry + { + 'input': {"text_": "Apple's latest product, the iPhone 15, was released in September 2023."}, + 'output': {"predictions": ""} # No person mentioned + }, + { + 'input': {"text_": "Barack Obama was the 44th president of the United States."}, + 'output': {"predictions": "Barack Obama"} + }, + { + 'input': {"text_": "Apple's latest product, the iPhone 15, was released in September 2023."}, + 'output': {"predictions": "iPhone 15"} + }, + { + 'input': {"text_": "Barack Obama was the 44th president of the United States."}, + 'output': {"predictions": ""} # No product mentioned + }, + { + 'input': {"text_": "Apple's latest product, the iPhone 15, was released in September 2023."}, + 'output': {"predictions": "September 2023"} + }, + { + 'input': {"text_": "Barack Obama was the 44th president of the United States."}, + 'output': {"predictions": ""} # No date mentioned + }, + { + 'input': {"text_": "Apple's latest product, the iPhone 15, was released in September 2023."}, + 'output': {"predictions": ""} # No location mentioned + }, + { + 'input': {"text_": "Barack Obama was the 44th president of the United States."}, + 'output': {"predictions": "United States"} + } + ], + strict=False +) +def test_llm_parallel_skillset(): + from adala.skills.skillset import ParallelSkillSet, LLMSkill + from adala.datasets import DataFrameDataset, InternalDataFrame + from adala.runtimes import OpenAIRuntime + + skillset = ParallelSkillSet( + skills=[ + LLMSkill(name="skill_person", instructions="Extract person's name", input_data_field="text"), + LLMSkill(name="skill_product", instructions="Extract product name", input_data_field="text"), + LLMSkill(name="skill_date", instructions="Extract date", input_data_field="text"), + LLMSkill(name="skill_location", instructions="Extract location", input_data_field="text"), + ] + ) + dataset = DataFrameDataset(df=InternalDataFrame([ + "Apple's latest product, the iPhone 15, was released in September 2023.", + "Barack Obama was the 44th president of the United States.", + ], columns=["text"])) + predictions = skillset.apply( + dataset=dataset, + runtime=OpenAIRuntime(verbose=True), + ) + + pd.testing.assert_frame_equal(InternalDataFrame.from_records([ + { + 'text': "Apple's latest product, the iPhone 15, was released in September 2023.", + 'skill_person': "", # No person mentioned + 'skill_product': 'iPhone 15', + 'skill_date': 'September 2023', + 'skill_location': "" # No location mentioned + }, + { + 'text': 'Barack Obama was the 44th president of the United States.', + 'skill_person': 'Barack Obama', + 'skill_product': "", # No product mentioned + 'skill_date': "", # No date mentioned + 'skill_location': 'United States' + } + ]), predictions) \ No newline at end of file