Skip to content

Commit

Permalink
Adding ParallelSkillSet & tests (#31)
Browse files Browse the repository at this point in the history
  • Loading branch information
alex-medicratic authored Nov 10, 2023
1 parent 8ac8cc9 commit f5ba24c
Show file tree
Hide file tree
Showing 2 changed files with 208 additions and 25 deletions.
148 changes: 123 additions & 25 deletions adala/skills/skillset.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,11 +29,11 @@ def apply(
self,
dataset: Union[Dataset, InternalDataFrame],
runtime: Runtime,
improved_skill: Optional[str] = None
improved_skill: Optional[str] = None,
) -> InternalDataFrame:
"""
Apply the skill set to a dataset using a specified runtime.
Args:
dataset (Union[Dataset, InternalDataFrame]): The dataset to apply the skill set to.
runtime (Runtime): The runtime environment in which to apply the skills.
Expand All @@ -43,7 +43,9 @@ def apply(
"""

@abstractmethod
def select_skill_to_improve(self, accuracy: Mapping, accuracy_threshold: Optional[float] = 1.0) -> Optional[BaseSkill]:
def select_skill_to_improve(
self, accuracy: Mapping, accuracy_threshold: Optional[float] = 1.0
) -> Optional[BaseSkill]:
"""
Select skill to improve based on accuracy.
Expand Down Expand Up @@ -90,12 +92,12 @@ class LinearSkillSet(SkillSet):
"""
Represents a sequence of skills that are acquired in a specific order to achieve a goal.
LinearSkillSet ensures that skills are developed in a sequential manner, determined either
LinearSkillSet ensures that skills are developed in a sequential manner, determined either
by the provided skill_sequence or by the lexicographical order of skill names.
Attributes:
skills (Union[List[str], Dict[str, str], List[BaseSkill], Dict[str, BaseSkill]]): Provided skills
skill_sequence (List[str], optional): Ordered list of skill names indicating the order
skill_sequence (List[str], optional): Ordered list of skill names indicating the order
in which they should be acquired.
By default, lexographical order of skill names is used.
input_data_field (Optional[str], optional): Name of the input data field. Defaults to None.
Expand All @@ -117,8 +119,11 @@ class LinearSkillSet(SkillSet):
skill_sequence: List[str] = None
input_data_field: Optional[str] = None

@field_validator('skills', mode='before')
def skills_validator(cls, v: Union[List[str], List[BaseSkill], Dict[str, BaseSkill]]) -> Dict[str, BaseSkill]:
@field_validator("skills", mode="before")
@classmethod
def skills_validator(
cls, v: Union[List[str], List[BaseSkill], Dict[str, BaseSkill]]
) -> Dict[str, BaseSkill]:
"""
Validates and converts the skills attribute to a dictionary of skill names to BaseSkill instances.
Expand All @@ -140,7 +145,7 @@ def skills_validator(cls, v: Union[List[str], List[BaseSkill], Dict[str, BaseSki
skills[skill_name] = LLMSkill(
name=skill_name,
instructions=instructions,
input_data_field=input_data_field
input_data_field=input_data_field,
)
# Linear skillset creates skills pipeline - update input_data_field for next skill
input_data_field = skill_name
Expand All @@ -150,7 +155,7 @@ def skills_validator(cls, v: Union[List[str], List[BaseSkill], Dict[str, BaseSki
skills[skill_name] = LLMSkill(
name=skill_name,
instructions=instructions,
input_data_field=input_data_field
input_data_field=input_data_field,
)
# Linear skillset creates skills pipeline - update input_data_field for next skill
input_data_field = skill_name
Expand All @@ -164,8 +169,8 @@ def skills_validator(cls, v: Union[List[str], List[BaseSkill], Dict[str, BaseSki
raise ValueError(f"skills must be a list or dictionary, not {type(skills)}")
return skills

@model_validator(mode='after')
def skill_sequence_validator(self) -> 'LinearSkillSet':
@model_validator(mode="after")
def skill_sequence_validator(self) -> "LinearSkillSet":
"""
Validates and sets the default order for the skill sequence if not provided.
Expand All @@ -176,9 +181,11 @@ def skill_sequence_validator(self) -> 'LinearSkillSet':
# use default skill sequence defined by lexicographical order
self.skill_sequence = list(self.skills.keys())
if len(self.skill_sequence) != len(self.skills):
raise ValueError(f"skill_sequence must contain all skill names - "
f"length of skill_sequence is {len(self.skill_sequence)} "
f"while length of skills is {len(self.skills)}")
raise ValueError(
f"skill_sequence must contain all skill names - "
f"length of skill_sequence is {len(self.skill_sequence)} "
f"while length of skills is {len(self.skills)}"
)
return self

def apply(
Expand All @@ -189,7 +196,7 @@ def apply(
) -> InternalDataFrame:
"""
Sequentially applies each skill on the dataset, enhancing the agent's experience.
Args:
dataset (Dataset): The dataset to apply the skills on.
runtime (Runtime): The runtime environment in which to apply the skills.
Expand All @@ -201,7 +208,9 @@ def apply(
predictions = None
if improved_skill:
# start from the specified skill, assuming previous skills have already been applied
skill_sequence = self.skill_sequence[self.skill_sequence.index(improved_skill):]
skill_sequence = self.skill_sequence[
self.skill_sequence.index(improved_skill) :
]
else:
skill_sequence = self.skill_sequence
for i, skill_name in enumerate(skill_sequence):
Expand All @@ -210,16 +219,14 @@ def apply(
input_dataset = dataset if i == 0 else predictions
print_text(f"Applying skill: {skill_name}")
predictions = skill.apply(input_dataset, runtime)

return predictions

def select_skill_to_improve(
self,
accuracy: Mapping,
accuracy_threshold: Optional[float] = 1.0
self, accuracy: Mapping, accuracy_threshold: Optional[float] = 0.9
) -> Optional[BaseSkill]:
"""
Selects the skill with the lowest accuracy to improve.
Selects the first skill in the sequence with accuracy below the threshold to improve.
Args:
accuracy (Mapping): Accuracy of each skill.
Expand All @@ -236,14 +243,105 @@ def __rich__(self):
# TODO: move it to a base class and use repr derived from Skills
text = f"[bold blue]Total Agent Skills: {len(self.skills)}[/bold blue]\n\n"
for skill in self.skills.values():
text += f'[bold underline green]{skill.name}[/bold underline green]\n' \
f'[green]{skill.instructions}[green]\n'
text += (
f"[bold underline green]{skill.name}[/bold underline green]\n"
f"[green]{skill.instructions}[green]\n"
)
return text


class ParallelSkillSet(SkillSet):
"""
Represents a set of skills that are acquired simultaneously to reach a goal.
In a ParallelSkillSet, each skill can be developed independently of the others. This is useful
for agents that require multiple, diverse capabilities, or tasks where each skill contributes a piece of
the overall solution.
Examples:
Create a ParallelSkillSet with a list of skills specified as BaseSkill instances
>>> from adala.skills import ParallelSkillSet, TextClassificationSkill, TextGenerationSkill
>>> skillset = ParallelSkillSet(skills=[TextClassificationSkill(name='Classify sentiment', instructions='Classify the sentiment'), TextGenerationSkill(name='Summarize text', instructions='Generate a summar')])
Create a ParallelSkillSet with a dictionary of skill names to BaseSkill instances
>>> from adala.skills import ParallelSkillSet, TextClassificationSkill, TextGenerationSkill
>>> skillset = ParallelSkillSet(skills={'sentiment_analysis': TextClassificationSkill(name='Classify sentiment', instructions='Classify the sentiment'),'text_summary': TextGenerationSkill(name='Summarize text', instructions='Generate a summary')})
"""

pass

@field_validator("skills", mode="before")
@classmethod
def skills_validator(
cls, v: Union[List[BaseSkill], Dict[str, BaseSkill]]
) -> Dict[str, BaseSkill]:
"""
Validates and converts the skills attribute to a dictionary of skill names to BaseSkill instances.
Args:
v (List[BaseSkill], Dict[str, BaseSkill]]): The skills attribute to validate.
Returns:
Dict[str, BaseSkill]: Dictionary mapping skill names to their corresponding BaseSkill instances.
"""
skills = OrderedDict()
if not v:
return skills

if isinstance(v, list) and isinstance(v[0], BaseSkill):
# convert list of skill names to dictionary
for skill in v:
skills[skill.name] = skill
elif isinstance(v, dict):
skills = v
else:
raise ValidationError(
f"skills must be a list or dictionary, not {type(skills)}"
)
return skills

def apply(
self,
dataset: Union[Dataset, InternalDataFrame],
runtime: Runtime,
improved_skill: Optional[str] = None,
) -> InternalDataFrame:
"""
Applies each skill on the dataset, enhancing the agent's experience.
Args:
dataset (Dataset): The dataset to apply the skills on.
runtime (Runtime): The runtime environment in which to apply the skills.
improved_skill (Optional[str], optional): Unused in ParallelSkillSet. Defaults to None.
Returns:
InternalDataFrame: Skill predictions.
"""
predictions = None

for i, skill_name in enumerate(self.skills.keys()):
skill = self.skills[skill_name]
# use input dataset for the first node in the pipeline
input_dataset = dataset if i == 0 else predictions
print_text(f"Applying skill: {skill_name}")
predictions = skill.apply(input_dataset, runtime)

return predictions

def select_skill_to_improve(
self, accuracy: Mapping, accuracy_threshold: Optional[float] = 0.9
) -> Optional[BaseSkill]:
"""
Selects the skill with the lowest accuracy to improve.
Args:
accuracy (Mapping): Accuracy of each skill.
accuracy_threshold (Optional[float], optional): Accuracy threshold. Defaults to 1.0.
Returns:
Optional[BaseSkill]: Skill to improve. None if no skill to improve.
"""
skills_below_threshold = [
skill_name
for skill_name in self.skills.keys()
if accuracy[skill_name] < accuracy_threshold
]
if skills_below_threshold:
weakest_skill_name = min(skills_below_threshold, key=accuracy.get)
return self.skills[weakest_skill_name]
85 changes: 85 additions & 0 deletions tests/test_llm_parallel_skillset.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
import pandas as pd

from utils import patching, PatchedCalls

@patching(
target_function=PatchedCalls.OPENAI_MODEL_LIST.value,
data=[{'input': {}, 'output': {'data': [{'id': 'gpt-3.5-turbo-instruct'}]}}],
)
@patching(
target_function=PatchedCalls.GUIDANCE.value,
data=[
# Responses for the first text entry
{
'input': {"text_": "Apple's latest product, the iPhone 15, was released in September 2023."},
'output': {"predictions": ""} # No person mentioned
},
{
'input': {"text_": "Barack Obama was the 44th president of the United States."},
'output': {"predictions": "Barack Obama"}
},
{
'input': {"text_": "Apple's latest product, the iPhone 15, was released in September 2023."},
'output': {"predictions": "iPhone 15"}
},
{
'input': {"text_": "Barack Obama was the 44th president of the United States."},
'output': {"predictions": ""} # No product mentioned
},
{
'input': {"text_": "Apple's latest product, the iPhone 15, was released in September 2023."},
'output': {"predictions": "September 2023"}
},
{
'input': {"text_": "Barack Obama was the 44th president of the United States."},
'output': {"predictions": ""} # No date mentioned
},
{
'input': {"text_": "Apple's latest product, the iPhone 15, was released in September 2023."},
'output': {"predictions": ""} # No location mentioned
},
{
'input': {"text_": "Barack Obama was the 44th president of the United States."},
'output': {"predictions": "United States"}
}
],
strict=False
)
def test_llm_parallel_skillset():
from adala.skills.skillset import ParallelSkillSet, LLMSkill
from adala.datasets import DataFrameDataset, InternalDataFrame
from adala.runtimes import OpenAIRuntime

skillset = ParallelSkillSet(
skills=[
LLMSkill(name="skill_person", instructions="Extract person's name", input_data_field="text"),
LLMSkill(name="skill_product", instructions="Extract product name", input_data_field="text"),
LLMSkill(name="skill_date", instructions="Extract date", input_data_field="text"),
LLMSkill(name="skill_location", instructions="Extract location", input_data_field="text"),
]
)
dataset = DataFrameDataset(df=InternalDataFrame([
"Apple's latest product, the iPhone 15, was released in September 2023.",
"Barack Obama was the 44th president of the United States.",
], columns=["text"]))
predictions = skillset.apply(
dataset=dataset,
runtime=OpenAIRuntime(verbose=True),
)

pd.testing.assert_frame_equal(InternalDataFrame.from_records([
{
'text': "Apple's latest product, the iPhone 15, was released in September 2023.",
'skill_person': "", # No person mentioned
'skill_product': 'iPhone 15',
'skill_date': 'September 2023',
'skill_location': "" # No location mentioned
},
{
'text': 'Barack Obama was the 44th president of the United States.',
'skill_person': 'Barack Obama',
'skill_product': "", # No product mentioned
'skill_date': "", # No date mentioned
'skill_location': 'United States'
}
]), predictions)

0 comments on commit f5ba24c

Please sign in to comment.