Skip to content

Commit

Permalink
Merge pull request #155 from AutomatedProcessImprovement/conditions
Browse files Browse the repository at this point in the history
Conditions
  • Loading branch information
david-chapela authored Sep 2, 2024
2 parents aaa478d + f18ed01 commit 7202b09
Show file tree
Hide file tree
Showing 61 changed files with 1,535 additions and 2,539 deletions.
2,368 changes: 0 additions & 2,368 deletions poetry.lock

This file was deleted.

6 changes: 3 additions & 3 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"

[tool.poetry]
name = "simod"
version = "4.1.1"
version = "5.0.0"
authors = [
"Ihar Suvorau <[email protected]>",
"David Chapela <[email protected]>",
Expand Down Expand Up @@ -34,10 +34,10 @@ scipy = "^1.9.2"
statistics = "^1.0.3.5"
tqdm = "^4.64.1"
xmltodict = "^0.13.0"
prosimos = "^2.0.3"
prosimos = "^2.0.5"
extraneous-activity-delays = "^2.1.21"
openxes-cli-py = "^0.1.15"
pix-framework = "^0.13.8"
pix-framework = "^0.13.16"
log-distance-measures = "^1.0.2"

[tool.poetry.group.dev.dependencies]
Expand Down
File renamed without changes.
30 changes: 30 additions & 0 deletions src/simod/branch_rules/discovery.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
import pandas as pd
from typing import List

from simod.branch_rules.types import BranchRules

from pix_framework.io.event_log import EventLogIDs
from pix_framework.discovery.gateway_probabilities import GatewayProbabilities
from pix_framework.discovery.gateway_conditions.gateway_conditions import discover_gateway_conditions


def discover_branch_rules(bpmn_graph, log: pd.DataFrame, log_ids: EventLogIDs, f_score=0.7) -> list[BranchRules]:
"""
Discover branch_rules from a log.
"""
rules = discover_gateway_conditions(bpmn_graph, log, log_ids, f_score_threshold=f_score)

rules = list(map(lambda x: BranchRules.from_dict(x), rules))

return rules


def map_branch_rules_to_flows(gateway_probabilities: List[GatewayProbabilities], branch_rules: List[BranchRules]):
condition_lookup = {rule.id: rule for rule in branch_rules}

for gateway in gateway_probabilities:
for path in gateway.outgoing_paths:
if path.path_id in condition_lookup:
path.condition_id = condition_lookup[path.path_id].id

return gateway_probabilities
45 changes: 45 additions & 0 deletions src/simod/branch_rules/types.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
from dataclasses import dataclass


@dataclass
class BranchRule:
attribute: str
comparison: str
value: str

@staticmethod
def from_dict(data: dict) -> "BranchRule":
return BranchRule(
attribute=data["attribute"],
comparison=data["comparison"],
value=data["value"]
)

def to_dict(self):
return {
"attribute": self.attribute,
"comparison": self.comparison,
"value": self.value
}


@dataclass
class BranchRules:
id: str
rules: list[list[BranchRule]]

@staticmethod
def from_dict(data: dict) -> "BranchRules":
return BranchRules(
id=data["id"],
rules=[
[BranchRule.from_dict(rule) for rule in rule_set]
for rule_set in data["rules"]
]
)

def to_dict(self):
return {
"id": self.id,
"rules": [[rule.to_dict() for rule in rule_set] for rule_set in self.rules]
}
28 changes: 0 additions & 28 deletions src/simod/case_attributes/discovery.py

This file was deleted.

40 changes: 0 additions & 40 deletions src/simod/case_attributes/types.py

This file was deleted.

35 changes: 33 additions & 2 deletions src/simod/control_flow/optimizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@
GatewayProbabilitiesDiscoveryMethod,
compute_gateway_probabilities,
)
from simod.branch_rules.discovery import discover_branch_rules, map_branch_rules_to_flows
from simod.branch_rules.types import BranchRules
from pix_framework.filesystem.file_manager import create_folder, get_random_folder_id, remove_asset
from pix_framework.io.bpm_graph import BPMNGraph

Expand Down Expand Up @@ -75,6 +77,7 @@ def __init__(self, event_log: EventLog, bps_model: BPSModel, settings: ControlFl
"prioritize_parallelism",
"replace_or_joins",
"output_dir",
"f_score"
]
)
# Instantiate trials for hyper-optimization process
Expand All @@ -84,7 +87,6 @@ def __init__(self, event_log: EventLog, bps_model: BPSModel, settings: ControlFl
def _hyperopt_iteration(self, hyperopt_iteration_dict: dict):
# Report new iteration
print_subsection(f"Control-flow optimization iteration {self.iteration_index}")

# Initialize status
status = STATUS_OK
# Create folder for this iteration
Expand Down Expand Up @@ -123,6 +125,20 @@ def _hyperopt_iteration(self, hyperopt_iteration_dict: dict):
hyperopt_iteration_params.gateway_probabilities_method,
)

# Discover branch rules
if self.settings.discover_branch_rules:
status, current_bps_model.branch_rules = hyperopt_step(
status,
self._discover_branch_rules,
current_bps_model.process_model,
hyperopt_iteration_params
)

current_bps_model.gateway_probabilities = map_branch_rules_to_flows(
current_bps_model.gateway_probabilities,
current_bps_model.branch_rules
)

# Simulate candidate and evaluate its quality
status, evaluation_measurements = hyperopt_step(
status, self._simulate_bps_model, current_bps_model, hyperopt_iteration_params.output_dir
Expand Down Expand Up @@ -243,6 +259,12 @@ def _define_search_space(self, settings: ControlFlowSettings) -> dict:
else:
space["epsilon"] = settings.epsilon

if settings.discover_branch_rules and settings.f_score:
if isinstance(settings.f_score, tuple):
space["f_score"] = hp.uniform("f_score", settings.f_score[0], settings.f_score[1])
else:
space["f_score"] = settings.f_score

return space

def cleanup(self):
Expand Down Expand Up @@ -296,6 +318,16 @@ def _discover_process_model(self, params: HyperoptIterationParams) -> Path:
discover_process_model(self._xes_train_log_path, output_model_path, params)
return output_model_path

def _discover_branch_rules(self, process_model: Path, params: HyperoptIterationParams) -> List[BranchRules]:
print_step(f"Discovering branch rules with f_score {params.f_score}")
bpmn_graph = BPMNGraph.from_bpmn_path(process_model)
return discover_branch_rules(
bpmn_graph,
self.event_log.train_partition,
self.event_log.log_ids,
f_score=params.f_score
)

def _discover_gateway_probabilities(
self, process_model: Path, gateway_probabilities_method: GatewayProbabilitiesDiscoveryMethod
) -> List[GatewayProbabilities]:
Expand All @@ -312,7 +344,6 @@ def _simulate_bps_model(self, bps_model: BPSModel, output_dir: Path) -> List[dic
bps_model.replace_activity_names_with_ids()

json_parameters_path = bps_model.to_json(output_dir, self.event_log.process_name)

evaluation_measures = simulate_and_evaluate(
process_model_path=bps_model.process_model,
parameters_path=json_parameters_path,
Expand Down
7 changes: 7 additions & 0 deletions src/simod/control_flow/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ class HyperoptIterationParams:
eta: Optional[float] # Percentile for frequency threshold (eta)
replace_or_joins: Optional[bool] # Should replace non-trivial OR joins
prioritize_parallelism: Optional[bool] # Should prioritize parallelism on loops
f_score: Optional[float] = None # quality gateway for branch rules (f_score)

def to_dict(self) -> dict:
"""Returns a dictionary with the parameters for this run."""
Expand All @@ -48,6 +49,9 @@ def to_dict(self) -> dict:
else:
optimization_parameters["provided_model_path"] = str(self.provided_model_path)

if self.f_score:
optimization_parameters["f_score"] = self.f_score

return optimization_parameters

@staticmethod
Expand Down Expand Up @@ -75,6 +79,8 @@ def from_hyperopt_dict(
elif mining_algorithm == ProcessModelDiscoveryAlgorithm.SPLIT_MINER_V2:
epsilon = hyperopt_dict["epsilon"]

f_score = hyperopt_dict.get("f_score", None)

return HyperoptIterationParams(
output_dir=output_dir,
provided_model_path=provided_model_path,
Expand All @@ -86,4 +92,5 @@ def from_hyperopt_dict(
eta=eta,
prioritize_parallelism=prioritize_parallelism,
replace_or_joins=replace_or_joins,
f_score=f_score
)
Empty file.
31 changes: 31 additions & 0 deletions src/simod/data_attributes/discovery.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
import pandas as pd

from simod.data_attributes.types import GlobalAttribute, CaseAttribute, EventAttribute

from pix_framework.io.event_log import EventLogIDs
from pix_framework.discovery.attributes.attribute_discovery import discover_attributes


def discover_data_attributes(log: pd.DataFrame, log_ids: EventLogIDs) -> (list[CaseAttribute], list[GlobalAttribute], list[EventAttribute]):
"""
Discover data attributes from a log ignoring common non-case columns.
"""
attributes = discover_attributes(
event_log=log,
log_ids=log_ids,
avoid_columns=[
log_ids.case,
log_ids.activity,
log_ids.enabled_time,
log_ids.start_time,
log_ids.end_time,
log_ids.resource,
],
confidence_threshold=0.95,
)

global_attributes = list(map(GlobalAttribute.from_dict, attributes["global_attributes"]))
case_attributes = list(map(CaseAttribute.from_dict, attributes["case_attributes"]))
event_attributes = list(map(EventAttribute.from_dict, attributes["event_attributes"]))

return global_attributes, case_attributes, event_attributes
Loading

0 comments on commit 7202b09

Please sign in to comment.