diff --git a/pyproject.toml b/pyproject.toml index ea55418..0b33972 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api" [tool.poetry] name = "simod" -version = "3.5.0" +version = "3.5.1" authors = ["Manuel Camargo", "Ihar Suvorau", "David Chapela"] description = "Simod is a Python tool for automated discovery of business process simulation models from event logs." diff --git a/src/simod/optimization/optimizer.py b/src/simod/optimization/optimizer.py index d5680d6..3a46c43 100644 --- a/src/simod/optimization/optimizer.py +++ b/src/simod/optimization/optimizer.py @@ -5,6 +5,7 @@ import pandas as pd from extraneous_activity_delays.config import SimulationModel +from pix_framework.discovery.case_arrival import discover_case_arrival_model from pix_framework.discovery.gateway_probabilities import GatewayProbabilities from pix_framework.filesystem.file_manager import get_random_folder_id, get_random_file_id, create_folder @@ -20,7 +21,6 @@ from simod.settings.simod_settings import SimodSettings, PROJECT_DIR from simod.settings.temporal_settings import CalendarSettings from simod.simulation.parameters.BPS_model import BPSModel -from simod.simulation.parameters.case_arrival_model import discover_case_arrival_model from simod.simulation.parameters.miner import mine_parameters from simod.simulation.parameters.resource_model import discover_resource_model from simod.simulation.prosimos import simulate_and_evaluate diff --git a/src/simod/simulation/parameters/BPS_model.py b/src/simod/simulation/parameters/BPS_model.py index 6908bab..7a06031 100644 --- a/src/simod/simulation/parameters/BPS_model.py +++ b/src/simod/simulation/parameters/BPS_model.py @@ -2,9 +2,9 @@ from pathlib import Path from typing import Optional, List +from pix_framework.discovery.case_arrival import CaseArrivalModel from pix_framework.discovery.gateway_probabilities import GatewayProbabilities -from simod.simulation.parameters.case_arrival_model import CaseArrivalModel from simod.simulation.parameters.resource_model import ResourceModel diff --git a/src/simod/simulation/parameters/case_arrival_model.py b/src/simod/simulation/parameters/case_arrival_model.py deleted file mode 100644 index 26e6fb4..0000000 --- a/src/simod/simulation/parameters/case_arrival_model.py +++ /dev/null @@ -1,169 +0,0 @@ -from dataclasses import dataclass -from typing import List - -import pandas as pd -from pix_framework.calendar.resource_calendar import RCalendar -from pix_framework.discovery.calendar_factory import CalendarFactory -from pix_framework.log_ids import EventLogIDs -from pix_framework.statistics.distribution import get_best_fitting_distribution, get_observations_histogram - -from simod.utilities import nearest_divisor_for_granularity - - -@dataclass -class CaseArrivalModel: - """ - Simulation model parameters containing the calendar of the case arrivals and the distribution modeling the inter-arrival times. - """ - - case_arrival_calendar: RCalendar - inter_arrival_times: dict - - def to_dict(self) -> dict: - return { - 'arrival_time_calendar': self.case_arrival_calendar.to_json(), - 'arrival_time_distribution': self.inter_arrival_times - } - - @staticmethod - def from_dict(resource_model: dict) -> 'CaseArrivalModel': - calendar = RCalendar(calendar_id='Arrival Calendar') - for timetable in resource_model['arrival_time_calendar']: - calendar.add_calendar_item( - from_day=timetable['from'], - to_day=timetable['to'], - begin_time=timetable['beginTime'], - end_time=timetable['endTime'], - ) - - return CaseArrivalModel( - case_arrival_calendar=calendar, - inter_arrival_times=resource_model['arrival_time_distribution'] - ) - - -def discover_case_arrival_model( - event_log: pd.DataFrame, - log_ids: EventLogIDs, - granularity=60, - filter_outliers: bool = True -) -> CaseArrivalModel: - """ - Discover the case arrival model associated to the given event log. - - :param event_log: event log to discover the case arrival model from. - :param log_ids: Event log column IDs. - :param granularity: number of minutes to take as minimum available interval surrounding each - observed arrival for the calendar. - :param filter_outliers: flag to remove outlier in the inter-arrival time discovery. - - :return: case arrival model. - """ - return CaseArrivalModel( - case_arrival_calendar=discover_case_arrival_calendar(event_log, log_ids, granularity), - inter_arrival_times=discover_inter_arrival_distribution(event_log, log_ids, filter_outliers) - ) - - -def discover_case_arrival_calendar( - event_log: pd.DataFrame, - log_ids: EventLogIDs, - granularity=60 -) -> RCalendar: - """ - Discover weekly calendar for the arrival of new cases, i.e., the periods of times in each day when - new cases arrive to the system. - - :param event_log: event log to model the case arrivals from. - :param log_ids: Event log column IDs. - :param granularity: number of minutes to take as minimum available interval surrounding each - observed arrival. - - :return: weekly calendar of case arrivals. - """ - # Correct granularity if not divisor of 1440 (minutes in a day) - if 1440 % granularity != 0: - granularity = nearest_divisor_for_granularity(granularity) - - # Create calendar discoverer and store arrivals - calendar_factory = CalendarFactory(granularity) - for case_id, events in event_log.groupby(by=log_ids.case): - resource = "system" # Assign all arrivals to the same resource - activity = "case_arrival" # Assign same activity label to all arrivals - case_arrival = events[log_ids.start_time].min() - calendar_factory.check_date_time(resource, activity, case_arrival) - - # Discover calendar for the case arrivals - calendars = calendar_factory.build_weekly_calendars(min_confidence=0.1, desired_support=0.7, min_participation=0.4) - - calendar = calendars["system"] - return calendar - - -def discover_inter_arrival_distribution( - event_log: pd.DataFrame, - log_ids: EventLogIDs, - filter_outliers: bool = True -) -> dict: - """ - Discover case inter-arrival duration distribution for the event log. - - :param event_log: Event log. - :param log_ids: Event log column IDs. - :param filter_outliers: flag to remove outlier inter-arrival times. - :return: Duration distribution for the inter-arrival times. - """ - # Get the durations between each two consecutive arrivals - inter_arrival_durations = _get_inter_arrival_times(event_log, log_ids) - # Get the best distribution fitting the inter-arrival durations - arrival_distribution = get_best_fitting_distribution( - data=inter_arrival_durations, - filter_outliers=filter_outliers - ) - # Return it - return arrival_distribution.to_prosimos_distribution() - - -def get_observed_inter_arrival_distribution( - event_log: pd.DataFrame, - log_ids: EventLogIDs, - num_bins: int = 20, - filter_outliers: bool = True -) -> dict: - """ - Get the distribution of observed inter-arrival times (CDF and bin midpoints of their histogram). - - :param event_log: event log to extract the arrivals. - :param log_ids: column mapping IDs for the event log. - :param num_bins: number of bins of the build histogram. - :param filter_outliers: flag to remove outlier inter-arrival times. - :return: CDF and bin midpoints of the histogram modelling the inter-arrivals. - """ - # Get the durations between each two consecutive arrivals - inter_arrival_durations = _get_inter_arrival_times(event_log, log_ids) - # Compute the CDF and BINs of the observations histogram - arrival_distribution = get_observations_histogram( - data=inter_arrival_durations, - num_bins=num_bins, - filter_outliers=filter_outliers - ) - # Return custom histogram distribution - return arrival_distribution - - -def _get_inter_arrival_times(event_log: pd.DataFrame, log_ids: EventLogIDs) -> List[float]: - # Get the arrival times from the event log - arrival_times = [] - for case_id, events in event_log.groupby(by=log_ids.case): - arrival_times += [events[log_ids.start_time].min()] - # Sort them - arrival_times.sort() - # Compute durations between one arrival and the next one (inter-arrival durations) - inter_arrival_durations = [] - last_arrival = None - for arrival in arrival_times: - if last_arrival: - inter_arrival_durations += [(arrival - last_arrival).total_seconds()] - last_arrival = arrival - # Return list of inter-arrivals - return inter_arrival_durations diff --git a/src/simod/simulation/parameters/miner.py b/src/simod/simulation/parameters/miner.py index b1f362b..8c7881c 100644 --- a/src/simod/simulation/parameters/miner.py +++ b/src/simod/simulation/parameters/miner.py @@ -4,6 +4,7 @@ import pandas as pd from networkx import DiGraph from pix_framework.calendar.resource_calendar import RCalendar +from pix_framework.discovery.case_arrival import discover_case_arrival_calendar, discover_inter_arrival_distribution from pix_framework.discovery.gateway_probabilities import GatewayProbabilitiesDiscoveryMethod, GatewayProbabilities, \ compute_gateway_probabilities from pix_framework.io.bpm_graph import BPMNGraph @@ -16,8 +17,6 @@ from simod.simulation.calendar_discovery import resource as resource_calendar from simod.simulation.calendar_discovery.resource import full_day_schedule, working_hours_schedule, \ UNDIFFERENTIATED_RESOURCE_POOL_KEY -from simod.simulation.parameters.case_arrival_model import discover_case_arrival_calendar, \ - discover_inter_arrival_distribution from simod.simulation.parameters.intervals import Interval, intersect_intervals, prosimos_interval_to_interval_safe, \ pd_interval_to_interval from simod.simulation.parameters.resource_activity_performances import ActivityResourceDistribution, \ diff --git a/tests/test_calendars.py b/tests/test_calendars.py index fd44f55..b437471 100644 --- a/tests/test_calendars.py +++ b/tests/test_calendars.py @@ -2,13 +2,13 @@ import pandas as pd import pytest +from pix_framework.discovery.case_arrival import discover_case_arrival_calendar from pix_framework.input import read_csv_log from pix_framework.log_ids import APROMORE_LOG_IDS from prosimos.resource_calendar import CalendarFactory from simod.discovery.resource_pool_discoverer import ResourcePoolDiscoverer from simod.event_log.utilities import read, convert_xes_to_csv -from simod.simulation.parameters.case_arrival_model import discover_case_arrival_calendar @pytest.mark.integration diff --git a/tests/test_discovery/test_inter_arrival_distribution.py b/tests/test_discovery/test_inter_arrival_distribution.py index e2b9608..0a50d41 100644 --- a/tests/test_discovery/test_inter_arrival_distribution.py +++ b/tests/test_discovery/test_inter_arrival_distribution.py @@ -1,8 +1,8 @@ import pytest +from pix_framework.discovery.case_arrival import discover_inter_arrival_distribution from pix_framework.log_ids import DEFAULT_XES_IDS from simod.event_log.utilities import read -from simod.simulation.parameters.case_arrival_model import discover_inter_arrival_distribution test_cases = [ { diff --git a/tests/test_process_structure/test_optimizer.py b/tests/test_process_structure/test_optimizer.py index 3957a3b..b9b9d3b 100644 --- a/tests/test_process_structure/test_optimizer.py +++ b/tests/test_process_structure/test_optimizer.py @@ -1,15 +1,15 @@ import pytest +from pix_framework.discovery.case_arrival import discover_case_arrival_model from pix_framework.filesystem.file_manager import get_random_folder_id, create_folder from pix_framework.log_ids import DEFAULT_XES_IDS -from simod.event_log.event_log import EventLog from simod.control_flow.optimizer import ControlFlowOptimizer from simod.control_flow.settings import HyperoptIterationParams +from simod.event_log.event_log import EventLog from simod.settings.control_flow_settings import ControlFlowSettings from simod.settings.simod_settings import PROJECT_DIR from simod.settings.temporal_settings import CalendarSettings from simod.simulation.parameters.BPS_model import BPSModel -from simod.simulation.parameters.case_arrival_model import discover_case_arrival_model from simod.simulation.parameters.resource_model import discover_resource_model structure_config_sm3 = { diff --git a/tests/test_simulation/test_calendar_discovery.py b/tests/test_simulation/test_calendar_discovery.py index 79b9058..d5fa20c 100644 --- a/tests/test_simulation/test_calendar_discovery.py +++ b/tests/test_simulation/test_calendar_discovery.py @@ -1,11 +1,11 @@ import pytest from pix_framework.calendar.resource_calendar import RCalendar +from pix_framework.discovery.case_arrival import discover_case_arrival_calendar from pix_framework.input import read_csv_log from pix_framework.log_ids import APROMORE_LOG_IDS from simod.simulation.calendar_discovery.resource import discover_undifferentiated, discover_per_resource_pool, \ discover_per_resource -from simod.simulation.parameters.case_arrival_model import discover_case_arrival_calendar @pytest.mark.integration