diff --git a/CMakeLists.txt b/CMakeLists.txt index 810bbb7..570a21b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -8,7 +8,7 @@ endif() find_package(Eigen3 REQUIRED) find_package(TBB REQUIRED) -add_subdirectory(extern/pybind11) +find_package(pybind11 CONFIG REQUIRED) include_directories(${PROJECT_SOURCE_DIR}/src) add_subdirectory(src) diff --git a/dupin/detect/dynp.py b/dupin/detect/dynp.py index 0fd38c2..bf26407 100644 --- a/dupin/detect/dynp.py +++ b/dupin/detect/dynp.py @@ -1,46 +1,52 @@ +"""Implements dynamic programming class for optimal segementation algorithm.""" import _DynP +import numpy as np -class DynP: - """Dynamic Programming class for calculating optimal segmentation - Attributes: - data (np.ndarray): Matrix storing the dataset. - num_bkps (int): Number of breakpoints to detect. - jump (int): Interval for checking potential breakpoints. - min_size (int): Minimum size of a segment. +class DynP: + """Detects the change points in a time series. + + Attributes + ---------- + data: np.ndarray + Matrix storing the time series data. + num_bkps: int + Number of change points to detect. + jump: int + Interval for checking potential change points. Changing will + not provide optimal detection, but will reduce runtime. + min_size: int + Minimum size of a segment. Changing will not provide optimal + detection, but will reduce runtime. """ - def __init__(self, data: np.ndarray, num_bkps: int, jump: int, min_size: int): - """Initializes the DynamicProgramming instance with given parameters.""" + def __init__( + self, data: np.ndarray, num_bkps: int, jump: int, min_size: int + ): + """Initialize the DynamicProgramming instance with given parameters.""" self.dynp = _DynP.DynamicProgramming(data, num_bkps, jump, min_size) def set_num_threads(self, num_threads: int): - """Sets the number of threads for parallelization. + """Set the number of threads for parallelization. - Args: - num_threads (int): The number of threads to use. + Parameters + ---------- + num_threads: int + The number of threads to use during computation. Default + is determined automatically. """ self.dynp.set_threads(num_threads) - def return_breakpoints(self) -> list: - """Returns the optimal set of breakpoints after segmentation. - - Returns: - list: A list of integers representing the breakpoints. - """ - return self.dynp.return_breakpoints() - - def initialize_cost_matrix(self): - """Initializes and fills the upper triangular cost matrix for all data segments.""" - self.dynp.initialize_cost_matrix() + def fit(self, num_bkps: int) -> list: + """Calculate the cost matrix and return the breakpoints. - def fit(self) -> list: - """Calculates the cost matrix and returns the breakpoints. + Parameters + ---------- + num_bkps: int + number of change points to detect. - Returns: + Returns + ------- list: A list of integers representing the breakpoints. """ return self.dynp.fit() - - - diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 6fc4fa5..bf0d409 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -1,5 +1,5 @@ pybind11_add_module(_dupin dupininterface.cpp - dupin.h dupin.cpp + dupin.h dupin.cpp ) set_target_properties(_dupin PROPERTIES diff --git a/src/dupin.cpp b/src/dupin.cpp index 814cc27..6a53f6d 100644 --- a/src/dupin.cpp +++ b/src/dupin.cpp @@ -15,9 +15,9 @@ using namespace Eigen; DynamicProgramming::DynamicProgramming() : num_bkps(1), num_parameters(0), num_timesteps(0), jump(1), min_size(3) {} -DynamicProgramming::DynamicProgramming(const Eigen::MatrixXd &data, int num_bkps_, +DynamicProgramming::DynamicProgramming(const Eigen::MatrixXd &data, int num_bkps_, int jump_, int min_size_) - : data(data), num_bkps(num_bkps_), + : data(data), num_bkps(num_bkps_), jump(jump_), min_size(min_size_) { num_timesteps = data.rows(); num_parameters = data.cols(); @@ -95,6 +95,7 @@ void DynamicProgramming::initialize_cost_matrix() { } } }); + cost_computed = true; } std::pair> DynamicProgramming::seg(int start, int end, @@ -138,9 +139,12 @@ std::vector DynamicProgramming::compute_breakpoints() { return breakpoints; } -std::vector DynamicProgramming::fit(){ +std::vector DynamicProgramming::fit(int num_bkps_in){ + num_bkps = num_bkps_in; + if (!cost_computed){ initialize_cost_matrix(); - return compute_breakpoints(); + } + return compute_breakpoints(); } void set_parallelization(int num_threads) { @@ -148,26 +152,14 @@ void set_parallelization(int num_threads) { num_threads); } -int DynamicProgramming::get_num_timesteps() { return num_timesteps; } - -int DynamicProgramming::get_num_parameters() { return num_parameters; } - -int DynamicProgramming::get_num_bkps() { return num_bkps; } - -Eigen::MatrixXd &DynamicProgramming::getDatum() { return data; } - DynamicProgramming::UpperTriangularMatrix & DynamicProgramming::getCostMatrix() { return cost_matrix; } -void DynamicProgramming::setDatum(const Eigen::MatrixXd &value) { - data = value; -} - void DynamicProgramming::setCostMatrix( const DynamicProgramming::UpperTriangularMatrix &value) { cost_matrix = value; } -int main() { return 0; } \ No newline at end of file +int main() { return 0; } diff --git a/src/dupin.h b/src/dupin.h index c48a32d..515049c 100644 --- a/src/dupin.h +++ b/src/dupin.h @@ -8,9 +8,11 @@ #include -// DynamicProgramming class for dynamic programming based segmentation. -class DynamicProgramming { +// Calculates optimal breakpoints in time-series data using memoization +class DynamicProgramming { private: + + //stores upper triangular cost matrix efficiently class UpperTriangularMatrix { private: std::vector matrix; @@ -24,13 +26,6 @@ class DynamicProgramming { public: UpperTriangularMatrix() : length(0) {} - UpperTriangularMatrix(int n) : length(n), matrix(n * (n + 1) / 2, 0.0), - row_indices(n) { - for (int row = 0; row < n; ++row) { - row_indices[row] = row * (2 * length - row + 1) / 2; - } - } - void initialize(int n) { length = n; matrix.resize(n * (n + 1) / 2, 0.0); @@ -45,8 +40,6 @@ class DynamicProgramming { } int getSize() const { return length; } }; - UpperTriangularMatrix cost_matrix; - // Struct for memoization key, combining start, end, and number of // breakpoints. struct MemoKey { @@ -63,7 +56,7 @@ class DynamicProgramming { // Custom XOR-bit hash function for MemoKey, avoids clustering of data in // unordered map to improve efficiency. - struct MemoKeyHash { + struct MemoKeyHash { std::size_t operator()(const MemoKey &key) const { return ((std::hash()(key.start) ^ (std::hash()(key.end) << 1)) >> @@ -81,8 +74,10 @@ class DynamicProgramming { int num_timesteps; // Number of data points (time steps). int jump; // Interval for checking potential breakpoints. int min_size; // Minimum size of a segment. - Eigen::MatrixXd data; // Matrix storing the dataset. + Eigen::MatrixXd data; // Matrix storing the dataset. + UpperTriangularMatrix cost_matrix; //Matrix storing costs + bool cost_computed = false; // Structure for storing linear regression parameters. struct linear_fit_struct { Eigen::MatrixXd y; // Dependent variable (labels). @@ -110,37 +105,27 @@ class DynamicProgramming { // Recursive function for dynamic programming segmentation. std::pair> seg(int start, int end, int num_bkps); +// Initializes and fills the cost matrix for all data segments. + void initialize_cost_matrix(); + + // Returns the optimal set of breakpoints after segmentation. + std::vector compute_breakpoints(); public: // Default constructor. DynamicProgramming(); // Parameterized constructor. - DynamicProgramming(const Eigen::MatrixXd &data, int num_bkps_, int jump_, + DynamicProgramming(const Eigen::MatrixXd &data, int num_bkps_, int jump_, int min_size_); - // Initializes and fills the cost matrix for all data segments. - void initialize_cost_matrix(); - - - //sets number of threads for parallelization + //Sets number of threads for parallelization void set_parallelization(int num_threads); - // Returns the optimal set of breakpoints after segmentation. - std::vector compute_breakpoints(); + // Calculates optimal breakpoints with given number of points. + std::vector fit(int num_bkps_in); - // Calculates the cost matrix and return the breakpoints - std::vector fit(); - - // Getter functions for accessing private class members. - int get_num_timesteps(); - int get_num_parameters(); - int get_num_bkps(); - Eigen::MatrixXd &getDatum(); + // Getter functions for cost matrix. DynamicProgramming::UpperTriangularMatrix &getCostMatrix(); - - // Setter functions for modifying private class members. - - void setDatum(const Eigen::MatrixXd &value); void setCostMatrix(const DynamicProgramming::UpperTriangularMatrix &value); }; diff --git a/src/dupininterface.cpp b/src/dupininterface.cpp index 2752228..c4e757a 100644 --- a/src/dupininterface.cpp +++ b/src/dupininterface.cpp @@ -8,18 +8,8 @@ namespace py = pybind11; PYBIND11_MODULE(_DynP, m) { py::class_(m, "DynamicProgramming") .def(py::init<>()) - .def_property("data", &DynamicProgramming::getDatum, - &DynamicProgramming::setDatum) .def_property("cost_matrix", &DynamicProgramming::getCostMatrix, &DynamicProgramming::setCostMatrix) - .def("num_bkps", &DynamicProgramming::get_num_bkps) - .def_property("num_timesteps", &DynamicProgramming::get_num_timesteps, - &DynamicProgramming::set_num_timesteps) - .def_property("num_parameters", &DynamicProgramming::get_num_parameters, - &DynamicProgramming::set_num_parameters) - .def("initialize_cost_matrix", - &DynamicProgramming::initialize_cost_matrix) - .def("return_breakpoints", &DynamicProgramming::compute_breakpoints) .def("fit", &DynamicProgramming::fit) .def("set_threads", &DynamicProgramming::set_parallelization); }