Skip to content

Commit

Permalink
fit function added with parameter input, removed whitespace with prec…
Browse files Browse the repository at this point in the history
…ommit, rearranged class to private
  • Loading branch information
npkamath committed Jan 19, 2024
1 parent 0af1b08 commit 66e2104
Show file tree
Hide file tree
Showing 6 changed files with 64 additions and 91 deletions.
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ endif()

find_package(Eigen3 REQUIRED)
find_package(TBB REQUIRED)
add_subdirectory(extern/pybind11)
find_package(pybind11 CONFIG REQUIRED)

include_directories(${PROJECT_SOURCE_DIR}/src)
add_subdirectory(src)
64 changes: 35 additions & 29 deletions dupin/detect/dynp.py
Original file line number Diff line number Diff line change
@@ -1,46 +1,52 @@
"""Implements dynamic programming class for optimal segementation algorithm."""
import _DynP
import numpy as np

Check warning on line 3 in dupin/detect/dynp.py

View check run for this annotation

Codecov / codecov/patch

dupin/detect/dynp.py#L2-L3

Added lines #L2 - L3 were not covered by tests

class DynP:
"""Dynamic Programming class for calculating optimal segmentation

Attributes:
data (np.ndarray): Matrix storing the dataset.
num_bkps (int): Number of breakpoints to detect.
jump (int): Interval for checking potential breakpoints.
min_size (int): Minimum size of a segment.
class DynP:
"""Detects the change points in a time series.
Attributes
----------
data: np.ndarray
Matrix storing the time series data.
num_bkps: int
Number of change points to detect.
jump: int
Interval for checking potential change points. Changing will
not provide optimal detection, but will reduce runtime.
min_size: int
Minimum size of a segment. Changing will not provide optimal
detection, but will reduce runtime.
"""

def __init__(self, data: np.ndarray, num_bkps: int, jump: int, min_size: int):
"""Initializes the DynamicProgramming instance with given parameters."""
def __init__(

Check warning on line 23 in dupin/detect/dynp.py

View check run for this annotation

Codecov / codecov/patch

dupin/detect/dynp.py#L23

Added line #L23 was not covered by tests
self, data: np.ndarray, num_bkps: int, jump: int, min_size: int
):
"""Initialize the DynamicProgramming instance with given parameters."""
self.dynp = _DynP.DynamicProgramming(data, num_bkps, jump, min_size)

Check warning on line 27 in dupin/detect/dynp.py

View check run for this annotation

Codecov / codecov/patch

dupin/detect/dynp.py#L27

Added line #L27 was not covered by tests

def set_num_threads(self, num_threads: int):

Check warning on line 29 in dupin/detect/dynp.py

View check run for this annotation

Codecov / codecov/patch

dupin/detect/dynp.py#L29

Added line #L29 was not covered by tests
"""Sets the number of threads for parallelization.
"""Set the number of threads for parallelization.
Args:
num_threads (int): The number of threads to use.
Parameters
----------
num_threads: int
The number of threads to use during computation. Default
is determined automatically.
"""
self.dynp.set_threads(num_threads)

Check warning on line 38 in dupin/detect/dynp.py

View check run for this annotation

Codecov / codecov/patch

dupin/detect/dynp.py#L38

Added line #L38 was not covered by tests

def return_breakpoints(self) -> list:
"""Returns the optimal set of breakpoints after segmentation.
Returns:
list: A list of integers representing the breakpoints.
"""
return self.dynp.return_breakpoints()

def initialize_cost_matrix(self):
"""Initializes and fills the upper triangular cost matrix for all data segments."""
self.dynp.initialize_cost_matrix()
def fit(self, num_bkps: int) -> list:

Check warning on line 40 in dupin/detect/dynp.py

View check run for this annotation

Codecov / codecov/patch

dupin/detect/dynp.py#L40

Added line #L40 was not covered by tests
"""Calculate the cost matrix and return the breakpoints.
def fit(self) -> list:
"""Calculates the cost matrix and returns the breakpoints.
Parameters
----------
num_bkps: int
number of change points to detect.
Returns:
Returns
-------
list: A list of integers representing the breakpoints.
"""
return self.dynp.fit()

Check warning on line 52 in dupin/detect/dynp.py

View check run for this annotation

Codecov / codecov/patch

dupin/detect/dynp.py#L52

Added line #L52 was not covered by tests



2 changes: 1 addition & 1 deletion src/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
pybind11_add_module(_dupin dupininterface.cpp
dupin.h dupin.cpp
dupin.h dupin.cpp
)

set_target_properties(_dupin PROPERTIES
Expand Down
26 changes: 9 additions & 17 deletions src/dupin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,9 @@ using namespace Eigen;
DynamicProgramming::DynamicProgramming()
: num_bkps(1), num_parameters(0), num_timesteps(0), jump(1), min_size(3) {}

DynamicProgramming::DynamicProgramming(const Eigen::MatrixXd &data, int num_bkps_,
DynamicProgramming::DynamicProgramming(const Eigen::MatrixXd &data, int num_bkps_,
int jump_, int min_size_)
: data(data), num_bkps(num_bkps_),
: data(data), num_bkps(num_bkps_),
jump(jump_), min_size(min_size_) {
num_timesteps = data.rows();
num_parameters = data.cols();
Expand Down Expand Up @@ -95,6 +95,7 @@ void DynamicProgramming::initialize_cost_matrix() {
}
}
});
cost_computed = true;
}

std::pair<double, std::vector<int>> DynamicProgramming::seg(int start, int end,
Expand Down Expand Up @@ -138,36 +139,27 @@ std::vector<int> DynamicProgramming::compute_breakpoints() {
return breakpoints;
}

std::vector<int> DynamicProgramming::fit(){
std::vector<int> DynamicProgramming::fit(int num_bkps_in){
num_bkps = num_bkps_in;
if (!cost_computed){
initialize_cost_matrix();
return compute_breakpoints();
}
return compute_breakpoints();
}

void set_parallelization(int num_threads) {
static tbb::global_control gc(tbb::global_control::max_allowed_parallelism,
num_threads);
}

int DynamicProgramming::get_num_timesteps() { return num_timesteps; }

int DynamicProgramming::get_num_parameters() { return num_parameters; }

int DynamicProgramming::get_num_bkps() { return num_bkps; }

Eigen::MatrixXd &DynamicProgramming::getDatum() { return data; }

DynamicProgramming::UpperTriangularMatrix &
DynamicProgramming::getCostMatrix() {
return cost_matrix;
}

void DynamicProgramming::setDatum(const Eigen::MatrixXd &value) {
data = value;
}

void DynamicProgramming::setCostMatrix(
const DynamicProgramming::UpperTriangularMatrix &value) {
cost_matrix = value;
}

int main() { return 0; }
int main() { return 0; }
51 changes: 18 additions & 33 deletions src/dupin.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,11 @@
#include <Eigen/Dense>


// DynamicProgramming class for dynamic programming based segmentation.
class DynamicProgramming {
// Calculates optimal breakpoints in time-series data using memoization
class DynamicProgramming {
private:

//stores upper triangular cost matrix efficiently
class UpperTriangularMatrix {
private:
std::vector<double> matrix;
Expand All @@ -24,13 +26,6 @@ class DynamicProgramming {
public:
UpperTriangularMatrix() : length(0) {}

UpperTriangularMatrix(int n) : length(n), matrix(n * (n + 1) / 2, 0.0),
row_indices(n) {
for (int row = 0; row < n; ++row) {
row_indices[row] = row * (2 * length - row + 1) / 2;
}
}

void initialize(int n) {
length = n;
matrix.resize(n * (n + 1) / 2, 0.0);
Expand All @@ -45,8 +40,6 @@ class DynamicProgramming {
}
int getSize() const { return length; }
};
UpperTriangularMatrix cost_matrix;

// Struct for memoization key, combining start, end, and number of
// breakpoints.
struct MemoKey {
Expand All @@ -63,7 +56,7 @@ class DynamicProgramming {

// Custom XOR-bit hash function for MemoKey, avoids clustering of data in
// unordered map to improve efficiency.
struct MemoKeyHash {
struct MemoKeyHash {
std::size_t operator()(const MemoKey &key) const {
return ((std::hash<int>()(key.start) ^
(std::hash<int>()(key.end) << 1)) >>
Expand All @@ -81,8 +74,10 @@ class DynamicProgramming {
int num_timesteps; // Number of data points (time steps).
int jump; // Interval for checking potential breakpoints.
int min_size; // Minimum size of a segment.
Eigen::MatrixXd data; // Matrix storing the dataset.

Eigen::MatrixXd data; // Matrix storing the dataset.
UpperTriangularMatrix cost_matrix; //Matrix storing costs
bool cost_computed = false;
// Structure for storing linear regression parameters.
struct linear_fit_struct {
Eigen::MatrixXd y; // Dependent variable (labels).
Expand Down Expand Up @@ -110,37 +105,27 @@ class DynamicProgramming {
// Recursive function for dynamic programming segmentation.
std::pair<double, std::vector<int>> seg(int start, int end, int num_bkps);

// Initializes and fills the cost matrix for all data segments.
void initialize_cost_matrix();

// Returns the optimal set of breakpoints after segmentation.
std::vector<int> compute_breakpoints();

public:
// Default constructor.
DynamicProgramming();

// Parameterized constructor.
DynamicProgramming(const Eigen::MatrixXd &data, int num_bkps_, int jump_,
DynamicProgramming(const Eigen::MatrixXd &data, int num_bkps_, int jump_,
int min_size_);

// Initializes and fills the cost matrix for all data segments.
void initialize_cost_matrix();


//sets number of threads for parallelization
//Sets number of threads for parallelization
void set_parallelization(int num_threads);

// Returns the optimal set of breakpoints after segmentation.
std::vector<int> compute_breakpoints();
// Calculates optimal breakpoints with given number of points.
std::vector<int> fit(int num_bkps_in);

// Calculates the cost matrix and return the breakpoints
std::vector<int> fit();

// Getter functions for accessing private class members.
int get_num_timesteps();
int get_num_parameters();
int get_num_bkps();
Eigen::MatrixXd &getDatum();
// Getter functions for cost matrix.
DynamicProgramming::UpperTriangularMatrix &getCostMatrix();

// Setter functions for modifying private class members.

void setDatum(const Eigen::MatrixXd &value);
void setCostMatrix(const DynamicProgramming::UpperTriangularMatrix &value);
};
10 changes: 0 additions & 10 deletions src/dupininterface.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,18 +8,8 @@ namespace py = pybind11;
PYBIND11_MODULE(_DynP, m) {
py::class_<DynamicProgramming>(m, "DynamicProgramming")
.def(py::init<>())
.def_property("data", &DynamicProgramming::getDatum,
&DynamicProgramming::setDatum)
.def_property("cost_matrix", &DynamicProgramming::getCostMatrix,
&DynamicProgramming::setCostMatrix)
.def("num_bkps", &DynamicProgramming::get_num_bkps)
.def_property("num_timesteps", &DynamicProgramming::get_num_timesteps,
&DynamicProgramming::set_num_timesteps)
.def_property("num_parameters", &DynamicProgramming::get_num_parameters,
&DynamicProgramming::set_num_parameters)
.def("initialize_cost_matrix",
&DynamicProgramming::initialize_cost_matrix)
.def("return_breakpoints", &DynamicProgramming::compute_breakpoints)
.def("fit", &DynamicProgramming::fit)
.def("set_threads", &DynamicProgramming::set_parallelization);
}

0 comments on commit 66e2104

Please sign in to comment.