Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add DES #490

Open
wants to merge 24 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
6383a4a
Add DES
VanyaVolgushev Nov 4, 2024
023a3ff
Make global methods and fields go after private
VanyaVolgushev Nov 22, 2024
df4065c
Fix namespace
VanyaVolgushev Nov 22, 2024
b5237f8
Add newlines at ends of files
VanyaVolgushev Nov 22, 2024
e0decad
Add const to String var
VanyaVolgushev Nov 22, 2024
55247ca
Fix FindFeatureDomains
VanyaVolgushev Nov 26, 2024
ba0acc2
Fix GetRandomPopulationInDomains
VanyaVolgushev Nov 26, 2024
ec347c7
Add const ref to MutateIndividual and rename to reflect that
VanyaVolgushev Nov 26, 2024
4468f05
Add unsigned where required
VanyaVolgushev Nov 27, 2024
716e82e
Remove double definition
VanyaVolgushev Nov 27, 2024
eb14603
Add move to mutants
VanyaVolgushev Nov 27, 2024
8538902
Remove typedef from diff_functions.h
VanyaVolgushev Nov 27, 2024
c67c212
Remove c-stlye cast
VanyaVolgushev Nov 27, 2024
8411e6a
Add assert(number_of_indices <= population - 1)
VanyaVolgushev Nov 27, 2024
f94b2b7
Make GetRandIndices return a vector
VanyaVolgushev Nov 27, 2024
09a45ce
Use [ ] with samples
VanyaVolgushev Nov 27, 2024
7ace291
Remove unreachable break statements
VanyaVolgushev Nov 27, 2024
ebab96a
Add const to VectorSize and FeatureCount
VanyaVolgushev Nov 27, 2024
bb56794
Remove else from operator[]
VanyaVolgushev Nov 27, 2024
f439248
Constexpr instead of const
VanyaVolgushev Nov 27, 2024
0701118
Reference
VanyaVolgushev Nov 27, 2024
ddd6853
Fix types in Decode
VanyaVolgushev Nov 27, 2024
f888035
Fix compare_by_permutation
VanyaVolgushev Nov 27, 2024
a72e340
Add const
VanyaVolgushev Nov 27, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions src/core/algorithms/algorithm_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@ namespace algos {

using AlgorithmTypes =
std::tuple<Depminer, DFD, FastFDs, FDep, FdMine, Pyro, Tane, PFDTane, FUN, hyfd::HyFD, Aid,
Apriori, metric::MetricVerifier, DataStats, fd_verifier::FDVerifier, HyUCC,
PyroUCC, HPIValid, cfd::FDFirstAlgorithm, ACAlgorithm, UCCVerifier, Faida,
Apriori, des::DES, metric::MetricVerifier, DataStats, fd_verifier::FDVerifier,
HyUCC, PyroUCC, HPIValid, cfd::FDFirstAlgorithm, ACAlgorithm, UCCVerifier, Faida,
Spider, Mind, Fastod, GfdValidation, EGfdValidation, NaiveGfdValidation,
order::Order, dd::Split, Cords, hymd::HyMD>;

Expand All @@ -36,6 +36,9 @@ BETTER_ENUM(AlgorithmType, char,

/* Association rules mining algorithms */
apriori,

/* Numerical association rules mining algorithms*/
des,

/* Metric verifier algorithm */
metric,
Expand Down
1 change: 1 addition & 0 deletions src/core/algorithms/algorithms.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
#include "algorithms/ind/mining_algorithms.h"
#include "algorithms/md/mining_algorithms.h"
#include "algorithms/metric/verification_algorithms.h"
#include "algorithms/nar/mining_algorithms.h"
#include "algorithms/od/mining_algorithms.h"
#include "algorithms/statistics/algorithms.h"
#include "algorithms/ucc/mining_algorithms.h"
Expand Down
87 changes: 87 additions & 0 deletions src/core/algorithms/nar/des/des.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
#include "des.h"

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Include-what-you-use is planned to be introduced in the CI, so it would be better to add these lines:

Suggested change
#include <algorithm>
#include <cstddef>
#include <vector>
#include <memory>

This also applies to other files

#include "algorithms/nar/value_range.h"
#include "config/names_and_descriptions.h"
#include "config/option_using.h"
#include "config/tabular_data/input_table/option.h"
#include "model/types/types.h"

namespace algos::des {
using model::ValueRange;

DES::DES() : NARAlgorithm({}) {
using namespace config::names;
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
using namespace config::names;

RegisterOptions();
}

void DES::RegisterOptions() {
DESBORDANTE_OPTION_USING;

DifferentialStrategy default_strategy = DifferentialStrategy::rand1Bin;
RegisterOption(Option{&population_size_, kPopulationSize, kDPopulationSize, 100u});
RegisterOption(
Option{&num_evaluations_, kMaxFitnessEvaluations, kDMaxFitnessEvaluations, 1000u});
RegisterOption(Option{&differential_options_.differential_scale, kDifferentialScale,
kDDifferentialScale, 0.5});
RegisterOption(Option{&differential_options_.crossover_probability, kCrossoverProbability,
kDCrossoverProbability, 0.9});
RegisterOption(Option{&differential_options_.differential_strategy, kDifferentialStrategy,
kDDifferentialStrategy, default_strategy});
}

void DES::MakeExecuteOptsAvailable() {
using namespace config::names;
MakeOptionsAvailable({kPopulationSize, kMaxFitnessEvaluations, kDifferentialScale,
kCrossoverProbability, kDifferentialStrategy});
}

FeatureDomains DES::FindFeatureDomains(TypedRelation const* typed_relation) {
std::vector<std::shared_ptr<ValueRange>> feature_domains;
feature_domains.reserve(typed_relation->GetNumColumns());
for (size_t i = 0; i < typed_relation->GetNumColumns(); i++) {
std::shared_ptr<ValueRange> domain = CreateValueRange(typed_relation->GetColumnData(i));
feature_domains.push_back(std::move(domain));
}
return feature_domains;
}

std::vector<EncodedNAR> DES::GetRandomPopulationInDomains(FeatureDomains const& domains) const {
std::vector<EncodedNAR> population(population_size_,
EncodedNAR(domains, typed_relation_.get()));
auto compare_by_fitness = [](EncodedNAR const& a, EncodedNAR const& b) {
return a.GetQualities().fitness > b.GetQualities().fitness;
};
std::sort(population.begin(), population.end(), compare_by_fitness);
return population;
}

EncodedNAR DES::MutatedIndividual(std::vector<EncodedNAR> const& population, size_t at) {
MutationFunction diff_func =
EnumToMutationStrategy(differential_options_.differential_strategy);
return (*diff_func)(population, at, differential_options_);
}

unsigned long long DES::ExecuteInternal() {
FeatureDomains feature_domains = FindFeatureDomains(typed_relation_.get());
std::vector<EncodedNAR> population = GetRandomPopulationInDomains(feature_domains);

for (unsigned i = 0; i < num_evaluations_; i++) {
size_t candidate_i = i % population_size_;
EncodedNAR mutant = MutatedIndividual(population, candidate_i);
NAR mutant_decoded = mutant.SetQualities(feature_domains, typed_relation_.get());
double candidate_fitness = population[candidate_i].GetQualities().fitness;

if (mutant.GetQualities().fitness > candidate_fitness) {
population[candidate_i] = std::move(mutant);
nar_collection_.emplace_back(std::move(mutant_decoded));
}
}

auto compare_by_fitness = [](const NAR& a, const NAR& b) -> bool {
return a.GetQualities().fitness > b.GetQualities().fitness;
};
Comment on lines +80 to +82
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

that's code duplication, you used that in GetRandomPopulationInDomains func.
Maybe extract this to a struct with a meaningful name with overloaded operator()?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

or create a private method to sort nar_collection_ by fitness, it will be better this way

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

They take in different types, first takes in two NARs, second takes two EncodedNARs.

std::sort(nar_collection_.begin(), nar_collection_.end(), compare_by_fitness);
return 0;
}

} // namespace algos::des
34 changes: 34 additions & 0 deletions src/core/algorithms/nar/des/des.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
#pragma once

#include "algorithms/nar/nar_algorithm.h"
#include "config/names.h"
#include "differential_functions.h"
#include "encoded_nar.h"
#include "enums.h"

namespace algos::des {
using FeatureDomains = std::vector<std::shared_ptr<model::ValueRange>> const;
using TypedRelation = model::ColumnLayoutTypedRelationData;

class DES : public NARAlgorithm {
private:
unsigned int population_size_;
unsigned int num_evaluations_;
DifferentialOptions differential_options_;

void RegisterOptions();

static FeatureDomains FindFeatureDomains(TypedRelation const* typed_relation);
std::vector<EncodedNAR> GetRandomPopulationInDomains(FeatureDomains const& domains) const;
void EvolvePopulation(std::vector<EncodedNAR>& population) const;
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I can't find definition of DES::EvolvePopulation method. Shouldn't it's declaration be removed?

EncodedNAR MutatedIndividual(std::vector<EncodedNAR> const& population, size_t at);

protected:
void MakeExecuteOptsAvailable() override;
unsigned long long ExecuteInternal() override;

public:
DES();
};

} // namespace algos::des
57 changes: 57 additions & 0 deletions src/core/algorithms/nar/des/differential_functions.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
#include "differential_functions.h"

#include <algorithm>

namespace algos::des {

// gets slow if population ~= number_of_indices
std::vector<size_t> GetRandIndices(size_t except_index, size_t population,
size_t number_of_indices) {
assert(number_of_indices <= population - 1);
std::unordered_set<size_t> indices;
indices.insert(except_index);
while (indices.size() < number_of_indices + 1) {
size_t random_index = RNG().Next() * population;
indices.insert(random_index);
}
indices.erase(except_index);
std::vector<size_t> ind_vec;
ind_vec.reserve(number_of_indices);
ind_vec.insert(ind_vec.end(), indices.begin(), indices.end());
return ind_vec;
Comment on lines +18 to +21
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
std::vector<size_t> ind_vec;
ind_vec.reserve(number_of_indices);
ind_vec.insert(ind_vec.end(), indices.begin(), indices.end());
return ind_vec;
std::vector<size_t> ind_vec {indices.begin(), indices.end()};
return ind_vec;

}

EncodedNAR Rand1Bin(std::vector<EncodedNAR> const& population, size_t candidate_index,
DifferentialOptions options) {
auto sample_indices = GetRandIndices(candidate_index, population.size(), 3);
size_t sample_index1 = sample_indices[0];
size_t sample_index2 = sample_indices[1];
size_t sample_index3 = sample_indices[2];

auto new_individual = population[candidate_index];
auto sample1 = population[sample_index1];
auto sample2 = population[sample_index2];
auto sample3 = population[sample_index3];

for (size_t i = 0; i < new_individual.VectorSize(); i++) {
if (RNG().Next() < options.crossover_probability) {
double new_feature_val =
sample1[i] + options.differential_scale * (sample2[i] - sample3[i]);
new_feature_val = std::clamp(new_feature_val, 0.0, 1.0);
new_individual[i] = new_feature_val;
}
}
return new_individual;
}

// TODO: name is probably inconsistent with how it's called in the field.
MutationFunction EnumToMutationStrategy(DifferentialStrategy strategy) {
switch (strategy) {
case DifferentialStrategy::rand1Bin:
return Rand1Bin;
default:
throw std::logic_error("No mutation function corresponding to DifferentialStategy.");
}
}

} // namespace algos::des
25 changes: 25 additions & 0 deletions src/core/algorithms/nar/des/differential_functions.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
#pragma once

#include <enum.h>

#include "encoded_nar.h"
#include "enums.h"
#include "rng.h"

namespace algos::des {

struct DifferentialOptions {
double differential_scale;
double crossover_probability;
DifferentialStrategy differential_strategy = DifferentialStrategy::best1Exp;
};

EncodedNAR Rand1Bin(std::vector<EncodedNAR> const& population, size_t candidate_index,
DifferentialOptions options);

using MutationFunction = EncodedNAR (*)(std::vector<EncodedNAR> const& population, size_t candidate_index,
DifferentialOptions options);

MutationFunction EnumToMutationStrategy(DifferentialStrategy strategy);

} // namespace algos::des
101 changes: 101 additions & 0 deletions src/core/algorithms/nar/des/encoded_nar.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
#include "encoded_nar.h"

#include "algorithms/nar/value_range.h"
#include "model/types/types.h"

namespace algos::des {

using model::NAR;

size_t EncodedNAR::VectorSize() const {
return encoded_value_ranges_.size() * EncodedValueRange::kFieldCount + 1;
}

size_t EncodedNAR::FeatureCount() const {
return encoded_value_ranges_.size();
}

// TODO: remove code duplication here
double& EncodedNAR::operator[](size_t index) {
qualities_consistent_ = false;
if (index == 0) {
return implication_sign_pos_;
}
index--;
size_t feature = index / EncodedValueRange::kFieldCount;
size_t feature_field = index % EncodedValueRange::kFieldCount;
return encoded_value_ranges_[feature][feature_field];
}

double const& EncodedNAR::operator[](size_t index) const {
if (index == 0) {
return implication_sign_pos_;
} else {
index--;
size_t feature = index / EncodedValueRange().kFieldCount;
size_t feature_field = index % EncodedValueRange().kFieldCount;
return encoded_value_ranges_[feature][feature_field];
}
Comment on lines +33 to +38
Copy link
Collaborator

@Petua41 Petua41 Nov 27, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
} else {
index--;
size_t feature = index / EncodedValueRange().kFieldCount;
size_t feature_field = index % EncodedValueRange().kFieldCount;
return encoded_value_ranges_[feature][feature_field];
}
index--;
size_t feature = index / EncodedValueRange::kFieldCount;
size_t feature_field = index % EncodedValueRange::kFieldCount;
return encoded_value_ranges_[feature][feature_field];

}

NAR EncodedNAR::SetQualities(FeatureDomains& domains, TypedRelation const* typed_relation) {
NAR this_decoded = Decode(domains);
this_decoded.SetQualities(typed_relation);
qualities_ = this_decoded.GetQualities();
qualities_consistent_ = true;
return this_decoded;
}

model::NARQualities const& EncodedNAR::GetQualities() const {
if (!qualities_consistent_) {
throw std::logic_error("Getting uninitialized qualities from NAR.");
}
return qualities_;
}

NAR EncodedNAR::Decode(FeatureDomains& domains) const {
Copy link
Collaborator

@Petua41 Petua41 Nov 27, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
NAR EncodedNAR::Decode(FeatureDomains& domains) const {
NAR EncodedNAR::Decode(FeatureDomains const& domains) const {

If FeatureDomains doesn't have const operator[] (some of the stl containers may have this problem), use at -- it must have const version

NAR resulting_nar;
std::vector<size_t> feature_order(encoded_value_ranges_.size());
std::iota(std::begin(feature_order), std::end(feature_order), 0);
VanyaVolgushev marked this conversation as resolved.
Show resolved Hide resolved
auto compare_by_permutation = [this](size_t a, size_t b) {
return encoded_value_ranges_[a].permutation > encoded_value_ranges_[b].permutation;
};
std::sort(feature_order.begin(), feature_order.end(), compare_by_permutation);

size_t implication_sign_after = implication_sign_pos_ * (encoded_value_ranges_.size() - 1);
size_t handling_feat_num = 0;
for (size_t feature_index : feature_order) {
EncodedValueRange const& encoded_feature = encoded_value_ranges_[feature_index];
if (encoded_feature.threshold < RNG().Next()) {
handling_feat_num++;
continue;
}
auto domain = domains[feature_index];
auto decoded = encoded_value_ranges_[feature_index].Decode(domain);
Comment on lines +73 to +74
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

again, you copying the shared pointer when using auto domain = domains[feature_index]. Use const reference, no need to increase ref count of shared ptr for no reason

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

again, no need to create an additional copy of decoded.
I'm aware that InsertInCons wants a shared pointer, not a const ref, but when passing const ref to it it will create a copy automatically, so no problems here.

if (handling_feat_num > implication_sign_after) {
resulting_nar.InsertInCons(feature_index, decoded);
} else {
resulting_nar.InsertInAnte(feature_index, decoded);
}
handling_feat_num++;
}
return resulting_nar;
}

EncodedNAR::EncodedNAR(FeatureDomains& domains, TypedRelation const* typed_relation) {
size_t feature_count = domains.size();
for (size_t feature_index = 0; feature_index < feature_count; feature_index++) {
encoded_value_ranges_.emplace_back(EncodedValueRange());
}
implication_sign_pos_ = RNG().Next();
SetQualities(domains, typed_relation);
}

EncodedNAR::EncodedNAR(size_t feature_count) {
for (size_t feature_index = 0; feature_index < feature_count; feature_index++) {
encoded_value_ranges_.emplace_back(EncodedValueRange());
}
Comment on lines +94 to +97
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
EncodedNAR::EncodedNAR(size_t feature_count) {
for (size_t feature_index = 0; feature_index < feature_count; feature_index++) {
encoded_value_ranges_.emplace_back(EncodedValueRange());
}
EncodedNAR::EncodedNAR(size_t feature_count)
: encoded_value_ranges_(feature_count, EncodedValueRange()) {

implication_sign_pos_ = RNG().Next();
}
Comment on lines +90 to +99
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

same thing with RNG, just create a std::random_device as a field in EncodedNAR class


} // namespace algos::des
36 changes: 36 additions & 0 deletions src/core/algorithms/nar/des/encoded_nar.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
#pragma once

#include "algorithms/nar/nar.h"
#include "encoded_value_range.h"
#include "model/table/column_layout_typed_relation_data.h"

namespace algos::des {
using model::NAR;

class EncodedNAR {
private:
using TypedRelation = model::ColumnLayoutTypedRelationData;
using FeatureDomains = std::vector<std::shared_ptr<model::ValueRange>> const;

double implication_sign_pos_ = -1;
std::vector<EncodedValueRange> encoded_value_ranges_ = std::vector<EncodedValueRange>();

model::NARQualities qualities_;
bool qualities_consistent_ = false;
double& GetElementAtIndex(size_t index) const;

public:
size_t VectorSize() const;
size_t FeatureCount() const;
double& operator[](size_t index);
double const& operator[](size_t index) const;

model::NARQualities const& GetQualities() const;
NAR SetQualities(FeatureDomains& domains, TypedRelation const* typed_relation);

NAR Decode(FeatureDomains& domains) const;
EncodedNAR(FeatureDomains& domains, TypedRelation const* typed_relation);
EncodedNAR(size_t feature_count);
};

} // namespace algos::des
Loading
Loading