Skip to content

Commit

Permalink
Add topofeatures (#1241)
Browse files Browse the repository at this point in the history
TopologicalFeaturesImplementation was added
  • Loading branch information
valer1435 authored Jan 15, 2024
1 parent dcc7ff5 commit a4ef39e
Show file tree
Hide file tree
Showing 65 changed files with 972,299 additions and 606 deletions.
17 changes: 8 additions & 9 deletions cases/dataset_preparation.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,24 +8,23 @@ def unpack_archived_data(archive_name: str):
os.path.basename(archive_name) not in os.listdir(os.path.dirname(archive_path))):
with tarfile.open(archive_path) as file:
def is_within_directory(directory, target):

abs_directory = os.path.abspath(directory)
abs_target = os.path.abspath(target)

prefix = os.path.commonprefix([abs_directory, abs_target])

return prefix == abs_directory

def safe_extract(tar, path=".", members=None, *, numeric_owner=False):

for member in tar.getmembers():
member_path = os.path.join(path, member.name)
if not is_within_directory(path, member_path):
raise Exception("Attempted Path Traversal in Tar File")

tar.extractall(path, members, numeric_owner=numeric_owner)



tar.extractall(path, members, numeric_owner=numeric_owner)

safe_extract(file, path=os.path.dirname(archive_path))
print('Unpacking finished')
else:
Expand Down
2 changes: 1 addition & 1 deletion cases/time_series_gapfilling_case.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ def get_composite_pipeline():
node_linear_2 = PipelineNode('linear', nodes_from=[node_2])

node_final = PipelineNode('ridge', nodes_from=[node_linear_1,
node_linear_2])
node_linear_2])
pipeline = Pipeline(node_final)
return pipeline

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ FEDOT supports bunch of dimensionality preprocessing operations that can be be a
`one_hot_encoding`,One-Hot Encoder, Feature encoding
`label_encoding`,Label Encoder, Feature encoding
`resample`,Imbalanced binary class transformation in classification, Data transformation
`topological_features`,Calculation of topological features, only for time series,Data transformation


.. csv-table:: Feature transformation operations implementations
Expand Down Expand Up @@ -104,6 +105,7 @@ FEDOT supports bunch of dimensionality preprocessing operations that can be be a
`one_hot_encoding`,`sklearn.preprocessing.OneHotEncoder`,
`label_encoding`,`sklearn.preprocessing.LabelEncoder`,`fast_train` `*tree`
`resample`,`FEDOT model using sklearn.utils.resample`,
`topological_features`,FEDOT model,`ts`


Models used
Expand Down
20 changes: 9 additions & 11 deletions examples/advanced/automl/tpot_vs_fedot.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,16 @@
from fedot.core.data.data import InputData
from fedot.core.pipelines.node import PipelineNode
from fedot.core.pipelines.pipeline import Pipeline
from tpot.export_utils import set_param_recursive
from tpot.builtins import StackingEstimator
from sklearn.pipeline import make_pipeline
from sklearn.naive_bayes import BernoulliNB
from sklearn.metrics import roc_auc_score as roc_auc
from sklearn.ensemble import RandomForestClassifier
import numpy

numpy.float = numpy.float64 # tmp patch before TPOT could fix this: https://github.com/EpistasisLab/tpot/issues/1281

from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import roc_auc_score as roc_auc
from sklearn.naive_bayes import BernoulliNB
from sklearn.pipeline import make_pipeline
from tpot.builtins import StackingEstimator
from tpot.export_utils import set_param_recursive

from fedot.core.pipelines.pipeline import Pipeline
from fedot.core.pipelines.node import PipelineNode
from fedot.core.data.data import InputData


def run_tpot_vs_fedot_example(train_file_path: str, test_file_path: str):
train_data = InputData.from_csv(train_file_path)
Expand Down
8 changes: 1 addition & 7 deletions examples/advanced/time_series_forecasting/multistep.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import pandas as pd

from examples.advanced.time_series_forecasting.composing_pipelines import get_border_line_info
from examples.simple.time_series_forecasting.api_forecasting import TS_DATASETS
from examples.simple.time_series_forecasting.ts_pipelines import ts_ar_pipeline
from examples.simple.time_series_forecasting.tuning_pipelines import visualise
from fedot.core.data.data import InputData
Expand All @@ -19,13 +20,6 @@

_TS_EXAMPLES_DATA_PATH = fedot_project_root().joinpath('examples/data/ts')

TS_DATASETS = {
'australia': _TS_EXAMPLES_DATA_PATH.joinpath('australia.csv'),
'beer': _TS_EXAMPLES_DATA_PATH.joinpath('beer.csv'),
'salaries': _TS_EXAMPLES_DATA_PATH.joinpath('salaries.csv'),
'stackoverflow': _TS_EXAMPLES_DATA_PATH.joinpath('stackoverflow.csv'),
'test_sea': fedot_project_root().joinpath('test', 'data', 'simple_sea_level.csv')}


def run_multistep(dataset: str, pipeline: Pipeline, step_forecast: int = 10, future_steps: int = 5,
visualisation=False):
Expand Down
32 changes: 16 additions & 16 deletions examples/advanced/time_series_forecasting/nemo.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,11 +117,11 @@ def return_working_pipeline():

pipeline = get_arima_pipeline()
train_dataset = MultiModalData({
'arima': deepcopy(train_input),
})
'arima': deepcopy(train_input),
})
predict_dataset = MultiModalData({
'arima': deepcopy(predict_input),
})
'arima': deepcopy(predict_input),
})
pipeline.fit_from_scratch(train_dataset)
predicted_values = pipeline.predict(predict_dataset)
predicted_values = predicted_values.predict
Expand All @@ -140,13 +140,13 @@ def return_working_pipeline():
# arima with nemo ensemble
pipeline = return_working_pipeline()
train_dataset = MultiModalData({
'lagged/1': deepcopy(train_input),
'exog_ts': deepcopy(train_input_exog)
})
'lagged/1': deepcopy(train_input),
'exog_ts': deepcopy(train_input_exog)
})
predict_dataset = MultiModalData({
'lagged/1': deepcopy(predict_input),
'exog_ts': deepcopy(predict_input_exog)
})
'lagged/1': deepcopy(predict_input),
'exog_ts': deepcopy(predict_input_exog)
})
pipeline.fit_from_scratch(train_dataset)
predicted_values = pipeline.predict(predict_dataset).predict

Expand All @@ -165,13 +165,13 @@ def return_working_pipeline():
# arima with nemo ensemble
pipeline = get_arima_nemo_pipeline()
train_dataset = MultiModalData({
'arima': deepcopy(train_input),
'exog_ts': deepcopy(train_input_exog)
})
'arima': deepcopy(train_input),
'exog_ts': deepcopy(train_input_exog)
})
predict_dataset = MultiModalData({
'arima': deepcopy(predict_input),
'exog_ts': deepcopy(predict_input_exog)
})
'arima': deepcopy(predict_input),
'exog_ts': deepcopy(predict_input_exog)
})
pipeline.fit_from_scratch(train_dataset)
predicted_values = pipeline.predict(predict_dataset).predict

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,10 +35,10 @@ def build_pred_ints(start=5000, end=7000, horizon=200):
task=task,
data_type=DataTypesEnum.ts)
model = Fedot(problem='ts_forecasting',
task_params=task.task_params,
timeout=3,
preset='ts',
show_progress=False)
task_params=task.task_params,
timeout=3,
preset='ts',
show_progress=False)

model.fit(train_input)
model.forecast()
Expand Down
Loading

0 comments on commit a4ef39e

Please sign in to comment.