diff --git a/.gitignore b/.gitignore
index 9e584fd4..a5f9134a 100644
--- a/.gitignore
+++ b/.gitignore
@@ -129,4 +129,4 @@ dmypy.json
.pyre/
# User data
-data/
+/data
diff --git a/examples/0_loading_data/load_list_of_datasests.py b/examples/0_loading_data/load_list_of_datasests.py
index c2ee1cbb..741438e1 100644
--- a/examples/0_loading_data/load_list_of_datasests.py
+++ b/examples/0_loading_data/load_list_of_datasests.py
@@ -6,9 +6,8 @@ def get_datasets():
'nomao', 'sylvine', 'kc1', 'jungle_chess_2pcs_raw_endgame_complete', 'credit-g', 'delta_ailerons', 'pol'
]
datasets_loader = OpenMLDatasetsLoader()
- datasets = datasets_loader.load(dataset_names)
- print(f'Datasets "{", ".join(dataset_names)}" are available at the paths:')
- print('\n'.join(str(d) for d in datasets))
+ datasets = datasets_loader.load(dataset_names, allow_names=True)
+ print(f'Datasets "{", ".join(dataset_names)}" are downloaded.')
return datasets
diff --git a/examples/2_extracting_datasets_meta_features/extract_with_load_on_demand.py b/examples/2_extracting_datasets_meta_features/extract_with_load_on_demand.py
index 9519e6ca..ad2110a2 100644
--- a/examples/2_extracting_datasets_meta_features/extract_with_load_on_demand.py
+++ b/examples/2_extracting_datasets_meta_features/extract_with_load_on_demand.py
@@ -1,3 +1,5 @@
+import openml
+
from meta_automl.data_preparation.datasets_loaders import OpenMLDatasetsLoader
from meta_automl.data_preparation.meta_features_extractors import PymfeExtractor
@@ -6,8 +8,9 @@ def main():
dataset_names = [
'nomao', 'sylvine'
]
+ dataset_ids = [openml.datasets.get_dataset(name, download_data=False, download_qualities=False).dataset_id for name in dataset_names]
extractor = PymfeExtractor(extractor_params={'groups': 'general'}, datasets_loader=OpenMLDatasetsLoader())
- meta_features = extractor.extract(dataset_names)
+ meta_features = extractor.extract(dataset_ids)
return meta_features
diff --git a/examples/2_extracting_datasets_meta_features/load_and_extract_features_sequentially.py b/examples/2_extracting_datasets_meta_features/load_and_extract_features_sequentially.py
index f1d21cf4..cda8b804 100644
--- a/examples/2_extracting_datasets_meta_features/load_and_extract_features_sequentially.py
+++ b/examples/2_extracting_datasets_meta_features/load_and_extract_features_sequentially.py
@@ -9,8 +9,8 @@ def main():
loader = OpenMLDatasetsLoader()
extractor = PymfeExtractor(extractor_params={'groups': 'general'})
- cached_datasets = loader.load(dataset_names)
- meta_features = extractor.extract(cached_datasets)
+ datasets = loader.load(dataset_names, allow_names=True)
+ meta_features = extractor.extract(datasets)
return meta_features
diff --git a/examples/3_selecting_similar_datasets/select_similar_datasets_by_knn.py b/examples/3_selecting_similar_datasets/select_similar_datasets_by_knn.py
index b6f2bb8c..5f13201e 100644
--- a/examples/3_selecting_similar_datasets/select_similar_datasets_by_knn.py
+++ b/examples/3_selecting_similar_datasets/select_similar_datasets_by_knn.py
@@ -8,9 +8,10 @@
def main():
# Define datasets.
dataset_names = ['monks-problems-1', 'apsfailure', 'australian', 'bank-marketing']
+ datasets = OpenMLDatasetsLoader().load(dataset_names, allow_names=True)
# Extract meta-features and load on demand.
- extractor = PymfeExtractor(extractor_params={'groups': 'general'}, datasets_loader=OpenMLDatasetsLoader())
- meta_features = extractor.extract(dataset_names)
+ extractor = PymfeExtractor(extractor_params={'groups': 'general'})
+ meta_features = extractor.extract(datasets)
# Preprocess meta-features, as KNN does not support NaNs.
meta_features = meta_features.dropna(axis=1, how='any')
# Split datasets to train (preprocessing) and test (actual meta-algorithm objects).
diff --git a/examples/4_advising_models/advise_models_from_similar_datasets.py b/examples/4_advising_models/advise_models_from_similar_datasets.py
index 37c3b2db..e1dc16aa 100644
--- a/examples/4_advising_models/advise_models_from_similar_datasets.py
+++ b/examples/4_advising_models/advise_models_from_similar_datasets.py
@@ -2,7 +2,7 @@
from golem.core.optimisers.fitness import SingleObjFitness
from sklearn.model_selection import train_test_split
-from meta_automl.data_preparation.dataset import DatasetCache
+from meta_automl.data_preparation.dataset import OpenMLDataset
from meta_automl.data_preparation.datasets_loaders import OpenMLDatasetsLoader
from meta_automl.data_preparation.meta_features_extractors import PymfeExtractor
from meta_automl.data_preparation.model import Model
@@ -13,9 +13,10 @@
def main():
# Define datasets.
dataset_names = ['monks-problems-1', 'apsfailure', 'australian', 'bank-marketing']
+ datasets = OpenMLDatasetsLoader().load(dataset_names, allow_names=True)
# Extract meta-features and load on demand.
- extractor = PymfeExtractor(extractor_params={'groups': 'general'}, datasets_loader=OpenMLDatasetsLoader())
- meta_features = extractor.extract(dataset_names)
+ extractor = PymfeExtractor(extractor_params={'groups': 'general'})
+ meta_features = extractor.extract(datasets)
# Preprocess meta-features, as KNN does not support NaNs.
meta_features = meta_features.dropna(axis=1, how='any')
# Split datasets to train (preprocessing) and test (actual meta-algorithm objects).
@@ -29,8 +30,8 @@ def main():
PipelineBuilder().add_node('normalization').add_node('logit').build(),
PipelineBuilder().add_node('rf').add_node('logit').build()
]
- best_models = [[Model(pipeline, SingleObjFitness(1), 'some_metric_name', DatasetCache(dataset_name))]
- for dataset_name, pipeline in zip(y_train, best_pipelines)]
+ best_models = [[Model(pipeline, SingleObjFitness(1), 'some_metric_name', OpenMLDataset(dataset_id))]
+ for dataset_id, pipeline in zip(y_train, best_pipelines)]
dataset_names_to_best_pipelines = dict(zip(y_train, best_models))
advisor = DiverseFEDOTPipelineAdvisor(assessor, minimal_distance=2).fit(dataset_names_to_best_pipelines)
diff --git a/examples/knowledge_base_loading.py b/examples/knowledge_base_loading.py
index 699a547f..310b4bdf 100644
--- a/examples/knowledge_base_loading.py
+++ b/examples/knowledge_base_loading.py
@@ -16,12 +16,12 @@
# ===== Another way to get train models, but also group them by datasets:
models_for_train = {}
- for dataset_name in train_datasets['dataset_name']:
+ for dataset_id in train_datasets['dataset_id']:
dataset_models = models_loader.load(
- dataset_names=[dataset_name], # load models just for this exact dataset.
+ dataset_ids=[dataset_id], # load models just for this exact dataset.
fitness_metric='logloss', # must correspond to a metric name in a knowledge base.
)
- models_for_train[dataset_name] = dataset_models
+ models_for_train[dataset_id] = dataset_models
# If you need to load data to the local storage
# dataset = OpenMLDatasetsLoader().load_single(dataset_name)
diff --git a/experiments/fedot_warm_start/run.py b/experiments/fedot_warm_start/run.py
index 26382ebb..c0461f30 100644
--- a/experiments/fedot_warm_start/run.py
+++ b/experiments/fedot_warm_start/run.py
@@ -22,8 +22,8 @@
from sklearn.model_selection import StratifiedKFold
from tqdm import tqdm
-from meta_automl.data_preparation.data_manager import DataManager
-from meta_automl.data_preparation.dataset import DatasetCache, Dataset
+
+from meta_automl.data_preparation.dataset import OpenMLDataset, DatasetData
from meta_automl.data_preparation.datasets_loaders import OpenMLDatasetsLoader
from meta_automl.data_preparation.datasets_train_test_split import openml_datasets_train_test_split
from meta_automl.data_preparation.meta_features_extractors import PymfeExtractor
@@ -37,8 +37,8 @@
N_DATASETS = 3
TEST_SIZE = 0.33
# Evaluation timeouts
-TRAIN_TIMEOUT = 1
-TEST_TIMEOUT = 1
+TRAIN_TIMEOUT = 0.01
+TEST_TIMEOUT = 0.01
# Models & datasets
N_BEST_DATASET_MODELS_TO_MEMORIZE = 10
N_CLOSEST_DATASETS_TO_PROPOSE = 5
@@ -61,7 +61,7 @@
time_now = datetime.now()
time_now_iso = time_now.isoformat(timespec="minutes")
time_now_for_path = time_now_iso.replace(":", ".")
-save_dir = DataManager.get_data_dir(). \
+save_dir = get_data_dir(). \
joinpath('experiments').joinpath('fedot_warm_start').joinpath(f'run_{time_now_for_path}')
save_dir.mkdir(parents=True)
log_file = save_dir.joinpath('log.txt')
@@ -75,18 +75,23 @@
)
-def prepare_data() -> Tuple[List[int], Dict[str, DatasetCache]]:
+def prepare_data() -> Tuple[pd.DataFrame, pd.DataFrame, Dict[int, OpenMLDataset]]:
"""Returns dictionary with dataset names and cached datasets downloaded from OpenML."""
dataset_ids = openml.study.get_suite(99).data
if N_DATASETS is not None:
dataset_ids = pd.Series(dataset_ids)
dataset_ids = dataset_ids.sample(n=N_DATASETS, random_state=SEED)
- dataset_ids = list(dataset_ids)
- return dataset_ids, {cache.name: cache for cache in OpenMLDatasetsLoader().load(dataset_ids)}
+ df_split_datasets = openml_datasets_train_test_split(dataset_ids, seed=SEED)
+ df_datasets_train = df_split_datasets[df_split_datasets['is_train'] == 1]
+ df_datasets_test = df_split_datasets[df_split_datasets['is_train'] == 0]
+
+ datasets = {dataset.id_: dataset for dataset in OpenMLDatasetsLoader().load(dataset_ids)}
+ return df_datasets_train, df_datasets_test, datasets
-def transform_data_for_fedot(data: Dataset) -> (np.array, np.array):
+
+def transform_data_for_fedot(data: DatasetData) -> (np.array, np.array):
x = data.x
y = data.y
if len(y.shape) == 1:
@@ -127,8 +132,8 @@ def prepare_extractor_and_assessor(datasets_train: List[str]):
return data_similarity_assessor, extractor
-def fit_fedot(data: Dataset, timeout: float, run_label: str, initial_assumption=None):
- x, y = transform_data_for_fedot(data)
+def fit_fedot(dataset: OpenMLDataset, timeout: float, run_label: str, initial_assumption=None):
+ x, y = transform_data_for_fedot(dataset.get_data(dataset_format='array'))
time_start = timeit.default_timer()
fedot = Fedot(timeout=timeout, initial_assumption=initial_assumption, **COMMON_FEDOT_PARAMS)
@@ -137,14 +142,14 @@ def fit_fedot(data: Dataset, timeout: float, run_label: str, initial_assumption=
metrics = get_pipeline_metrics(fedot.current_pipeline, fedot.train_data)
pipeline = fedot.current_pipeline
- run_results = get_result_data_row(dataset=data, run_label=run_label, pipeline=pipeline, automl_time_sec=automl_time,
+ run_results = get_result_data_row(dataset=dataset, run_label=run_label, pipeline=pipeline, automl_time_sec=automl_time,
automl_timeout_min=fedot.params.timeout, history_obj=fedot.history, **metrics)
return fedot, run_results
-def get_result_data_row(dataset, run_label: str, pipeline, history_obj=None, automl_time_sec=0., automl_timeout_min=0.,
- **metrics):
- run_results = dict(dataset_id=dataset.id,
+def get_result_data_row(dataset: OpenMLDataset, run_label: str, pipeline, history_obj=None, automl_time_sec=0.,
+ automl_timeout_min=0., **metrics):
+ run_results = dict(dataset_id=dataset.id_,
dataset_name=dataset.name,
run_label=run_label,
model_obj=pipeline,
@@ -157,7 +162,7 @@ def get_result_data_row(dataset, run_label: str, pipeline, history_obj=None, aut
return run_results
-def extract_best_history_models(dataset_cache, history):
+def extract_best_history_models(dataset, history):
best_individuals = sorted(chain(*history.individuals),
key=lambda ind: ind.fitness,
reverse=True)
@@ -165,7 +170,7 @@ def extract_best_history_models(dataset_cache, history):
best_models = []
for individual in best_individuals[:N_BEST_DATASET_MODELS_TO_MEMORIZE]:
pipeline = PipelineAdapter().restore(individual.graph)
- model = Model(pipeline, individual.fitness, history.objective.metric_names[0], dataset_cache)
+ model = Model(pipeline, individual.fitness, history.objective.metric_names[0], dataset)
best_models.append(model)
return best_models
@@ -173,22 +178,19 @@ def extract_best_history_models(dataset_cache, history):
def main():
baseline_pipeline = PipelineBuilder().add_node('rf').build()
- dataset_ids, datasets_cache = prepare_data()
+ df_datasets_train, df_datasets_test, datasets = prepare_data()
- split_datasets = openml_datasets_train_test_split(dataset_ids, seed=SEED)
- datasets_train = split_datasets[split_datasets['is_train'] == 1]['dataset_name'].to_list()
- datasets_test = split_datasets[~split_datasets['is_train'] == 0]['dataset_name'].to_list()
+ dataset_ids_train = df_datasets_train.index.to_list()
+ dataset_ids_test = df_datasets_test.index.to_list()
evaluation_results = []
best_models_per_dataset = {}
progress_file = open(save_dir.joinpath('progress.txt'), 'a')
- for name in tqdm(datasets_cache.keys(), 'FEDOT, all datasets', file=progress_file):
+ for dataset_id in tqdm(datasets.keys(), 'FEDOT, all datasets', file=progress_file):
try:
- cache = datasets_cache[name]
- data = cache.from_cache()
-
- timeout = TRAIN_TIMEOUT if name in datasets_train else TEST_TIMEOUT
- fedot, run_results = fit_fedot(data=data, timeout=timeout, run_label='FEDOT')
+ dataset = datasets[dataset_id]
+ timeout = TRAIN_TIMEOUT if dataset_id in dataset_ids_train else TEST_TIMEOUT
+ fedot, run_results = fit_fedot(dataset=dataset, timeout=timeout, run_label='FEDOT')
evaluation_results.append(run_results)
# TODO:
# x Turn the tuned pipeline into a model (evaluate its fitness on the data)
@@ -197,38 +199,37 @@ def main():
# Filter out unique individuals with the best fitness
history = fedot.history
- best_models = extract_best_history_models(cache, history)
- best_models_per_dataset[name] = best_models
+ best_models = extract_best_history_models(dataset, history)
+ best_models_per_dataset[dataset_id] = best_models
except Exception:
- logging.exception(f'Train dataset "{name}"')
+ logging.exception(f'Train dataset "{dataset_id}"')
- data_similarity_assessor, extractor = prepare_extractor_and_assessor(datasets_train)
+ data_similarity_assessor, extractor = prepare_extractor_and_assessor(dataset_ids_train)
model_advisor = DiverseFEDOTPipelineAdvisor(data_similarity_assessor, n_best_to_advise=N_BEST_MODELS_TO_ADVISE,
minimal_distance=MINIMAL_DISTANCE_BETWEEN_ADVISED_MODELS)
model_advisor.fit(best_models_per_dataset)
- for name in tqdm(datasets_test, 'MetaFEDOT, Test datasets', file=progress_file):
+ for dataset_id in tqdm(dataset_ids_test, 'MetaFEDOT, Test datasets', file=progress_file):
try:
- cache = datasets_cache[name]
- data = cache.from_cache()
+ dataset = datasets[dataset_id]
# Run meta AutoML
# 1
time_start = timeit.default_timer()
- meta_features = extractor.extract([cache], fill_input_nans=True, use_cached=False, update_cached=True)
+ meta_features = extractor.extract([dataset], fill_input_nans=True, use_cached=False, update_cached=True)
meta_features = meta_features.fillna(0)
meta_learning_time_sec = timeit.default_timer() - time_start
initial_assumptions = model_advisor.predict(meta_features)[0]
assumption_pipelines = [model.predictor for model in initial_assumptions]
# 2
- fedot_meta, fedot_meta_results = fit_fedot(data=data, timeout=TEST_TIMEOUT, run_label='MetaFEDOT',
+ fedot_meta, fedot_meta_results = fit_fedot(dataset=dataset, timeout=TEST_TIMEOUT, run_label='MetaFEDOT',
initial_assumption=assumption_pipelines)
fedot_meta_results['meta_learning_time_sec'] = meta_learning_time_sec
evaluation_results.append(fedot_meta_results)
# Fit & evaluate simple baseline
baseline_metrics = get_pipeline_metrics(baseline_pipeline, fedot_meta.train_data)
- baseline_res = get_result_data_row(dataset=data, run_label='simple baseline', pipeline=baseline_pipeline,
+ baseline_res = get_result_data_row(dataset=dataset, run_label='simple baseline', pipeline=baseline_pipeline,
**baseline_metrics)
evaluation_results.append(baseline_res)
@@ -236,11 +237,11 @@ def main():
for i, assumption in enumerate(initial_assumptions):
pipeline = assumption.predictor
assumption_metrics = get_pipeline_metrics(pipeline, fedot_meta.train_data)
- assumption_res = get_result_data_row(dataset=data, run_label=f'MetaFEDOT - initial assumption {i}',
+ assumption_res = get_result_data_row(dataset=dataset, run_label=f'MetaFEDOT - initial assumption {i}',
pipeline=pipeline, **assumption_metrics)
evaluation_results.append(assumption_res)
except Exception:
- logging.exception(f'Test dataset "{name}"')
+ logging.exception(f'Test dataset "{dataset_id}"')
progress_file.close()
# Save the accumulated results
@@ -250,11 +251,11 @@ def main():
for res in evaluation_results:
try:
res['run_date'] = time_now
- dataset_name = res['dataset_name']
+ dataset_id = res['dataset_id']
run_label = res['run_label']
# define saving paths
- model_path = models_dir.joinpath(f'{dataset_name}_{run_label}')
- history_path = history_dir.joinpath(f'{dataset_name}_{run_label}_history.json')
+ model_path = models_dir.joinpath(f'{dataset_id}_{run_label}')
+ history_path = history_dir.joinpath(f'{dataset_id}_{run_label}_history.json')
# replace objects with export paths for csv
res['model_path'] = str(model_path)
res.pop('model_obj').save(res['model_path'])
@@ -271,12 +272,13 @@ def main():
params = {
'run_date': time_now_iso,
'seed': SEED,
- 'n_datasets': N_DATASETS or len(dataset_ids),
+ 'n_datasets': N_DATASETS or len(datasets),
'test_size': TEST_SIZE,
- 'dataset_ids': dataset_ids,
- 'dataset_names': list(datasets_cache.keys()),
- 'dataset_names_train': datasets_train,
- 'dataset_names_test': datasets_test,
+ 'dataset_ids': list(datasets.keys()),
+ 'dataset_ids_train': dataset_ids_train,
+ 'dataset_ids_test': dataset_ids_test,
+ 'dataset_names_train': df_datasets_train['dataset_name'].to_list(),
+ 'dataset_names_test': df_datasets_test['dataset_name'].to_list(),
'train_timeout': TRAIN_TIMEOUT,
'test_timeout': TEST_TIMEOUT,
'n_best_dataset_models_to_memorize': N_BEST_DATASET_MODELS_TO_MEMORIZE,
diff --git a/meta_automl/data_preparation/data_manager.py b/meta_automl/data_preparation/data_manager.py
deleted file mode 100644
index 0a743e28..00000000
--- a/meta_automl/data_preparation/data_manager.py
+++ /dev/null
@@ -1,59 +0,0 @@
-from __future__ import annotations
-
-import pickle
-from os import PathLike
-from pathlib import Path
-from typing import Dict, Any, Union
-
-PathType = Union[PathLike, str]
-DEFAULT_CACHE_EXTENSION = '.pkl'
-
-
-class DataManager:
-
- @classmethod
- def get_dataset_cache_path(cls, dataset_name: str) -> Path:
- return cls.get_datasets_dir().joinpath(dataset_name).with_suffix(DEFAULT_CACHE_EXTENSION)
-
- @classmethod
- def get_datasets_dir(cls) -> Path:
- datasets_dir = cls.get_data_dir().joinpath('datasets')
- return cls.ensure_dir_exists(datasets_dir)
-
- @classmethod
- def get_data_dir(cls) -> Path:
- data_dir = cls.get_project_root().joinpath('data')
- return cls.ensure_dir_exists(data_dir)
-
- @classmethod
- def ensure_dir_exists(cls, dir_: Path) -> Path:
- if not dir_.exists():
- dir_.mkdir()
- return dir_
-
- @classmethod
- def get_project_root(cls) -> Path:
- """Returns project root folder."""
- return Path(__file__).parents[2]
-
- @classmethod
- def get_meta_features_cache_path(cls, dataset_name: str, source_name: str):
- meta_features_dir = cls.ensure_dir_exists(cls.get_data_dir().joinpath(source_name))
- return meta_features_dir.joinpath(dataset_name).with_suffix('.pkl')
-
- @classmethod
- def get_meta_features_dict(cls, dataset_name: str, source_name: str) -> Dict[str, Any]:
- meta_features_file = cls.get_meta_features_cache_path(dataset_name, source_name)
- if not meta_features_file.exists():
- return {}
- with open(meta_features_file, 'rb') as f:
- meta_features = pickle.load(f)
- return meta_features
-
- @classmethod
- def update_meta_features_dict(cls, dataset_name: str, source_name: str, meta_features: Dict[str, Any]):
- meta_features_file = cls.get_meta_features_cache_path(dataset_name, source_name)
- meta_features_old = cls.get_meta_features_dict(dataset_name, source_name)
- with open(meta_features_file, 'wb') as f:
- meta_features_old.update(meta_features)
- pickle.dump(meta_features, f)
diff --git a/meta_automl/data_preparation/dataset.py b/meta_automl/data_preparation/dataset.py
deleted file mode 100644
index 23dda83c..00000000
--- a/meta_automl/data_preparation/dataset.py
+++ /dev/null
@@ -1,64 +0,0 @@
-from __future__ import annotations
-
-import pickle
-from dataclasses import dataclass
-from pathlib import Path
-from typing import Union, Optional, List
-
-import numpy as np
-import pandas as pd
-import scipy as sp
-
-from meta_automl.data_preparation.data_manager import DataManager
-
-
-class NoCacheError(FileNotFoundError):
- pass
-
-
-@dataclass
-class DatasetCache:
- name: str
- _cache_path: Optional[Path] = None
- _id: Optional[int] = None
-
- @property
- def id(self):
- return self._id or self.name
-
- @property
- def cache_path(self):
- return self._cache_path or DataManager.get_dataset_cache_path(self.name)
-
- @cache_path.setter
- def cache_path(self, val):
- self._cache_path = val
-
- def from_cache(self) -> Dataset:
- if not self.cache_path.exists():
- raise NoCacheError(f'Dataset {self.name} not found!')
- with open(self.cache_path, 'rb') as f:
- dataset = pickle.load(f)
- dataset.cache_path = self.cache_path
- return dataset
-
-
-@dataclass
-class Dataset:
- name: str
- x: Union[np.ndarray, pd.DataFrame, sp.sparse.csr_matrix]
- y: Optional[Union[np.ndarray, pd.DataFrame]] = None
- categorical_indicator: Optional[List[bool]] = None
- attribute_names: Optional[List[str]] = None
- cache_path: Optional[Path] = None
- _id: Optional[int] = None
-
- def dump_to_cache(self, cache_path: Optional[Path] = None) -> DatasetCache:
- cache_path = cache_path or self.cache_path
- with open(cache_path, 'wb') as f:
- pickle.dump(self, f)
- return DatasetCache(self.name, cache_path, self.id)
-
- @property
- def id(self):
- return self._id or self.name
diff --git a/meta_automl/data_preparation/dataset/__init__.py b/meta_automl/data_preparation/dataset/__init__.py
new file mode 100644
index 00000000..62c0a37d
--- /dev/null
+++ b/meta_automl/data_preparation/dataset/__init__.py
@@ -0,0 +1,3 @@
+from .dataset_base import DatasetBase, DatasetData, DatasetIDType
+from .custom_dataset import DataNotFoundError, CustomDataset
+from .openml_dataset import OpenMLDataset, OpenMLDatasetIDType
diff --git a/meta_automl/data_preparation/dataset/custom_dataset.py b/meta_automl/data_preparation/dataset/custom_dataset.py
new file mode 100644
index 00000000..505868f6
--- /dev/null
+++ b/meta_automl/data_preparation/dataset/custom_dataset.py
@@ -0,0 +1,30 @@
+from __future__ import annotations
+
+import pickle
+from pathlib import Path
+from typing import Optional
+
+from meta_automl.data_preparation.dataset import DatasetBase
+from meta_automl.data_preparation.dataset.dataset_base import DatasetData
+
+
+
+class DataNotFoundError(FileNotFoundError):
+ pass
+
+
+class CustomDataset(DatasetBase):
+
+ def get_data(self, cache_path: Optional[Path] = None) -> DatasetData:
+ cache_path = cache_path or self.cache_path
+ if not cache_path.exists():
+ raise DataNotFoundError(f'Dataset {self} is missing by the path "{cache_path}".')
+ with open(cache_path, 'rb') as f:
+ dataset_data = pickle.load(f)
+ return dataset_data
+
+ def dump_data(self, dataset_data: DatasetData, cache_path: Optional[Path] = None) -> CustomDataset:
+ cache_path = cache_path or self.cache_path
+ with open(cache_path, 'wb') as f:
+ pickle.dump(dataset_data, f)
+ return self
diff --git a/meta_automl/data_preparation/dataset/dataset_base.py b/meta_automl/data_preparation/dataset/dataset_base.py
new file mode 100644
index 00000000..fd84dee5
--- /dev/null
+++ b/meta_automl/data_preparation/dataset/dataset_base.py
@@ -0,0 +1,40 @@
+from __future__ import annotations
+
+from abc import abstractmethod, ABC
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Union, Optional, List, Any
+
+import numpy as np
+import pandas as pd
+import scipy as sp
+
+from meta_automl.data_preparation.file_system import CacheOperator, get_dataset_cache_path
+
+DatasetIDType = Any
+
+
+@dataclass
+class DatasetData:
+ x: Union[np.ndarray, pd.DataFrame, sp.sparse.csr_matrix]
+ y: Optional[Union[np.ndarray, pd.DataFrame]] = None
+ categorical_indicator: Optional[List[bool]] = None
+ attribute_names: Optional[List[str]] = None
+
+
+class DatasetBase(ABC, CacheOperator):
+
+ def __init__(self, id_: DatasetIDType, name: Optional[str] = None):
+ self.id_ = id_
+ self.name = name
+
+ def __repr__(self):
+ return f'{self.__class__.__name__}(id_={self.id_}, name={self.name})'
+
+ @abstractmethod
+ def get_data(self) -> DatasetData:
+ raise NotImplementedError()
+
+ @property
+ def cache_path(self) -> Path:
+ return get_dataset_cache_path(self)
diff --git a/meta_automl/data_preparation/dataset/openml_dataset.py b/meta_automl/data_preparation/dataset/openml_dataset.py
new file mode 100644
index 00000000..08fc5c1d
--- /dev/null
+++ b/meta_automl/data_preparation/dataset/openml_dataset.py
@@ -0,0 +1,39 @@
+from __future__ import annotations
+
+from typing import Union
+
+import openml
+
+from meta_automl.data_preparation.dataset import DatasetBase
+from meta_automl.data_preparation.dataset.dataset_base import DatasetData
+from meta_automl.data_preparation.file_system import update_openml_cache_dir
+
+OpenMLDatasetIDType = int
+
+update_openml_cache_dir()
+
+
+class OpenMLDataset(DatasetBase):
+
+ def __init__(self, id_: OpenMLDatasetIDType):
+ if isinstance(id_, str):
+ raise ValueError('Creating OpenMLDataset by dataset name is ambiguous. Please, use dataset id.'
+ f'Otherwise, you can perform search by f{self.__class__.__name__}.from_search().')
+ self._openml_dataset = openml.datasets.get_dataset(id_, download_data=False, download_qualities=False,
+ error_if_multiple=True)
+ id_ = self._openml_dataset.id
+ name = self._openml_dataset.name
+ super().__init__(id_, name)
+
+ @classmethod
+ def from_search(cls, id_: Union[OpenMLDatasetIDType, str], **get_dataset_kwargs) -> OpenMLDataset:
+ openml_dataset = openml.datasets.get_dataset(id_, download_data=False, download_qualities=False,
+ **get_dataset_kwargs)
+ return cls(openml_dataset.id)
+
+ def get_data(self, dataset_format: str = 'dataframe') -> DatasetData:
+ X, y, categorical_indicator, attribute_names = self._openml_dataset.get_data(
+ target=self._openml_dataset.default_target_attribute,
+ dataset_format=dataset_format
+ )
+ return DatasetData(X, y, categorical_indicator, attribute_names)
diff --git a/meta_automl/data_preparation/datasets_loaders/__init__.py b/meta_automl/data_preparation/datasets_loaders/__init__.py
index 3908c8e0..4b91c8aa 100644
--- a/meta_automl/data_preparation/datasets_loaders/__init__.py
+++ b/meta_automl/data_preparation/datasets_loaders/__init__.py
@@ -1,2 +1,2 @@
from .datasets_loader import DatasetsLoader
-from .openml_datasets_loader import OpenMLDatasetsLoader, OpenMLDatasetID
+from .openml_datasets_loader import OpenMLDatasetsLoader
diff --git a/meta_automl/data_preparation/datasets_loaders/datasets_loader.py b/meta_automl/data_preparation/datasets_loaders/datasets_loader.py
index 8faba6d0..ab6ffa6c 100644
--- a/meta_automl/data_preparation/datasets_loaders/datasets_loader.py
+++ b/meta_automl/data_preparation/datasets_loaders/datasets_loader.py
@@ -1,25 +1,17 @@
from __future__ import annotations
from abc import abstractmethod
-from typing import List, Type
+from typing import List
-from meta_automl.data_preparation.data_manager import DataManager
-from meta_automl.data_preparation.dataset import Dataset, DatasetCache, NoCacheError
+from meta_automl.data_preparation.dataset import DatasetBase
class DatasetsLoader:
- data_manager: Type[DataManager] = DataManager
@abstractmethod
- def load(self, *args, **kwargs) -> List[DatasetCache]:
+ def load(self, *args, **kwargs) -> List[DatasetBase]:
raise NotImplementedError()
@abstractmethod
- def load_single(self, *args, **kwargs) -> DatasetCache:
+ def load_single(self, *args, **kwargs) -> DatasetBase:
raise NotImplementedError()
-
- def cache_to_memory(self, dataset: DatasetCache) -> Dataset:
- try:
- return dataset.from_cache()
- except NoCacheError:
- return self.load_single(dataset.id).from_cache()
diff --git a/meta_automl/data_preparation/datasets_loaders/openml_datasets_loader.py b/meta_automl/data_preparation/datasets_loaders/openml_datasets_loader.py
index 7959ca61..11294c45 100644
--- a/meta_automl/data_preparation/datasets_loaders/openml_datasets_loader.py
+++ b/meta_automl/data_preparation/datasets_loaders/openml_datasets_loader.py
@@ -1,57 +1,43 @@
from __future__ import annotations
-import shutil
-from pathlib import Path
-from typing import List, Union
+from typing import List, Union, Optional
-import openml
+from golem.core.log import default_log
-from meta_automl.data_preparation.dataset import DatasetCache, Dataset
+from meta_automl.data_preparation.dataset import OpenMLDataset, OpenMLDatasetIDType
from meta_automl.data_preparation.datasets_loaders import DatasetsLoader
-OpenMLDatasetID = Union[str, int]
-
-
-def _clear_openml_cache():
- cache_dir = openml.config.get_cache_directory()
- cache_dir = Path(cache_dir)
- shutil.rmtree(cache_dir)
-
class OpenMLDatasetsLoader(DatasetsLoader):
+ def __init__(self, allow_names: bool = False):
+ self.dataset_ids = []
+ self._allow_names = allow_names
- def __init__(self):
- self.dataset_sources = []
-
- def load(self, dataset_sources: List[OpenMLDatasetID]) -> List[DatasetCache]:
- self.dataset_sources = dataset_sources
+ def load(self, dataset_ids: List[Union[OpenMLDatasetIDType, str]],
+ allow_names: Optional[bool] = None) -> List[OpenMLDataset]:
+ self.dataset_ids += dataset_ids
+ allow_names = self._allow_names if allow_names is None else allow_names
datasets = []
# TODO: Optimize like this
# https://github.com/openml/automlbenchmark/commit/a09dc8aee96178dd14837d9e1cd519d1ec63f804
- for source in self.dataset_sources:
- dataset = self.load_single(source)
+ for dataset_id in self.dataset_ids:
+ dataset = self.load_single(dataset_id, allow_name=allow_names)
datasets.append(dataset)
return datasets
- def load_single(self, source: OpenMLDatasetID):
- try:
- return self.get_openml_dataset(source)
- finally:
- _clear_openml_cache()
-
- def get_openml_dataset(self, dataset_id: OpenMLDatasetID, force_download: bool = False) -> DatasetCache:
- openml_dataset = openml.datasets.get_dataset(dataset_id, download_data=False, download_qualities=False)
- name = openml_dataset.name.lower()
- dataset_cache_path = self.data_manager.get_dataset_cache_path(name)
- if dataset_cache_path.exists() and not force_download:
- dataset_cache = DatasetCache(name, dataset_cache_path)
+ def load_single(self, dataset_id: Union[OpenMLDatasetIDType, str],
+ allow_name: Optional[bool] = None) -> OpenMLDataset:
+ allow_name = self._allow_names if allow_name is None else allow_name
+
+ if allow_name:
+ dataset = OpenMLDataset.from_search(dataset_id)
else:
- dataset_id = openml_dataset.id
- X, y, categorical_indicator, attribute_names = openml_dataset.get_data(
- target=openml_dataset.default_target_attribute,
- dataset_format='array'
- )
- dataset = Dataset(name, X, y, categorical_indicator, attribute_names, _id=dataset_id)
- dataset_cache = dataset.dump_to_cache(dataset_cache_path)
- return dataset_cache
+ dataset = OpenMLDataset(dataset_id)
+
+ self.dataset_ids.append(dataset.id_)
+ return dataset
+
+ @property
+ def _log(self):
+ return default_log(self)
diff --git a/meta_automl/data_preparation/file_system/__init__.py b/meta_automl/data_preparation/file_system/__init__.py
new file mode 100644
index 00000000..a228da6e
--- /dev/null
+++ b/meta_automl/data_preparation/file_system/__init__.py
@@ -0,0 +1,5 @@
+from meta_automl.data_preparation.file_system.file_system import PathType, get_project_root, get_data_dir
+from meta_automl.data_preparation.file_system.cache import (CacheOperator, get_dataset_cache_path,
+ get_dataset_cache_path_by_id, get_meta_features_cache_path,
+ get_local_meta_features, update_local_meta_features,
+ get_openml_cache_dir, update_openml_cache_dir)
diff --git a/meta_automl/data_preparation/file_system/cache.py b/meta_automl/data_preparation/file_system/cache.py
new file mode 100644
index 00000000..99daf965
--- /dev/null
+++ b/meta_automl/data_preparation/file_system/cache.py
@@ -0,0 +1,95 @@
+from __future__ import annotations
+
+import pickle
+from pathlib import Path
+
+from typing import Type, Any, Dict, TYPE_CHECKING
+
+import openml
+
+from meta_automl.data_preparation.file_system.cache_properties import CacheProperties, CacheType
+from meta_automl.data_preparation.file_system.file_system import get_data_dir, ensure_dir_exists
+
+if TYPE_CHECKING:
+ from meta_automl.data_preparation.dataset import DatasetBase
+ from meta_automl.data_preparation.meta_features_extractors import MetaFeaturesExtractor
+
+
+class CacheOperator:
+ pass
+
+
+def get_openml_cache_dir() -> Path:
+ return get_data_dir().joinpath('openml_cache')
+
+
+def get_full_openml_cache_dir() -> Path:
+ return get_data_dir().joinpath('openml_cache/org/openml/www')
+
+
+def update_openml_cache_dir():
+ openml_cache_path = str(get_openml_cache_dir())
+ openml.config.set_cache_directory(openml_cache_path)
+
+
+def _get_cache_path(object_class: Type[CacheOperator], object_id: str, _create_parent_dir: bool = True) -> Path:
+ cache_properties = get_cache_properties(object_class.__name__)
+ directory = cache_properties.dir_
+ path = cache_properties.template.format(id_=object_id)
+ path = directory.joinpath(path)
+ if _create_parent_dir:
+ ensure_dir_exists(directory)
+ return path
+
+
+def get_dataset_cache_path(dataset: DatasetBase) -> Path:
+ class_ = dataset.__class__
+ id_ = dataset.id_
+ return _get_cache_path(class_, str(id_))
+
+
+def get_dataset_cache_path_by_id(class_: Type[DatasetBase], id_: Any) -> Path:
+ return _get_cache_path(class_, str(id_))
+
+
+def get_meta_features_cache_path(extractor_class: Type[MetaFeaturesExtractor], dataset_id: Any) -> Path:
+ return _get_cache_path(extractor_class, str(dataset_id))
+
+
+def get_local_meta_features(extractor_class: Type[MetaFeaturesExtractor], dataset_id: Any) -> Dict[str, Any]:
+ meta_features_file = get_meta_features_cache_path(extractor_class, dataset_id)
+ if not meta_features_file.exists():
+ return {}
+ with open(meta_features_file, 'rb') as f:
+ meta_features = pickle.load(f)
+ return meta_features
+
+
+def update_local_meta_features(extractor_class: Type[MetaFeaturesExtractor],
+ dataset_id: Any, meta_features: Dict[str, Any]):
+ meta_features_file = get_meta_features_cache_path(extractor_class, dataset_id)
+ meta_features_old = get_local_meta_features(extractor_class, dataset_id)
+ with open(meta_features_file, 'wb') as f:
+ meta_features_old.update(meta_features)
+ pickle.dump(meta_features_old, f)
+
+
+def get_cache_properties(class_name: str) -> CacheProperties:
+ cache_properties_by_class_name = {
+ 'OpenMLDataset': CacheProperties(
+ type_=CacheType.directory,
+ dir_=get_full_openml_cache_dir().joinpath('datasets'),
+ template='{id_}'),
+ 'CustomDataset': CacheProperties(
+ type_=CacheType.file,
+ dir_=get_data_dir().joinpath('datasets/custom_dataset'),
+ template='{id_}.pkl'),
+ 'PymfeExtractor': CacheProperties(
+ type_=CacheType.file,
+ dir_=get_data_dir().joinpath('metafeatures/pymfe'),
+ template='{id_}.pkl'),
+ }
+ try:
+ return cache_properties_by_class_name[class_name]
+ except KeyError as e:
+ raise KeyError(f'Cache properties for the class {class_name} are not defined.').with_traceback(e.__traceback__)
diff --git a/meta_automl/data_preparation/file_system/cache_properties.py b/meta_automl/data_preparation/file_system/cache_properties.py
new file mode 100644
index 00000000..7374df08
--- /dev/null
+++ b/meta_automl/data_preparation/file_system/cache_properties.py
@@ -0,0 +1,21 @@
+from __future__ import annotations
+
+from dataclasses import dataclass
+from enum import Enum
+from pathlib import Path
+from typing import Optional, TYPE_CHECKING
+
+if TYPE_CHECKING:
+ from meta_automl.data_preparation.file_system import PathType
+
+
+class CacheType(Enum):
+ file = 'file'
+ directory = 'directory'
+
+
+@dataclass
+class CacheProperties:
+ type_: Optional[CacheType] = None
+ dir_: Optional[Path] = None
+ template: Optional[PathType] = None
diff --git a/meta_automl/data_preparation/file_system/file_system.py b/meta_automl/data_preparation/file_system/file_system.py
new file mode 100644
index 00000000..ff2c3743
--- /dev/null
+++ b/meta_automl/data_preparation/file_system/file_system.py
@@ -0,0 +1,27 @@
+from __future__ import annotations
+
+from os import PathLike
+from pathlib import Path
+from typing import Union
+
+PathType = Union[PathLike, str]
+
+DATA_SUBDIR = 'data'
+
+
+def ensure_dir_exists(dir_: Path) -> Path:
+ if dir_.is_file():
+ dir_ = dir_.parent
+ if not dir_.exists():
+ dir_.mkdir(parents=True)
+ return dir_
+
+
+def get_project_root() -> Path:
+ """Returns project root folder."""
+ return Path(__file__).parents[3]
+
+
+def get_data_dir() -> Path:
+ data_dir = get_project_root().joinpath(DATA_SUBDIR)
+ return data_dir
diff --git a/meta_automl/data_preparation/meta_features_extractors/meta_features_extractor.py b/meta_automl/data_preparation/meta_features_extractors/meta_features_extractor.py
index dc7ccf5a..d81e8cbd 100644
--- a/meta_automl/data_preparation/meta_features_extractors/meta_features_extractor.py
+++ b/meta_automl/data_preparation/meta_features_extractors/meta_features_extractor.py
@@ -1,28 +1,28 @@
from __future__ import annotations
-from abc import abstractmethod
-from typing import Optional, Iterable, Dict, Any, Type
+from abc import abstractmethod, ABC
+from typing import Optional, Iterable, Dict, Any
import pandas as pd
-from meta_automl.data_preparation.data_manager import DataManager
+from meta_automl.data_preparation.dataset import DatasetIDType
+from meta_automl.data_preparation.file_system import (CacheOperator, get_local_meta_features,
+ update_local_meta_features)
-class MetaFeaturesExtractor:
- DEFAULT_PARAMS: Optional[Dict[str, Any]] = None
- SOURCE: Optional[str] = None
- data_manager: Type[DataManager] = DataManager
+class MetaFeaturesExtractor(ABC, CacheOperator):
+ default_params: Optional[Dict[str, Any]] = None
@abstractmethod
def extract(self, datasets) -> pd.DataFrame:
raise NotImplementedError()
- def _get_meta_features_cache(self, dataset_name: str, meta_feature_names: Iterable[str]):
- cache = self.data_manager.get_meta_features_dict(dataset_name, self.SOURCE)
+ def _get_meta_features_cache(self, dataset_id: DatasetIDType, meta_feature_names: Iterable[str]):
+ cache = get_local_meta_features(self.__class__, str(dataset_id))
if set(meta_feature_names) ^ cache.keys():
return None
else:
return {mf_name: cache[mf_name] for mf_name in meta_feature_names}
- def _update_meta_features_cache(self, dataset_name: str, meta_features_dict: Dict[str, Any]):
- self.data_manager.update_meta_features_dict(dataset_name, self.SOURCE, meta_features_dict)
+ def _update_meta_features_cache(self, dataset_id: DatasetIDType, meta_features_dict: Dict[str, Any]):
+ update_local_meta_features(self.__class__, dataset_id, meta_features_dict)
diff --git a/meta_automl/data_preparation/meta_features_extractors/pymfe_extractor.py b/meta_automl/data_preparation/meta_features_extractors/pymfe_extractor.py
index 8dbc728f..edfa6925 100644
--- a/meta_automl/data_preparation/meta_features_extractors/pymfe_extractor.py
+++ b/meta_automl/data_preparation/meta_features_extractors/pymfe_extractor.py
@@ -6,17 +6,16 @@
from golem.core.log import default_log
from pymfe.mfe import MFE
-from meta_automl.data_preparation.dataset import DatasetCache
+from meta_automl.data_preparation.dataset import DatasetBase, DatasetIDType
from meta_automl.data_preparation.datasets_loaders import DatasetsLoader, OpenMLDatasetsLoader
from meta_automl.data_preparation.meta_features_extractors import MetaFeaturesExtractor
class PymfeExtractor(MetaFeaturesExtractor):
- DEFAULT_PARAMS = {'groups': 'default'}
- SOURCE = 'pymfe'
+ default_params = {'groups': 'default'}
def __init__(self, extractor_params: Dict[str, Any] = None, datasets_loader: DatasetsLoader = None):
- self.extractor_params = extractor_params if extractor_params is not None else self.DEFAULT_PARAMS
+ self.extractor_params = extractor_params if extractor_params is not None else self.default_params
self._datasets_loader = datasets_loader or OpenMLDatasetsLoader()
self._extractor = MFE(**self.extractor_params)
self._logger = default_log(self)
@@ -27,21 +26,21 @@ def datasets_loader(self) -> DatasetsLoader:
raise ValueError("Datasets loader not provided!")
return self._datasets_loader
- def extract(self, datasets: List[Union[DatasetCache, str]], fill_input_nans: bool = False,
- use_cached: bool = True, update_cached: bool = True) -> pd.DataFrame:
+ def extract(self, datasets_or_ids: List[Union[DatasetBase, DatasetIDType]],
+ fill_input_nans: bool = False, use_cached: bool = True, update_cached: bool = True) -> pd.DataFrame:
meta_features = {}
meta_feature_names = self._extractor.extract_metafeature_names()
- load_dataset = self.datasets_loader.cache_to_memory
- for dataset in datasets:
- if isinstance(dataset, str):
- dataset = DatasetCache(dataset)
- self._logger.info(f'Extracting meta features of the dataset {dataset.name}...')
+ for dataset in datasets_or_ids:
+ if not isinstance(dataset, DatasetBase):
+ dataset = self._datasets_loader.load_single(dataset)
+
+ self._logger.info(f'Extracting meta features of the dataset {dataset}...')
if (use_cached and
- (mfs := self._get_meta_features_cache(dataset.name, meta_feature_names))):
- meta_features[dataset.name] = mfs
+ (mfs := self._get_meta_features_cache(dataset.id_, meta_feature_names))):
+ meta_features[dataset.id_] = mfs
else:
- loaded_dataset = load_dataset(dataset)
+ loaded_dataset = dataset.get_data(dataset_format='array')
cat_cols = [i for i, val in enumerate(loaded_dataset.categorical_indicator) if val]
x = loaded_dataset.x
y = loaded_dataset.y
@@ -51,8 +50,8 @@ def extract(self, datasets: List[Union[DatasetCache, str]], fill_input_nans: boo
feature_names, dataset_features = mfe.extract(out_type=tuple)
mfs = dict(zip(feature_names, dataset_features))
if update_cached:
- self._update_meta_features_cache(dataset.name, mfs)
- meta_features[dataset.name] = mfs
+ self._update_meta_features_cache(dataset.id_, mfs)
+ meta_features[dataset.id_] = mfs
meta_features = pd.DataFrame.from_dict(meta_features, orient='index')
return meta_features
diff --git a/meta_automl/data_preparation/model.py b/meta_automl/data_preparation/model.py
index 25de781c..d437ea24 100644
--- a/meta_automl/data_preparation/model.py
+++ b/meta_automl/data_preparation/model.py
@@ -3,13 +3,16 @@
from golem.core.optimisers.fitness import Fitness
-from meta_automl.data_preparation.dataset import DatasetCache
+from meta_automl.data_preparation.dataset import DatasetBase
+
+
+PredictorType = Any
@dataclass
class Model:
- predictor: Any
+ predictor: PredictorType
fitness: Fitness
fitness_metric_name: str
- dataset_cache: DatasetCache
+ dataset: DatasetBase
metadata: Dict[str, Any] = field(default_factory=dict)
diff --git a/meta_automl/data_preparation/models_loaders/fedot_pipelines_loader.py b/meta_automl/data_preparation/models_loaders/fedot_pipelines_loader.py
index ae7f0b38..599056fa 100644
--- a/meta_automl/data_preparation/models_loaders/fedot_pipelines_loader.py
+++ b/meta_automl/data_preparation/models_loaders/fedot_pipelines_loader.py
@@ -14,8 +14,8 @@
from golem.core.log import default_log
from tqdm import tqdm
-from meta_automl.data_preparation.data_manager import PathType
-from meta_automl.data_preparation.dataset import DatasetCache
+from meta_automl.data_preparation.file_system import PathType
+from meta_automl.data_preparation.dataset import DatasetBase
from meta_automl.data_preparation.datasets_loaders import DatasetsLoader, OpenMLDatasetsLoader
from meta_automl.data_preparation.model import Model
from meta_automl.data_preparation.models_loaders import ModelsLoader
@@ -29,10 +29,9 @@ def evaluate_classification_fedot_pipeline(pipeline, input_data):
return fitness
-def get_n_best_fedot_performers(dataset_cache: DatasetCache, pipelines: List[Pipeline], datasets_loader: DatasetsLoader,
- n_best: int = 1) -> List[Model]:
- loaded_dataset = datasets_loader.cache_to_memory(dataset_cache)
- X, y_test = loaded_dataset.x, loaded_dataset.y
+def get_n_best_fedot_performers(dataset: DatasetBase, pipelines: List[Pipeline], n_best: int = 1) -> List[Model]:
+ data = dataset.get_data()
+ X, y_test = data.x, data.y
input_data = InputData(idx=np.arange(0, len(X)), features=X, target=y_test, data_type=DataTypesEnum.table,
task=Task(TaskTypesEnum.classification))
fitnesses = []
@@ -41,14 +40,14 @@ def get_n_best_fedot_performers(dataset_cache: DatasetCache, pipelines: List[Pip
for pipeline in tqdm(pipelines, desc='Evaluating pipelines'):
fitness = evaluate_classification_fedot_pipeline(pipeline, input_data)
fitnesses.append(fitness)
- models.append(Model(pipeline, fitness, metric_name, dataset_cache))
+ models.append(Model(pipeline, fitness, metric_name, dataset))
best_models = [models.pop(np.argmax(fitnesses)) for _ in range(min(n_best, len(pipelines)))]
return best_models
class FEDOTPipelinesLoader(ModelsLoader):
- def __init__(self, datasets_to_load: Union[List[Union[DatasetCache, str]], Literal['auto']] = 'auto',
+ def __init__(self, datasets_to_load: Union[List[Union[DatasetBase, str]], Literal['auto']] = 'auto',
candidate_pipelines: Optional[List[List[Pipeline]]] = None,
candidate_pipeline_paths: Optional[List[List[PathType]]] = None,
launch_dir: Optional[PathType] = None,
@@ -56,12 +55,12 @@ def __init__(self, datasets_to_load: Union[List[Union[DatasetCache, str]], Liter
self.log = default_log(self)
- self.datasets_loader = datasets_loader or OpenMLDatasetsLoader()
+ self.datasets_loader = datasets_loader or OpenMLDatasetsLoader(allow_names=True)
self.launch_dir: Path = Path(launch_dir) if isinstance(launch_dir, str) else launch_dir
- self._datasets: List[DatasetCache] = (self._define_datasets() if datasets_to_load == 'auto'
- else self._dataset_names_to_cache(datasets_to_load))
+ self._datasets: List[DatasetBase] = (self._define_datasets() if datasets_to_load == 'auto'
+ else self._get_datasets_from_names(datasets_to_load))
self.candidate_pipelines = candidate_pipelines
@@ -71,8 +70,8 @@ def __init__(self, datasets_to_load: Union[List[Union[DatasetCache, str]], Liter
def load(self, datasets: Union[List[str], Literal['auto']] = 'auto', n_best: int = 1) -> List[List[Model]]:
if datasets != 'auto':
- datasets = self._dataset_names_to_cache(datasets)
- difference = set(d.name for d in datasets) - set(self.dataset_names)
+ datasets = self._get_datasets_from_names(datasets)
+ difference = set(d.name for d in datasets) - set(self.dataset_ids)
if difference:
raise ValueError(f'Results for these datasets are not available: {difference}.')
else:
@@ -89,10 +88,10 @@ def _define_pipeline_paths(self) -> List[List[Path]]:
if not self.launch_dir:
raise ValueError('Launch dir or model paths must be provided!')
- dataset_names = self.dataset_names
- datasets_models_paths = dict(zip(dataset_names, [[]] * len(dataset_names)))
+ dataset_ids = self.dataset_ids
+ datasets_models_paths = dict(zip(dataset_ids, [[]] * len(dataset_ids)))
- for dataset_name in tqdm(dataset_names, desc='Defining model paths', unit='dataset'):
+ for dataset_name in tqdm(dataset_ids, desc='Defining model paths', unit='dataset'):
for model_path in self.launch_dir.joinpath(dataset_name).glob(r'FEDOT*\*\*\launch_*.json'):
datasets_models_paths[dataset_name].append(model_path)
@@ -104,28 +103,27 @@ def _import_pipelines(self, candidate_pipeline_paths: List[List[PathType]]):
desc='Importing pipelines', unit='dataset'):
candidates_for_dataset = [Pipeline.from_serialized(str(p)) for p in paths]
if not candidates_for_dataset:
- self.log.warning(f'No pipelines found for the dataset "{dataset.name}".')
+ self.log.warning(f'No pipelines found for the dataset "{dataset}".')
candidate_pipelines.append(candidates_for_dataset)
self.candidate_pipelines = candidate_pipelines
- def _define_datasets(self) -> List[DatasetCache]:
+ def _define_datasets(self) -> List[DatasetBase]:
if not self.launch_dir:
raise ValueError('Launch dir or datasets must be provided!')
datasets = list({p.parents[2].name for p in self.launch_dir.glob(r'*\FEDOT*\*\launch_0')})
datasets.sort()
- datasets = self._dataset_names_to_cache(datasets)
+ datasets = self._get_datasets_from_names(datasets)
return datasets
@property
- def dataset_names(self):
- return [d.name if isinstance(d, DatasetCache) else d for d in self._datasets]
+ def dataset_ids(self):
+ return [d.name if isinstance(d, DatasetBase) else d for d in self._datasets]
- @staticmethod
- def _dataset_names_to_cache(datasets: List[Union[str, DatasetCache]]) -> List[DatasetCache]:
+ def _get_datasets_from_names(self, datasets: List[Union[str, DatasetBase]]) -> List[DatasetBase]:
new_list = []
for dataset in datasets:
- if isinstance(dataset, str):
- dataset = DatasetCache(dataset)
+ if not isinstance(dataset, DatasetBase):
+ dataset = self.datasets_loader.load_single(dataset)
new_list.append(dataset)
return new_list
diff --git a/meta_automl/data_preparation/models_loaders/knowledge_base_models_loader.py b/meta_automl/data_preparation/models_loaders/knowledge_base_models_loader.py
index e26b896e..7c38b9d8 100644
--- a/meta_automl/data_preparation/models_loaders/knowledge_base_models_loader.py
+++ b/meta_automl/data_preparation/models_loaders/knowledge_base_models_loader.py
@@ -7,12 +7,13 @@
from fedot.core.pipelines.pipeline import Pipeline
from golem.core.optimisers.fitness import SingleObjFitness
-from meta_automl.data_preparation.data_manager import DataManager
-from meta_automl.data_preparation.dataset import DatasetCache
+
+from meta_automl.data_preparation.dataset import OpenMLDataset
+from meta_automl.data_preparation.file_system import get_data_dir
from meta_automl.data_preparation.model import Model
from meta_automl.data_preparation.models_loaders import ModelsLoader
-DEFAULT_KNOWLEDGE_BASE_PATH = DataManager.get_data_dir().joinpath('knowledge_base_0')
+DEFAULT_KNOWLEDGE_BASE_PATH = get_data_dir().joinpath('knowledge_base_0')
class KnowledgeBaseModelsLoader(ModelsLoader):
@@ -21,21 +22,21 @@ def __init__(self, knowledge_base_path: Union[str, PathLike] = DEFAULT_KNOWLEDGE
self.df_knowledge_base: Optional[pd.DataFrame] = None
self.df_datasets: Optional[pd.DataFrame] = None
- def load(self, dataset_names: Optional[Sequence[str]] = None,
+ def load(self, dataset_ids: Optional[Sequence[str]] = None,
fitness_metric: str = 'f1') -> List[Model]:
if self.df_knowledge_base is None:
knowledge_base_split_file = self.knowledge_base_path.joinpath('knowledge_base.csv')
self.df_knowledge_base = pd.read_csv(knowledge_base_split_file)
- if dataset_names is None:
- dataset_names = self.parse_datasets()['dataset_name']
+ if dataset_ids is None:
+ dataset_ids = self.parse_datasets()['dataset_id']
df_knowledge_base = self.df_knowledge_base
- df_knowledge_base = df_knowledge_base[df_knowledge_base['dataset_name'].isin(dataset_names)]
+ df_knowledge_base = df_knowledge_base[df_knowledge_base['dataset_id'].isin(dataset_ids)]
cached_datasets = {}
- for name in dataset_names:
- cached_datasets[name] = DatasetCache(name)
+ for id_ in dataset_ids:
+ cached_datasets[id_] = OpenMLDataset(id_)
models = []
for _, row in df_knowledge_base.iterrows():
@@ -45,7 +46,7 @@ def load(self, dataset_names: Optional[Sequence[str]] = None,
metric_value = row[fitness_metric]
fitness = SingleObjFitness(metric_value)
metadata = dict(row)
- dataset_cache = cached_datasets[row['dataset_name']]
+ dataset_cache = cached_datasets[row['dataset_id']]
model = Model(predictor, fitness, fitness_metric, dataset_cache, metadata)
models.append(model)
return models
diff --git a/meta_automl/meta_algorithm/datasets_similarity_assessors/model_based_similarity_assessors.py b/meta_automl/meta_algorithm/datasets_similarity_assessors/model_based_similarity_assessors.py
index 09720a1e..40008d00 100644
--- a/meta_automl/meta_algorithm/datasets_similarity_assessors/model_based_similarity_assessors.py
+++ b/meta_automl/meta_algorithm/datasets_similarity_assessors/model_based_similarity_assessors.py
@@ -1,10 +1,11 @@
from abc import ABC
-from typing import Optional, Dict, Any, List, Iterable
+from typing import Optional, List, Iterable
import numpy as np
import pandas as pd
from sklearn.neighbors import NearestNeighbors
+from meta_automl.data_preparation.dataset import DatasetIDType
from meta_automl.meta_algorithm.datasets_similarity_assessors.datasets_similarity_assessor import \
DatasetsSimilarityAssessor
@@ -13,7 +14,7 @@ class ModelBasedSimilarityAssessor(ABC, DatasetsSimilarityAssessor):
def __init__(self, model, n_best: int = 1):
self._inner_model = model
self.n_best = n_best
- self._datasets: Optional[Iterable[str]] = None
+ self._datasets: Optional[Iterable[DatasetIDType]] = None
class KNeighborsBasedSimilarityAssessor(ModelBasedSimilarityAssessor):
@@ -21,7 +22,7 @@ def __init__(self, n_neighbors: int = 1, **model_params):
model = NearestNeighbors(n_neighbors=n_neighbors, **model_params)
super().__init__(model, n_neighbors)
- def fit(self, meta_features: pd.DataFrame, datasets: Iterable[str]):
+ def fit(self, meta_features: pd.DataFrame, datasets: Iterable[DatasetIDType]):
meta_features = self.preprocess_meta_features(meta_features)
self._datasets = np.array(datasets)
self._inner_model.fit(meta_features)
@@ -30,7 +31,7 @@ def fit(self, meta_features: pd.DataFrame, datasets: Iterable[str]):
def preprocess_meta_features(meta_features: pd.DataFrame) -> pd.DataFrame:
return meta_features.dropna(axis=1, how='any')
- def predict(self, meta_features: pd.DataFrame, return_distance: bool = False) -> Iterable[Iterable[str]]:
+ def predict(self, meta_features: pd.DataFrame, return_distance: bool = False) -> Iterable[Iterable[DatasetIDType]]:
dataset_indexes = self._inner_model.kneighbors(meta_features, return_distance=return_distance)
if return_distance:
distances, dataset_indexes = dataset_indexes
diff --git a/meta_automl/meta_algorithm/model_advisors/model_advisor.py b/meta_automl/meta_algorithm/model_advisors/model_advisor.py
index a9ca0d97..c653a173 100644
--- a/meta_automl/meta_algorithm/model_advisors/model_advisor.py
+++ b/meta_automl/meta_algorithm/model_advisors/model_advisor.py
@@ -1,8 +1,9 @@
from abc import abstractmethod
-from typing import List, Dict, Iterable, Optional
+from typing import List, Dict, Iterable
import pandas as pd
+from meta_automl.data_preparation.dataset import DatasetIDType
from meta_automl.data_preparation.model import Model
from meta_automl.meta_algorithm.datasets_similarity_assessors import DatasetsSimilarityAssessor
@@ -17,13 +18,13 @@ def predict(self, *args, **kwargs) -> List[List[Model]]:
class SimpleSimilarityModelAdvisor(ModelAdvisor):
def __init__(self, fitted_similarity_assessor: DatasetsSimilarityAssessor):
self.similarity_assessor = fitted_similarity_assessor
- self.best_models: Dict[str, List[Model]] = {}
+ self.best_models: Dict[DatasetIDType, List[Model]] = {}
@property
def datasets(self):
return self.similarity_assessor.datasets
- def fit(self, dataset_names_to_best_pipelines: Dict[str, List[Model]]):
+ def fit(self, dataset_names_to_best_pipelines: Dict[DatasetIDType, List[Model]]):
self.best_models.update(dataset_names_to_best_pipelines)
return self
diff --git a/requirements.txt b/requirements.txt
index eca13d85..ad0a2233 100644
Binary files a/requirements.txt and b/requirements.txt differ
diff --git a/test/conftest.py b/test/conftest.py
new file mode 100644
index 00000000..77cbabd1
--- /dev/null
+++ b/test/conftest.py
@@ -0,0 +1,40 @@
+import time
+from pathlib import Path
+
+import pytest
+
+from test import constants
+from meta_automl.data_preparation.file_system import file_system, get_data_dir, get_project_root
+from meta_automl.data_preparation.file_system import update_openml_cache_dir
+
+
+def pytest_configure():
+ # Crucial setup & checks to avoid misplacing data during the tests
+ check_project_root()
+ set_data_dir()
+ check_data_dir()
+ update_openml_cache_dir()
+
+
+def check_project_root():
+ actual_root = Path(__file__).parents[1]
+ root = get_project_root()
+ if root != actual_root:
+ pytest.exit(f'The function `get_project_root()` should point to "{actual_root}". '
+ f'Got "{root}" instead', 1)
+
+
+def set_data_dir():
+ file_system.DATA_SUBDIR = constants.TEST_DATA_SUBDIR
+
+
+def check_data_dir():
+ data_dir = get_data_dir()
+ if data_dir.relative_to(get_project_root()) != Path(constants.TEST_DATA_SUBDIR):
+ pytest.exit(f'The function `get_data_dir()` should point to "test/data" (relative to project root). '
+ f'Got "{data_dir}" instead', 1)
+
+
+@pytest.fixture(scope="session", autouse=True)
+def set_test_start_timestamp():
+ constants.TEST_START_TIMESTAMP = time.time()
diff --git a/test/constants.py b/test/constants.py
index 2eea547c..9a9f9b12 100644
--- a/test/constants.py
+++ b/test/constants.py
@@ -1,2 +1,5 @@
-CACHED_DATASETS = ['australian', 'monks-problems-1']
-DATASETS_WITH_CACHED_META_FEATURES = ['australian', 'monks-problems-2']
+OPENML_CACHED_DATASETS = [40981, 333] # australian, monks-problems-1
+DATASETS_WITH_CACHED_META_FEATURES = [40981, 334] # australian, monks-problems-2
+OPENML_DATASET_IDS_TO_LOAD = [40981, 1464] # australian, blood-transfusion-service-center
+TEST_DATA_SUBDIR = 'test/data'
+TEST_START_TIMESTAMP = None
diff --git a/test/data/datasets/australian.pkl b/test/data/datasets/australian.pkl
deleted file mode 100644
index be8a9ae3..00000000
Binary files a/test/data/datasets/australian.pkl and /dev/null differ
diff --git a/test/data/datasets/monks-problems-1.pkl b/test/data/datasets/monks-problems-1.pkl
deleted file mode 100644
index 58061c48..00000000
Binary files a/test/data/datasets/monks-problems-1.pkl and /dev/null differ
diff --git a/test/data/pymfe/monks-problems-2.pkl b/test/data/metafeatures/pymfe/334.pkl
similarity index 100%
rename from test/data/pymfe/monks-problems-2.pkl
rename to test/data/metafeatures/pymfe/334.pkl
diff --git a/test/data/pymfe/australian.pkl b/test/data/metafeatures/pymfe/40981.pkl
similarity index 100%
rename from test/data/pymfe/australian.pkl
rename to test/data/metafeatures/pymfe/40981.pkl
diff --git a/test/data/openml_cache/org/openml/www/datasets/333/dataset.arff b/test/data/openml_cache/org/openml/www/datasets/333/dataset.arff
new file mode 100644
index 00000000..44600f16
--- /dev/null
+++ b/test/data/openml_cache/org/openml/www/datasets/333/dataset.arff
@@ -0,0 +1,651 @@
+%
+% 1. Title: The Monk's Problems
+%
+% 2. Sources:
+% (a) Donor: Sebastian Thrun
+% School of Computer Science
+% Carnegie Mellon University
+% Pittsburgh, PA 15213, USA
+%
+% E-mail: thrun@cs.cmu.edu
+%
+% (b) Date: October 1992
+%
+% 3. Past Usage:
+%
+% - See File: thrun.comparison.ps.Z
+%
+% - Wnek, J., "Hypothesis-driven Constructive Induction," PhD dissertation,
+% School of Information Technology and Engineering, Reports of Machine
+% Learning and Inference Laboratory, MLI 93-2, Center for Artificial
+% Intelligence, George Mason University, March 1993.
+%
+% - Wnek, J. and Michalski, R.S., "Comparing Symbolic and
+% Subsymbolic Learning: Three Studies," in Machine Learning: A
+% Multistrategy Approach, Vol. 4., R.S. Michalski and G. Tecuci (Eds.),
+% Morgan Kaufmann, San Mateo, CA, 1993.
+%
+% 4. Relevant Information:
+%
+% The MONK's problem were the basis of a first international comparison
+% of learning algorithms. The result of this comparison is summarized in
+% "The MONK's Problems - A Performance Comparison of Different Learning
+% algorithms" by S.B. Thrun, J. Bala, E. Bloedorn, I. Bratko, B.
+% Cestnik, J. Cheng, K. De Jong, S. Dzeroski, S.E. Fahlman, D. Fisher,
+% R. Hamann, K. Kaufman, S. Keller, I. Kononenko, J. Kreuziger, R.S.
+% Michalski, T. Mitchell, P. Pachowicz, Y. Reich H. Vafaie, W. Van de
+% Welde, W. Wenzel, J. Wnek, and J. Zhang has been published as
+% Technical Report CS-CMU-91-197, Carnegie Mellon University in Dec.
+% 1991.
+%
+% One significant characteristic of this comparison is that it was
+% performed by a collection of researchers, each of whom was an advocate
+% of the technique they tested (often they were the creators of the
+% various methods). In this sense, the results are less biased than in
+% comparisons performed by a single person advocating a specific
+% learning method, and more accurately reflect the generalization
+% behavior of the learning techniques as applied by knowledgeable users.
+%
+% There are three MONK's problems. The domains for all MONK's problems
+% are the same (described below). One of the MONK's problems has noise
+% added. For each problem, the domain has been partitioned into a train
+% and test set.
+%
+% 5. Number of Instances: 432
+%
+% 6. Number of Attributes: 8 (including class attribute)
+%
+% 7. Attribute information:
+% 1. class: 0, 1
+% 2. a1: 1, 2, 3
+% 3. a2: 1, 2, 3
+% 4. a3: 1, 2
+% 5. a4: 1, 2, 3
+% 6. a5: 1, 2, 3, 4
+% 7. a6: 1, 2
+% 8. Id: (A unique symbol for each instance)
+%
+% 8. Missing Attribute Values: None
+%
+% 9. Target Concepts associated to the MONK's problem:
+%
+% MONK-1: (a1 = a2) or (a5 = 1)
+%
+% MONK-2: EXACTLY TWO of {a1 = 1, a2 = 1, a3 = 1, a4 = 1, a5 = 1, a6 = 1}
+%
+% MONK-3: (a5 = 3 and a4 = 1) or (a5 /= 4 and a2 /= 3)
+% (5% class noise added to the training set)
+%
+%
+% Information about the dataset
+% CLASSTYPE: nominal
+% CLASSINDEX: first
+%
+
+@relation monks-problems-1
+
+@attribute 'class' {0,1}
+@attribute 'attr1' {1,2,3}
+@attribute 'attr2' {1,2,3}
+@attribute 'attr3' {1,2}
+@attribute 'attr4' {1,2,3}
+@attribute 'attr5' {1,2,3,4}
+@attribute 'attr6' {1,2}
+
+@data
+1,1,1,1,1,3,1
+1,1,1,1,1,3,2
+1,1,1,1,3,2,1
+1,1,1,1,3,3,2
+1,1,1,2,1,2,1
+1,1,1,2,1,2,2
+1,1,1,2,2,3,1
+1,1,1,2,2,4,1
+1,1,1,2,3,1,2
+1,1,2,1,1,1,2
+0,1,2,1,1,2,1
+0,1,2,1,1,3,1
+0,1,2,1,1,4,2
+1,1,2,1,2,1,1
+0,1,2,1,2,3,1
+0,1,2,1,2,3,2
+0,1,2,1,2,4,2
+0,1,2,1,3,2,1
+0,1,2,1,3,4,2
+0,1,2,2,1,2,2
+0,1,2,2,2,3,2
+0,1,2,2,2,4,1
+0,1,2,2,2,4,2
+0,1,2,2,3,2,2
+0,1,2,2,3,3,1
+0,1,2,2,3,3,2
+0,1,3,1,1,2,1
+0,1,3,1,1,4,1
+0,1,3,1,2,2,1
+0,1,3,1,2,4,1
+1,1,3,1,3,1,2
+0,1,3,1,3,2,2
+0,1,3,1,3,3,1
+0,1,3,1,3,4,1
+0,1,3,1,3,4,2
+0,1,3,2,1,2,2
+1,1,3,2,2,1,2
+0,1,3,2,2,2,2
+0,1,3,2,2,3,2
+0,1,3,2,2,4,1
+0,1,3,2,2,4,2
+1,1,3,2,3,1,1
+0,1,3,2,3,2,1
+0,1,3,2,3,4,1
+0,1,3,2,3,4,2
+0,2,1,1,1,3,1
+0,2,1,1,1,3,2
+1,2,1,1,2,1,1
+1,2,1,1,2,1,2
+0,2,1,1,2,2,2
+0,2,1,1,2,3,1
+0,2,1,1,2,4,1
+0,2,1,1,2,4,2
+0,2,1,1,3,4,1
+0,2,1,2,1,2,2
+0,2,1,2,1,3,1
+0,2,1,2,1,4,2
+0,2,1,2,2,3,1
+0,2,1,2,2,4,2
+0,2,1,2,3,2,2
+0,2,1,2,3,4,1
+1,2,2,1,1,2,1
+1,2,2,1,1,2,2
+1,2,2,1,1,3,1
+1,2,2,1,2,3,2
+1,2,2,1,3,1,1
+1,2,2,1,3,1,2
+1,2,2,1,3,2,2
+1,2,2,1,3,3,2
+1,2,2,1,3,4,2
+1,2,2,2,1,1,1
+1,2,2,2,1,3,2
+1,2,2,2,1,4,1
+1,2,2,2,1,4,2
+1,2,2,2,2,2,1
+1,2,2,2,3,4,1
+1,2,3,1,1,1,1
+1,2,3,1,2,1,1
+0,2,3,1,2,3,1
+1,2,3,1,3,1,2
+0,2,3,1,3,3,1
+0,2,3,1,3,4,2
+0,2,3,2,1,3,2
+1,2,3,2,2,1,1
+1,2,3,2,2,1,2
+0,2,3,2,2,2,1
+0,2,3,2,3,3,2
+1,3,1,1,1,1,1
+1,3,1,1,1,1,2
+1,3,1,1,2,1,1
+0,3,1,1,2,2,2
+0,3,1,1,3,2,2
+1,3,1,2,1,1,1
+0,3,1,2,1,2,2
+0,3,1,2,2,2,2
+0,3,1,2,2,3,2
+0,3,1,2,3,2,2
+1,3,2,1,1,1,1
+0,3,2,1,1,4,2
+1,3,2,1,2,1,2
+0,3,2,1,2,4,2
+1,3,2,2,1,1,1
+1,3,2,2,1,1,2
+0,3,2,2,1,3,2
+1,3,2,2,3,1,1
+0,3,2,2,3,2,1
+0,3,2,2,3,4,1
+1,3,3,1,1,1,1
+1,3,3,1,1,2,1
+1,3,3,1,1,4,2
+1,3,3,1,2,3,2
+1,3,3,1,2,4,2
+1,3,3,1,3,1,2
+1,3,3,1,3,2,1
+1,3,3,1,3,2,2
+1,3,3,1,3,4,2
+1,3,3,2,1,1,1
+1,3,3,2,1,3,2
+1,3,3,2,1,4,1
+1,3,3,2,1,4,2
+1,3,3,2,3,1,2
+1,3,3,2,3,2,2
+1,3,3,2,3,3,2
+1,3,3,2,3,4,2
+1,1,1,1,1,1,1
+1,1,1,1,1,1,2
+1,1,1,1,1,2,1
+1,1,1,1,1,2,2
+1,1,1,1,1,3,1
+1,1,1,1,1,3,2
+1,1,1,1,1,4,1
+1,1,1,1,1,4,2
+1,1,1,1,2,1,1
+1,1,1,1,2,1,2
+1,1,1,1,2,2,1
+1,1,1,1,2,2,2
+1,1,1,1,2,3,1
+1,1,1,1,2,3,2
+1,1,1,1,2,4,1
+1,1,1,1,2,4,2
+1,1,1,1,3,1,1
+1,1,1,1,3,1,2
+1,1,1,1,3,2,1
+1,1,1,1,3,2,2
+1,1,1,1,3,3,1
+1,1,1,1,3,3,2
+1,1,1,1,3,4,1
+1,1,1,1,3,4,2
+1,1,1,2,1,1,1
+1,1,1,2,1,1,2
+1,1,1,2,1,2,1
+1,1,1,2,1,2,2
+1,1,1,2,1,3,1
+1,1,1,2,1,3,2
+1,1,1,2,1,4,1
+1,1,1,2,1,4,2
+1,1,1,2,2,1,1
+1,1,1,2,2,1,2
+1,1,1,2,2,2,1
+1,1,1,2,2,2,2
+1,1,1,2,2,3,1
+1,1,1,2,2,3,2
+1,1,1,2,2,4,1
+1,1,1,2,2,4,2
+1,1,1,2,3,1,1
+1,1,1,2,3,1,2
+1,1,1,2,3,2,1
+1,1,1,2,3,2,2
+1,1,1,2,3,3,1
+1,1,1,2,3,3,2
+1,1,1,2,3,4,1
+1,1,1,2,3,4,2
+1,1,2,1,1,1,1
+1,1,2,1,1,1,2
+0,1,2,1,1,2,1
+0,1,2,1,1,2,2
+0,1,2,1,1,3,1
+0,1,2,1,1,3,2
+0,1,2,1,1,4,1
+0,1,2,1,1,4,2
+1,1,2,1,2,1,1
+1,1,2,1,2,1,2
+0,1,2,1,2,2,1
+0,1,2,1,2,2,2
+0,1,2,1,2,3,1
+0,1,2,1,2,3,2
+0,1,2,1,2,4,1
+0,1,2,1,2,4,2
+1,1,2,1,3,1,1
+1,1,2,1,3,1,2
+0,1,2,1,3,2,1
+0,1,2,1,3,2,2
+0,1,2,1,3,3,1
+0,1,2,1,3,3,2
+0,1,2,1,3,4,1
+0,1,2,1,3,4,2
+1,1,2,2,1,1,1
+1,1,2,2,1,1,2
+0,1,2,2,1,2,1
+0,1,2,2,1,2,2
+0,1,2,2,1,3,1
+0,1,2,2,1,3,2
+0,1,2,2,1,4,1
+0,1,2,2,1,4,2
+1,1,2,2,2,1,1
+1,1,2,2,2,1,2
+0,1,2,2,2,2,1
+0,1,2,2,2,2,2
+0,1,2,2,2,3,1
+0,1,2,2,2,3,2
+0,1,2,2,2,4,1
+0,1,2,2,2,4,2
+1,1,2,2,3,1,1
+1,1,2,2,3,1,2
+0,1,2,2,3,2,1
+0,1,2,2,3,2,2
+0,1,2,2,3,3,1
+0,1,2,2,3,3,2
+0,1,2,2,3,4,1
+0,1,2,2,3,4,2
+1,1,3,1,1,1,1
+1,1,3,1,1,1,2
+0,1,3,1,1,2,1
+0,1,3,1,1,2,2
+0,1,3,1,1,3,1
+0,1,3,1,1,3,2
+0,1,3,1,1,4,1
+0,1,3,1,1,4,2
+1,1,3,1,2,1,1
+1,1,3,1,2,1,2
+0,1,3,1,2,2,1
+0,1,3,1,2,2,2
+0,1,3,1,2,3,1
+0,1,3,1,2,3,2
+0,1,3,1,2,4,1
+0,1,3,1,2,4,2
+1,1,3,1,3,1,1
+1,1,3,1,3,1,2
+0,1,3,1,3,2,1
+0,1,3,1,3,2,2
+0,1,3,1,3,3,1
+0,1,3,1,3,3,2
+0,1,3,1,3,4,1
+0,1,3,1,3,4,2
+1,1,3,2,1,1,1
+1,1,3,2,1,1,2
+0,1,3,2,1,2,1
+0,1,3,2,1,2,2
+0,1,3,2,1,3,1
+0,1,3,2,1,3,2
+0,1,3,2,1,4,1
+0,1,3,2,1,4,2
+1,1,3,2,2,1,1
+1,1,3,2,2,1,2
+0,1,3,2,2,2,1
+0,1,3,2,2,2,2
+0,1,3,2,2,3,1
+0,1,3,2,2,3,2
+0,1,3,2,2,4,1
+0,1,3,2,2,4,2
+1,1,3,2,3,1,1
+1,1,3,2,3,1,2
+0,1,3,2,3,2,1
+0,1,3,2,3,2,2
+0,1,3,2,3,3,1
+0,1,3,2,3,3,2
+0,1,3,2,3,4,1
+0,1,3,2,3,4,2
+1,2,1,1,1,1,1
+1,2,1,1,1,1,2
+0,2,1,1,1,2,1
+0,2,1,1,1,2,2
+0,2,1,1,1,3,1
+0,2,1,1,1,3,2
+0,2,1,1,1,4,1
+0,2,1,1,1,4,2
+1,2,1,1,2,1,1
+1,2,1,1,2,1,2
+0,2,1,1,2,2,1
+0,2,1,1,2,2,2
+0,2,1,1,2,3,1
+0,2,1,1,2,3,2
+0,2,1,1,2,4,1
+0,2,1,1,2,4,2
+1,2,1,1,3,1,1
+1,2,1,1,3,1,2
+0,2,1,1,3,2,1
+0,2,1,1,3,2,2
+0,2,1,1,3,3,1
+0,2,1,1,3,3,2
+0,2,1,1,3,4,1
+0,2,1,1,3,4,2
+1,2,1,2,1,1,1
+1,2,1,2,1,1,2
+0,2,1,2,1,2,1
+0,2,1,2,1,2,2
+0,2,1,2,1,3,1
+0,2,1,2,1,3,2
+0,2,1,2,1,4,1
+0,2,1,2,1,4,2
+1,2,1,2,2,1,1
+1,2,1,2,2,1,2
+0,2,1,2,2,2,1
+0,2,1,2,2,2,2
+0,2,1,2,2,3,1
+0,2,1,2,2,3,2
+0,2,1,2,2,4,1
+0,2,1,2,2,4,2
+1,2,1,2,3,1,1
+1,2,1,2,3,1,2
+0,2,1,2,3,2,1
+0,2,1,2,3,2,2
+0,2,1,2,3,3,1
+0,2,1,2,3,3,2
+0,2,1,2,3,4,1
+0,2,1,2,3,4,2
+1,2,2,1,1,1,1
+1,2,2,1,1,1,2
+1,2,2,1,1,2,1
+1,2,2,1,1,2,2
+1,2,2,1,1,3,1
+1,2,2,1,1,3,2
+1,2,2,1,1,4,1
+1,2,2,1,1,4,2
+1,2,2,1,2,1,1
+1,2,2,1,2,1,2
+1,2,2,1,2,2,1
+1,2,2,1,2,2,2
+1,2,2,1,2,3,1
+1,2,2,1,2,3,2
+1,2,2,1,2,4,1
+1,2,2,1,2,4,2
+1,2,2,1,3,1,1
+1,2,2,1,3,1,2
+1,2,2,1,3,2,1
+1,2,2,1,3,2,2
+1,2,2,1,3,3,1
+1,2,2,1,3,3,2
+1,2,2,1,3,4,1
+1,2,2,1,3,4,2
+1,2,2,2,1,1,1
+1,2,2,2,1,1,2
+1,2,2,2,1,2,1
+1,2,2,2,1,2,2
+1,2,2,2,1,3,1
+1,2,2,2,1,3,2
+1,2,2,2,1,4,1
+1,2,2,2,1,4,2
+1,2,2,2,2,1,1
+1,2,2,2,2,1,2
+1,2,2,2,2,2,1
+1,2,2,2,2,2,2
+1,2,2,2,2,3,1
+1,2,2,2,2,3,2
+1,2,2,2,2,4,1
+1,2,2,2,2,4,2
+1,2,2,2,3,1,1
+1,2,2,2,3,1,2
+1,2,2,2,3,2,1
+1,2,2,2,3,2,2
+1,2,2,2,3,3,1
+1,2,2,2,3,3,2
+1,2,2,2,3,4,1
+1,2,2,2,3,4,2
+1,2,3,1,1,1,1
+1,2,3,1,1,1,2
+0,2,3,1,1,2,1
+0,2,3,1,1,2,2
+0,2,3,1,1,3,1
+0,2,3,1,1,3,2
+0,2,3,1,1,4,1
+0,2,3,1,1,4,2
+1,2,3,1,2,1,1
+1,2,3,1,2,1,2
+0,2,3,1,2,2,1
+0,2,3,1,2,2,2
+0,2,3,1,2,3,1
+0,2,3,1,2,3,2
+0,2,3,1,2,4,1
+0,2,3,1,2,4,2
+1,2,3,1,3,1,1
+1,2,3,1,3,1,2
+0,2,3,1,3,2,1
+0,2,3,1,3,2,2
+0,2,3,1,3,3,1
+0,2,3,1,3,3,2
+0,2,3,1,3,4,1
+0,2,3,1,3,4,2
+1,2,3,2,1,1,1
+1,2,3,2,1,1,2
+0,2,3,2,1,2,1
+0,2,3,2,1,2,2
+0,2,3,2,1,3,1
+0,2,3,2,1,3,2
+0,2,3,2,1,4,1
+0,2,3,2,1,4,2
+1,2,3,2,2,1,1
+1,2,3,2,2,1,2
+0,2,3,2,2,2,1
+0,2,3,2,2,2,2
+0,2,3,2,2,3,1
+0,2,3,2,2,3,2
+0,2,3,2,2,4,1
+0,2,3,2,2,4,2
+1,2,3,2,3,1,1
+1,2,3,2,3,1,2
+0,2,3,2,3,2,1
+0,2,3,2,3,2,2
+0,2,3,2,3,3,1
+0,2,3,2,3,3,2
+0,2,3,2,3,4,1
+0,2,3,2,3,4,2
+1,3,1,1,1,1,1
+1,3,1,1,1,1,2
+0,3,1,1,1,2,1
+0,3,1,1,1,2,2
+0,3,1,1,1,3,1
+0,3,1,1,1,3,2
+0,3,1,1,1,4,1
+0,3,1,1,1,4,2
+1,3,1,1,2,1,1
+1,3,1,1,2,1,2
+0,3,1,1,2,2,1
+0,3,1,1,2,2,2
+0,3,1,1,2,3,1
+0,3,1,1,2,3,2
+0,3,1,1,2,4,1
+0,3,1,1,2,4,2
+1,3,1,1,3,1,1
+1,3,1,1,3,1,2
+0,3,1,1,3,2,1
+0,3,1,1,3,2,2
+0,3,1,1,3,3,1
+0,3,1,1,3,3,2
+0,3,1,1,3,4,1
+0,3,1,1,3,4,2
+1,3,1,2,1,1,1
+1,3,1,2,1,1,2
+0,3,1,2,1,2,1
+0,3,1,2,1,2,2
+0,3,1,2,1,3,1
+0,3,1,2,1,3,2
+0,3,1,2,1,4,1
+0,3,1,2,1,4,2
+1,3,1,2,2,1,1
+1,3,1,2,2,1,2
+0,3,1,2,2,2,1
+0,3,1,2,2,2,2
+0,3,1,2,2,3,1
+0,3,1,2,2,3,2
+0,3,1,2,2,4,1
+0,3,1,2,2,4,2
+1,3,1,2,3,1,1
+1,3,1,2,3,1,2
+0,3,1,2,3,2,1
+0,3,1,2,3,2,2
+0,3,1,2,3,3,1
+0,3,1,2,3,3,2
+0,3,1,2,3,4,1
+0,3,1,2,3,4,2
+1,3,2,1,1,1,1
+1,3,2,1,1,1,2
+0,3,2,1,1,2,1
+0,3,2,1,1,2,2
+0,3,2,1,1,3,1
+0,3,2,1,1,3,2
+0,3,2,1,1,4,1
+0,3,2,1,1,4,2
+1,3,2,1,2,1,1
+1,3,2,1,2,1,2
+0,3,2,1,2,2,1
+0,3,2,1,2,2,2
+0,3,2,1,2,3,1
+0,3,2,1,2,3,2
+0,3,2,1,2,4,1
+0,3,2,1,2,4,2
+1,3,2,1,3,1,1
+1,3,2,1,3,1,2
+0,3,2,1,3,2,1
+0,3,2,1,3,2,2
+0,3,2,1,3,3,1
+0,3,2,1,3,3,2
+0,3,2,1,3,4,1
+0,3,2,1,3,4,2
+1,3,2,2,1,1,1
+1,3,2,2,1,1,2
+0,3,2,2,1,2,1
+0,3,2,2,1,2,2
+0,3,2,2,1,3,1
+0,3,2,2,1,3,2
+0,3,2,2,1,4,1
+0,3,2,2,1,4,2
+1,3,2,2,2,1,1
+1,3,2,2,2,1,2
+0,3,2,2,2,2,1
+0,3,2,2,2,2,2
+0,3,2,2,2,3,1
+0,3,2,2,2,3,2
+0,3,2,2,2,4,1
+0,3,2,2,2,4,2
+1,3,2,2,3,1,1
+1,3,2,2,3,1,2
+0,3,2,2,3,2,1
+0,3,2,2,3,2,2
+0,3,2,2,3,3,1
+0,3,2,2,3,3,2
+0,3,2,2,3,4,1
+0,3,2,2,3,4,2
+1,3,3,1,1,1,1
+1,3,3,1,1,1,2
+1,3,3,1,1,2,1
+1,3,3,1,1,2,2
+1,3,3,1,1,3,1
+1,3,3,1,1,3,2
+1,3,3,1,1,4,1
+1,3,3,1,1,4,2
+1,3,3,1,2,1,1
+1,3,3,1,2,1,2
+1,3,3,1,2,2,1
+1,3,3,1,2,2,2
+1,3,3,1,2,3,1
+1,3,3,1,2,3,2
+1,3,3,1,2,4,1
+1,3,3,1,2,4,2
+1,3,3,1,3,1,1
+1,3,3,1,3,1,2
+1,3,3,1,3,2,1
+1,3,3,1,3,2,2
+1,3,3,1,3,3,1
+1,3,3,1,3,3,2
+1,3,3,1,3,4,1
+1,3,3,1,3,4,2
+1,3,3,2,1,1,1
+1,3,3,2,1,1,2
+1,3,3,2,1,2,1
+1,3,3,2,1,2,2
+1,3,3,2,1,3,1
+1,3,3,2,1,3,2
+1,3,3,2,1,4,1
+1,3,3,2,1,4,2
+1,3,3,2,2,1,1
+1,3,3,2,2,1,2
+1,3,3,2,2,2,1
+1,3,3,2,2,2,2
+1,3,3,2,2,3,1
+1,3,3,2,2,3,2
+1,3,3,2,2,4,1
+1,3,3,2,2,4,2
+1,3,3,2,3,1,1
+1,3,3,2,3,1,2
+1,3,3,2,3,2,1
+1,3,3,2,3,2,2
+1,3,3,2,3,3,1
+1,3,3,2,3,3,2
+1,3,3,2,3,4,1
+1,3,3,2,3,4,2
diff --git a/test/data/openml_cache/org/openml/www/datasets/333/dataset_333.pkl.py3 b/test/data/openml_cache/org/openml/www/datasets/333/dataset_333.pkl.py3
new file mode 100644
index 00000000..5c9a3b2e
Binary files /dev/null and b/test/data/openml_cache/org/openml/www/datasets/333/dataset_333.pkl.py3 differ
diff --git a/test/data/openml_cache/org/openml/www/datasets/333/dataset_333.pq b/test/data/openml_cache/org/openml/www/datasets/333/dataset_333.pq
new file mode 100644
index 00000000..34aeff80
Binary files /dev/null and b/test/data/openml_cache/org/openml/www/datasets/333/dataset_333.pq differ
diff --git a/test/data/openml_cache/org/openml/www/datasets/333/description.xml b/test/data/openml_cache/org/openml/www/datasets/333/description.xml
new file mode 100644
index 00000000..4c00296e
--- /dev/null
+++ b/test/data/openml_cache/org/openml/www/datasets/333/description.xml
@@ -0,0 +1,33 @@
+
+ 333
+ monks-problems-1
+ 1
+ **Author**: Sebastian Thrun (Carnegie Mellon University)
+**Source**: [UCI](https://archive.ics.uci.edu/ml/datasets/MONK's+Problems) - October 1992
+**Please cite**: [UCI](https://archive.ics.uci.edu/ml/citation_policy.html)
+
+**The Monk's Problems: Problem 1**
+Once upon a time, in July 1991, the monks of Corsendonk Priory were faced with a school held in their priory, namely the 2nd European Summer School on Machine Learning. After listening more than one week to a wide variety of learning algorithms, they felt rather confused: Which algorithm would be optimal? And which one to avoid? As a consequence of this dilemma, they created a simple task on which all learning algorithms ought to be compared: the three MONK's problems.
+
+The target concept associated with the 1st Monk's problem is the binary outcome of the logical formula:
+MONK-1: (a1 == a2) or (a5 == 1)
+
+In this dataset, the original train and test sets were merged to allow other sampling procedures. However, the original train-test splits can be found as one of the OpenML tasks.
+
+### Attribute information:
+* attr1: 1, 2, 3
+* attr2: 1, 2, 3
+* attr3: 1, 2
+* attr4: 1, 2, 3
+* attr5: 1, 2, 3, 4
+* attr6: 1, 2
+
+### Relevant papers
+The MONK's Problems - A Performance Comparison of Different Learning Algorithms, by S.B. Thrun, J. Bala, E. Bloedorn, I. Bratko, B. Cestnik, J. Cheng, K. De Jong, S. Dzeroski, S.E. Fahlman, D. Fisher, R. Hamann, K. Kaufman, S. Keller, I. Kononenko, J. Kreuziger, R.S. Michalski, T. Mitchell, P. Pachowicz, Y. Reich H. Vafaie, W. Van de Welde, W. Wenzel, J. Wnek, and J. Zhang. Technical Report CS-CMU-91-197, Carnegie Mellon University, Dec. 1991.
+ 1
+ ARFF
+ Sebastian Thrun 1992-10-01 2014-08-26T17:11:18
+ English Public https://api.openml.org/data/v1/download/52236/monks-problems-1.arff
+ http://openml1.win.tue.nl/dataset333/dataset_333.pq 52236 class https://archive.ics.uci.edu/ml/citation_policy.html artificialmythbusting_1OpenML100study_1study_123study_135study_14study_144study_15study_20study_34study_41study_50study_52study_7uci public https://archive.ics.uci.edu/ml/datasets/MONK's+Problems https://link.springer.com/article/10.1023/A:1022622132310 http://openml1.win.tue.nl/dataset333/dataset_333.pq active
+ 2020-11-20 18:58:56 6cd008dccee6a34420c091dfe7cdb457
+
diff --git a/test/data/openml_cache/org/openml/www/datasets/333/features.xml b/test/data/openml_cache/org/openml/www/datasets/333/features.xml
new file mode 100644
index 00000000..6cca4738
--- /dev/null
+++ b/test/data/openml_cache/org/openml/www/datasets/333/features.xml
@@ -0,0 +1,84 @@
+
+
+ 0
+ class
+ nominal
+ 0
+ 1
+ true
+ false
+ false
+ 0
+
+
+ 1
+ attr1
+ nominal
+ 1
+ 2
+ 3
+ false
+ false
+ false
+ 0
+
+
+ 2
+ attr2
+ nominal
+ 1
+ 2
+ 3
+ false
+ false
+ false
+ 0
+
+
+ 3
+ attr3
+ nominal
+ 1
+ 2
+ false
+ false
+ false
+ 0
+
+
+ 4
+ attr4
+ nominal
+ 1
+ 2
+ 3
+ false
+ false
+ false
+ 0
+
+
+ 5
+ attr5
+ nominal
+ 1
+ 2
+ 3
+ 4
+ false
+ false
+ false
+ 0
+
+
+ 6
+ attr6
+ nominal
+ 1
+ 2
+ false
+ false
+ false
+ 0
+
+
diff --git a/test/data/openml_cache/org/openml/www/datasets/333/features.xml.pkl b/test/data/openml_cache/org/openml/www/datasets/333/features.xml.pkl
new file mode 100644
index 00000000..03189bf8
Binary files /dev/null and b/test/data/openml_cache/org/openml/www/datasets/333/features.xml.pkl differ
diff --git a/test/data/openml_cache/org/openml/www/datasets/40981/dataset.arff b/test/data/openml_cache/org/openml/www/datasets/40981/dataset.arff
new file mode 100644
index 00000000..f86981d5
--- /dev/null
+++ b/test/data/openml_cache/org/openml/www/datasets/40981/dataset.arff
@@ -0,0 +1,707 @@
+@relation 'aus4'
+@attribute 'A1' {'0','1'}
+@attribute 'A2' numeric
+@attribute 'A3' numeric
+@attribute 'A4' {'1','2','3'}
+@attribute 'A5' {'1','2','3','4','5','6','7','8','9','10','11','12','13','14'}
+@attribute 'A6' {'1','2','3','4','5','7','8','9'}
+@attribute 'A7' numeric
+@attribute 'A8' {'0','1'}
+@attribute 'A9' {'0','1'}
+@attribute 'A10' numeric
+@attribute 'A11' {'0','1'}
+@attribute 'A12' {'1','2','3'}
+@attribute 'A13' numeric
+@attribute 'A14' numeric
+@attribute 'A15' {'0','1'}
+@data
+"1",65,168,"2","4","4",39,"0","0",1,"1","2",32,161,"0"
+"0",72,123,"2","8","4",5,"0","0",1,"0","2",53,1,"0"
+"0",142,52,"1","4","4",31,"0","0",1,"1","2",98,1,"0"
+"0",60,169,"1","5","3",1,"1","1",12,"1","2",1,1,"1"
+"1",44,134,"2","6","4",46,"1","1",15,"0","2",18,68,"1"
+"0",4,20,"2","8","8",37,"1","1",3,"0","2",32,1,"1"
+"1",16,119,"2","3","4",4,"0","0",1,"0","2",18,50,"0"
+"0",330,94,"2","11","8",66,"1","1",7,"0","2",12,120,"1"
+"1",126,33,"1","2","8",65,"0","0",1,"0","2",61,117,"0"
+"0",315,125,"2","4","8",100,"1","1",4,"1","2",32,36,"0"
+"1",183,52,"2","14","8",81,"1","1",5,"1","2",89,144,"1"
+"1",248,102,"2","11","8",85,"1","1",7,"1","2",148,1,"1"
+"1",49,40,"1","8","8",34,"1","1",4,"1","2",45,79,"0"
+"1",198,102,"2","14","8",104,"1","1",7,"1","2",1,150,"1"
+"1",329,69,"2","8","4",56,"0","0",1,"1","2",107,1,"0"
+"1",284,116,"2","4","4",2,"0","0",1,"0","2",1,196,"1"
+"1",142,95,"2","9","4",104,"1","1",3,"1","2",109,1,"1"
+"0",33,139,"2","6","4",20,"1","1",3,"0","2",26,124,"1"
+"1",42,40,"1","4","4",4,"0","0",1,"0","2",45,5,"0"
+"0",69,111,"2","11","4",60,"1","1",8,"0","2",41,202,"1"
+"0",129,20,"2","6","4",2,"0","0",1,"0","2",92,151,"0"
+"0",35,20,"1","6","4",16,"1","0",1,"1","2",53,1,"0"
+"1",246,42,"2","2","4",5,"0","0",1,"0","2",57,1,"0"
+"1",250,52,"2","4","4",6,"1","0",1,"0","2",53,1,"0"
+"1",38,146,"2","6","4",21,"0","0",1,"0","2",24,101,"0"
+"1",176,46,"2","13","8",93,"1","1",4,"1","2",1,1,"1"
+"1",70,5,"1","4","4",4,"0","0",1,"0","2",69,44,"0"
+"1",181,74,"1","8","8",48,"1","1",2,"1","2",63,235,"1"
+"0",155,175,"2","8","4",47,"1","1",2,"0","2",77,20,"1"
+"1",77,66,"2","8","4",28,"1","1",12,"1","2",18,186,"1"
+"1",118,26,"2","8","8",78,"1","1",4,"1","2",106,66,"1"
+"0",47,160,"1","14","8",1,"0","0",1,"1","2",51,28,"0"
+"1",305,43,"1","8","8",114,"1","0",1,"1","2",69,50,"0"
+"1",77,169,"2","9","8",50,"1","1",12,"1","2",100,92,"1"
+"1",259,40,"2","7","4",123,"0","1",2,"1","2",115,54,"0"
+"1",348,204,"1","1","1",2,"0","1",3,"0","2",1,102,"0"
+"1",95,180,"2","6","4",65,"1","0",1,"1","1",3,1,"1"
+"1",236,191,"2","9","4",111,"1","1",7,"1","2",20,1,"1"
+"0",282,132,"2","8","4",106,"1","1",7,"1","2",1,164,"1"
+"0",280,73,"2","14","4",123,"1","1",3,"1","2",155,178,"1"
+"1",78,1,"2","13","4",3,"1","0",1,"0","2",1,1,"1"
+"1",71,46,"1","6","4",15,"0","0",1,"1","2",38,42,"0"
+"1",115,37,"2","14","8",31,"1","0",1,"0","2",1,216,"1"
+"1",338,18,"2","8","4",16,"1","1",4,"1","2",63,1,"0"
+"1",84,14,"1","8","4",2,"0","1",3,"0","2",40,7,"0"
+"0",50,156,"2","11","4",19,"1","1",3,"1","2",14,1,"1"
+"0",90,52,"1","8","4",5,"0","0",1,"0","2",43,1,"0"
+"1",8,2,"2","8","4",2,"0","0",1,"0","2",1,1,"1"
+"0",141,55,"1","10","8",47,"0","0",1,"0","2",91,18,"0"
+"0",308,196,"2","8","4",93,"1","1",15,"0","2",1,188,"1"
+"1",173,82,"2","4","4",14,"0","0",1,"1","2",84,1,"0"
+"1",53,90,"1","3","8",2,"0","0",1,"0","2",45,50,"0"
+"1",129,5,"1","4","4",3,"0","0",1,"0","2",76,185,"0"
+"1",34,52,"1","8","4",54,"0","0",1,"1","2",36,7,"0"
+"1",123,78,"1","11","8",87,"0","1",3,"1","2",119,2,"0"
+"1",126,46,"2","9","4",47,"1","1",12,"1","2",140,30,"1"
+"1",239,119,"2","6","5",73,"1","1",2,"0","2",1,115,"1"
+"0",216,66,"2","3","8",6,"0","0",1,"0","2",92,86,"0"
+"1",257,101,"1","9","4",69,"1","0",1,"1","2",16,173,"1"
+"1",321,177,"2","7","4",31,"1","1",5,"1","2",69,1,"1"
+"1",262,102,"2","3","5",52,"0","0",1,"1","2",46,1,"0"
+"0",84,25,"2","9","4",7,"0","1",2,"1","2",86,5,"0"
+"1",28,55,"2","3","4",37,"1","1",3,"0","2",38,94,"1"
+"0",243,82,"2","3","5",14,"0","0",1,"0","1",170,1,"0"
+"0",90,17,"2","11","8",37,"1","0",1,"0","2",98,141,"1"
+"1",253,149,"2","14","8",107,"1","1",9,"0","2",1,1,"1"
+"0",38,6,"2","11","4",2,"0","0",1,"1","2",124,1,"0"
+"1",58,169,"2","3","4",14,"1","0",1,"1","2",32,43,"0"
+"1",160,71,"2","1","1",1,"0","1",6,"0","2",61,64,"0"
+"1",120,48,"2","13","8",44,"1","1",13,"1","2",160,131,"1"
+"1",290,214,"1","1","1",1,"1","0",1,"1","2",1,1,"0"
+"0",152,43,"2","9","8",2,"0","1",4,"0","2",1,29,"0"
+"1",140,40,"2","9","4",43,"0","0",1,"0","2",69,1,"0"
+"1",130,103,"1","8","5",37,"1","1",9,"1","2",47,8,"1"
+"1",240,208,"2","10","9",131,"1","1",12,"0","2",1,158,"1"
+"1",211,92,"2","11","4",73,"0","0",1,"0","2",144,36,"0"
+"1",102,12,"2","4","8",73,"0","0",1,"1","2",111,1,"0"
+"1",145,82,"2","8","4",5,"0","0",1,"0","2",76,1,"0"
+"1",77,1,"2","4","4",26,"0","1",12,"0","1",1,1,"0"
+"0",171,45,"2","9","4",28,"1","1",17,"0","2",38,184,"1"
+"1",97,82,"2","13","4",17,"1","1",8,"0","2",1,223,"1"
+"0",200,85,"2","1","1",1,"0","1",7,"0","2",1,77,"0"
+"0",29,151,"2","2","4",11,"0","0",1,"0","2",24,34,"0"
+"1",238,102,"2","3","5",6,"0","0",1,"0","2",158,1,"0"
+"1",81,33,"2","8","4",14,"0","0",1,"1","1",98,1,"0"
+"1",219,1,"2","8","4",1,"0","0",1,"0","3",65,1,"1"
+"0",92,189,"2","11","8",37,"0","0",1,"0","2",98,2,"0"
+"0",276,185,"2","3","5",89,"1","1",10,"1","2",1,1,"1"
+"1",190,106,"2","9","4",3,"0","0",1,"1","2",100,7,"1"
+"1",66,20,"1","1","1",1,"0","0",1,"0","2",32,1,"0"
+"1",125,41,"2","4","8",7,"0","0",1,"1","1",45,1,"0"
+"1",258,89,"2","6","4",2,"0","0",1,"0","2",34,50,"0"
+"1",135,193,"2","2","4",4,"0","0",1,"0","2",1,93,"0"
+"1",208,102,"2","8","5",58,"1","1",7,"0","2",1,104,"1"
+"0",24,151,"1","11","8",5,"0","0",1,"0","2",111,1,"0"
+"1",55,46,"2","9","4",37,"0","0",1,"0","2",49,9,"1"
+"0",228,50,"2","6","4",7,"0","0",1,"0","2",1,106,"0"
+"1",167,2,"1","7","4",2,"0","0",1,"0","2",1,1,"0"
+"0",15,144,"2","6","4",43,"0","1",11,"1","2",1,11,"0"
+"0",47,29,"2","11","4",39,"1","1",2,"0","2",1,1,"1"
+"0",230,87,"2","8","4",65,"0","0",1,"0","2",149,1,"0"
+"1",226,9,"2","4","4",3,"1","0",1,"1","2",98,1,"1"
+"1",124,190,"2","9","4",95,"1","0",1,"1","2",150,115,"1"
+"1",125,20,"1","13","4",7,"1","1",3,"0","2",92,115,"1"
+"1",34,1,"1","1","1",1,"0","1",5,"0","2",13,2,"0"
+"0",95,30,"2","14","8",27,"1","0",1,"1","2",53,220,"1"
+"1",124,55,"2","14","8",26,"1","1",5,"0","2",45,224,"1"
+"1",67,139,"2","6","4",3,"0","0",1,"0","2",1,1,"0"
+"0",295,188,"2","4","8",110,"1","0",1,"1","2",1,1,"1"
+"1",173,66,"2","8","4",31,"0","0",1,"1","2",98,1,"0"
+"1",222,154,"1","4","4",4,"0","0",1,"0","2",53,1,"0"
+"1",279,119,"2","8","4",26,"0","0",1,"1","2",1,82,"0"
+"1",126,87,"1","3","8",95,"1","1",3,"1","2",22,1,"0"
+"0",204,31,"2","6","4",20,"1","1",5,"0","2",1,175,"1"
+"1",184,10,"2","3","5",76,"0","0",1,"1","1",136,1,"0"
+"1",189,3,"1","7","5",2,"1","1",2,"1","2",98,180,"1"
+"0",50,158,"2","13","8",9,"1","1",2,"1","2",24,36,"1"
+"1",181,33,"2","14","4",20,"1","1",8,"1","2",111,207,"1"
+"1",73,164,"2","11","4",58,"1","1",8,"1","2",32,140,"1"
+"1",290,136,"2","8","8",119,"1","1",10,"0","2",64,177,"1"
+"1",242,102,"2","8","4",85,"1","1",8,"0","2",1,201,"1"
+"1",49,29,"1","8","4",47,"0","0",1,"1","1",86,1,"0"
+"0",227,46,"2","1","1",1,"0","0",1,"0","2",23,1,"0"
+"0",323,205,"2","8","4",93,"1","1",8,"0","2",1,200,"1"
+"0",160,85,"2","13","8",17,"1","1",10,"1","2",64,1,"1"
+"1",64,27,"2","9","4",8,"0","1",2,"0","2",136,91,"0"
+"0",327,151,"2","11","4",76,"1","1",15,"0","2",1,176,"1"
+"1",137,13,"2","8","4",8,"0","0",1,"0","2",77,62,"0"
+"0",273,87,"2","5","3",1,"1","0",1,"0","2",32,148,"1"
+"1",52,73,"1","2","4",28,"1","1",9,"1","2",53,2,"1"
+"0",92,180,"2","6","4",37,"1","1",13,"1","2",38,121,"1"
+"0",51,17,"1","10","2",26,"0","0",1,"0","2",92,1,"0"
+"1",91,187,"1","1","1",1,"0","0",1,"0","2",65,1,"0"
+"0",112,69,"1","8","4",3,"0","0",1,"0","1",24,1,"0"
+"1",244,29,"2","1","1",1,"1","0",1,"0","2",42,2,"0"
+"0",223,93,"2","8","4",4,"0","0",1,"0","2",53,1,"0"
+"1",39,20,"2","1","1",1,"0","1",4,"0","2",114,137,"0"
+"1",232,144,"2","7","4",99,"1","1",15,"0","2",86,211,"1"
+"1",104,17,"2","8","4",36,"1","1",6,"1","2",106,1,"1"
+"0",274,73,"2","8","4",55,"1","1",9,"1","2",129,208,"1"
+"1",40,151,"1","4","8",23,"1","0",1,"1","2",45,1,"0"
+"0",67,40,"1","1","1",71,"0","0",1,"0","2",98,1,"0"
+"1",32,83,"1","1","1",1,"0","0",1,"1","2",63,2,"0"
+"1",339,206,"2","14","8",129,"1","1",10,"1","2",1,150,"1"
+"0",11,40,"2","11","4",7,"0","1",2,"0","2",34,48,"0"
+"0",343,196,"2","10","9",1,"1","1",15,"0","2",1,204,"1"
+"1",349,210,"2","10","9",120,"1","1",2,"1","2",1,53,"1"
+"0",5,72,"2","11","4",3,"0","0",1,"0","2",38,1,"0"
+"1",197,87,"2","2","5",119,"1","0",1,"1","2",65,1,"0"
+"0",280,132,"2","10","5",99,"1","1",7,"0","2",123,239,"1"
+"1",78,1,"2","8","4",1,"0","0",1,"0","3",65,1,"1"
+"1",270,46,"2","8","4",58,"1","0",1,"1","2",45,1,"0"
+"1",2,123,"2","10","4",26,"0","0",1,"0","2",161,1,"0"
+"1",32,14,"1","8","4",5,"0","1",2,"0","2",69,2,"0"
+"1",307,119,"2","4","4",98,"1","1",16,"0","2",1,231,"1"
+"1",35,1,"1","7","5",1,"0","0",1,"1","1",152,2,"1"
+"1",22,6,"2","11","7",6,"0","0",1,"0","2",69,32,"1"
+"1",218,29,"2","10","4",2,"0","0",1,"0","2",38,6,"0"
+"1",72,48,"1","9","4",67,"1","1",7,"0","2",24,1,"1"
+"1",283,91,"2","14","5",3,"0","0",1,"1","2",156,1,"0"
+"1",189,66,"2","8","4",26,"0","0",1,"0","2",146,17,"0"
+"0",180,49,"2","2","4",15,"0","0",1,"1","2",1,1,"0"
+"1",261,13,"1","8","4",10,"1","1",9,"1","2",102,70,"1"
+"1",193,88,"1","3","5",110,"1","1",8,"1","2",68,1,"1"
+"1",258,73,"2","3","5",26,"1","0",1,"0","2",1,77,"0"
+"1",25,6,"2","2","4",7,"0","0",1,"1","1",98,1,"0"
+"1",77,66,"2","1","1",3,"0","0",1,"1","2",32,209,"0"
+"0",70,135,"1","14","4",57,"0","0",1,"0","2",55,1,"1"
+"1",21,8,"2","6","4",2,"0","0",1,"0","2",98,135,"0"
+"1",27,159,"1","6","4",4,"1","0",1,"0","2",38,105,"0"
+"0",124,46,"2","7","4",47,"1","0",1,"0","1",118,1,"0"
+"0",33,141,"1","8","4",26,"1","1",5,"1","2",24,115,"1"
+"1",72,26,"2","3","4",39,"0","1",2,"1","2",131,10,"0"
+"1",336,183,"1","8","8",85,"1","0",1,"0","2",36,1,"0"
+"0",82,139,"2","11","4",110,"1","1",6,"1","2",38,1,"1"
+"1",201,77,"2","14","8",74,"1","0",1,"1","2",163,1,"0"
+"0",322,92,"1","1","1",85,"0","0",1,"1","2",1,5,"0"
+"0",309,6,"2","1","1",1,"0","0",1,"1","1",19,26,"0"
+"1",246,88,"2","13","8",101,"1","1",9,"0","2",107,1,"1"
+"1",255,103,"2","11","8",120,"1","0",1,"1","2",27,1,"1"
+"1",246,40,"1","9","4",7,"0","0",1,"0","2",1,75,"0"
+"0",186,26,"2","4","5",26,"1","1",4,"1","2",75,1,"0"
+"1",103,180,"2","13","4",30,"1","1",23,"1","2",45,89,"1"
+"1",88,120,"1","2","4",93,"1","0",1,"1","1",32,1,"1"
+"1",80,171,"1","9","4",23,"1","0",1,"1","2",53,94,"1"
+"1",155,66,"2","13","8",52,"0","0",1,"1","1",111,1,"0"
+"1",215,87,"2","8","5",85,"1","0",1,"1","1",98,1,"0"
+"1",110,47,"2","9","4",4,"0","0",1,"0","2",32,1,"0"
+"0",144,22,"2","9","4",7,"0","0",1,"1","2",102,1,"0"
+"0",76,54,"2","5","3",1,"0","1",2,"0","2",69,37,"0"
+"1",14,73,"2","4","4",2,"0","0",1,"1","2",53,32,"0"
+"1",139,195,"2","6","4",86,"1","1",6,"1","2",57,1,"1"
+"0",134,83,"2","3","5",14,"1","0",1,"1","2",59,1,"0"
+"1",194,1,"2","8","4",1,"0","0",1,"0","3",65,1,"0"
+"1",81,20,"2","8","8",3,"1","0",1,"0","2",63,1,"0"
+"1",95,167,"2","8","4",58,"1","1",18,"0","2",69,159,"1"
+"0",31,127,"2","11","4",62,"1","1",6,"0","2",65,236,"1"
+"1",13,10,"2","11","4",9,"0","1",5,"0","2",53,9,"0"
+"0",7,12,"2","1","1",1,"0","1",2,"0","2",53,59,"0"
+"1",178,66,"2","6","4",43,"0","1",3,"1","2",165,1,"0"
+"0",46,151,"2","8","8",26,"1","1",5,"0","2",15,174,"1"
+"0",61,52,"1","5","3",1,"0","0",1,"0","2",53,1,"0"
+"1",181,34,"2","12","8",99,"1","0",1,"1","2",55,237,"1"
+"0",99,57,"2","8","8",63,"1","0",1,"1","2",117,2,"0"
+"0",92,73,"2","11","8",44,"1","1",19,"0","2",1,115,"1"
+"1",157,1,"2","9","4",31,"1","1",2,"0","2",70,1,"1"
+"1",50,104,"1","5","4",8,"0","0",1,"0","2",45,74,"0"
+"0",241,133,"1","4","4",5,"0","1",3,"0","2",65,19,"0"
+"1",140,40,"2","8","8",7,"0","1",3,"1","2",131,52,"0"
+"1",244,17,"1","7","4",14,"0","0",1,"1","2",42,1,"0"
+"0",141,16,"2","4","4",15,"1","1",5,"0","2",124,115,"1"
+"1",312,17,"1","4","8",75,"1","0",1,"0","2",63,95,"1"
+"1",188,109,"1","8","4",37,"0","0",1,"1","2",18,1,"0"
+"1",95,175,"2","4","4",52,"1","1",3,"1","2",38,6,"0"
+"0",113,67,"1","1","1",1,"0","0",1,"1","2",63,41,"0"
+"0",180,97,"2","11","8",40,"1","1",3,"0","2",1,1,"1"
+"1",185,59,"2","8","4",37,"0","0",1,"0","3",38,1,"0"
+"1",207,109,"2","3","5",85,"0","0",1,"0","2",73,129,"0"
+"1",38,11,"2","4","4",8,"0","0",1,"0","2",98,103,"0"
+"1",87,30,"2","11","4",83,"1","1",3,"1","2",156,180,"1"
+"0",153,119,"2","8","5",76,"1","1",8,"1","2",1,201,"1"
+"1",42,123,"2","8","4",14,"0","0",1,"0","2",1,1,"0"
+"1",99,19,"2","8","4",8,"1","1",8,"1","2",30,214,"1"
+"1",142,99,"2","7","4",47,"0","1",2,"1","2",146,43,"0"
+"1",262,61,"2","3","5",20,"1","0",1,"0","2",159,1,"0"
+"1",136,38,"2","4","4",14,"1","0",1,"0","1",98,1,"0"
+"1",163,73,"1","3","5",101,"0","0",1,"0","2",1,2,"0"
+"1",45,150,"2","10","2",1,"1","0",1,"0","2",1,1,"1"
+"1",269,136,"2","13","8",124,"1","1",2,"1","2",26,180,"1"
+"0",74,62,"2","11","8",53,"1","1",8,"1","2",45,193,"1"
+"0",49,73,"2","11","4",5,"1","1",4,"0","2",32,7,"1"
+"1",126,46,"2","9","4",52,"0","1",2,"1","2",32,4,"0"
+"1",244,61,"1","14","8",115,"1","0",1,"1","2",61,1,"0"
+"1",318,215,"1","8","4",132,"1","1",22,"0","2",1,16,"1"
+"0",339,6,"2","1","1",1,"1","1",2,"0","2",84,50,"1"
+"0",69,167,"1","14","8",20,"1","1",5,"0","2",1,97,"1"
+"0",88,66,"1","3","5",81,"0","0",1,"0","2",69,111,"0"
+"0",345,115,"2","1","1",1,"0","0",1,"0","1",1,1,"0"
+"1",203,26,"2","4","4",37,"0","0",1,"1","2",83,1,"0"
+"1",287,175,"2","7","4",128,"1","0",1,"0","1",35,1,"1"
+"0",128,196,"1","10","9",1,"1","0",1,"0","2",1,232,"1"
+"1",200,66,"2","4","4",81,"1","1",8,"0","2",49,165,"1"
+"1",293,129,"2","3","5",105,"1","1",16,"1","2",1,213,"1"
+"1",350,109,"2","8","4",15,"1","0",1,"0","2",1,99,"0"
+"1",139,185,"2","2","8",14,"0","0",1,"0","2",81,1,"0"
+"0",12,17,"2","3","4",5,"0","1",7,"1","2",86,30,"0"
+"0",6,6,"2","6","4",26,"0","1",3,"1","2",107,2,"0"
+"1",333,193,"2","1","1",130,"1","1",16,"1","2",1,150,"1"
+"1",127,55,"2","4","8",77,"1","1",3,"1","2",64,1,"1"
+"1",249,47,"2","3","5",73,"0","0",1,"0","2",76,1,"1"
+"1",86,139,"2","6","4",7,"0","0",1,"1","2",1,1,"0"
+"1",90,186,"1","6","4",2,"0","0",1,"1","2",38,113,"0"
+"1",196,196,"2","12","7",92,"1","1",10,"1","2",1,61,"1"
+"1",185,40,"2","9","4",29,"0","0",1,"0","2",38,1,"0"
+"0",243,151,"2","11","8",43,"1","0",1,"0","2",8,143,"1"
+"1",47,54,"2","8","4",52,"1","1",2,"0","2",32,66,"1"
+"1",218,37,"1","2","4",37,"0","0",1,"1","2",138,1,"1"
+"1",288,92,"2","7","4",4,"1","0",1,"1","2",80,1,"1"
+"1",76,26,"2","7","4",14,"0","0",1,"1","1",107,1,"0"
+"1",199,81,"2","8","8",109,"0","0",1,"1","2",1,1,"0"
+"1",10,70,"2","6","4",18,"0","1",2,"0","2",24,22,"0"
+"1",72,160,"2","11","8",33,"1","0",1,"0","2",32,1,"1"
+"0",61,173,"2","8","4",7,"0","0",1,"1","2",63,1,"0"
+"0",96,67,"1","6","4",7,"1","0",1,"1","2",120,1,"1"
+"1",209,58,"1","9","4",3,"1","1",2,"0","2",15,157,"1"
+"0",134,85,"2","8","4",7,"0","1",2,"1","2",11,67,"0"
+"1",66,176,"2","8","4",72,"0","1",3,"1","2",63,72,"0"
+"1",190,47,"2","13","4",38,"1","1",2,"1","2",156,238,"1"
+"1",199,66,"2","3","4",26,"0","0",1,"1","2",73,1,"0"
+"1",134,84,"2","6","4",7,"0","0",1,"1","2",32,1,"0"
+"0",35,108,"2","3","8",8,"0","0",1,"0","2",24,114,"0"
+"1",79,33,"2","8","4",23,"1","0",1,"0","1",102,1,"1"
+"1",11,5,"2","8","4",5,"0","0",1,"0","2",43,1,"0"
+"0",43,5,"2","11","4",26,"0","1",2,"0","2",86,136,"1"
+"1",68,164,"2","9","4",47,"1","1",2,"0","2",24,90,"1"
+"1",188,104,"1","3","5",28,"0","0",1,"1","2",149,1,"0"
+"0",323,12,"2","3","5",26,"1","0",1,"1","2",88,187,"0"
+"1",9,29,"2","7","4",3,"1","0",1,"0","1",69,1,"0"
+"1",177,66,"2","13","8",63,"1","1",7,"0","2",53,183,"1"
+"1",286,211,"2","9","4",43,"1","1",4,"0","2",38,15,"1"
+"1",190,70,"2","3","5",58,"0","0",1,"1","2",84,77,"0"
+"1",26,39,"1","10","2",1,"0","0",1,"0","2",32,1,"0"
+"1",266,17,"2","7","4",116,"1","0",1,"0","1",131,1,"0"
+"1",228,52,"2","4","4",14,"0","0",1,"1","2",102,3,"0"
+"1",164,26,"1","6","4",73,"0","0",1,"1","2",107,1,"0"
+"1",96,51,"2","14","4",41,"1","1",2,"1","2",128,21,"1"
+"1",341,164,"2","10","9",131,"1","1",8,"1","2",5,1,"1"
+"1",247,1,"2","8","5",126,"1","0",1,"0","2",1,1,"1"
+"1",160,37,"2","1","1",1,"0","1",2,"0","2",30,20,"0"
+"0",214,98,"2","1","1",1,"0","0",1,"0","2",53,1,"0"
+"1",173,127,"2","10","5",39,"1","0",1,"1","1",136,1,"0"
+"0",168,76,"2","1","1",66,"0","1",3,"1","2",69,5,"0"
+"1",93,70,"2","8","4",52,"1","1",7,"0","2",65,125,"1"
+"1",63,18,"1","14","4",2,"1","1",2,"1","2",167,40,"1"
+"1",191,73,"2","13","8",103,"1","0",1,"1","2",1,1,"1"
+"1",302,73,"1","1","1",37,"0","0",1,"0","2",63,5,"0"
+"0",163,46,"2","1","1",1,"0","1",3,"1","2",69,51,"0"
+"1",224,24,"2","8","4",10,"0","1",3,"0","2",80,115,"0"
+"1",231,51,"2","14","4",4,"1","1",6,"1","2",149,1,"1"
+"1",24,65,"2","8","7",25,"0","1",3,"1","2",53,123,"0"
+"1",229,102,"2","13","4",73,"1","1",11,"1","2",1,1,"1"
+"1",42,165,"2","8","4",47,"0","0",1,"1","2",44,1,"0"
+"1",176,63,"2","2","8",95,"0","0",1,"1","2",101,1,"0"
+"1",29,112,"2","2","4",15,"0","0",1,"0","2",38,1,"0"
+"0",4,130,"2","11","4",4,"0","1",2,"1","2",1,69,"0"
+"1",233,114,"2","13","8",115,"1","1",15,"1","2",130,1,"1"
+"0",39,22,"2","9","4",41,"0","0",1,"0","2",77,6,"0"
+"1",56,160,"2","8","4",65,"1","0",1,"1","2",1,1,"1"
+"0",72,26,"2","8","4",47,"0","1",3,"1","2",69,107,"0"
+"1",103,78,"2","8","8",53,"0","1",2,"1","2",135,22,"0"
+"1",53,153,"1","10","8",31,"0","0",1,"0","2",92,1,"0"
+"0",291,46,"2","5","3",1,"1","0",1,"1","2",32,26,"0"
+"1",49,107,"2","11","4",10,"1","1",2,"0","2",53,1,"0"
+"0",40,9,"2","11","8",8,"1","1",12,"0","2",24,49,"1"
+"1",62,10,"2","2","8",18,"1","0",1,"1","2",1,1,"1"
+"1",116,18,"2","4","1",1,"0","0",1,"0","2",32,1,"0"
+"0",225,102,"2","13","4",122,"1","0",1,"1","2",169,1,"0"
+"0",296,181,"2","6","4",53,"1","1",4,"1","2",52,1,"1"
+"0",3,13,"2","8","4",26,"0","0",1,"0","2",38,19,"0"
+"0",90,183,"2","8","5",84,"1","1",3,"0","2",21,110,"1"
+"1",52,100,"1","9","4",52,"1","1",2,"1","2",24,94,"1"
+"1",65,164,"2","13","4",18,"1","0",1,"0","2",32,1,"1"
+"1",313,143,"2","1","1",125,"1","1",12,"1","2",9,94,"1"
+"1",23,109,"2","4","4",14,"1","0",1,"0","2",24,1,"1"
+"1",213,93,"1","4","4",7,"1","1",11,"1","2",107,1,"1"
+"1",221,154,"2","14","4",58,"1","1",7,"0","2",156,76,"1"
+"1",62,47,"2","4","4",3,"0","0",1,"1","2",116,1,"0"
+"0",303,119,"2","3","5",67,"0","0",1,"1","2",21,1,"0"
+"1",66,61,"2","3","4",4,"0","0",1,"0","2",53,11,"0"
+"1",306,46,"2","2","4",74,"0","0",1,"1","2",1,101,"0"
+"0",335,102,"2","6","4",76,"1","1",5,"0","2",1,49,"1"
+"1",44,110,"2","9","4",42,"1","0",1,"0","1",38,1,"1"
+"1",79,87,"2","8","5",7,"1","0",1,"1","2",53,1,"1"
+"1",207,15,"1","9","4",8,"0","0",1,"1","2",104,3,"0"
+"0",256,52,"1","8","4",1,"0","0",1,"1","2",49,2,"0"
+"1",83,29,"2","3","8",3,"0","0",1,"1","2",77,6,"0"
+"0",205,33,"2","8","4",47,"1","1",12,"0","2",1,111,"1"
+"1",236,102,"2","1","1",1,"0","1",3,"0","2",2,2,"0"
+"0",32,93,"1","8","8",65,"1","0",1,"0","2",86,1,"1"
+"1",120,21,"2","6","4",12,"1","0",1,"1","2",69,1,"0"
+"1",174,73,"2","2","4",5,"0","0",1,"1","2",38,1,"0"
+"1",37,46,"1","13","4",47,"1","0",1,"1","2",32,21,"0"
+"0",71,162,"2","11","4",11,"1","1",6,"1","2",1,120,"1"
+"1",238,118,"2","11","4",2,"1","1",2,"0","2",69,94,"1"
+"1",72,6,"2","8","3",52,"0","0",1,"1","1",1,1,"1"
+"0",81,27,"1","11","4",37,"1","1",3,"1","2",24,109,"1"
+"0",147,107,"2","10","2",52,"1","1",6,"1","2",31,115,"1"
+"0",241,128,"1","11","8",108,"1","1",15,"0","2",1,192,"1"
+"0",214,105,"2","10","4",85,"1","0",1,"1","2",1,206,"1"
+"1",158,57,"2","8","4",3,"0","0",1,"0","2",102,1,"0"
+"0",97,72,"2","14","8",24,"1","0",1,"0","2",117,1,"1"
+"1",122,180,"2","6","5",7,"0","0",1,"1","2",165,1,"0"
+"1",163,87,"2","14","4",85,"1","1",4,"1","2",100,190,"1"
+"1",189,119,"2","6","4",4,"1","0",1,"1","2",142,1,"0"
+"1",65,63,"2","4","4",20,"0","0",1,"0","2",63,1,"0"
+"1",159,75,"2","8","4",58,"0","1",3,"1","2",53,33,"0"
+"1",163,17,"2","8","5",23,"1","0",1,"1","1",107,1,"0"
+"1",6,76,"2","9","4",3,"0","1",2,"0","2",1,7,"0"
+"1",120,50,"2","13","8",87,"1","1",10,"0","2",130,142,"1"
+"1",97,73,"2","8","4",31,"0","1",2,"0","2",1,23,"0"
+"1",342,109,"2","10","9",121,"1","1",2,"1","2",1,1,"1"
+"0",32,144,"2","9","4",40,"1","1",7,"1","2",11,125,"1"
+"0",120,11,"2","7","8",4,"0","1",2,"1","2",95,52,"0"
+"0",39,22,"1","8","4",26,"0","1",2,"0","2",171,3,"0"
+"1",156,48,"2","2","4",16,"0","0",1,"1","1",1,1,"0"
+"0",297,29,"2","6","4",14,"0","0",1,"1","2",86,56,"0"
+"1",159,46,"1","9","4",2,"0","0",1,"0","1",53,1,"0"
+"1",301,196,"2","8","4",110,"1","1",10,"0","2",1,1,"1"
+"0",212,11,"2","1","1",1,"0","1",11,"0","2",69,19,"0"
+"1",184,70,"2","7","4",78,"1","1",7,"0","2",71,1,"1"
+"0",51,73,"2","6","4",2,"1","0",1,"0","2",32,1,"1"
+"1",207,202,"2","9","4",3,"0","0",1,"0","2",107,205,"0"
+"1",298,20,"2","1","1",1,"0","0",1,"0","2",48,1,"0"
+"0",83,28,"2","11","4",11,"0","1",2,"1","2",69,12,"0"
+"1",200,95,"2","14","8",95,"0","0",1,"1","1",164,1,"1"
+"0",328,207,"2","3","5",115,"1","1",14,"0","2",1,222,"1"
+"1",130,30,"2","7","4",25,"1","1",4,"1","2",129,1,"1"
+"1",163,21,"2","4","4",7,"0","0",1,"0","2",124,181,"0"
+"0",166,73,"1","5","3",1,"0","0",1,"0","2",53,21,"0"
+"1",37,119,"2","9","8",36,"1","1",8,"0","2",24,199,"1"
+"1",230,115,"2","7","4",32,"1","1",6,"1","2",34,155,"1"
+"1",13,79,"2","3","4",9,"0","0",1,"1","2",45,3,"0"
+"1",268,123,"1","8","4",40,"0","0",1,"0","2",53,3,"0"
+"1",9,1,"1","6","4",7,"0","0",1,"0","2",18,1,"0"
+"0",246,119,"2","11","4",14,"1","1",4,"1","2",48,1,"1"
+"1",137,196,"2","8","8",91,"1","1",12,"0","2",1,191,"1"
+"0",109,55,"2","5","3",1,"0","0",1,"1","2",97,2,"0"
+"1",225,80,"2","9","4",76,"1","1",15,"0","2",126,168,"1"
+"1",97,115,"2","8","4",26,"1","1",4,"0","2",1,1,"1"
+"0",129,13,"2","11","4",16,"1","1",5,"0","2",24,1,"1"
+"1",347,201,"2","1","1",1,"1","0",1,"1","2",1,1,"1"
+"1",70,169,"1","7","4",37,"0","0",1,"1","2",1,206,"0"
+"0",119,40,"2","1","1",1,"0","1",2,"0","2",27,94,"0"
+"0",35,137,"2","13","8",20,"1","1",8,"0","2",30,1,"1"
+"0",102,1,"2","8","4",1,"0","0",1,"0","3",65,1,"1"
+"1",163,2,"1","2","4",78,"0","0",1,"1","2",146,1,"0"
+"0",70,136,"2","11","4",43,"1","1",11,"0","2",24,149,"0"
+"0",75,40,"2","11","4",7,"0","0",1,"1","2",38,140,"0"
+"1",79,182,"2","8","4",4,"0","1",3,"0","2",1,217,"0"
+"0",141,36,"1","14","4",26,"0","0",1,"0","2",98,14,"0"
+"0",169,115,"2","2","4",31,"0","0",1,"0","2",95,1,"0"
+"1",118,46,"1","9","4",10,"1","0",1,"1","2",92,154,"1"
+"1",36,163,"2","8","5",16,"0","1",3,"1","2",69,8,"0"
+"1",204,64,"2","9","4",4,"0","1",3,"0","2",77,2,"0"
+"1",32,1,"2","11","4",18,"0","0",1,"0","2",53,2,"0"
+"0",136,85,"2","8","4",28,"1","1",2,"1","2",121,1,"1"
+"1",192,92,"2","3","5",71,"1","1",3,"0","2",96,126,"1"
+"0",182,73,"1","6","4",47,"0","0",1,"0","2",63,1,"0"
+"1",310,147,"2","10","4",112,"1","1",6,"0","2",1,1,"1"
+"1",98,33,"2","6","4",14,"0","0",1,"0","2",69,1,"0"
+"1",174,59,"1","4","1",1,"0","0",1,"0","2",38,1,"0"
+"0",53,102,"1","1","1",1,"0","0",1,"0","2",1,1,"0"
+"0",319,198,"2","5","1",1,"1","1",16,"0","2",1,87,"1"
+"1",172,6,"1","8","8",71,"1","1",2,"1","2",139,225,"1"
+"0",48,174,"2","8","8",96,"1","0",1,"0","2",111,1,"1"
+"0",150,109,"2","4","4",93,"0","0",1,"1","1",32,1,"0"
+"0",91,23,"2","6","8",43,"1","0",1,"0","2",131,1,"0"
+"1",78,166,"2","14","8",13,"1","1",2,"0","2",32,1,"1"
+"0",163,167,"2","1","1",1,"0","0",1,"0","2",65,215,"0"
+"0",173,18,"2","13","4",2,"1","0",1,"0","2",141,230,"1"
+"1",85,46,"2","2","8",45,"1","1",7,"0","2",69,98,"1"
+"1",214,5,"1","8","4",37,"0","0",1,"1","2",84,55,"1"
+"1",145,40,"1","4","4",7,"0","0",1,"0","2",79,1,"0"
+"0",86,17,"2","11","8",31,"1","1",2,"0","2",1,128,"1"
+"1",54,10,"1","8","8",7,"0","0",1,"0","2",98,78,"0"
+"1",162,197,"2","8","4",14,"1","0",1,"0","2",38,1,"0"
+"1",74,73,"2","7","4",32,"1","1",2,"0","2",92,139,"1"
+"1",218,52,"1","8","5",7,"1","0",1,"1","2",55,109,"0"
+"0",93,95,"2","9","4",26,"0","0",1,"1","2",117,7,"0"
+"1",209,86,"2","9","4",29,"1","0",1,"1","2",69,1,"0"
+"0",278,26,"2","11","8",63,"1","1",2,"0","2",110,145,"1"
+"1",170,87,"1","13","4",37,"0","0",1,"1","2",38,1,"0"
+"0",154,161,"2","11","8",3,"0","1",13,"1","2",41,4,"0"
+"1",126,47,"2","9","4",74,"1","1",6,"1","2",32,4,"1"
+"1",314,197,"2","10","9",1,"1","1",20,"0","2",50,60,"0"
+"1",7,26,"2","8","4",43,"1","1",6,"1","2",115,130,"1"
+"1",91,180,"2","9","4",24,"1","0",1,"1","2",92,1,"0"
+"1",175,109,"2","11","8",93,"1","1",13,"1","2",132,150,"1"
+"1",149,17,"2","8","4",43,"1","1",12,"0","2",10,118,"1"
+"1",67,16,"2","4","4",4,"0","0",1,"1","2",98,38,"0"
+"1",251,32,"2","14","4",58,"1","0",1,"0","2",153,125,"1"
+"1",213,55,"2","3","4",7,"0","0",1,"1","2",78,1,"0"
+"1",105,184,"2","13","4",14,"0","0",1,"0","2",1,3,"0"
+"1",146,54,"2","8","8",80,"1","0",1,"0","2",92,77,"1"
+"1",43,10,"2","11","4",4,"0","0",1,"0","2",69,1,"0"
+"1",40,13,"2","11","4",47,"1","1",3,"1","2",24,1,"1"
+"1",300,2,"2","14","8",2,"1","0",1,"0","2",1,200,"1"
+"1",337,123,"2","10","9",1,"0","0",1,"0","2",1,13,"0"
+"1",242,79,"2","7","4",73,"0","0",1,"1","1",131,1,"0"
+"0",10,9,"2","6","4",4,"0","0",1,"0","2",69,2,"0"
+"1",141,19,"2","9","4",8,"0","1",2,"0","2",111,198,"0"
+"1",188,95,"2","6","4",26,"1","0",1,"1","2",86,1,"0"
+"0",346,1,"2","8","4",1,"0","0",1,"0","3",65,1,"1"
+"1",253,8,"2","3","8",88,"1","0",1,"0","2",131,1,"1"
+"0",70,164,"1","11","4",65,"1","0",1,"1","2",94,1,"0"
+"1",89,55,"2","10","2",5,"0","1",3,"0","2",107,166,"0"
+"1",267,17,"2","3","8",85,"1","0",1,"1","2",107,1,"1"
+"0",76,173,"2","14","8",14,"1","1",3,"1","2",102,119,"1"
+"1",192,42,"2","3","5",4,"0","0",1,"1","2",141,210,"0"
+"1",77,169,"2","3","4",73,"1","1",10,"0","2",17,134,"1"
+"1",106,30,"2","4","4",10,"0","1",3,"1","2",60,4,"0"
+"0",18,139,"2","6","4",34,"1","0",1,"1","2",1,1,"1"
+"1",47,36,"2","11","4",37,"0","0",1,"0","2",34,8,"0"
+"1",165,199,"2","2","4",65,"1","1",10,"0","2",87,133,"1"
+"0",325,55,"2","1","1",99,"0","1",2,"0","2",1,11,"0"
+"1",332,47,"2","11","4",4,"1","0",1,"1","2",92,1,"1"
+"0",340,192,"2","1","1",1,"1","1",12,"1","2",1,171,"1"
+"1",148,34,"1","3","5",14,"1","1",11,"1","2",43,27,"0"
+"1",289,119,"2","11","8",96,"1","0",1,"1","2",114,1,"1"
+"0",83,169,"1","4","8",65,"0","0",1,"1","2",3,17,"0"
+"1",44,141,"2","8","4",41,"1","1",4,"1","2",11,27,"1"
+"1",285,130,"2","9","8",127,"1","1",13,"0","2",1,138,"1"
+"1",79,46,"2","11","4",55,"1","1",4,"1","2",1,122,"1"
+"1",172,192,"1","1","1",1,"0","1",3,"0","2",53,2,"0"
+"1",184,12,"1","13","4",3,"0","0",1,"0","2",63,1,"0"
+"1",83,16,"1","9","4",61,"1","1",7,"1","2",72,100,"0"
+"1",12,12,"1","4","4",8,"0","0",1,"0","1",69,1,"0"
+"0",51,136,"2","8","4",5,"0","0",1,"0","2",1,102,"0"
+"0",334,200,"2","11","4",117,"1","0",1,"1","2",4,229,"1"
+"1",163,102,"1","6","4",110,"1","0",1,"0","2",1,1,"0"
+"0",95,178,"2","13","8",73,"1","1",7,"0","2",131,112,"1"
+"1",58,148,"2","8","4",7,"1","0",1,"0","2",45,1,"0"
+"1",47,1,"2","8","4",1,"0","0",1,"0","3",65,1,"0"
+"0",98,187,"1","1","1",47,"0","1",2,"1","2",69,2,"0"
+"1",259,97,"2","11","4",82,"1","0",1,"0","1",1,1,"1"
+"1",114,52,"1","8","4",26,"1","1",6,"1","2",53,218,"1"
+"1",326,124,"2","7","4",124,"1","1",7,"1","2",117,167,"1"
+"1",316,169,"2","1","1",85,"1","1",6,"0","2",1,226,"1"
+"1",299,151,"2","3","5",1,"1","1",12,"0","2",1,163,"1"
+"1",237,17,"2","7","4",7,"1","0",1,"0","1",99,1,"0"
+"1",123,56,"1","6","4",47,"1","1",4,"1","2",120,120,"1"
+"1",149,119,"2","13","4",68,"1","1",9,"0","2",109,158,"1"
+"1",302,56,"1","1","1",37,"0","0",1,"0","2",38,2,"0"
+"0",95,164,"1","6","4",81,"1","0",1,"0","2",38,1,"0"
+"1",143,26,"1","8","4",2,"0","0",1,"0","2",86,1,"0"
+"0",185,13,"2","13","4",10,"0","0",1,"0","2",102,35,"1"
+"1",104,26,"2","8","5",7,"1","0",1,"0","2",113,24,"1"
+"0",135,34,"2","8","4",58,"1","1",6,"1","2",102,172,"1"
+"0",23,13,"3","13","1",115,"0","0",1,"1","1",102,1,"1"
+"1",47,123,"2","8","4",40,"1","1",4,"0","2",69,170,"1"
+"1",17,209,"3","1","7",1,"0","0",1,"1","3",143,240,"1"
+"1",23,122,"1","7","4",2,"0","0",1,"0","2",45,1,"0"
+"1",281,66,"2","7","5",58,"1","1",13,"1","2",133,195,"1"
+"1",324,136,"2","10","8",101,"1","1",4,"0","2",1,1,"1"
+"1",1,87,"1","9","4",43,"1","1",3,"1","2",38,150,"1"
+"1",36,144,"2","11","4",26,"1","0",1,"1","2",18,109,"1"
+"0",63,172,"2","4","8",3,"0","0",1,"0","2",107,6,"0"
+"1",18,151,"2","9","8",5,"0","1",2,"0","2",38,2,"0"
+"1",197,66,"1","9","4",65,"0","0",1,"0","1",69,1,"0"
+"1",138,82,"2","9","4",73,"1","1",4,"1","2",108,1,"0"
+"1",57,26,"1","12","7",20,"0","0",1,"1","2",43,3,"0"
+"1",24,156,"2","8","8",28,"0","0",1,"0","2",107,14,"0"
+"1",127,73,"2","9","4",20,"0","0",1,"1","2",102,42,"0"
+"0",272,160,"2","11","4",85,"1","1",8,"1","2",1,1,"1"
+"1",73,169,"2","3","4",11,"0","0",1,"0","2",1,1,"0"
+"0",163,82,"2","2","4",65,"1","0",1,"1","2",102,1,"0"
+"0",47,127,"2","4","4",37,"1","1",2,"0","2",53,83,"1"
+"0",55,63,"2","3","5",14,"1","1",5,"0","1",24,1,"1"
+"1",114,92,"2","13","4",79,"1","1",2,"1","2",38,1,"1"
+"0",121,50,"2","1","1",1,"0","0",1,"0","2",111,2,"0"
+"1",254,34,"2","9","4",85,"1","1",7,"1","2",152,228,"1"
+"0",25,151,"2","9","4",26,"0","1",2,"0","2",38,2,"0"
+"0",133,33,"2","11","4",26,"1","1",3,"1","2",56,115,"0"
+"0",58,115,"2","6","4",58,"1","1",4,"0","2",24,146,"1"
+"1",62,164,"2","14","4",8,"1","1",7,"0","2",39,1,"1"
+"1",63,17,"2","8","4",4,"0","0",1,"0","2",117,1,"0"
+"1",175,139,"1","9","8",90,"1","0",1,"1","2",51,1,"1"
+"0",271,33,"2","11","4",4,"0","0",1,"1","2",93,1,"0"
+"0",83,20,"1","1","1",4,"0","0",1,"0","2",38,46,"0"
+"0",21,18,"2","8","4",43,"0","1",2,"1","2",24,6,"0"
+"1",179,66,"1","9","4",101,"0","0",1,"1","2",98,1,"0"
+"0",72,12,"2","11","4",20,"0","0",1,"0","1",53,1,"0"
+"1",195,35,"2","7","4",29,"0","0",1,"0","1",7,1,"0"
+"1",143,44,"2","9","8",20,"1","1",2,"0","2",86,50,"1"
+"1",167,66,"2","6","4",104,"1","0",1,"1","2",157,1,"0"
+"0",49,54,"2","11","4",49,"1","1",6,"0","2",77,194,"1"
+"1",186,70,"2","3","5",1,"0","0",1,"0","2",63,1,"0"
+"1",65,28,"2","8","8",51,"0","0",1,"1","2",40,1,"1"
+"0",202,175,"2","11","8",124,"1","1",9,"0","2",1,221,"1"
+"1",168,94,"2","13","8",97,"1","1",4,"0","2",105,94,"1"
+"0",72,27,"2","3","4",3,"0","0",1,"0","2",47,1,"0"
+"1",114,194,"2","3","5",1,"0","0",1,"1","2",62,1,"0"
+"1",264,55,"2","7","4",43,"1","1",3,"1","2",1,16,"1"
+"1",149,36,"1","8","4",2,"0","0",1,"0","2",58,73,"0"
+"1",220,123,"2","11","8",118,"1","1",8,"1","2",102,6,"0"
+"1",13,4,"1","8","4",2,"0","0",1,"0","2",45,132,"0"
+"1",292,62,"2","1","1",8,"0","0",1,"0","2",69,4,"0"
+"1",75,7,"2","7","4",3,"0","0",1,"0","1",1,1,"0"
+"0",197,40,"1","3","8",14,"0","0",1,"1","2",53,1,"0"
+"1",266,164,"1","2","4",37,"1","0",1,"0","1",1,1,"0"
+"1",331,70,"2","9","4",43,"1","1",6,"1","2",18,39,"1"
+"1",101,13,"2","7","4",7,"1","1",4,"0","2",92,234,"1"
+"0",250,34,"2","6","4",18,"0","0",1,"0","2",86,84,"0"
+"1",294,204,"2","1","1",1,"1","1",2,"0","2",29,1,"0"
+"0",187,48,"1","1","1",1,"1","0",1,"0","2",107,1,"0"
+"1",35,87,"1","3","4",26,"0","0",1,"1","2",117,150,"0"
+"0",90,64,"1","8","4",1,"0","0",1,"0","2",38,1,"0"
+"1",80,46,"2","8","8",35,"1","0",1,"0","2",137,77,"1"
+"1",344,139,"2","1","1",76,"0","1",2,"0","2",20,7,"0"
+"1",281,11,"2","8","5",126,"1","1",20,"0","2",1,233,"1"
+"0",182,66,"1","8","4",58,"0","0",1,"1","2",1,3,"0"
+"1",263,73,"2","11","8",96,"1","1",12,"0","2",24,1,"1"
+"1",76,21,"1","6","4",4,"1","0",1,"0","2",63,2,"0"
+"1",196,66,"2","13","5",14,"0","0",1,"0","2",112,1,"0"
+"1",320,203,"2","2","5",126,"1","1",18,"1","2",1,1,"1"
+"0",265,121,"2","11","4",102,"1","1",4,"1","2",1,1,"1"
+"0",267,1,"2","8","4",58,"1","0",1,"0","2",1,1,"1"
+"1",191,52,"2","9","5",7,"1","0",1,"1","2",54,1,"0"
+"0",269,96,"2","4","8",26,"0","0",1,"1","1",86,1,"0"
+"1",154,69,"1","7","4",4,"0","0",1,"1","1",24,1,"0"
+"1",124,26,"2","11","8",5,"0","0",1,"1","2",77,88,"0"
+"1",37,126,"2","7","4",2,"0","1",2,"0","2",32,2,"0"
+"0",247,33,"2","3","5",52,"1","0",1,"1","2",1,94,"1"
+"0",82,46,"2","9","4",24,"0","0",1,"1","2",53,1,"0"
+"1",165,28,"2","14","4",33,"1","1",9,"1","2",103,203,"1"
+"0",20,164,"2","14","8",26,"1","1",12,"0","2",1,200,"1"
+"0",181,61,"1","13","4",73,"0","0",1,"1","2",69,63,"0"
+"0",307,123,"2","6","8",65,"0","0",1,"0","2",1,1,"0"
+"0",100,37,"2","11","4",32,"1","1",3,"0","2",69,1,"0"
+"1",242,46,"2","3","5",1,"0","0",1,"0","1",102,1,"0"
+"1",85,20,"1","13","8",4,"0","0",1,"0","2",86,2,"0"
+"1",317,180,"2","4","8",108,"1","0",1,"1","2",6,182,"1"
+"0",133,74,"1","14","8",59,"1","1",2,"0","2",20,1,"1"
+"1",170,87,"2","7","4",58,"1","0",1,"1","2",117,1,"1"
+"1",54,30,"1","8","8",7,"0","0",1,"0","2",98,78,"0"
+"1",190,140,"2","8","4",81,"1","1",13,"1","2",1,80,"1"
+"1",263,213,"2","11","8",6,"1","1",2,"0","2",166,47,"0"
+"0",222,115,"2","4","4",26,"1","0",1,"1","2",1,1,"1"
+"1",217,56,"2","9","4",2,"1","0",1,"1","2",131,219,"1"
+"1",145,56,"1","14","8",2,"0","0",1,"0","2",40,2,"0"
+"1",216,68,"2","13","4",5,"0","0",1,"1","2",1,116,"0"
+"1",161,205,"2","8","4",101,"1","1",17,"0","2",1,213,"1"
+"1",206,67,"2","1","1",1,"0","0",1,"0","2",1,150,"0"
+"1",48,64,"2","8","4",47,"1","1",12,"1","2",69,200,"1"
+"1",277,113,"2","9","4",93,"0","0",1,"0","2",147,66,"0"
+"1",64,131,"1","3","5",5,"0","0",1,"1","2",65,1,"0"
+"1",48,151,"1","8","4",58,"1","0",1,"0","1",11,1,"1"
+"1",198,66,"2","9","4",1,"1","0",1,"1","2",85,77,"1"
+"1",86,30,"2","7","4",3,"0","1",5,"0","2",90,179,"0"
+"1",216,119,"2","7","8",78,"1","1",13,"1","2",28,1,"1"
+"0",50,145,"2","3","4",2,"0","0",1,"0","2",69,150,"0"
+"1",213,78,"2","11","8",113,"1","0",1,"1","2",33,127,"1"
+"1",71,152,"2","14","4",2,"1","1",10,"0","2",18,108,"1"
+"0",117,187,"2","11","8",85,"1","1",3,"0","2",1,213,"1"
+"0",275,16,"2","13","8",11,"1","1",12,"1","2",141,7,"1"
+"0",304,1,"1","1","1",1,"0","0",1,"0","2",1,1,"0"
+"1",231,66,"1","3","8",115,"0","0",1,"1","1",69,1,"0"
+"1",231,49,"2","8","4",37,"1","1",11,"0","2",66,212,"1"
+"0",124,56,"2","9","4",7,"0","0",1,"1","2",63,36,"0"
+"1",35,144,"2","9","4",37,"1","0",1,"0","2",38,189,"1"
+"1",235,92,"2","8","5",99,"1","1",17,"0","2",37,160,"1"
+"1",210,26,"1","2","4",16,"0","0",1,"0","2",86,4,"0"
+"1",109,10,"2","3","5",1,"1","0",1,"1","2",1,1,"1"
+"1",234,51,"1","7","4",5,"0","0",1,"0","1",131,1,"0"
+"1",252,15,"2","8","8",6,"1","1",7,"0","2",77,147,"1"
+"1",56,127,"2","6","4",35,"1","1",2,"0","2",24,227,"1"
+"0",43,40,"2","8","4",1,"0","0",1,"0","2",1,1,"0"
+"0",123,73,"2","7","4",64,"0","1",2,"1","2",98,11,"0"
+"1",59,27,"1","13","4",18,"0","0",1,"0","2",53,1,"0"
+"1",130,105,"2","14","4",84,"1","1",3,"0","2",136,8,"1"
+"1",117,61,"2","3","5",14,"0","0",1,"1","2",162,206,"0"
+"0",27,141,"2","11","4",30,"1","1",5,"0","2",18,118,"1"
+"0",79,114,"2","11","4",70,"1","1",11,"0","2",38,85,"1"
+"1",83,53,"2","8","4",15,"0","0",1,"1","2",44,2,"0"
+"1",131,102,"2","9","4",117,"1","0",1,"1","2",20,1,"1"
+"1",103,60,"1","6","4",76,"1","0",1,"0","2",67,1,"0"
+"0",134,50,"2","11","4",56,"1","0",1,"1","2",141,1,"0"
+"1",75,77,"1","8","4",5,"0","0",1,"0","2",53,152,"0"
+"0",90,34,"1","1","1",14,"1","1",4,"0","2",63,65,"0"
+"1",92,18,"2","7","4",26,"0","0",1,"1","2",38,2,"0"
+"1",169,52,"1","10","8",2,"1","0",1,"1","2",127,1,"1"
+"1",284,85,"2","3","5",26,"0","0",1,"0","2",32,3,"0"
+"0",132,82,"2","9","4",23,"1","0",1,"0","1",98,1,"1"
+"1",100,18,"2","9","4",5,"0","1",2,"0","2",95,110,"0"
+"1",29,157,"2","1","1",11,"0","0",1,"0","2",24,1,"0"
+"1",60,38,"1","4","4",58,"1","1",2,"0","2",63,21,"0"
+"1",91,40,"2","8","4",7,"0","0",1,"0","2",35,1,"0"
+"1",10,89,"2","3","8",11,"0","0",1,"1","2",38,1,"0"
+"1",115,55,"2","2","4",20,"0","0",1,"1","2",24,1,"0"
+"1",285,82,"2","6","4",73,"1","0",1,"0","1",82,1,"1"
+"1",82,70,"2","1","1",81,"0","0",1,"0","2",53,25,"0"
+"0",245,56,"1","11","8",4,"1","1",21,"1","2",145,162,"1"
+"1",200,212,"2","14","8",40,"1","1",2,"1","2",154,115,"1"
+"1",84,175,"2","8","4",49,"0","0",1,"0","1",24,1,"0"
+"1",30,102,"2","11","4",10,"1","1",3,"0","2",1,31,"0"
+"1",151,17,"2","2","8",3,"0","0",1,"1","1",88,1,"0"
+"0",41,26,"2","8","4",22,"1","1",6,"1","2",45,6,"0"
+"1",109,29,"2","13","4",29,"0","0",1,"0","2",32,1,"0"
+"1",136,46,"1","8","4",37,"1","0",1,"1","2",1,81,"0"
+"1",135,142,"2","11","8",94,"1","1",7,"0","2",125,71,"1"
+"1",107,33,"2","11","4",43,"1","0",1,"1","2",98,1,"1"
+"1",82,77,"1","4","4",11,"0","1",2,"1","2",98,45,"0"
+"1",111,185,"2","10","2",1,"0","0",1,"1","2",45,156,"0"
+"1",311,122,"2","8","8",61,"1","1",12,"1","2",1,92,"1"
+"1",163,13,"2","2","4",24,"1","0",1,"1","1",168,1,"0"
+"0",98,180,"2","2","4",26,"0","0",1,"1","2",63,153,"0"
+"1",181,77,"1","14","4",69,"1","1",4,"1","2",124,1,"1"
+"1",235,49,"2","8","4",37,"0","0",1,"0","2",1,96,"0"
+"0",19,1,"1","5","1",1,"0","0",1,"0","2",25,1,"0"
+"1",85,22,"2","8","4",5,"0","0",1,"0","2",32,1,"1"
+"1",114,69,"1","13","4",90,"1","1",2,"0","2",74,1,"1"
+"0",70,14,"2","3","4",9,"0","0",1,"1","1",47,1,"0"
+"1",238,18,"1","6","4",14,"1","1",4,"0","2",69,150,"1"
+"0",108,138,"2","6","4",35,"1","0",1,"0","2",53,66,"1"
+"1",42,1,"2","2","4",14,"0","0",1,"0","2",47,1,"0"
+"1",163,87,"1","3","4",3,"0","0",1,"1","2",134,1,"0"
+"1",115,95,"1","8","5",58,"0","0",1,"0","2",69,160,"0"
+"0",94,40,"2","1","1",1,"1","0",1,"0","2",24,1,"0"
+"0",172,46,"2","8","4",7,"0","0",1,"1","2",122,58,"0"
+"1",19,94,"2","8","4",7,"0","0",1,"0","1",24,1,"0"
+"0",220,109,"2","11","4",4,"1","0",1,"1","2",81,1,"1"
+"1",72,67,"1","8","8",60,"1","0",1,"0","2",1,1,"1"
+"0",21,155,"2","1","1",1,"0","0",1,"0","2",1,36,"0"
+"1",89,179,"2","11","8",39,"1","0",1,"1","2",38,1,"1"
+"0",104,17,"2","8","8",24,"1","0",1,"1","2",151,1,"1"
+"1",109,180,"1","4","8",31,"0","0",1,"1","2",1,18,"0"
+"0",73,117,"2","6","4",5,"0","0",1,"0","2",77,150,"0"
+"1",76,26,"2","7","4",14,"1","0",1,"1","1",107,1,"0"
+"1",103,11,"1","8","4",37,"0","0",1,"1","2",53,1,"0"
+"1",289,8,"1","4","4",7,"1","1",12,"0","2",124,197,"1"
+"1",54,1,"2","8","4",14,"0","0",1,"1","1",1,1,"0"
+"1",201,200,"1","8","4",76,"1","0",1,"0","2",24,1,"0"
+"0",75,170,"2","13","4",2,"1","0",1,"0","2",24,169,"1"
+"0",285,42,"2","3","7",9,"0","0",1,"0","2",1,57,"0"
+"1",260,11,"1","13","8",43,"1","1",9,"0","2",32,105,"1"
+"1",163,160,"2","14","4",99,"1","0",1,"0","2",1,1,"1"
+"1",49,14,"2","8","4",4,"0","0",1,"0","2",1,35,"0"
+"0",32,145,"2","6","4",3,"1","0",1,"0","2",32,1,"1"
+"0",122,193,"2","14","8",67,"1","1",2,"0","2",38,12,"1"
+"1",245,2,"2","10","4",2,"0","1",2,"0","1",159,1,"1"
diff --git a/test/data/openml_cache/org/openml/www/datasets/40981/dataset_40981.pkl.py3 b/test/data/openml_cache/org/openml/www/datasets/40981/dataset_40981.pkl.py3
new file mode 100644
index 00000000..b217d55f
Binary files /dev/null and b/test/data/openml_cache/org/openml/www/datasets/40981/dataset_40981.pkl.py3 differ
diff --git a/test/data/openml_cache/org/openml/www/datasets/40981/dataset_40981.pq b/test/data/openml_cache/org/openml/www/datasets/40981/dataset_40981.pq
new file mode 100644
index 00000000..fe69f0e0
Binary files /dev/null and b/test/data/openml_cache/org/openml/www/datasets/40981/dataset_40981.pq differ
diff --git a/test/data/openml_cache/org/openml/www/datasets/40981/description.xml b/test/data/openml_cache/org/openml/www/datasets/40981/description.xml
new file mode 100644
index 00000000..70843ade
--- /dev/null
+++ b/test/data/openml_cache/org/openml/www/datasets/40981/description.xml
@@ -0,0 +1,49 @@
+
+ 40981
+ Australian
+ 4
+ **Author**: Confidential. Donated by Ross Quinlan
+**Source**: [LibSVM](https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/binary.html), [UCI](https://archive.ics.uci.edu/ml/datasets/Statlog+(Australian+Credit+Approval)) - 1987
+**Please cite**: [UCI](https://archive.ics.uci.edu/ml/citation_policy.html)
+
+**Important note:** This dataset is derived from [credit-approval](https://www.openml.org/d/29), even though both datasets exist individually on UCI. In this version, missing values were filled in (not clear how) and a duplicate feature was removed.
+
+**Australian Credit Approval**. This is the famous Australian Credit Approval dataset, originating from the StatLog project. It concerns credit card applications. All attribute names and values have been changed to meaningless symbols to protect the confidentiality of the data.
+
+This dataset was retrieved 2014-11-14 from the UCI site and converted to the ARFF format.
+
+__Major changes w.r.t. version 3: dataset from UCI that matches description and data types__
+
+
+### Feature information
+
+There are 6 numerical and 8 categorical attributes, all normalized to [-1,1]. The original formatting was as follows:
+
+A1: 0,1 CATEGORICAL (formerly: a,b)
+A2: continuous.
+A3: continuous.
+A4: 1,2,3 CATEGORICAL (formerly: p,g,gg)
+A5: 1, 2,3,4,5, 6,7,8,9,10,11,12,13,14 CATEGORICAL (formerly: ff,d,i,k,j,aa,m,c,w, e, q, r,cc, x)
+A6: 1, 2,3, 4,5,6,7,8,9 CATEGORICAL (formerly: ff,dd,j,bb,v,n,o,h,z)
+A7: continuous.
+A8: 1, 0 CATEGORICAL (formerly: t, f)
+A9: 1, 0 CATEGORICAL (formerly: t, f)
+A10: continuous.
+A11: 1, 0 CATEGORICAL (formerly t, f)
+A12: 1, 2, 3 CATEGORICAL (formerly: s, g, p)
+A13: continuous.
+A14: continuous.
+A15: 1,2 class attribute (formerly: +,-)
+
+### Relevant Papers
+
+Ross Quinlan. "Simplifying decision trees", Int J Man-Machine Studies 27, Dec 1987, pp. 221-234.
+
+Ross Quinlan. "C4.5: Programs for Machine Learning", Morgan Kaufmann, Oct 1992
+ 2
+ ARFF
+ 2017-12-04T22:15:38
+ Public https://api.openml.org/data/v1/download/18151910/Australian.arff
+ http://openml1.win.tue.nl/dataset40981/dataset_40981.pq 18151910 A15 4 derivedOpenML100study_135study_144study_218study_98 public https://archive.ics.uci.edu/ml/datasets/Statlog+(Australian+Credit+Approval) http://openml1.win.tue.nl/dataset40981/dataset_40981.pq active
+ 2018-10-04 07:20:02 920e2419a28215109651fcc5cbd1662e
+
diff --git a/test/data/openml_cache/org/openml/www/datasets/40981/features.xml b/test/data/openml_cache/org/openml/www/datasets/40981/features.xml
new file mode 100644
index 00000000..ba431ff5
--- /dev/null
+++ b/test/data/openml_cache/org/openml/www/datasets/40981/features.xml
@@ -0,0 +1,175 @@
+
+
+ 0
+ A1
+ nominal
+ 0
+ 1
+ false
+ false
+ false
+ 0
+
+
+ 1
+ A2
+ numeric
+ false
+ false
+ false
+ 0
+
+
+ 2
+ A3
+ numeric
+ false
+ false
+ false
+ 0
+
+
+ 3
+ A4
+ nominal
+ 1
+ 2
+ 3
+ false
+ false
+ false
+ 0
+
+
+ 4
+ A5
+ nominal
+ 1
+ 10
+ 11
+ 12
+ 13
+ 14
+ 2
+ 3
+ 4
+ 5
+ 6
+ 7
+ 8
+ 9
+ false
+ false
+ false
+ 0
+
+
+ 5
+ A6
+ nominal
+ 1
+ 2
+ 3
+ 4
+ 5
+ 7
+ 8
+ 9
+ false
+ false
+ false
+ 0
+
+
+ 6
+ A7
+ numeric
+ false
+ false
+ false
+ 0
+
+
+ 7
+ A8
+ nominal
+ 0
+ 1
+ false
+ false
+ false
+ 0
+
+
+ 8
+ A9
+ nominal
+ 0
+ 1
+ false
+ false
+ false
+ 0
+
+
+ 9
+ A10
+ numeric
+ false
+ false
+ false
+ 0
+
+
+ 10
+ A11
+ nominal
+ 0
+ 1
+ false
+ false
+ false
+ 0
+
+
+ 11
+ A12
+ nominal
+ 1
+ 2
+ 3
+ false
+ false
+ false
+ 0
+
+
+ 12
+ A13
+ numeric
+ false
+ false
+ false
+ 0
+
+
+ 13
+ A14
+ numeric
+ false
+ false
+ false
+ 0
+
+
+ 14
+ A15
+ nominal
+ 0
+ 1
+ true
+ false
+ false
+ 0
+
+
diff --git a/test/data/openml_cache/org/openml/www/datasets/40981/features.xml.pkl b/test/data/openml_cache/org/openml/www/datasets/40981/features.xml.pkl
new file mode 100644
index 00000000..a865af56
Binary files /dev/null and b/test/data/openml_cache/org/openml/www/datasets/40981/features.xml.pkl differ
diff --git a/test/data_manager.py b/test/data_manager.py
deleted file mode 100644
index 1bb78bc6..00000000
--- a/test/data_manager.py
+++ /dev/null
@@ -1,9 +0,0 @@
-from pathlib import Path
-
-from meta_automl.data_preparation.data_manager import DataManager
-
-
-class TestDataManager(DataManager):
- @classmethod
- def get_data_dir(cls) -> Path:
- return cls.get_project_root().joinpath('test/data')
diff --git a/test/general_checks.py b/test/general_checks.py
deleted file mode 100644
index a1d8610d..00000000
--- a/test/general_checks.py
+++ /dev/null
@@ -1,25 +0,0 @@
-from pathlib import Path
-from typing import Union
-
-from meta_automl.data_preparation.dataset import Dataset, DatasetCache
-from test.constants import CACHED_DATASETS
-from test.data_manager import TestDataManager
-
-
-def assert_file_unmodified_during_test(path: Path, test_start_timestamp: float):
- assert path.stat().st_mtime < test_start_timestamp, f'The file should not be modified during the test: ' \
- f'"{path.relative_to(TestDataManager.get_project_root())}".'
-
-
-def assert_cache_file_exists(path: Path):
- assert path.exists(), 'Cache not found at the path: ' \
- f'"{path.relative_to(TestDataManager.get_project_root())}".'
-
-
-def check_dataset_and_cache(dataset_or_cache: Union[Dataset, DatasetCache], desired_name: str, desired_path: Path,
- test_start_time: float):
- assert dataset_or_cache.name == desired_name
- assert dataset_or_cache.cache_path == desired_path
- assert_cache_file_exists(desired_path)
- if desired_name in CACHED_DATASETS:
- assert_file_unmodified_during_test(desired_path, test_start_time)
diff --git a/test/unit/datasets/__init__.py b/test/unit/datasets/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/test/unit/datasets/conftest.py b/test/unit/datasets/conftest.py
new file mode 100644
index 00000000..bd43ec3e
--- /dev/null
+++ b/test/unit/datasets/conftest.py
@@ -0,0 +1,18 @@
+import shutil
+
+import pytest
+
+from meta_automl.data_preparation.dataset import OpenMLDataset
+from meta_automl.data_preparation.file_system import get_dataset_cache_path_by_id
+from test.constants import OPENML_CACHED_DATASETS, OPENML_DATASET_IDS_TO_LOAD
+
+
+@pytest.fixture
+def openml_dataset_ids():
+ ids = OPENML_DATASET_IDS_TO_LOAD
+ yield ids
+ for dataset_id in ids:
+ if dataset_id in OPENML_CACHED_DATASETS:
+ continue
+ cache_path = get_dataset_cache_path_by_id(OpenMLDataset, dataset_id)
+ shutil.rmtree(cache_path, ignore_errors=True)
diff --git a/test/unit/datasets/general_checks.py b/test/unit/datasets/general_checks.py
new file mode 100644
index 00000000..5e2f446d
--- /dev/null
+++ b/test/unit/datasets/general_checks.py
@@ -0,0 +1,24 @@
+from pathlib import Path
+
+import test.constants
+from meta_automl.data_preparation.dataset import DatasetBase
+from meta_automl.data_preparation.file_system import get_project_root
+from meta_automl.data_preparation.file_system import get_dataset_cache_path
+
+
+def assert_file_unmodified_during_test(path: Path):
+ failure_message = ('The file should not be modified during the test: '
+ f'"{path.relative_to(get_project_root())}".')
+ assert path.stat().st_mtime < test.constants.TEST_START_TIMESTAMP, failure_message
+
+
+def assert_cache_file_exists(path: Path):
+ assert path.exists(), 'Cache not found at the path: ' \
+ f'"{path.relative_to(get_project_root())}".'
+
+
+def check_dataset_cache(dataset: DatasetBase):
+ cache_path = get_dataset_cache_path(dataset)
+ assert_cache_file_exists(cache_path)
+ if dataset.id_ in test.constants.OPENML_CACHED_DATASETS:
+ assert_file_unmodified_during_test(cache_path)
diff --git a/test/unit/datasets/test_custom_dataset.py b/test/unit/datasets/test_custom_dataset.py
new file mode 100644
index 00000000..5f34b194
--- /dev/null
+++ b/test/unit/datasets/test_custom_dataset.py
@@ -0,0 +1,48 @@
+import numpy as np
+import pytest
+
+from meta_automl.data_preparation.dataset import DataNotFoundError, CustomDataset, DatasetData
+from test.unit.datasets.general_checks import assert_cache_file_exists
+
+
+@pytest.fixture(scope='module')
+def new_dataset_data():
+ dataset_data = DatasetData(
+ x=np.array([['a', 'b'], ['b', 'a']]),
+ y=np.array([5, 10]),
+ categorical_indicator=[True, True],
+ attribute_names=['foo', 'bar']
+ )
+ return dataset_data
+
+
+@pytest.fixture(scope='module')
+def new_dataset(new_dataset_data):
+ dataset = CustomDataset(42)
+ dataset.dump_data(new_dataset_data)
+ yield dataset
+ dataset.cache_path.unlink()
+
+
+def test_error_on_missing_dataset_cache():
+ with pytest.raises(DataNotFoundError):
+ CustomDataset('random_missing_dataset').get_data()
+
+
+def test_custom_dataset_dumping(new_dataset):
+ # Act
+ cache_path = new_dataset.cache_path
+ # Assert
+ assert_cache_file_exists(cache_path)
+
+
+def test_custom_dataset_data_loading(new_dataset_data, new_dataset):
+ # Act
+ correct_data = new_dataset_data
+ dataset = new_dataset
+ data = dataset.get_data()
+ # Assert
+ assert np.all(np.equal(data.x, correct_data.x))
+ assert np.all(np.equal(data.y, correct_data.y))
+ assert data.categorical_indicator == correct_data.categorical_indicator
+ assert data.attribute_names == correct_data.attribute_names
diff --git a/test/unit/datasets/test_datasets_loaders.py b/test/unit/datasets/test_datasets_loaders.py
new file mode 100644
index 00000000..0fd1ce17
--- /dev/null
+++ b/test/unit/datasets/test_datasets_loaders.py
@@ -0,0 +1,24 @@
+from meta_automl.data_preparation.datasets_loaders import OpenMLDatasetsLoader
+from test.unit.datasets.general_checks import check_dataset_cache
+
+
+def test_group_load_new_datasets(openml_dataset_ids):
+ loader = OpenMLDatasetsLoader()
+ datasets = loader.load(openml_dataset_ids)
+ assert loader.dataset_ids == openml_dataset_ids
+ for dataset_id, dataset in zip(openml_dataset_ids, datasets):
+ check_dataset_cache(dataset)
+
+
+def test_load_single(openml_dataset_ids):
+ loader = OpenMLDatasetsLoader()
+ for dataset_id in openml_dataset_ids:
+ dataset = loader.load_single(dataset_id)
+ check_dataset_cache(dataset)
+
+
+def test_load_new_datasets_on_demand(openml_dataset_ids):
+ loader = OpenMLDatasetsLoader()
+ for dataset_id in openml_dataset_ids:
+ dataset = loader.load_single(dataset_id)
+ check_dataset_cache(dataset)
diff --git a/test/unit/datasets/test_file_dataset.py b/test/unit/datasets/test_file_dataset.py
new file mode 100644
index 00000000..125cb641
--- /dev/null
+++ b/test/unit/datasets/test_file_dataset.py
@@ -0,0 +1,48 @@
+import numpy as np
+import pytest
+
+from meta_automl.data_preparation.dataset import CacheNotFoundError, FileDataset, DatasetData
+from test.unit.datasets.general_checks import assert_cache_file_exists
+
+
+@pytest.fixture(scope='module')
+def new_dataset_data():
+ dataset_data = DatasetData(
+ x=np.array([['a', 'b'], ['b', 'a']]),
+ y=np.array([5, 10]),
+ categorical_indicator=[True, True],
+ attribute_names=['foo', 'bar']
+ )
+ return dataset_data
+
+
+@pytest.fixture(scope='module')
+def new_dataset(new_dataset_data):
+ dataset = FileDataset(42)
+ dataset.dump_data(new_dataset_data)
+ yield dataset
+ dataset.cache_path.unlink()
+
+
+def test_error_on_missing_dataset_cache():
+ with pytest.raises(CacheNotFoundError):
+ FileDataset('random_missing_dataset').get_data()
+
+
+def test_file_dataset_dumping(new_dataset):
+ # Act
+ cache_path = new_dataset.cache_path
+ # Assert
+ assert_cache_file_exists(cache_path)
+
+
+def test_file_dataset_data_loading(new_dataset_data, new_dataset):
+ # Act
+ correct_data = new_dataset_data
+ dataset = new_dataset
+ data = dataset.get_data()
+ # Assert
+ assert np.all(np.equal(data.x, correct_data.x))
+ assert np.all(np.equal(data.y, correct_data.y))
+ assert data.categorical_indicator == correct_data.categorical_indicator
+ assert data.attribute_names == correct_data.attribute_names
diff --git a/test/unit/datasets/test_openml_dataset.py b/test/unit/datasets/test_openml_dataset.py
new file mode 100644
index 00000000..81042648
--- /dev/null
+++ b/test/unit/datasets/test_openml_dataset.py
@@ -0,0 +1,27 @@
+from meta_automl.data_preparation.dataset import OpenMLDataset, DatasetData
+from meta_automl.data_preparation.file_system import get_dataset_cache_path_by_id
+from test.constants import OPENML_CACHED_DATASETS
+from test.unit.datasets.general_checks import check_dataset_cache
+
+
+def test_openml_dataset_creation(openml_dataset_ids):
+ for dataset_id in openml_dataset_ids:
+ dataset = OpenMLDataset(dataset_id)
+
+ assert dataset.id_ == dataset_id
+
+
+def test_openml_dataset_is_cached_cached(openml_dataset_ids):
+ for dataset_id in openml_dataset_ids:
+ cache_path = get_dataset_cache_path_by_id(OpenMLDataset, dataset_id)
+
+ is_exist = dataset_id in OPENML_CACHED_DATASETS
+ assert is_exist == cache_path.exists()
+
+
+def test_openml_dataset_data_loading(openml_dataset_ids):
+ for dataset_id in openml_dataset_ids:
+ dataset = OpenMLDataset(dataset_id)
+ dataset_data = dataset.get_data()
+ assert isinstance(dataset_data, DatasetData)
+ check_dataset_cache(dataset)
diff --git a/test/unit/test_dataset.py b/test/unit/test_dataset.py
deleted file mode 100644
index 3ac46d6d..00000000
--- a/test/unit/test_dataset.py
+++ /dev/null
@@ -1,40 +0,0 @@
-import numpy as np
-import pytest
-
-from meta_automl.data_preparation.dataset import DatasetCache, NoCacheError
-from test.constants import CACHED_DATASETS
-from test.data_manager import TestDataManager
-
-
-@pytest.fixture
-def dumped_cache_path():
- path = TestDataManager.get_dataset_cache_path('data_dumped')
- yield path
- path.unlink()
-
-
-def test_dataset_caching(dumped_cache_path):
- dataset_name = CACHED_DATASETS[0]
-
- cache_path = TestDataManager.get_dataset_cache_path(dataset_name)
-
- dataset_cache = DatasetCache(dataset_name, cache_path)
- dataset = dataset_cache.from_cache()
- dumped_cache = dataset.dump_to_cache(dumped_cache_path)
- reloaded_dataset = dumped_cache.from_cache()
- # Check data integrity.
- assert dataset.name == dataset_name
- assert reloaded_dataset.name == dataset_name
- assert dataset.id == reloaded_dataset.id
- assert np.all(np.equal(dataset.x, reloaded_dataset.x))
- assert np.all(np.equal(dataset.y, reloaded_dataset.y))
- # Check caching integrity.
- assert dataset_cache.cache_path == cache_path
- assert dataset.cache_path == cache_path
- assert dumped_cache.cache_path == dumped_cache_path
- assert reloaded_dataset.cache_path == dumped_cache_path
-
-
-def test_error_on_missing_dataset_cache():
- with pytest.raises(NoCacheError):
- DatasetCache('random_missing_cache').from_cache()
diff --git a/test/unit/test_datasets_loaders.py b/test/unit/test_datasets_loaders.py
deleted file mode 100644
index 1596e312..00000000
--- a/test/unit/test_datasets_loaders.py
+++ /dev/null
@@ -1,50 +0,0 @@
-import time
-
-import pytest
-
-from meta_automl.data_preparation.dataset import DatasetCache
-from meta_automl.data_preparation.datasets_loaders import OpenMLDatasetsLoader
-from test.general_checks import check_dataset_and_cache
-from test.constants import CACHED_DATASETS
-from test.data_manager import TestDataManager
-
-
-@pytest.fixture
-def dataset_names():
- dataset_names = ['australian', 'blood-transfusion-service-center']
- yield dataset_names
- for dataset_name in dataset_names:
- if dataset_name not in CACHED_DATASETS:
- TestDataManager.get_dataset_cache_path(dataset_name).unlink(missing_ok=True)
-
-
-def test_group_load_new_datasets(dataset_names):
- test_start_time = time.time()
- loader = OpenMLDatasetsLoader()
- loader.data_manager = TestDataManager
-
- datasets = loader.load(dataset_names)
-
- assert loader.dataset_sources == dataset_names
-
- for dataset_name, dataset_cache in zip(dataset_names, datasets):
- check_dataset_and_cache(dataset_cache, dataset_name, dataset_cache.cache_path, test_start_time)
-
-
-def test_load_single(dataset_names):
- test_start_time = time.time()
- loader = OpenMLDatasetsLoader()
- loader.data_manager = TestDataManager
- for dataset_name in dataset_names:
- dataset_cache = loader.load_single(dataset_name)
- check_dataset_and_cache(dataset_cache, dataset_name, dataset_cache.cache_path, test_start_time)
-
-
-def test_load_new_datasets_on_demand(dataset_names):
- test_start_time = time.time()
- loader = OpenMLDatasetsLoader()
- loader.data_manager = TestDataManager
- for dataset_name in dataset_names:
- cache_path = TestDataManager.get_dataset_cache_path(dataset_name)
- dataset = loader.cache_to_memory(DatasetCache(dataset_name, cache_path))
- check_dataset_and_cache(dataset, dataset_name, cache_path, test_start_time)
diff --git a/test/unit/test_file_system.py b/test/unit/test_file_system.py
new file mode 100644
index 00000000..dba55923
--- /dev/null
+++ b/test/unit/test_file_system.py
@@ -0,0 +1,7 @@
+import pytest
+from pathlib import Path
+
+from meta_automl.data_preparation.file_system import get_data_dir, get_project_root
+
+# def test_root_dir():
+# assert get_project_root() ==
diff --git a/test/unit/test_meta_features_extractors.py b/test/unit/test_meta_features_extractors.py
index c5625f53..bd9b925b 100644
--- a/test/unit/test_meta_features_extractors.py
+++ b/test/unit/test_meta_features_extractors.py
@@ -1,38 +1,37 @@
-import time
+import shutil
import pytest
+from meta_automl.data_preparation.dataset import OpenMLDataset
+from meta_automl.data_preparation.file_system import get_dataset_cache_path_by_id, get_meta_features_cache_path
from meta_automl.data_preparation.meta_features_extractors import PymfeExtractor
-from test.general_checks import assert_file_unmodified_during_test, assert_cache_file_exists
-from test.data_manager import TestDataManager
-from test.constants import CACHED_DATASETS, DATASETS_WITH_CACHED_META_FEATURES
+from test.unit.datasets.general_checks import assert_file_unmodified_during_test, assert_cache_file_exists
+from test.constants import OPENML_DATASET_IDS_TO_LOAD, OPENML_CACHED_DATASETS, DATASETS_WITH_CACHED_META_FEATURES
@pytest.fixture
-def dataset_names():
- dataset_names = ['australian', 'monks-problems-1', 'monks-problems-2', 'blood-transfusion-service-center']
- yield dataset_names
- for dataset_name in dataset_names:
- if dataset_name not in CACHED_DATASETS + DATASETS_WITH_CACHED_META_FEATURES:
- TestDataManager.get_dataset_cache_path(dataset_name).unlink(missing_ok=True)
- if dataset_name not in DATASETS_WITH_CACHED_META_FEATURES:
- TestDataManager.get_meta_features_cache_path(dataset_name, PymfeExtractor.SOURCE).unlink(missing_ok=True)
+def dataset_ids():
+ dataset_ids = set(OPENML_CACHED_DATASETS + DATASETS_WITH_CACHED_META_FEATURES + OPENML_DATASET_IDS_TO_LOAD)
+ yield dataset_ids
+ for dataset_id in dataset_ids:
+ if dataset_id not in OPENML_CACHED_DATASETS:
+ dataset_cache_path = get_dataset_cache_path_by_id(OpenMLDataset, dataset_id)
+ shutil.rmtree(dataset_cache_path)
+ if dataset_id not in DATASETS_WITH_CACHED_META_FEATURES:
+ mf_cache_path = get_meta_features_cache_path(PymfeExtractor, dataset_id)
+ mf_cache_path.unlink(missing_ok=True)
-def test_meta_features_extraction(dataset_names):
- test_start_time = time.time()
+def test_meta_features_extraction(dataset_ids):
extractor = PymfeExtractor(extractor_params={'groups': 'general'})
- extractor.data_manager = TestDataManager
- extractor.datasets_loader.data_manager = TestDataManager
- meta_features = extractor.extract(dataset_names)
- assert list(meta_features.index) == dataset_names
- for dataset_name in dataset_names:
- meta_features_cache_path = TestDataManager.get_meta_features_cache_path(
- dataset_name, extractor.SOURCE)
+ meta_features = extractor.extract(dataset_ids)
+ assert set(meta_features.index) == dataset_ids
+ for dataset_id in dataset_ids:
+ meta_features_cache_path = get_meta_features_cache_path(PymfeExtractor, dataset_id)
assert_cache_file_exists(meta_features_cache_path)
- if dataset_name in DATASETS_WITH_CACHED_META_FEATURES:
- assert_file_unmodified_during_test(meta_features_cache_path, test_start_time)
+ if dataset_id in DATASETS_WITH_CACHED_META_FEATURES:
+ assert_file_unmodified_during_test(meta_features_cache_path)
else:
- cache_path = TestDataManager.get_dataset_cache_path(dataset_name)
+ cache_path = get_dataset_cache_path_by_id(OpenMLDataset, dataset_id)
assert_cache_file_exists(cache_path)