Skip to content

Commit

Permalink
Tony experiments (#29)
Browse files Browse the repository at this point in the history
* remove contract

* remove contract

* set n_jobs in ROCKET variants

* threaded rocket

* switch to threaded DrCIF

* switch to threaded DrCIF

* switch to threaded DrCIF

* add temp DrCIF to test Parallel options

* add temp DrCIF to test Parallel options

* revert

* set train file flag correctly

* iterate DrCIF over 30 Tiselac resamples

* Switch to DrCIF with FaceDetection

* DrCIF with FaceDetection

* DrCIF with InsectWingbeatEq

* comment on regression_experiments.py

* adjust set_regressor.py

* switch to HC2 EigenWorms

* switch to HC2 EigenWorms

* switch to HC2 EigenWorms

* tweak scripts

* switch output dir creation
  • Loading branch information
TonyBagnall authored Dec 14, 2022
1 parent 16f9bf3 commit 2af30a4
Show file tree
Hide file tree
Showing 10 changed files with 121 additions and 57 deletions.
9 changes: 6 additions & 3 deletions ada_uea_experiments/classification_experiment.sh
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ max_folds=30
start_fold=1

# To avoid dumping 1000s of jobs in the queue we have a higher level queue
max_num_submitted=100
max_num_submitted=500

# Queue options are https://my.uea.ac.uk/divisions/it-and-computing-services/service-catalogue/research-it-services/hpc/ada-cluster/using-ada
queue="compute-64-512"
Expand Down Expand Up @@ -42,11 +42,12 @@ results_dir=$local_path"ClassificationResults/sktime/"
out_dir=$local_path"ClassificationResults/output/"

# The python script we are running
script_file_path=$local_path"Code/tsml-estimator-evaluation/tsml_eval/experiments/classification_experiments.py"
script_file_path=$local_path"Code/tsml-eval/tsml_eval/experiments
/classification_experiments.py"

# Environment name, change accordingly, for set up, see https://hackmd.io/ds5IEK3oQAquD4c6AP2xzQ
# Separate environments for GPU (default python/anaconda/2020.11/3.8) and CPU (default python/anaconda/2019.10/3.7) are recommended
env_name="est-eval"
env_name="eval"

# Generating train folds is usually slower, set to false unless you need them
generate_train_files="false"
Expand Down Expand Up @@ -93,7 +94,9 @@ do
fi
done


if [ "${array_jobs}" != "" ]; then
mkdir -p ${out_dir}${classifier}/${dataset}/

# This creates the scrip to run the job based on the info above
echo "#!/bin/bash
Expand Down
2 changes: 1 addition & 1 deletion ada_uea_experiments/clustering_experiments.sh
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ script_file_path=$local_path"Code/tsml-estimator-evaluation/tsml_eval/experiment

# Environment name, change accordingly, for set up, see https://hackmd.io/ds5IEK3oQAquD4c6AP2xzQ
# Separate environments for GPU (default python/anaconda/2020.11/3.8) and CPU (default python/anaconda/2019.10/3.7) are recommended
env_name="est-eval"
env_name="eval"

# todo this is currently only in for file skipping, should always be generating train files. need to rework clustering experiments more
generate_train_files="true"
Expand Down
3 changes: 2 additions & 1 deletion ada_uea_experiments/distance_clustering_experiments.sh
Original file line number Diff line number Diff line change
Expand Up @@ -46,11 +46,12 @@ script_file_path=$local_path"Code/tsml-estimator-evaluation/tsml_eval/experiment

# Environment name, change accordingly, for set up, see https://hackmd.io/ds5IEK3oQAquD4c6AP2xzQ
# Separate environments for GPU (default python/anaconda/2020.11/3.8) and CPU (default python/anaconda/2019.10/3.7) are recommended
env_name="est-eval"
env_name="eval"

generate_train_files="false"
clusterer="kmeans"
averaging="mean"
normalise=""

count=0
# dtw ddtw erp edr wdtw lcss twe msm dwdtw euclidean
Expand Down
7 changes: 4 additions & 3 deletions ada_uea_experiments/regression_experiment.sh
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ max_folds=30
start_fold=1

# To avoid dumping 1000s of jobs in the queue we have a higher level queue
max_num_submitted=100
max_num_submitted=500

# Queue options are https://my.uea.ac.uk/divisions/it-and-computing-services/service-catalogue/research-it-services/hpc/ada-cluster/using-ada
queue="compute-64-512"
Expand Down Expand Up @@ -42,11 +42,12 @@ results_dir=$local_path"RegressionResults/sktime/"
out_dir=$local_path"RegressionResults/output/"

# The python script we are running
script_file_path=$local_path"Code/tsml-estimator-evaluation/tsml_eval/experiments/regression_experiments.py"
script_file_path=$local_path"Code/tsml-eval/tsml_eval/experiments/regression_experiments
.py"

# Environment name, change accordingly, for set up, see https://hackmd.io/ds5IEK3oQAquD4c6AP2xzQ
# Separate environments for GPU (default python/anaconda/2020.11/3.8) and CPU (default python/anaconda/2019.10/3.7) are recommended
env_name="est-eval"
env_name="eval"

# Generating train folds is usually slower, set to false unless you need them
generate_train_files="false"
Expand Down
34 changes: 34 additions & 0 deletions tsml_eval/debug.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
"""Hacky area to test shit out"""
import time
from sktime.datasets import load_from_tsfile
from sktime.utils.sampling import stratified_resample
from sktime.distances import dtw_distance
import numpy as np
instance1 = np.array([[1,2,3,4], [4,3,2,1]])
instance2 = np.array([[2,3,4,5], [5,4,3,2]])
print(" shape is [n_dimensions, series_length] = ", instance1.shape)
print(" DTW_D is = ", dtw_distance(instance1, instance2))


def time_data_load():
dataset = ["InsectWingbeatEq"]
for file in dataset:
start = time.time()
x, y = load_from_tsfile(f"C:/Data/{file}/{file}_TRAIN.ts")
x2, y2 = load_from_tsfile(f"C:/Data/{file}/{file}_TEST.ts")
end = time.time()
print(f" Load pandas for problem {file} time taken = {end-start}")
start = time.time()
x, y, x2, y2 = stratified_resample(x, y, x2, y2, 1)
end = time.time()
print(f" resample time problem {file} time taken = {end-start}")
# start = time.time()
# x, y = load_from_tsfile(f"C:/Data/{file}/{file}_TRAIN.ts",
# return_data_type="numpy3d")
# x2, y2 = load_from_tsfile(f"C:/Data/{file}/{file}_TEST.ts",
# return_data_type="numpy3d")
# end = time.time()
# print(f" Load numpy for problem {file} time taken = {end-start}")


time_data_load()
44 changes: 25 additions & 19 deletions tsml_eval/experiments/classification_experiments.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,27 +75,33 @@ def run_experiment(args, overwrite=False):
overwrite=overwrite,
)
else: # Local run
data_dir = "../"
results_dir = "../"
cls_name = "DrCIF"
dataset = "ItalyPowerDemand"
resample = 0
data_dir = "/home/ajb/Data/"
results_dir = "/home/ajb/Results Working Area/ReduxBakeoff/sktime/"
cls_name = "HC2"
n_jobs = 92
contract_mins = 0
dataset = "EigenWorms"
print(f" Local Run of {cls_name} on dataset {dataset} with threading jobs "
f"={ n_jobs} and "
f"contract time ={contract_mins}")
train_fold = False
predefined_resample = False
classifier = set_classifier(cls_name, resample, train_fold)
print(f"Local Run of {classifier.__class__.__name__}.")

load_and_run_classification_experiment(
overwrite=False,
problem_path=data_dir,
results_path=results_dir,
cls_name=cls_name,
classifier=classifier,
dataset=dataset,
resample_id=resample,
build_train=train_fold,
predefined_resample=predefined_resample,
)
for resample in range(0, 30):
classifier = set_classifier(cls_name, resample_id=resample, n_jobs=n_jobs,
contract=contract_mins, train_file=train_fold)
print(f"Local Run of {classifier.__class__.__name__} with {classifier.n_jobs} jobs")

load_and_run_classification_experiment(
overwrite=False,
problem_path=data_dir,
results_path=results_dir,
cls_name=cls_name,
classifier=classifier,
dataset=dataset,
resample_id=resample,
build_train=train_fold,
predefined_resample=predefined_resample,
)


if __name__ == "__main__":
Expand Down
6 changes: 6 additions & 0 deletions tsml_eval/experiments/debug.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@

tf = bool("false")
tf2 = bool("False")
tf3 = bool("true")
tf4 = bool("True")
print(f" {tf} {tf2} {tf3} {tf4}")
52 changes: 30 additions & 22 deletions tsml_eval/experiments/distance_clustering_experiments.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,29 +86,41 @@ def _recreate_results(trainX, trainY):
clusterer = "kmeans"
chris_config = True # This is so chris doesn't have to change config each time
tune = False
if sys.argv.__len__() > 1: # cluster run, this is fragile
normalise = True
if sys.argv.__len__() > 1: # cluster run, this is fragile, requires all args atm
data_dir = sys.argv[1]
results_dir = sys.argv[2]
distance = sys.argv[3]
dataset = sys.argv[4]
resample = int(sys.argv[5]) - 1
tf = bool(sys.argv[6])
clusterer = sys.argv[7]
averaging = sys.argv[8]
# ADA starts indexing its jobs at 1, so we need to subtract 1
resample = int(args[5]) - 1
clusterer = sys.argv[6]
if len(args) > 7:
train_fold = args[7].lower() == "true"
else:
train_fold = False
if len(args) > 8:
averaging = args[8]
else:
averaging = "mean"
if len(args) > 9:
normalise = args[9].lower() == "true"
else:
normalise = False
if averaging == "dba":
results_dir = results_dir + clusterer + "_dba"

if results_present(results_dir, clusterer, dataset, resample):
print("Ignoring, results already present")


elif chris_config is True:
path = "C:/Users/chris/Documents/Masters"
data_dir = os.path.abspath(f"{path}/datasets/Multivariate_ts/")
results_dir = os.path.abspath(f"{path}/results/")
dataset = "Handwriting"
resample = 2
averaging = "mean"
tf = True
train_fold = True
distance = "dtw"

else: # Local run
Expand All @@ -118,7 +130,7 @@ def _recreate_results(trainX, trainY):
results_dir = "./temp"
resample = 0
averaging = "dba"
tf = True
train_fold = True
distance = "dtw"

if isinstance(dataset, str):
Expand All @@ -137,26 +149,22 @@ def _recreate_results(trainX, trainY):
# import sys

from sklearn.preprocessing import StandardScaler

s = StandardScaler()
train_X = s.fit_transform(train_X.T)
train_X = train_X.T
test_X = s.fit_transform(test_X.T)
test_X = test_X.T
if normalise:
s = StandardScaler()
train_X = s.fit_transform(train_X.T)
train_X = train_X.T
test_X = s.fit_transform(test_X.T)
test_X = test_X.T
w = 1.0
if tune:
w = tune_window(distance, train_X, len(set(train_Y)))
name = clusterer + "-" + distance + "-tuned"
else:
name = clusterer + "-" + distance
# w = 1.0
# if (
# distance == "wdtw"
# or distance == "dwdtw"
# or distance == "dtw"
# or distance == "wdtw"
# ):
# w = 0.2
w = 1.0
if (distance == "wdtw" or distance == "dwdtw" or distance == "dtw" or distance ==
"wdtw"):
w = 0.2
parameters = {
"window": w,
"epsilon": 0.05,
Expand Down
4 changes: 3 additions & 1 deletion tsml_eval/experiments/regression_experiments.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# -*- coding: utf-8 -*-
"""Classifier Experiments: code to run experiments as an alternative to orchestration.
"""Regressor Experiments: code to run experiments and generate results file in
standard format.
This file is configured for runs of the main method with command line arguments, or for
single debugging runs. Results are written in a standard format. It is cloned from
Expand All @@ -10,6 +11,7 @@

import os

# Remove if not running on cluster?
os.environ["MKL_NUM_THREADS"] = "1" # must be done before numpy import!!
os.environ["NUMEXPR_NUM_THREADS"] = "1" # must be done before numpy import!!
os.environ["OMP_NUM_THREADS"] = "1" # must be done before numpy import!!
Expand Down
17 changes: 10 additions & 7 deletions tsml_eval/experiments/set_classifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -202,8 +202,7 @@ def set_classifier(cls, resample_id=None, train_file=False, n_jobs=1, contract=0
return HIVECOTEV1(random_state=resample_id)
elif name == "hc2" or name == "hivecotev2":
from sktime.classification.hybrid import HIVECOTEV2

return HIVECOTEV2(random_state=resample_id)
return HIVECOTEV2(random_state=resample_id, n_jobs=n_jobs)
# Interval based
elif name == "rise-500":
from sktime.classification.interval_based import RandomIntervalSpectralEnsemble
Expand Down Expand Up @@ -251,7 +250,6 @@ def set_classifier(cls, resample_id=None, train_file=False, n_jobs=1, contract=0
return SupervisedTimeSeriesForest(random_state=resample_id, n_jobs=n_jobs)
elif name == "drcif-500":
from sktime.classification.interval_based import DrCIF

return DrCIF(
random_state=resample_id,
n_estimators=500,
Expand All @@ -268,28 +266,32 @@ def set_classifier(cls, resample_id=None, train_file=False, n_jobs=1, contract=0
elif name == "rocket" or name == "rocketclassifier":
from sktime.classification.kernel_based import RocketClassifier

return RocketClassifier(random_state=resample_id)
return RocketClassifier(random_state=resample_id, n_jobs=n_jobs)
elif name == "mini-rocket":
from sktime.classification.kernel_based import RocketClassifier

return RocketClassifier(random_state=resample_id, rocket_transform="minirocket")
return RocketClassifier(random_state=resample_id,
rocket_transform="minirocket", n_jobs=n_jobs)
elif name == "multi-rocket":
from sktime.classification.kernel_based import RocketClassifier

return RocketClassifier(
random_state=resample_id, rocket_transform="multirocket"
random_state=resample_id, rocket_transform="multirocket", n_jobs=n_jobs,

)
elif name == "arsenal":
from sktime.classification.kernel_based import Arsenal

return Arsenal(random_state=resample_id, save_transformed_data=train_file)
return Arsenal(random_state=resample_id, save_transformed_data=train_file,
n_jobs=n_jobs)
elif name == "mini-arsenal":
from sktime.classification.kernel_based import Arsenal

return Arsenal(
random_state=resample_id,
save_transformed_data=train_file,
rocket_transform="minirocket",
n_jobs=n_jobs,
)
elif name == "multi-arsenal":
from sktime.classification.kernel_based import Arsenal
Expand All @@ -298,6 +300,7 @@ def set_classifier(cls, resample_id=None, train_file=False, n_jobs=1, contract=0
random_state=resample_id,
save_transformed_data=train_file,
rocket_transform="multirocket",
n_jobs=n_jobs,
)
elif name == "hydra":
from tsml_eval.sktime_estimators.classification.hydra import HYDRA
Expand Down

0 comments on commit 2af30a4

Please sign in to comment.