diff --git a/examples/mnist-keras/client/get_data.py b/examples/mnist-keras/client/data.py old mode 100755 new mode 100644 similarity index 57% rename from examples/mnist-keras/client/get_data.py rename to examples/mnist-keras/client/data.py index ed123a4a3..749722d98 --- a/examples/mnist-keras/client/get_data.py +++ b/examples/mnist-keras/client/data.py @@ -4,6 +4,52 @@ import numpy as np import tensorflow as tf +dir_path = os.path.dirname(os.path.realpath(__file__)) +abs_path = os.path.abspath(dir_path) + +NUM_CLASSES = 10 + + +def get_data(out_dir="data"): + # Make dir if necessary + if not os.path.exists(out_dir): + os.mkdir(out_dir) + + # Download data + (x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data() + np.savez(f"{out_dir}/mnist.npz", x_train=x_train, y_train=y_train, x_test=x_test, y_test=y_test) + + +def load_data(data_path, is_train=True): + """Load data from disk. + + :param data_path: Path to data file. + :type data_path: str + :param is_train: Whether to load train or test data. + :type is_train: bool + :return: Tuple of data and labels. + :rtype: tuple + """ + if data_path is None: + data_path = os.environ.get("FEDN_DATA_PATH", abs_path + "/data/clients/1/mnist.npz") + + data = np.load(data_path) + + if is_train: + X = data["x_train"] + y = data["y_train"] + else: + X = data["x_test"] + y = data["y_test"] + + # Normalize + X = X.astype("float32") + X = np.expand_dims(X, -1) + X = X / 255 + y = tf.keras.utils.to_categorical(y, NUM_CLASSES) + + return X, y + def splitset(dataset, parts): n = dataset.shape[0] @@ -33,16 +79,8 @@ def split(dataset="data/mnist.npz", outdir="data", n_splits=2): np.savez(f"{subdir}/mnist.npz", x_train=data["x_train"][i], y_train=data["y_train"][i], x_test=data["x_test"][i], y_test=data["y_test"][i]) -def get_data(out_dir="data"): - # Make dir if necessary - if not os.path.exists(out_dir): - os.mkdir(out_dir) - - # Download data - (x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data() - np.savez(f"{out_dir}/mnist.npz", x_train=x_train, y_train=y_train, x_test=x_test, y_test=y_test) - - if __name__ == "__main__": - get_data() - split() + # Prepare data if not already done + if not os.path.exists(abs_path + "/data/clients/1"): + get_data() + split() diff --git a/examples/mnist-keras/client/entrypoint.py b/examples/mnist-keras/client/entrypoint.py deleted file mode 100755 index 1ed8f2f77..000000000 --- a/examples/mnist-keras/client/entrypoint.py +++ /dev/null @@ -1,194 +0,0 @@ -import json -import os - -import fire -import numpy as np -import tensorflow as tf - -from fedn.utils.helpers.helpers import get_helper, save_metadata, save_metrics - -HELPER_MODULE = "numpyhelper" -helper = get_helper(HELPER_MODULE) - -NUM_CLASSES = 10 - -dir_path = os.path.dirname(os.path.realpath(__file__)) -abs_path = os.path.abspath(dir_path) - - -def _get_data_path(): - data_path = os.environ.get("FEDN_DATA_PATH", abs_path + "/data/clients/1/mnist.npz") - - return data_path - - -def compile_model(img_rows=28, img_cols=28): - """Compile the TF model. - - param: img_rows: The number of rows in the image - type: img_rows: int - param: img_cols: The number of rows in the image - type: img_cols: int - return: The compiled model - type: keras.model.Sequential - """ - # Set input shape - input_shape = (img_rows, img_cols, 1) - - # Define model - model = tf.keras.models.Sequential() - model.add(tf.keras.layers.Flatten(input_shape=input_shape)) - model.add(tf.keras.layers.Dense(64, activation="relu")) - model.add(tf.keras.layers.Dropout(0.5)) - model.add(tf.keras.layers.Dense(32, activation="relu")) - model.add(tf.keras.layers.Dense(NUM_CLASSES, activation="softmax")) - model.compile(loss=tf.keras.losses.categorical_crossentropy, optimizer=tf.keras.optimizers.Adam(), metrics=["accuracy"]) - - return model - - -def load_data(data_path, is_train=True): - # Load data - if data_path is None: - data = np.load(_get_data_path()) - else: - data = np.load(data_path) - - if is_train: - X = data["x_train"] - y = data["y_train"] - else: - X = data["x_test"] - y = data["y_test"] - - # Normalize - X = X.astype("float32") - X = np.expand_dims(X, -1) - X = X / 255 - y = tf.keras.utils.to_categorical(y, NUM_CLASSES) - - return X, y - - -def init_seed(out_path="../seed.npz"): - """Initialize seed model and save it to file. - - :param out_path: The path to save the seed model to. - :type out_path: str - """ - weights = compile_model().get_weights() - helper.save(weights, out_path) - - -def train(in_model_path, out_model_path, data_path=None, batch_size=32, epochs=1): - """Complete a model update. - - Load model paramters from in_model_path (managed by the FEDn client), - perform a model update, and write updated paramters - to out_model_path (picked up by the FEDn client). - - :param in_model_path: The path to the input model. - :type in_model_path: str - :param out_model_path: The path to save the output model to. - :type out_model_path: str - :param data_path: The path to the data file. - :type data_path: str - :param batch_size: The batch size to use. - :type batch_size: int - :param epochs: The number of epochs to train. - :type epochs: int - """ - # Load data - x_train, y_train = load_data(data_path) - - # Load model - model = compile_model() - weights = helper.load(in_model_path) - model.set_weights(weights) - - # Train - model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs) - - # Metadata needed for aggregation server side - metadata = { - # num_examples are mandatory - "num_examples": len(x_train), - "batch_size": batch_size, - "epochs": epochs, - } - - # Save JSON metadata file (mandatory) - save_metadata(metadata, out_model_path) - - # Save model update (mandatory) - weights = model.get_weights() - helper.save(weights, out_model_path) - - -def validate(in_model_path, out_json_path, data_path=None): - """Validate model. - - :param in_model_path: The path to the input model. - :type in_model_path: str - :param out_json_path: The path to save the output JSON to. - :type out_json_path: str - :param data_path: The path to the data file. - :type data_path: str - """ - # Load data - x_train, y_train = load_data(data_path) - x_test, y_test = load_data(data_path, is_train=False) - - # Load model - model = compile_model() - helper = get_helper(HELPER_MODULE) - weights = helper.load(in_model_path) - model.set_weights(weights) - - # Evaluate - model_score = model.evaluate(x_train, y_train) - model_score_test = model.evaluate(x_test, y_test) - y_pred = model.predict(x_test) - y_pred = np.argmax(y_pred, axis=1) - - # JSON schema - report = { - "training_loss": model_score[0], - "training_accuracy": model_score[1], - "test_loss": model_score_test[0], - "test_accuracy": model_score_test[1], - } - - # Save JSON - save_metrics(report, out_json_path) - - -def predict(in_model_path, out_json_path, data_path=None): - # Using test data for inference but another dataset could be loaded - x_test, _ = load_data(data_path, is_train=False) - - # Load model - model = compile_model() - helper = get_helper(HELPER_MODULE) - weights = helper.load(in_model_path) - model.set_weights(weights) - - # Infer - y_pred = model.predict(x_test) - y_pred = np.argmax(y_pred, axis=1) - - # Save JSON - with open(out_json_path, "w") as fh: - fh.write(json.dumps({"predictions": y_pred.tolist()})) - - -if __name__ == "__main__": - fire.Fire( - { - "init_seed": init_seed, - "train": train, - "validate": validate, - "predict": predict, - "_get_data_path": _get_data_path, # for testing - } - ) diff --git a/examples/mnist-keras/client/fedn.yaml b/examples/mnist-keras/client/fedn.yaml index 776dc9594..30873488b 100644 --- a/examples/mnist-keras/client/fedn.yaml +++ b/examples/mnist-keras/client/fedn.yaml @@ -1,12 +1,12 @@ python_env: python_env.yaml entry_points: build: - command: python entrypoint.py init_seed + command: python model.py startup: - command: python get_data.py + command: python data.py train: - command: python entrypoint.py train $ENTRYPOINT_OPTS + command: python train.py validate: - command: python entrypoint.py validate $ENTRYPOINT_OPTS + command: python validate.py predict: - command: python entrypoint.py predict $ENTRYPOINT_OPTS + command: python predict.py \ No newline at end of file diff --git a/examples/mnist-keras/client/model.py b/examples/mnist-keras/client/model.py new file mode 100644 index 000000000..696ac8c13 --- /dev/null +++ b/examples/mnist-keras/client/model.py @@ -0,0 +1,71 @@ +import tensorflow as tf + +from fedn.utils.helpers.helpers import get_helper + +NUM_CLASSES = 10 +HELPER_MODULE = "numpyhelper" +helper = get_helper(HELPER_MODULE) + + +def compile_model(img_rows=28, img_cols=28): + """Compile the TF model. + + param: img_rows: The number of rows in the image + type: img_rows: int + param: img_cols: The number of rows in the image + type: img_cols: int + return: The compiled model + type: keras.model.Sequential + """ + # Set input shape + input_shape = (img_rows, img_cols, 1) + + # Define model + model = tf.keras.models.Sequential() + model.add(tf.keras.layers.Flatten(input_shape=input_shape)) + model.add(tf.keras.layers.Dense(64, activation="relu")) + model.add(tf.keras.layers.Dropout(0.5)) + model.add(tf.keras.layers.Dense(32, activation="relu")) + model.add(tf.keras.layers.Dense(NUM_CLASSES, activation="softmax")) + model.compile(loss=tf.keras.losses.categorical_crossentropy, optimizer=tf.keras.optimizers.Adam(), metrics=["accuracy"]) + return model + + +def save_parameters(model, out_path): + """Save model parameters to file. + + :param model: The model to serialize. + :type model: keras.model.Sequential + :param out_path: The path to save the model to. + :type out_path: str + """ + weights = model.get_weights() + helper.save(weights, out_path) + + +def load_parameters(model_path): + """Load model parameters from file and populate model. + + :param model_path: The path to load from. + :type model_path: str + :return: The loaded model. + :rtype: keras.model.Sequential + """ + model = compile_model() + weights = helper.load(model_path) + model.set_weights(weights) + return model + + +def init_seed(out_path="../seed.npz"): + """Initialize seed model and save it to file. + + :param out_path: The path to save the seed model to. + :type out_path: str + """ + weights = compile_model().get_weights() + helper.save(weights, out_path) + + +if __name__ == "__main__": + init_seed("../seed.npz") diff --git a/examples/mnist-keras/client/predict.py b/examples/mnist-keras/client/predict.py new file mode 100644 index 000000000..9d502ed75 --- /dev/null +++ b/examples/mnist-keras/client/predict.py @@ -0,0 +1,30 @@ +import json +import os +import sys + +import numpy as np +from data import load_data +from model import load_parameters + +dir_path = os.path.dirname(os.path.realpath(__file__)) +sys.path.append(os.path.abspath(dir_path)) + + +def predict(in_model_path, out_json_path, data_path=None): + # Using test data for inference but another dataset could be loaded + x_test, _ = load_data(data_path, is_train=False) + + # Load model + model = load_parameters(in_model_path) + + # Infer + y_pred = model.predict(x_test) + y_pred = np.argmax(y_pred, axis=1) + + # Save JSON + with open(out_json_path, "w") as fh: + fh.write(json.dumps({"predictions": y_pred.tolist()})) + + +if __name__ == "__main__": + predict(sys.argv[1], sys.argv[2]) diff --git a/examples/mnist-keras/client/train.py b/examples/mnist-keras/client/train.py new file mode 100644 index 000000000..e899f7c4d --- /dev/null +++ b/examples/mnist-keras/client/train.py @@ -0,0 +1,56 @@ +import os +import sys + +from data import load_data +from model import load_parameters, save_parameters + +from fedn.utils.helpers.helpers import save_metadata + +dir_path = os.path.dirname(os.path.realpath(__file__)) +sys.path.append(os.path.abspath(dir_path)) + + +def train(in_model_path, out_model_path, data_path=None, batch_size=32, epochs=1): + """Complete a model update. + + Load model paramters from in_model_path (managed by the FEDn client), + perform a model update, and write updated paramters + to out_model_path (picked up by the FEDn client). + + :param in_model_path: The path to the input model. + :type in_model_path: str + :param out_model_path: The path to save the output model to. + :type out_model_path: str + :param data_path: The path to the data file. + :type data_path: str + :param batch_size: The batch size to use. + :type batch_size: int + :param epochs: The number of epochs to train. + :type epochs: int + """ + # Load data + x_train, y_train = load_data(data_path) + + # Load model + model = load_parameters(in_model_path) + + # Train + model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs) + + # Metadata needed for aggregation server side + metadata = { + # num_examples are mandatory + "num_examples": len(x_train), + "batch_size": batch_size, + "epochs": epochs, + } + + # Save JSON metadata file (mandatory) + save_metadata(metadata, out_model_path) + + # Save model update (mandatory) + save_parameters(model, out_model_path) + + +if __name__ == "__main__": + train(sys.argv[1], sys.argv[2]) diff --git a/examples/mnist-keras/client/validate.py b/examples/mnist-keras/client/validate.py new file mode 100644 index 000000000..6b462dfc2 --- /dev/null +++ b/examples/mnist-keras/client/validate.py @@ -0,0 +1,50 @@ +import os +import sys + +import numpy as np +from data import load_data +from model import load_parameters + +from fedn.utils.helpers.helpers import save_metrics + +dir_path = os.path.dirname(os.path.realpath(__file__)) +sys.path.append(os.path.abspath(dir_path)) + + +def validate(in_model_path, out_json_path, data_path=None): + """Validate model. + + :param in_model_path: The path to the input model. + :type in_model_path: str + :param out_json_path: The path to save the output JSON to. + :type out_json_path: str + :param data_path: The path to the data file. + :type data_path: str + """ + # Load data + x_train, y_train = load_data(data_path) + x_test, y_test = load_data(data_path, is_train=False) + + # Load model + model = load_parameters(in_model_path) + + # Evaluate + model_score = model.evaluate(x_train, y_train) + model_score_test = model.evaluate(x_test, y_test) + y_pred = model.predict(x_test) + y_pred = np.argmax(y_pred, axis=1) + + # JSON schema + report = { + "training_loss": model_score[0], + "training_accuracy": model_score[1], + "test_loss": model_score_test[0], + "test_accuracy": model_score_test[1], + } + + # Save JSON + save_metrics(report, out_json_path) + + +if __name__ == "__main__": + validate(sys.argv[1], sys.argv[2])