From 0410583f1ca9ea380765be5815d1f19dfb0d2c05 Mon Sep 17 00:00:00 2001 From: syurkevi Date: Mon, 24 Sep 2018 19:09:20 -0400 Subject: [PATCH 1/5] adds googletest framework for testing --- .gitmodules | 3 + CMakeLists.txt | 6 ++ extern/googletest | 1 + src/nn/Modules/Loss.cpp | 22 ++++- tests/CMakeLists.txt | 20 +++++ tests/test_autograd.cpp | 191 ++++++++++++++++++++++++++++++++++++++++ tests/test_loss.cpp | 24 +++++ 7 files changed, 266 insertions(+), 1 deletion(-) create mode 100644 .gitmodules create mode 160000 extern/googletest create mode 100644 tests/CMakeLists.txt create mode 100644 tests/test_autograd.cpp create mode 100644 tests/test_loss.cpp diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000..bac2581 --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "extern/googletest"] + path = extern/googletest + url = https://github.com/google/googletest.git diff --git a/CMakeLists.txt b/CMakeLists.txt index 9560d4b..257efc7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -35,3 +35,9 @@ set_target_properties(afml CXX_STANDARD 11) add_subdirectory(examples) + +option(PACKAGE_TESTS "Build tests" ON) + if(PACKAGE_TESTS) + enable_testing() + add_subdirectory(tests) + endif() diff --git a/extern/googletest b/extern/googletest new file mode 160000 index 0000000..ed6e84c --- /dev/null +++ b/extern/googletest @@ -0,0 +1 @@ +Subproject commit ed6e84ccef7f3c0f31caca60a47077a46d72a806 diff --git a/src/nn/Modules/Loss.cpp b/src/nn/Modules/Loss.cpp index 226f8e4..f15d965 100644 --- a/src/nn/Modules/Loss.cpp +++ b/src/nn/Modules/Loss.cpp @@ -46,7 +46,7 @@ namespace af binaryCrossEntropy(const autograd::Variable &inputs, const autograd::Variable &targets) { - targets * inputs + (1 - targets) * (1 - inputs); + return targets * inputs + (1 - targets) * (1 - inputs); } autograd::Variable BinaryCrossEntropyLoss::forward(const autograd::Variable &inputs, @@ -61,5 +61,25 @@ namespace af { return mean(flat(weights * binaryCrossEntropy(inputs, targets)), {0}); } + + static autograd::Variable + CrossEntropy(const autograd::Variable &inputs, + const autograd::Variable &targets) + { + return targets * inputs + (1 - targets) * (1 - inputs); + } + + autograd::Variable CrossEntropyLoss::forward(const autograd::Variable &inputs, + const autograd::Variable &targets) + { + return mean(flat(CrossEntropy(inputs, targets)), {0}); + } + + autograd::Variable CrossEntropyLoss::forward(const autograd::Variable &inputs, + const autograd::Variable &targets, + const autograd::Variable &weights) + { + return mean(flat(weights * CrossEntropy(inputs, targets)), {0}); + } } } diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt new file mode 100644 index 0000000..70e2a11 --- /dev/null +++ b/tests/CMakeLists.txt @@ -0,0 +1,20 @@ +add_subdirectory("${PROJECT_SOURCE_DIR}/extern/googletest" "extern/googletest") +mark_as_advanced( + BUILD_GMOCK BUILD_GTEST BUILD_SHARED_LIBS + gmock_build_tests gtest_build_samples gtest_build_tests + gtest_disable_pthreads gtest_force_shared_crt gtest_hide_internal_symbols +) + +macro(package_add_test TESTNAME) + add_executable(${TESTNAME} ${ARGN}) + target_link_libraries(${TESTNAME} PRIVATE + gtest + gmock + gtest_main + afml) + add_test(${TESTNAME} ${TESTNAME}) + set_target_properties(${TESTNAME} PROPERTIES FOLDER tests) +endmacro() + +package_add_test(test_loss test_loss.cpp) +package_add_test(test_autograd test_autograd.cpp) diff --git a/tests/test_autograd.cpp b/tests/test_autograd.cpp new file mode 100644 index 0000000..b7e8fcf --- /dev/null +++ b/tests/test_autograd.cpp @@ -0,0 +1,191 @@ +/******************************************************* + * Copyright (c) 2018, ArrayFire + * All rights reserved. + * + * This file is distributed under 3-clause BSD license. + * The complete license agreement can be obtained at: + * http://arrayfire.com/licenses/BSD-3-Clause + ********************************************************/ + +#include +#include +#include +#include + +using af::allTrue; +using af::autograd::Variable; + +TEST(Autograd, Multiply) +{ + auto x = Variable(af::randu(5), true); + auto y = x * x; + auto dy = Variable(af::constant(1.0, 5), false); + y.backward(dy); + auto dx = x.grad(); + EXPECT_TRUE(allTrue((dx.array() - 2 * x.array()) < 1E-5)); +} + +TEST(Autograd, MultiplyAdd) +{ + auto x = Variable(af::randu(5), true); + auto y = Variable(af::randu(5), true); + auto z = x * x + x * y + y * y; + auto dz = Variable(af::constant(1.0, 5), false); + z.backward(dz); + auto dx = x.grad(); + auto dy = y.grad(); + auto diffx = dx.array() - 2 * x.array() - y.array(); + auto diffy = dy.array() - 2 * y.array() - x.array(); + + EXPECT_TRUE(allTrue(diffx < 1E-5)); + EXPECT_TRUE(allTrue(diffy < 1E-5)); +} + +TEST(Autograd, noCalcGrad) +{ + auto x = Variable(af::randu(5), false); + auto y = Variable(af::randu(5), true); + auto z = x * x + x * y + y * y; + auto dz = Variable(af::constant(1.0, 5), false); + z.backward(dz); + auto dy = y.grad(); + + auto diffy = (dy.array() - 2 * y.array() - x.array()); + EXPECT_TRUE(allTrue(diffy < 1E-5)); + try { + auto dx = x.grad(); + } catch(af::exception &ex) { + std::cout << ex.what() << std::endl; + return; + } + printf("%s:%d No Gradient check Failed\n"); +} + +TEST(Autograd, MultiplySub) +{ + auto x = Variable(af::randu(5), true); + auto y = Variable(af::randu(5), true); + auto z = x * x - x * y; + auto dz = Variable(af::constant(1.0, 5), false); + z.backward(dz); + auto dx = x.grad(); + auto dy = y.grad(); + auto diffx = (dx.array() - (2 * x.array() - y.array())); + auto diffy = (dy.array() - (-x.array())); + + EXPECT_TRUE(allTrue(diffx < 1E-5)); + EXPECT_TRUE(allTrue(diffy < 1E-5)); +} + +TEST(Autograd, DivideAdd) +{ + auto x = Variable(af::randu(5), true); + auto y = Variable(af::randu(5), true); + auto z = x + x / y + y; + auto dz = Variable(af::constant(1.0, 5), false); + z.backward(dz); + auto dx = x.grad(); + auto dy = y.grad(); + auto diffx = (dx.array() - (1.0 + 1.0 / y.array())); + auto diffy = (dy.array() - (1.0 - x.array() / (y.array() * y.array()))); + + EXPECT_TRUE(allTrue(diffx < 1E-5)); + EXPECT_TRUE(allTrue(diffy < 1E-5)); +} + +TEST(Autograd, MultiplyAddScalar) +{ + auto x = Variable(af::randu(5), true); + auto y = Variable(af::randu(5), true); + auto z = 2 * x + x * y + y; + auto dz = Variable(af::constant(1.0, 5), false); + z.backward(dz); + auto dx = x.grad(); + auto dy = y.grad(); + auto diffx = (dx.array() - (2.0 + y.array())); + auto diffy = (dy.array() - (1.0 + x.array())); + EXPECT_TRUE(allTrue(diffx < 1E-5)); + EXPECT_TRUE(allTrue(diffy < 1E-5)); +} + +TEST(Autograd, Exp) +{ + auto x = Variable(af::randu(5), true); + auto y = exp(x); + auto dy = Variable(af::constant(1.0, 5), false); + y.backward(dy); + auto dx = x.grad(); + auto diffx = (dx.array() - (af::exp(x.array()))); + EXPECT_TRUE(allTrue(diffx < 1E-5)); +} + +TEST(Autograd, Sigmoid) +{ + auto x = Variable(af::randu(5), true); + auto y = sigmoid(x); + auto dy = Variable(af::constant(1.0, 5), false); + y.backward(dy); + auto dx = x.grad(); + auto diffx = (dx.array() - (y.array() * (1 - y.array()))); + auto diffy = (dx.array() - (af::sigmoid(x.array()) * (1 - af::sigmoid(x.array())))); + EXPECT_TRUE(allTrue(diffx < 1E-5)); + EXPECT_TRUE(allTrue(diffy < 1E-5)); +} + +TEST(Autograd, Tanh) +{ + auto x = Variable(af::randu(5), true); + auto y = tanh(x); + auto dy = Variable(af::constant(1.0, 5), false); + y.backward(dy); + auto dx = x.grad(); + auto diffx = (dx.array() - (1 - y.array() * y.array())); + auto diffy = (dx.array() - (1 + af::tanh(x.array())) * (1 - af::tanh(x.array()))); + EXPECT_TRUE(allTrue(diffx < 1E-5)); + EXPECT_TRUE(allTrue(diffy < 1E-5)); +} + +TEST(Autograd, Tile) +{ + auto x = Variable(af::randu(5), true); + auto y = Variable(af::randu(5, 2), true); + auto z = y * tileAs(x, y); + auto dz = Variable(af::constant(1.0, 5, 2), false); + z.backward(dz); + auto dy = y.grad(); + auto dx = x.grad(); + auto diffx = (dy.array() - af::tile(x.array(), 1, 2)); + auto diffy = (dx.array() - af::sum(y.array(), 1)); + EXPECT_TRUE(allTrue(diffx < 1E-5)); + EXPECT_TRUE(allTrue(diffy < 1E-5)); +} + +TEST(Autograd, Sum) +{ + auto x = Variable(af::randu(5), true); + auto y = Variable(af::randu(5, 2), true); + auto z = x * sumAs(y, x); + auto dz = Variable(af::constant(1.0, 5), false); + z.backward(dz); + auto dy = y.grad(); + auto dx = x.grad(); + auto diffx = (dy.array() - af::tile(x.array(), 1, 2)); + auto diffy = (dx.array() - af::sum(y.array(), 1)); + EXPECT_TRUE(allTrue(diffx < 1E-5)); + EXPECT_TRUE(allTrue(diffy < 1E-5)); +} + +TEST(Autograd, Mean) +{ + auto x = Variable(af::randu(5), true); + auto y = Variable(af::randu(5, 3, 2), true); + auto z = x * mean(y, {1,2}); + auto dz = Variable(af::constant(1.0, 5), false); + z.backward(dz); + auto dy = y.grad(); + auto dx = x.grad(); + auto diffx = (dy.array() - 6 * af::tile(x.array(), 1, 3, 2)); + auto diffy = (dx.array() - af::mean(af::mean(y.array(), 1), 2)); + EXPECT_TRUE(allTrue(diffx < 1E-5)); + EXPECT_TRUE(allTrue(diffy < 1E-5)); +} diff --git a/tests/test_loss.cpp b/tests/test_loss.cpp new file mode 100644 index 0000000..f0f7e05 --- /dev/null +++ b/tests/test_loss.cpp @@ -0,0 +1,24 @@ +/******************************************************* + * Copyright (c) 2018, ArrayFire + * All rights reserved. + * + * This file is distributed under 3-clause BSD license. + * The complete license agreement can be obtained at: + * http://arrayfire.com/licenses/BSD-3-Clause + ********************************************************/ + +#include +#include +#include +#include + +using af::autograd::Variable; + +TEST(Autograd, Multiply) { + auto x = Variable(af::randu(5), true); + auto y = x * x; + auto dy = Variable(af::constant(1.0, 5), false); + y.backward(dy); + auto dx = x.grad(); + EXPECT_TRUE(af::allTrue((dx.array() - 2 * x.array()) < 1E-5)); +} From 54fd74f3ff17e876c235c2b29de6ddef13e32a82 Mon Sep 17 00:00:00 2001 From: syurkevi Date: Mon, 1 Oct 2018 17:02:06 -0400 Subject: [PATCH 2/5] adds indexing and softmax autograd functions --- CMakeLists.txt | 8 +-- include/af/autograd/Functions.hpp | 3 + src/autograd/Functions.cpp | 49 +++++++++++++- tests/test_autograd.cpp | 106 ++++++++++++++++++++++++------ 4 files changed, 140 insertions(+), 26 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 257efc7..f4121ee 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -37,7 +37,7 @@ set_target_properties(afml add_subdirectory(examples) option(PACKAGE_TESTS "Build tests" ON) - if(PACKAGE_TESTS) - enable_testing() - add_subdirectory(tests) - endif() +if(PACKAGE_TESTS) + enable_testing() + add_subdirectory(tests) +endif() diff --git a/include/af/autograd/Functions.hpp b/include/af/autograd/Functions.hpp index e4f471d..d3aa43c 100644 --- a/include/af/autograd/Functions.hpp +++ b/include/af/autograd/Functions.hpp @@ -44,6 +44,8 @@ namespace af { Variable operator <=(const Variable &lhs, const double &rhs); Variable operator !(const Variable &input); + Variable select_index(const Variable &input, const Variable &idx); + Variable set_index(const Variable &input, const Variable &idx, const Variable &vals); Variable negate(const Variable &input); Variable reciprocal(const Variable &input); @@ -54,6 +56,7 @@ namespace af { Variable cos(const Variable &input); Variable tanh(const Variable &input); Variable sigmoid(const Variable &input); + Variable softmax(const Variable &input); Variable max(const Variable &lhs, const Variable &rhs); Variable max(const Variable &lhs, const double &rhs); diff --git a/src/autograd/Functions.cpp b/src/autograd/Functions.cpp index 938dba6..4855b4d 100644 --- a/src/autograd/Functions.cpp +++ b/src/autograd/Functions.cpp @@ -117,6 +117,35 @@ namespace af { return Variable(result, false); } + Variable select_index(const Variable &input, const Variable &idx) + { + af::array result = input.array()(idx.array()); + af::array mask = af::constant(0, input.dims()); + mask(idx.array()) = 1; + + auto grad_func = [](std::vector &inputs, const Variable &grad_output) { + auto grad = inputs[2].array(); + auto grad_mask = af::where(grad); + grad(grad_mask) *= grad_output.array(); + + inputs[0].addGrad(Variable(grad, false)); + }; + return Variable(result, {input, idx, Variable(mask, false)}, grad_func); + } + + Variable set_index(const Variable &input, const Variable &idx, const Variable &vals) + { + af::array result = input.array(); + result(idx.array()) = vals.array(); + af::array mask = af::constant(1, input.dims(), s32); + mask(idx.array()) = 0; + + auto grad_func = [](std::vector &inputs, const Variable &grad_output) { + inputs[0].addGrad(inputs[3] * grad_output); + }; + return Variable(result, {input, idx, vals, Variable(mask, false)}, grad_func); + } + Variable max(const Variable &lhs, const Variable &rhs) { auto mask = lhs > rhs; @@ -241,6 +270,24 @@ namespace af { return Variable(result, {input}, grad_func); } + Variable softmax(const Variable &input) + { + //todo: add axis to apply? + auto exps = exp(input.array()); + auto result = exps / tile(sum(exps, 0), exps.dims(0)); + auto grad_func = [](std::vector &inputs, const Variable &grad_output) { + auto exps = exp(inputs[0]); + auto tmp = exps / tileAs(sum(exps, {0}), exps); + + auto ps_j = tile(tmp, { 1, (int)tmp.dims()[0] } ); + auto ps_i = transpose(tile(tmp, {1,(int)tmp.dims()[0] } )); + Variable I(identity((int)tmp.dims()[0], (int)tmp.dims()[0]), false); + auto jac = (sum(ps_i * (I - ps_j), { 1 })); + inputs[0].addGrad(grad_output * jac); + }; + return Variable(result, {input}, grad_func); + } + Variable transpose(const Variable &input) { auto result = transpose(input.array()); @@ -281,7 +328,7 @@ namespace af { Variable tile(const Variable &input, const std::vector &repeats) { - dim4 dims; + dim4 dims(0); for (size_t i = 0; i < repeats.size(); i++) { dims[i] = repeats[i]; } diff --git a/tests/test_autograd.cpp b/tests/test_autograd.cpp index b7e8fcf..279f78f 100644 --- a/tests/test_autograd.cpp +++ b/tests/test_autograd.cpp @@ -22,7 +22,8 @@ TEST(Autograd, Multiply) auto dy = Variable(af::constant(1.0, 5), false); y.backward(dy); auto dx = x.grad(); - EXPECT_TRUE(allTrue((dx.array() - 2 * x.array()) < 1E-5)); + auto diff = dx.array() - 2 * x.array(); + EXPECT_TRUE(allTrue(abs(diff) < 1E-5)); } TEST(Autograd, MultiplyAdd) @@ -37,8 +38,8 @@ TEST(Autograd, MultiplyAdd) auto diffx = dx.array() - 2 * x.array() - y.array(); auto diffy = dy.array() - 2 * y.array() - x.array(); - EXPECT_TRUE(allTrue(diffx < 1E-5)); - EXPECT_TRUE(allTrue(diffy < 1E-5)); + EXPECT_TRUE(allTrue(abs(diffx) < 1E-5)); + EXPECT_TRUE(allTrue(abs(diffy) < 1E-5)); } TEST(Autograd, noCalcGrad) @@ -51,7 +52,7 @@ TEST(Autograd, noCalcGrad) auto dy = y.grad(); auto diffy = (dy.array() - 2 * y.array() - x.array()); - EXPECT_TRUE(allTrue(diffy < 1E-5)); + EXPECT_TRUE(allTrue(abs(diffy) < 1E-5)); try { auto dx = x.grad(); } catch(af::exception &ex) { @@ -73,8 +74,8 @@ TEST(Autograd, MultiplySub) auto diffx = (dx.array() - (2 * x.array() - y.array())); auto diffy = (dy.array() - (-x.array())); - EXPECT_TRUE(allTrue(diffx < 1E-5)); - EXPECT_TRUE(allTrue(diffy < 1E-5)); + EXPECT_TRUE(allTrue(abs(diffx) < 1E-5)); + EXPECT_TRUE(allTrue(abs(diffy) < 1E-5)); } TEST(Autograd, DivideAdd) @@ -89,8 +90,8 @@ TEST(Autograd, DivideAdd) auto diffx = (dx.array() - (1.0 + 1.0 / y.array())); auto diffy = (dy.array() - (1.0 - x.array() / (y.array() * y.array()))); - EXPECT_TRUE(allTrue(diffx < 1E-5)); - EXPECT_TRUE(allTrue(diffy < 1E-5)); + EXPECT_TRUE(allTrue(abs(diffx) < 1E-5)); + EXPECT_TRUE(allTrue(abs(diffy) < 1E-5)); } TEST(Autograd, MultiplyAddScalar) @@ -104,8 +105,8 @@ TEST(Autograd, MultiplyAddScalar) auto dy = y.grad(); auto diffx = (dx.array() - (2.0 + y.array())); auto diffy = (dy.array() - (1.0 + x.array())); - EXPECT_TRUE(allTrue(diffx < 1E-5)); - EXPECT_TRUE(allTrue(diffy < 1E-5)); + EXPECT_TRUE(allTrue(abs(diffx) < 1E-5)); + EXPECT_TRUE(allTrue(abs(diffy) < 1E-5)); } TEST(Autograd, Exp) @@ -116,7 +117,7 @@ TEST(Autograd, Exp) y.backward(dy); auto dx = x.grad(); auto diffx = (dx.array() - (af::exp(x.array()))); - EXPECT_TRUE(allTrue(diffx < 1E-5)); + EXPECT_TRUE(allTrue(abs(diffx) < 1E-5)); } TEST(Autograd, Sigmoid) @@ -128,8 +129,71 @@ TEST(Autograd, Sigmoid) auto dx = x.grad(); auto diffx = (dx.array() - (y.array() * (1 - y.array()))); auto diffy = (dx.array() - (af::sigmoid(x.array()) * (1 - af::sigmoid(x.array())))); - EXPECT_TRUE(allTrue(diffx < 1E-5)); - EXPECT_TRUE(allTrue(diffy < 1E-5)); + EXPECT_TRUE(allTrue(abs(diffx) < 1E-5)); + EXPECT_TRUE(allTrue(abs(diffy) < 1E-5)); +} + +/* +TEST(Autograd, Softmax) +{ + auto x = Variable(af::randu(5), true); + auto x1a = x.array(); + x1a(0) += 0.1; + auto x1 = Variable(x1a, true); + + auto y = softmax(x); + auto y1 = softmax(x1); + + //auto dy = Variable(af::constant(1.0, 5), false); + //y.backward(dy); + y.backward(); + y1.backward(); + auto dx = x.grad(); + + af_print(x.array()); + af_print(y.array()); + af_print(x1.array()); + af_print(y1.array()); + printf("distribution sums to 1? %f\n", af::sum(y.array())); + af_print(dx.array()); + + //auto diffx = (dx.array() - (y.array() * (1 - y.array()))); + //auto diffy = (dx.array() - (af::sigmoid(x.array()) * (1 - af::sigmoid(x.array())))); + //EXPECT_TRUE(allTrue(diffx < 1E-5)); + //EXPECT_TRUE(allTrue(diffy < 1E-5)); +} +*/ + +TEST(Autograd, set_index) +{ + auto x = Variable(af::range(5) + 0.5, true); + auto idx = Variable(af::range(2) + 1, false); + + auto y = set_index(x, idx, Variable(af::constant(-2.0, idx.dims()), false)); + auto z = sum(2*y, {0}); + z.backward(); + + auto expected_grad = constant(2, x.dims()); + expected_grad(idx.array()) = 0; + + auto diff = (x.grad().array() - expected_grad); + EXPECT_TRUE(allTrue(abs(diff) < 1E-5)); +} + +TEST(Autograd, select_index) +{ + auto x = Variable(af::randu(5), true); + auto idx = Variable(af::range(2) + 1, false); + + auto y = select_index(x, idx); + auto z = sum(2*y, {0}); + z.backward(); + + auto expected_grad = constant(0, x.dims()); + expected_grad(idx.array()) = 2; + + auto diff = (x.grad().array() - expected_grad); + EXPECT_TRUE(allTrue(abs(diff) < 1E-5)); } TEST(Autograd, Tanh) @@ -141,8 +205,8 @@ TEST(Autograd, Tanh) auto dx = x.grad(); auto diffx = (dx.array() - (1 - y.array() * y.array())); auto diffy = (dx.array() - (1 + af::tanh(x.array())) * (1 - af::tanh(x.array()))); - EXPECT_TRUE(allTrue(diffx < 1E-5)); - EXPECT_TRUE(allTrue(diffy < 1E-5)); + EXPECT_TRUE(allTrue(abs(diffx) < 1E-5)); + EXPECT_TRUE(allTrue(abs(diffy) < 1E-5)); } TEST(Autograd, Tile) @@ -156,8 +220,8 @@ TEST(Autograd, Tile) auto dx = x.grad(); auto diffx = (dy.array() - af::tile(x.array(), 1, 2)); auto diffy = (dx.array() - af::sum(y.array(), 1)); - EXPECT_TRUE(allTrue(diffx < 1E-5)); - EXPECT_TRUE(allTrue(diffy < 1E-5)); + EXPECT_TRUE(allTrue(abs(diffx) < 1E-5)); + EXPECT_TRUE(allTrue(abs(diffy) < 1E-5)); } TEST(Autograd, Sum) @@ -171,8 +235,8 @@ TEST(Autograd, Sum) auto dx = x.grad(); auto diffx = (dy.array() - af::tile(x.array(), 1, 2)); auto diffy = (dx.array() - af::sum(y.array(), 1)); - EXPECT_TRUE(allTrue(diffx < 1E-5)); - EXPECT_TRUE(allTrue(diffy < 1E-5)); + EXPECT_TRUE(allTrue(abs(diffx) < 1E-5)); + EXPECT_TRUE(allTrue(abs(diffy) < 1E-5)); } TEST(Autograd, Mean) @@ -186,6 +250,6 @@ TEST(Autograd, Mean) auto dx = x.grad(); auto diffx = (dy.array() - 6 * af::tile(x.array(), 1, 3, 2)); auto diffy = (dx.array() - af::mean(af::mean(y.array(), 1), 2)); - EXPECT_TRUE(allTrue(diffx < 1E-5)); - EXPECT_TRUE(allTrue(diffy < 1E-5)); + EXPECT_TRUE(allTrue(abs(diffx) < 1E-5)); + EXPECT_TRUE(allTrue(abs(diffy) < 1E-5)); } From 88bee36c53b2c46ffab939d6fa662034356e89d1 Mon Sep 17 00:00:00 2001 From: syurkevi Date: Mon, 1 Oct 2018 17:03:06 -0400 Subject: [PATCH 3/5] adds CrossEntropyLoss and SVMLoss --- include/af/nn/Modules/Loss.hpp | 27 +++++++++ src/nn/Modules/Loss.cpp | 42 ++++++++++++- tests/test_loss.cpp | 106 ++++++++++++++++++++++++++++++--- 3 files changed, 166 insertions(+), 9 deletions(-) diff --git a/include/af/nn/Modules/Loss.hpp b/include/af/nn/Modules/Loss.hpp index dd5d1e8..96dc0bb 100644 --- a/include/af/nn/Modules/Loss.hpp +++ b/include/af/nn/Modules/Loss.hpp @@ -59,9 +59,36 @@ namespace af const autograd::Variable &weights); }; + class CrossEntropyLoss : public Loss + { + public: + CrossEntropyLoss() {} + + autograd::Variable forward(const autograd::Variable &inputs, + const autograd::Variable &targets); + + autograd::Variable forward(const autograd::Variable &inputs, + const autograd::Variable &targets, + const autograd::Variable &weights); + }; + + class MultiMarginLoss : public Loss + { + public: + MultiMarginLoss() {} + + autograd::Variable forward(const autograd::Variable &inputs, + const autograd::Variable &targets); + + autograd::Variable forward(const autograd::Variable &inputs, + const autograd::Variable &targets, + const autograd::Variable &weights); + }; + typedef MeanSquaredError MSE; typedef MeanAbsoluteError MAE; typedef MeanAbsoluteError L1Loss; typedef BinaryCrossEntropyLoss BCELoss; + typedef CrossEntropyLoss CELoss; } } diff --git a/src/nn/Modules/Loss.cpp b/src/nn/Modules/Loss.cpp index f15d965..34e9aa6 100644 --- a/src/nn/Modules/Loss.cpp +++ b/src/nn/Modules/Loss.cpp @@ -40,13 +40,14 @@ namespace af { auto df = inputs - targets; auto res = mean(flat(abs(df)), {0}); + return res; } static autograd::Variable binaryCrossEntropy(const autograd::Variable &inputs, const autograd::Variable &targets) { - return targets * inputs + (1 - targets) * (1 - inputs); + return -1 * (targets * log(inputs) + (1 - targets) * log(1 - inputs)); } autograd::Variable BinaryCrossEntropyLoss::forward(const autograd::Variable &inputs, @@ -66,7 +67,15 @@ namespace af CrossEntropy(const autograd::Variable &inputs, const autograd::Variable &targets) { - return targets * inputs + (1 - targets) * (1 - inputs); + auto correct_idxs = (range(targets.dims()[0]) + inputs.dims()[0] * targets.array()).as(s32); + + auto exps = exp(inputs); + auto softmaxScores = exps / tile(sum(exps, {1}), { 1, exps.dims()[1] }); + + Variable correct_scores = select_index(softmaxScores, Variable(correct_idxs, false)); + + auto losses = -1 * log(correct_scores); + return losses; } autograd::Variable CrossEntropyLoss::forward(const autograd::Variable &inputs, @@ -81,5 +90,34 @@ namespace af { return mean(flat(weights * CrossEntropy(inputs, targets)), {0}); } + + static autograd::Variable + MarginLoss(const autograd::Variable &inputs, + const autograd::Variable &targets) + { + auto correct_idxs = (range(targets.dims()[0]) + inputs.dims()[0] * targets.array()).as(s32); + Variable correct_scores = select_index(inputs, Variable(correct_idxs, false)); + + auto scores = inputs - tile(correct_scores, { 1, (int)inputs.dims()[1] } ); + const float margin = 1.f; + auto losses = max(scores + margin, 0); //gives different results than max(0, scores + margin), "intended" behaviour + //zero out correct classes, should not affect loss + losses = set_index(losses, correct_scores, Variable(af::constant(0, correct_scores.dims()[0]), false)); + losses = sum(losses, {1}) / inputs.dims()[1]; + return losses; + } + + autograd::Variable MultiMarginLoss::forward(const autograd::Variable &inputs, + const autograd::Variable &targets) + { + return mean(flat(MarginLoss(inputs, targets)), {0}); + } + + autograd::Variable MultiMarginLoss::forward(const autograd::Variable &inputs, + const autograd::Variable &targets, + const autograd::Variable &weights) + { + return mean(flat(weights * MarginLoss(inputs, targets)), {0}); + } } } diff --git a/tests/test_loss.cpp b/tests/test_loss.cpp index f0f7e05..b251fd0 100644 --- a/tests/test_loss.cpp +++ b/tests/test_loss.cpp @@ -12,13 +12,105 @@ #include #include +using af::allTrue; using af::autograd::Variable; +using std::log; -TEST(Autograd, Multiply) { - auto x = Variable(af::randu(5), true); - auto y = x * x; - auto dy = Variable(af::constant(1.0, 5), false); - y.backward(dy); - auto dx = x.grad(); - EXPECT_TRUE(af::allTrue((dx.array() - 2 * x.array()) < 1E-5)); +using namespace af::nn; + +TEST(Loss, MSE_1D) +{ + auto x = Variable(af::constant(2.0, 5), true); + auto y = Variable(af::constant(0.0, 5), false); + + auto loss = MeanSquaredError(); + auto l = loss(x, y); + + ASSERT_TRUE(allTrue((l.array() - af::constant(4.0, 1)) < 1E-5)); + //TODO: Test gradient calculation... +} + +TEST(Loss, MSE_nD) +{ + auto x = Variable(af::constant(1.0, 5, 100, 100, 100), true); + auto y = Variable(af::constant(0.0, 5, 100, 100, 100), false); + + auto loss = MeanSquaredError(); + auto l = loss(x, y); + + ASSERT_TRUE(allTrue((l.array() - af::constant(1.0, 1)) < 1E-5)); + //TODO: Test gradient calculation... +} + +TEST(Loss, MAE_1D) +{ + auto x = Variable(af::constant(2.0, 5), true); + auto y = Variable(af::constant(0.0, 5), false); + + auto loss = MeanAbsoluteError(); + auto l = loss(x, y); + + ASSERT_TRUE(allTrue((l.array() - af::constant(2.0, 1)) < 1E-5)); + //TODO: Test gradient calculation... +} + +TEST(Loss, MAE_nD) +{ + auto x = Variable(af::constant(2.0, 5, 100, 100, 100), true); + auto y = Variable(af::constant(0.0, 5, 100, 100, 100), false); + + auto loss = MeanAbsoluteError(); + auto l = loss(x, y); + + ASSERT_TRUE(allTrue((l.array() - af::constant(2.0, 1)) < 1E-5)); + //TODO: Test gradient calculation... +} + +TEST(Loss, BCELoss) +{ + auto x = Variable(af::constant(0.5, 5), true); + auto y = Variable(af::constant(1.0, 5), false); + + auto loss = BinaryCrossEntropyLoss(); + auto l = loss(x, y); + + ASSERT_TRUE(allTrue((l.array() - af::constant(-std::log(0.5) , 1)) < 1E-5)); + //TODO: Test gradient calculation... +} + +TEST(Loss, CELoss) +{ + auto x = Variable(af::transpose(af::range(5) + 0.5), true); //scores for each of C classes + auto y = Variable(af::constant(2, 1), false); //vector of correct class labels + + auto loss = CELoss(); + auto l = loss(x, y); + l.backward(); + + af_print(l.array()) + af_print(x.grad().array()) + + float h_x[] = { 0.0117, 0.0317, -0.9139, 0.2341, 0.6364 }; + af::array expected_grad(1, 5, h_x); + + auto diff = (x.grad().array() - expected_grad); + EXPECT_TRUE(allTrue(abs(diff) < 1E-4)); + EXPECT_TRUE(allTrue(abs(l.array() - af::constant(2.4519, 1)) < 1E-4)); +} + +TEST(Loss, MultiMarginLoss) +{ + auto x = Variable(af::transpose(af::range(5) + 0.5), true); //scores for each of C classes + auto y = Variable(af::constant(2, 1), false); //vector of correct class labels + + auto loss = MultiMarginLoss(); + auto l = loss(x, y); + l.backward(); + + float h_x[] = { 0.0, 0.0, -0.4, 0.2, 0.2 }; + af::array expected_grad(1, 5, h_x); + + auto diff = (x.grad().array() - expected_grad); + EXPECT_TRUE(allTrue(abs(diff) < 1E-5)); + EXPECT_TRUE(allTrue(abs(l.array() - af::constant(1.0, 1)) < 1E-5)); } From eaa986eeb20abe32a79f164fec8a9aacb8070c25 Mon Sep 17 00:00:00 2001 From: syurkevi Date: Mon, 1 Oct 2018 17:11:26 -0400 Subject: [PATCH 4/5] remove autograd example, now part of tests --- examples/autograd.cpp | 189 ------------------------------------------ 1 file changed, 189 deletions(-) delete mode 100644 examples/autograd.cpp diff --git a/examples/autograd.cpp b/examples/autograd.cpp deleted file mode 100644 index ea87b79..0000000 --- a/examples/autograd.cpp +++ /dev/null @@ -1,189 +0,0 @@ -/******************************************************* - * Copyright (c) 2017, ArrayFire - * All rights reserved. - * - * This file is distributed under 3-clause BSD license. - * The complete license agreement can be obtained at: - * http://arrayfire.com/licenses/BSD-3-Clause - ********************************************************/ - -#include -#include - -#include - -#define VERIFY(VAL) do { \ - auto res = af::allTrue(af::abs(VAL) < 1E-5); \ - printf("%s:%d %s\n", __FUNCTION__, __LINE__, \ - res ? "PASS" : "FAIL"); \ - } while(0) - -using af::autograd::Variable; -void test_multiply() -{ - auto x = Variable(af::randu(5), true); - auto y = x * x; - auto dy = Variable(af::constant(1.0, 5), false); - y.backward(dy); - auto dx = x.grad(); - VERIFY(dx.array() - 2 * x.array()); -} - -void test_multipl_add() -{ - auto x = Variable(af::randu(5), true); - auto y = Variable(af::randu(5), true); - auto z = x * x + x * y + y * y; - auto dz = Variable(af::constant(1.0, 5), false); - z.backward(dz); - auto dx = x.grad(); - auto dy = y.grad(); - VERIFY(dx.array() - 2 * x.array() - y.array()); - VERIFY(dy.array() - 2 * y.array() - x.array()); -} - -void test_no_calc_grad() -{ - auto x = Variable(af::randu(5), false); - auto y = Variable(af::randu(5), true); - auto z = x * x + x * y + y * y; - auto dz = Variable(af::constant(1.0, 5), false); - z.backward(dz); - auto dy = y.grad(); - VERIFY(dy.array() - 2 * y.array() - x.array()); - try { - auto dx = x.grad(); - } catch(af::exception &ex) { - std::cout << ex.what() << std::endl; - return; - } - printf("%s:%d No Gradient check Failed\n"); -} - -void test_multiply_sub() -{ - auto x = Variable(af::randu(5), true); - auto y = Variable(af::randu(5), true); - auto z = x * x - x * y; - auto dz = Variable(af::constant(1.0, 5), false); - z.backward(dz); - auto dx = x.grad(); - auto dy = y.grad(); - VERIFY(dx.array() - (2 * x.array() - y.array())); - VERIFY(dy.array() - (-x.array())); -} - -void test_divide_add() -{ - auto x = Variable(af::randu(5), true); - auto y = Variable(af::randu(5), true); - auto z = x + x / y + y; - auto dz = Variable(af::constant(1.0, 5), false); - z.backward(dz); - auto dx = x.grad(); - auto dy = y.grad(); - VERIFY(dx.array() - (1.0 + 1.0 / y.array())); - VERIFY(dy.array() - (1.0 - x.array() / (y.array() * y.array()))); -} - -void test_multiply_add_scalar() -{ - auto x = Variable(af::randu(5), true); - auto y = Variable(af::randu(5), true); - auto z = 2 * x + x * y + y; - auto dz = Variable(af::constant(1.0, 5), false); - z.backward(dz); - auto dx = x.grad(); - auto dy = y.grad(); - VERIFY(dx.array() - (2.0 + y.array())); - VERIFY(dy.array() - (1.0 + x.array())); -} - -void test_exp() -{ - auto x = Variable(af::randu(5), true); - auto y = exp(x); - auto dy = Variable(af::constant(1.0, 5), false); - y.backward(dy); - auto dx = x.grad(); - VERIFY(dx.array() - (af::exp(x.array()))); -} - -void test_sigmoid() -{ - auto x = Variable(af::randu(5), true); - auto y = sigmoid(x); - auto dy = Variable(af::constant(1.0, 5), false); - y.backward(dy); - auto dx = x.grad(); - VERIFY(dx.array() - (y.array() * (1 - y.array()))); - VERIFY(dx.array() - (af::sigmoid(x.array()) * (1 - af::sigmoid(x.array())))); -} - -void test_tanh() -{ - auto x = Variable(af::randu(5), true); - auto y = tanh(x); - auto dy = Variable(af::constant(1.0, 5), false); - y.backward(dy); - auto dx = x.grad(); - VERIFY(dx.array() - (1 - y.array() * y.array())); - VERIFY(dx.array() - (1 + af::tanh(x.array())) * (1 - af::tanh(x.array()))); -} - -void test_tile() -{ - auto x = Variable(af::randu(5), true); - auto y = Variable(af::randu(5, 2), true); - auto z = y * tileAs(x, y); - auto dz = Variable(af::constant(1.0, 5, 2), false); - z.backward(dz); - auto dy = y.grad(); - auto dx = x.grad(); - VERIFY(dy.array() - af::tile(x.array(), 1, 2)); - VERIFY(dx.array() - af::sum(y.array(), 1)); -} - -void test_sum() -{ - auto x = Variable(af::randu(5), true); - auto y = Variable(af::randu(5, 2), true); - auto z = x * sumAs(y, x); - auto dz = Variable(af::constant(1.0, 5), false); - z.backward(dz); - auto dy = y.grad(); - auto dx = x.grad(); - VERIFY(dy.array() - af::tile(x.array(), 1, 2)); - VERIFY(dx.array() - af::sum(y.array(), 1)); -} - -void test_mean() -{ - auto x = Variable(af::randu(5), true); - auto y = Variable(af::randu(5, 3, 2), true); - auto z = x * mean(y, {1,2}); - auto dz = Variable(af::constant(1.0, 5), false); - z.backward(dz); - auto dy = y.grad(); - auto dx = x.grad(); - VERIFY(dy.array() - 6 * af::tile(x.array(), 1, 3, 2)); - VERIFY(dx.array() - af::mean(af::mean(y.array(), 1), 2)); -} - -int main() -{ - af::info(); - test_multiply(); - test_multipl_add(); - test_no_calc_grad(); - test_multiply_sub(); - test_divide_add(); - test_multiply_add_scalar(); - test_exp(); - test_sigmoid(); - test_tanh(); - test_tile(); - test_sum(); - test_mean(); - return 0; -} From eea9572aeda3ba6e3452f35af914fb29df4c2bff Mon Sep 17 00:00:00 2001 From: syurkevi Date: Tue, 2 Oct 2018 13:48:03 -0400 Subject: [PATCH 5/5] change names to assign/lookup, remove unnecessary copies --- examples/CMakeLists.txt | 1 - include/af/autograd/Functions.hpp | 4 ++-- src/autograd/Functions.cpp | 22 ++++++++++------------ src/nn/Modules/Loss.cpp | 8 ++++---- tests/test_autograd.cpp | 8 ++++---- tests/test_loss.cpp | 3 --- 6 files changed, 20 insertions(+), 26 deletions(-) diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index 11e8d14..e40b645 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -15,4 +15,3 @@ endfunction(build_example) # build_example(Node.cpp) build_example(xor.cpp) # build_example(Weights.cpp) -build_example(autograd.cpp) diff --git a/include/af/autograd/Functions.hpp b/include/af/autograd/Functions.hpp index d3aa43c..e4d079f 100644 --- a/include/af/autograd/Functions.hpp +++ b/include/af/autograd/Functions.hpp @@ -44,8 +44,8 @@ namespace af { Variable operator <=(const Variable &lhs, const double &rhs); Variable operator !(const Variable &input); - Variable select_index(const Variable &input, const Variable &idx); - Variable set_index(const Variable &input, const Variable &idx, const Variable &vals); + Variable lookup(const Variable &input, const Variable &idx); + Variable assign(const Variable &input, const Variable &idx, const Variable &vals); Variable negate(const Variable &input); Variable reciprocal(const Variable &input); diff --git a/src/autograd/Functions.cpp b/src/autograd/Functions.cpp index 4855b4d..63468db 100644 --- a/src/autograd/Functions.cpp +++ b/src/autograd/Functions.cpp @@ -117,33 +117,31 @@ namespace af { return Variable(result, false); } - Variable select_index(const Variable &input, const Variable &idx) + Variable lookup(const Variable &input, const Variable &idx) { af::array result = input.array()(idx.array()); - af::array mask = af::constant(0, input.dims()); - mask(idx.array()) = 1; auto grad_func = [](std::vector &inputs, const Variable &grad_output) { - auto grad = inputs[2].array(); - auto grad_mask = af::where(grad); - grad(grad_mask) *= grad_output.array(); + af::array grad = af::constant(0, inputs[0].dims()); + grad(inputs[1].array()) = grad_output.array(); inputs[0].addGrad(Variable(grad, false)); }; - return Variable(result, {input, idx, Variable(mask, false)}, grad_func); + return Variable(result, {input, idx}, grad_func); } - Variable set_index(const Variable &input, const Variable &idx, const Variable &vals) + Variable assign(const Variable &input, const Variable &idx, const Variable &vals) { af::array result = input.array(); result(idx.array()) = vals.array(); - af::array mask = af::constant(1, input.dims(), s32); - mask(idx.array()) = 0; auto grad_func = [](std::vector &inputs, const Variable &grad_output) { - inputs[0].addGrad(inputs[3] * grad_output); + af::array mask = af::constant(1, inputs[0].dims(), s32); + mask(inputs[1].array()) = 0; + + inputs[0].addGrad(Variable(mask, false) * grad_output); }; - return Variable(result, {input, idx, vals, Variable(mask, false)}, grad_func); + return Variable(result, {input, idx, vals}, grad_func); } Variable max(const Variable &lhs, const Variable &rhs) diff --git a/src/nn/Modules/Loss.cpp b/src/nn/Modules/Loss.cpp index 34e9aa6..753c09e 100644 --- a/src/nn/Modules/Loss.cpp +++ b/src/nn/Modules/Loss.cpp @@ -70,9 +70,9 @@ namespace af auto correct_idxs = (range(targets.dims()[0]) + inputs.dims()[0] * targets.array()).as(s32); auto exps = exp(inputs); - auto softmaxScores = exps / tile(sum(exps, {1}), { 1, exps.dims()[1] }); + auto softmaxScores = exps / tile(sum(exps, {1}), { 1, (int)exps.dims()[1] }); - Variable correct_scores = select_index(softmaxScores, Variable(correct_idxs, false)); + Variable correct_scores = lookup(softmaxScores, Variable(correct_idxs, false)); auto losses = -1 * log(correct_scores); return losses; @@ -96,13 +96,13 @@ namespace af const autograd::Variable &targets) { auto correct_idxs = (range(targets.dims()[0]) + inputs.dims()[0] * targets.array()).as(s32); - Variable correct_scores = select_index(inputs, Variable(correct_idxs, false)); + Variable correct_scores = lookup(inputs, Variable(correct_idxs, false)); auto scores = inputs - tile(correct_scores, { 1, (int)inputs.dims()[1] } ); const float margin = 1.f; auto losses = max(scores + margin, 0); //gives different results than max(0, scores + margin), "intended" behaviour //zero out correct classes, should not affect loss - losses = set_index(losses, correct_scores, Variable(af::constant(0, correct_scores.dims()[0]), false)); + losses = assign(losses, correct_scores, Variable(af::constant(0, correct_scores.dims()[0]), false)); losses = sum(losses, {1}) / inputs.dims()[1]; return losses; } diff --git a/tests/test_autograd.cpp b/tests/test_autograd.cpp index 279f78f..3fa1fda 100644 --- a/tests/test_autograd.cpp +++ b/tests/test_autograd.cpp @@ -164,12 +164,12 @@ TEST(Autograd, Softmax) } */ -TEST(Autograd, set_index) +TEST(Autograd, assign) { auto x = Variable(af::range(5) + 0.5, true); auto idx = Variable(af::range(2) + 1, false); - auto y = set_index(x, idx, Variable(af::constant(-2.0, idx.dims()), false)); + auto y = assign(x, idx, Variable(af::constant(-2.0, idx.dims()), false)); auto z = sum(2*y, {0}); z.backward(); @@ -180,12 +180,12 @@ TEST(Autograd, set_index) EXPECT_TRUE(allTrue(abs(diff) < 1E-5)); } -TEST(Autograd, select_index) +TEST(Autograd, lookup) { auto x = Variable(af::randu(5), true); auto idx = Variable(af::range(2) + 1, false); - auto y = select_index(x, idx); + auto y = lookup(x, idx); auto z = sum(2*y, {0}); z.backward(); diff --git a/tests/test_loss.cpp b/tests/test_loss.cpp index b251fd0..3076604 100644 --- a/tests/test_loss.cpp +++ b/tests/test_loss.cpp @@ -87,9 +87,6 @@ TEST(Loss, CELoss) auto l = loss(x, y); l.backward(); - af_print(l.array()) - af_print(x.grad().array()) - float h_x[] = { 0.0117, 0.0317, -0.9139, 0.2341, 0.6364 }; af::array expected_grad(1, 5, h_x);