From 8cc25600cf2b26bd9324baaf2da55c38e449e1d8 Mon Sep 17 00:00:00 2001 From: Yan Li Date: Tue, 16 Aug 2016 12:33:13 +0800 Subject: [PATCH] DType regression (#3018) * DTypeRegressionOutput * Update DType test for pooling and regression * nullptr fix * fix infershape with {} and nullptr * nullptr fix --- src/operator/regression_output-inl.h | 46 +++++++++++++++++++++------ src/operator/regression_output.cc | 42 ++++++++++++++++-------- src/operator/regression_output.cu | 33 ++++++++++++------- tests/python/gpu/test_operator_gpu.py | 30 +++++++++++++++++ 4 files changed, 115 insertions(+), 36 deletions(-) diff --git a/src/operator/regression_output-inl.h b/src/operator/regression_output-inl.h index d70066d26fcb..273f42987cd0 100644 --- a/src/operator/regression_output-inl.h +++ b/src/operator/regression_output-inl.h @@ -33,7 +33,7 @@ struct RegressionOutputParam : public dmlc::Parameter { // Special Operator to output regression value in forward // And get gradient in calculation. -template +template class RegressionOutputOp : public Operator { public: explicit RegressionOutputOp(RegressionOutputParam param) : param_(param) {} @@ -48,8 +48,8 @@ class RegressionOutputOp : public Operator { CHECK_EQ(in_data.size(), 2) << "RegressionOutputOp Input: [data, label]"; CHECK_EQ(out_data.size(), 1) << "RegressionOutputOp Output: [output]"; Stream *s = ctx.get_stream(); - Tensor data = in_data[reg_enum::kData].FlatTo2D(s); - Tensor out = out_data[reg_enum::kOut].FlatTo2D(s); + Tensor data = in_data[reg_enum::kData].FlatTo2D(s); + Tensor out = out_data[reg_enum::kOut].FlatTo2D(s); Assign(out, req[reg_enum::kOut], F(data)); } @@ -69,11 +69,11 @@ class RegressionOutputOp : public Operator { Stream *s = ctx.get_stream(); real_t num_output = in_data[reg_enum::kLabel].Size()/in_data[reg_enum::kLabel].shape_[0]; - Tensor out = out_data[reg_enum::kOut].FlatTo2D(s); - Tensor grad = in_grad[reg_enum::kData].FlatTo2D(s); - Tensor label = in_data[reg_enum::kLabel] - .get_with_shape(out.shape_, s); - Assign(grad, req[reg_enum::kData], param_.grad_scale/num_output* + Tensor out = out_data[reg_enum::kOut].FlatTo2D(s); + Tensor grad = in_grad[reg_enum::kData].FlatTo2D(s); + Tensor label = in_data[reg_enum::kLabel] + .get_with_shape(out.shape_, s); + Assign(grad, req[reg_enum::kData], scalar(param_.grad_scale/num_output)* F(out, reshape(label, grad.shape_))); } @@ -84,7 +84,7 @@ class RegressionOutputOp : public Operator { // Decalre Factory function, used for dispatch specialization template Operator* CreateRegressionOutputOp(reg_enum::RegressionOutputType type, - RegressionOutputParam param); + RegressionOutputParam param, int dtype); #if DMLC_USE_CXX11 template @@ -129,6 +129,26 @@ class RegressionOutputProp : public OperatorProperty { return true; } + bool InferType(std::vector *in_type, + std::vector *out_type, + std::vector *aux_type) const override { + CHECK_EQ(in_type->size(), 2) << "Input:[data, label]"; + int dtype = (*in_type)[0]; + auto nin = in_type->size(); + in_type->clear(); + in_type->push_back(dtype); + for (index_t i = 1; i < nin; ++i) { + in_type->push_back(dtype); + } + if (dtype == -1) { + LOG(FATAL) << "Input type to regression_output is not specified."; + return false; + } + out_type->clear(); + out_type->push_back(dtype); + return true; + } + OperatorProperty* Copy() const override { auto ptr = new RegressionOutputProp(); ptr->param_ = param_; @@ -165,7 +185,13 @@ class RegressionOutputProp : public OperatorProperty { return {{in_data[reg_enum::kData], out_data[reg_enum::kOut]}}; } - Operator* CreateOperator(Context ctx) const override; + Operator* CreateOperator(Context ctx) const override { + LOG(FATAL) << "Not Implemented."; + return nullptr; + } + + Operator* CreateOperatorEx(Context ctx, std::vector *in_shape, + std::vector *in_type) const override; protected: RegressionOutputParam param_; diff --git a/src/operator/regression_output.cc b/src/operator/regression_output.cc index ff63c0e00dcf..06369f8abd88 100644 --- a/src/operator/regression_output.cc +++ b/src/operator/regression_output.cc @@ -11,24 +11,38 @@ namespace op { template<> Operator *CreateRegressionOutputOp(reg_enum::RegressionOutputType type, - RegressionOutputParam param) { - switch (type) { - case reg_enum::kLinear: - return new RegressionOutputOp(param); - case reg_enum::kLogistic: - return new RegressionOutputOp(param); - case reg_enum::kMAE: - return new RegressionOutputOp(param); - default: - LOG(FATAL) << "unknown activation type " << type; - } - return nullptr; + RegressionOutputParam param, int dtype) { + Operator *op = nullptr; + MSHADOW_REAL_TYPE_SWITCH(dtype, DType, { + switch (type) { + case reg_enum::kLinear: + op = new RegressionOutputOp + (param); + break; + case reg_enum::kLogistic: + op = new RegressionOutputOp + (param); + break; + case reg_enum::kMAE: + op = new RegressionOutputOp + (param); + break; + default: + LOG(FATAL) << "unknown RegressionOutput type " << type; + } + }); + return op; } // DO_BIND_DISPATCH comes from operator_common.h template -Operator *RegressionOutputProp::CreateOperator(Context ctx) const { - DO_BIND_DISPATCH(CreateRegressionOutputOp, type, param_); +Operator *RegressionOutputProp::CreateOperatorEx(Context ctx, std::vector *in_shape, + std::vector *in_type) const { + std::vector out_shape, aux_shape; + std::vector out_type, aux_type; + CHECK(InferType(in_type, &out_type, &aux_type)); + CHECK(InferShape(in_shape, &out_shape, &aux_shape)); + DO_BIND_DISPATCH(CreateRegressionOutputOp, type, param_, (*in_type)[0]); } DMLC_REGISTER_PARAMETER(RegressionOutputParam); diff --git a/src/operator/regression_output.cu b/src/operator/regression_output.cu index 18e7a1f4184c..4917192abc51 100644 --- a/src/operator/regression_output.cu +++ b/src/operator/regression_output.cu @@ -11,18 +11,27 @@ namespace op { template<> Operator *CreateRegressionOutputOp(reg_enum::RegressionOutputType type, - RegressionOutputParam param) { - switch (type) { - case reg_enum::kLinear: - return new RegressionOutputOp(param); - case reg_enum::kLogistic: - return new RegressionOutputOp(param); - case reg_enum::kMAE: - return new RegressionOutputOp(param); - default: - LOG(FATAL) << "unknown activation type " << type; - } - return NULL; + RegressionOutputParam param, int dtype) { + Operator *op = nullptr; + MSHADOW_REAL_TYPE_SWITCH(dtype, DType, { + switch (type) { + case reg_enum::kLinear: + op = new RegressionOutputOp + (param); + break; + case reg_enum::kLogistic: + op = new RegressionOutputOp + (param); + break; + case reg_enum::kMAE: + op = new RegressionOutputOp + (param); + break; + default: + LOG(FATAL) << "unknown RegressionOutput type " << type; + } + }); + return op; } } // namespace op } // namespace mxnet diff --git a/tests/python/gpu/test_operator_gpu.py b/tests/python/gpu/test_operator_gpu.py index daa60e1779a0..2847fb3e777f 100644 --- a/tests/python/gpu/test_operator_gpu.py +++ b/tests/python/gpu/test_operator_gpu.py @@ -207,6 +207,34 @@ def test_embedding_with_type(): {'ctx': mx.cpu(0), 'embedding_data': (2, 10), 'type_dict': {'embedding_data': np.float16}}] check_consistency(sym, ctx_list, grad_req={'embedding_data': 'null','embedding_weight': 'write'}) +def test_pooling_with_type(): + sym= mx.sym.Pooling(name='pooling', kernel=(3, 3), pool_type='avg') + ctx_list = [{'ctx': mx.gpu(0), 'pooling_data': (2, 2, 10, 10), 'type_dict': {'pooling_data': np.float64}}, + {'ctx': mx.gpu(0), 'pooling_data': (2, 2, 10, 10), 'type_dict': {'pooling_data': np.float32}}, + {'ctx': mx.gpu(0), 'pooling_data': (2, 2, 10, 10), 'type_dict': {'pooling_data': np.float16}}, + {'ctx': mx.cpu(0), 'pooling_data': (2, 2, 10, 10), 'type_dict': {'pooling_data': np.float64}}, + {'ctx': mx.cpu(0), 'pooling_data': (2, 2, 10, 10), 'type_dict': {'pooling_data': np.float32}}] + check_consistency(sym, ctx_list) + + sym_3d= mx.sym.Pooling(name='pooling', kernel=(3, 3, 3), pool_type='avg') + ctx_list_3d = [{'ctx': mx.gpu(0), 'pooling_data': (2, 2, 10, 10, 10), 'type_dict': {'pooling_data': np.float64}}, + {'ctx': mx.gpu(0), 'pooling_data': (2, 2, 10, 10, 10), 'type_dict': {'pooling_data': np.float32}}, + {'ctx': mx.gpu(0), 'pooling_data': (2, 2, 10, 10, 10), 'type_dict': {'pooling_data': np.float16}}, + {'ctx': mx.cpu(0), 'pooling_data': (2, 2, 10, 10, 10), 'type_dict': {'pooling_data': np.float64}}, + {'ctx': mx.cpu(0), 'pooling_data': (2, 2, 10, 10, 10), 'type_dict': {'pooling_data': np.float32}}] + check_consistency(sym_3d, ctx_list_3d) + +def test_regression_with_type() + sym_logistic = mx.sym.LogisticRegressionOutput(name = 'regression') + sym_linear = mx.sym.LinearRegressionOutput(name = 'regression') + ctx_list = [{'ctx': mx.gpu(0), 'regression_data': (2, 2, 10, 10), 'type_dict': {'regression_data': np.float64}}, + {'ctx': mx.gpu(0), 'regression_data': (2, 2, 10, 10), 'type_dict': {'regression_data': np.float32}}, + {'ctx': mx.gpu(0), 'regression_data': (2, 2, 10, 10), 'type_dict': {'regression_data': np.float16}}, + {'ctx': mx.cpu(0), 'regression_data': (2, 2, 10, 10), 'type_dict': {'regression_data': np.float64}}, + {'ctx': mx.cpu(0), 'regression_data': (2, 2, 10, 10), 'type_dict': {'regression_data': np.float32}}] + check_consistency(sym_logistic, ctx_list) + check_consistency(sym_linear, ctx_list) + if __name__ == '__main__': test_batchnorm_with_type() test_convolution_with_type() @@ -220,6 +248,8 @@ def test_embedding_with_type(): test_fullyconnected_with_type() test_activation_with_type() test_embedding_with_type() + test_pooling_with_type() + test_regression_with_type() #test_softmax_with_shape((3,4), mx.gpu()) #test_multi_softmax_with_shape((3,4,5), mx.gpu())