Skip to content

Commit

Permalink
DType regression (apache#3018)
Browse files Browse the repository at this point in the history
* DTypeRegressionOutput

* Update DType test for pooling and regression

* nullptr fix

* fix infershape with {} and nullptr

* nullptr fix
  • Loading branch information
Godricly authored and winstywang committed Aug 16, 2016
1 parent d08d87f commit 8cc2560
Show file tree
Hide file tree
Showing 4 changed files with 115 additions and 36 deletions.
46 changes: 36 additions & 10 deletions src/operator/regression_output-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ struct RegressionOutputParam : public dmlc::Parameter<RegressionOutputParam> {

// Special Operator to output regression value in forward
// And get gradient in calculation.
template<typename xpu, typename ForwardOp, typename BackwardOp>
template<typename xpu, typename ForwardOp, typename BackwardOp, typename DType>
class RegressionOutputOp : public Operator {
public:
explicit RegressionOutputOp(RegressionOutputParam param) : param_(param) {}
Expand All @@ -48,8 +48,8 @@ class RegressionOutputOp : public Operator {
CHECK_EQ(in_data.size(), 2) << "RegressionOutputOp Input: [data, label]";
CHECK_EQ(out_data.size(), 1) << "RegressionOutputOp Output: [output]";
Stream<xpu> *s = ctx.get_stream<xpu>();
Tensor<xpu, 2> data = in_data[reg_enum::kData].FlatTo2D<xpu, real_t>(s);
Tensor<xpu, 2> out = out_data[reg_enum::kOut].FlatTo2D<xpu, real_t>(s);
Tensor<xpu, 2, DType> data = in_data[reg_enum::kData].FlatTo2D<xpu, DType>(s);
Tensor<xpu, 2, DType> out = out_data[reg_enum::kOut].FlatTo2D<xpu, DType>(s);
Assign(out, req[reg_enum::kOut], F<ForwardOp>(data));
}

Expand All @@ -69,11 +69,11 @@ class RegressionOutputOp : public Operator {
Stream<xpu> *s = ctx.get_stream<xpu>();
real_t num_output =
in_data[reg_enum::kLabel].Size()/in_data[reg_enum::kLabel].shape_[0];
Tensor<xpu, 2> out = out_data[reg_enum::kOut].FlatTo2D<xpu, real_t>(s);
Tensor<xpu, 2> grad = in_grad[reg_enum::kData].FlatTo2D<xpu, real_t>(s);
Tensor<xpu, 2> label = in_data[reg_enum::kLabel]
.get_with_shape<xpu, 2, real_t>(out.shape_, s);
Assign(grad, req[reg_enum::kData], param_.grad_scale/num_output*
Tensor<xpu, 2, DType> out = out_data[reg_enum::kOut].FlatTo2D<xpu, DType>(s);
Tensor<xpu, 2, DType> grad = in_grad[reg_enum::kData].FlatTo2D<xpu, DType>(s);
Tensor<xpu, 2, DType> label = in_data[reg_enum::kLabel]
.get_with_shape<xpu, 2, DType>(out.shape_, s);
Assign(grad, req[reg_enum::kData], scalar<DType>(param_.grad_scale/num_output)*
F<BackwardOp>(out, reshape(label, grad.shape_)));
}

Expand All @@ -84,7 +84,7 @@ class RegressionOutputOp : public Operator {
// Decalre Factory function, used for dispatch specialization
template<typename xpu>
Operator* CreateRegressionOutputOp(reg_enum::RegressionOutputType type,
RegressionOutputParam param);
RegressionOutputParam param, int dtype);

#if DMLC_USE_CXX11
template<reg_enum::RegressionOutputType type>
Expand Down Expand Up @@ -129,6 +129,26 @@ class RegressionOutputProp : public OperatorProperty {
return true;
}

bool InferType(std::vector<int> *in_type,
std::vector<int> *out_type,
std::vector<int> *aux_type) const override {
CHECK_EQ(in_type->size(), 2) << "Input:[data, label]";
int dtype = (*in_type)[0];
auto nin = in_type->size();
in_type->clear();
in_type->push_back(dtype);
for (index_t i = 1; i < nin; ++i) {
in_type->push_back(dtype);
}
if (dtype == -1) {
LOG(FATAL) << "Input type to regression_output is not specified.";
return false;
}
out_type->clear();
out_type->push_back(dtype);
return true;
}

OperatorProperty* Copy() const override {
auto ptr = new RegressionOutputProp<type>();
ptr->param_ = param_;
Expand Down Expand Up @@ -165,7 +185,13 @@ class RegressionOutputProp : public OperatorProperty {
return {{in_data[reg_enum::kData], out_data[reg_enum::kOut]}};
}

Operator* CreateOperator(Context ctx) const override;
Operator* CreateOperator(Context ctx) const override {
LOG(FATAL) << "Not Implemented.";
return nullptr;
}

Operator* CreateOperatorEx(Context ctx, std::vector<TShape> *in_shape,
std::vector<int> *in_type) const override;

protected:
RegressionOutputParam param_;
Expand Down
42 changes: 28 additions & 14 deletions src/operator/regression_output.cc
Original file line number Diff line number Diff line change
Expand Up @@ -11,24 +11,38 @@ namespace op {

template<>
Operator *CreateRegressionOutputOp<cpu>(reg_enum::RegressionOutputType type,
RegressionOutputParam param) {
switch (type) {
case reg_enum::kLinear:
return new RegressionOutputOp<cpu, mshadow::op::identity, mshadow::op::minus>(param);
case reg_enum::kLogistic:
return new RegressionOutputOp<cpu, mshadow_op::sigmoid, mshadow::op::minus>(param);
case reg_enum::kMAE:
return new RegressionOutputOp<cpu, mshadow::op::identity, mshadow_op::minus_sign>(param);
default:
LOG(FATAL) << "unknown activation type " << type;
}
return nullptr;
RegressionOutputParam param, int dtype) {
Operator *op = nullptr;
MSHADOW_REAL_TYPE_SWITCH(dtype, DType, {
switch (type) {
case reg_enum::kLinear:
op = new RegressionOutputOp
<cpu, mshadow::op::identity, mshadow::op::minus, DType>(param);
break;
case reg_enum::kLogistic:
op = new RegressionOutputOp
<cpu, mshadow_op::sigmoid, mshadow::op::minus, DType>(param);
break;
case reg_enum::kMAE:
op = new RegressionOutputOp
<cpu, mshadow::op::identity, mshadow_op::minus_sign, DType>(param);
break;
default:
LOG(FATAL) << "unknown RegressionOutput type " << type;
}
});
return op;
}

// DO_BIND_DISPATCH comes from operator_common.h
template<reg_enum::RegressionOutputType type>
Operator *RegressionOutputProp<type>::CreateOperator(Context ctx) const {
DO_BIND_DISPATCH(CreateRegressionOutputOp, type, param_);
Operator *RegressionOutputProp<type>::CreateOperatorEx(Context ctx, std::vector<TShape> *in_shape,
std::vector<int> *in_type) const {
std::vector<TShape> out_shape, aux_shape;
std::vector<int> out_type, aux_type;
CHECK(InferType(in_type, &out_type, &aux_type));
CHECK(InferShape(in_shape, &out_shape, &aux_shape));
DO_BIND_DISPATCH(CreateRegressionOutputOp, type, param_, (*in_type)[0]);
}

DMLC_REGISTER_PARAMETER(RegressionOutputParam);
Expand Down
33 changes: 21 additions & 12 deletions src/operator/regression_output.cu
Original file line number Diff line number Diff line change
Expand Up @@ -11,18 +11,27 @@ namespace op {

template<>
Operator *CreateRegressionOutputOp<gpu>(reg_enum::RegressionOutputType type,
RegressionOutputParam param) {
switch (type) {
case reg_enum::kLinear:
return new RegressionOutputOp<gpu, mshadow::op::identity, mshadow::op::minus>(param);
case reg_enum::kLogistic:
return new RegressionOutputOp<gpu, mshadow_op::sigmoid, mshadow::op::minus>(param);
case reg_enum::kMAE:
return new RegressionOutputOp<gpu, mshadow::op::identity, mshadow_op::minus_sign>(param);
default:
LOG(FATAL) << "unknown activation type " << type;
}
return NULL;
RegressionOutputParam param, int dtype) {
Operator *op = nullptr;
MSHADOW_REAL_TYPE_SWITCH(dtype, DType, {
switch (type) {
case reg_enum::kLinear:
op = new RegressionOutputOp
<gpu, mshadow::op::identity, mshadow::op::minus, DType>(param);
break;
case reg_enum::kLogistic:
op = new RegressionOutputOp
<gpu, mshadow_op::sigmoid, mshadow::op::minus, DType>(param);
break;
case reg_enum::kMAE:
op = new RegressionOutputOp
<gpu, mshadow::op::identity, mshadow_op::minus_sign, DType>(param);
break;
default:
LOG(FATAL) << "unknown RegressionOutput type " << type;
}
});
return op;
}
} // namespace op
} // namespace mxnet
Expand Down
30 changes: 30 additions & 0 deletions tests/python/gpu/test_operator_gpu.py
Original file line number Diff line number Diff line change
Expand Up @@ -207,6 +207,34 @@ def test_embedding_with_type():
{'ctx': mx.cpu(0), 'embedding_data': (2, 10), 'type_dict': {'embedding_data': np.float16}}]
check_consistency(sym, ctx_list, grad_req={'embedding_data': 'null','embedding_weight': 'write'})

def test_pooling_with_type():
sym= mx.sym.Pooling(name='pooling', kernel=(3, 3), pool_type='avg')
ctx_list = [{'ctx': mx.gpu(0), 'pooling_data': (2, 2, 10, 10), 'type_dict': {'pooling_data': np.float64}},
{'ctx': mx.gpu(0), 'pooling_data': (2, 2, 10, 10), 'type_dict': {'pooling_data': np.float32}},
{'ctx': mx.gpu(0), 'pooling_data': (2, 2, 10, 10), 'type_dict': {'pooling_data': np.float16}},
{'ctx': mx.cpu(0), 'pooling_data': (2, 2, 10, 10), 'type_dict': {'pooling_data': np.float64}},
{'ctx': mx.cpu(0), 'pooling_data': (2, 2, 10, 10), 'type_dict': {'pooling_data': np.float32}}]
check_consistency(sym, ctx_list)

sym_3d= mx.sym.Pooling(name='pooling', kernel=(3, 3, 3), pool_type='avg')
ctx_list_3d = [{'ctx': mx.gpu(0), 'pooling_data': (2, 2, 10, 10, 10), 'type_dict': {'pooling_data': np.float64}},
{'ctx': mx.gpu(0), 'pooling_data': (2, 2, 10, 10, 10), 'type_dict': {'pooling_data': np.float32}},
{'ctx': mx.gpu(0), 'pooling_data': (2, 2, 10, 10, 10), 'type_dict': {'pooling_data': np.float16}},
{'ctx': mx.cpu(0), 'pooling_data': (2, 2, 10, 10, 10), 'type_dict': {'pooling_data': np.float64}},
{'ctx': mx.cpu(0), 'pooling_data': (2, 2, 10, 10, 10), 'type_dict': {'pooling_data': np.float32}}]
check_consistency(sym_3d, ctx_list_3d)

def test_regression_with_type()
sym_logistic = mx.sym.LogisticRegressionOutput(name = 'regression')
sym_linear = mx.sym.LinearRegressionOutput(name = 'regression')
ctx_list = [{'ctx': mx.gpu(0), 'regression_data': (2, 2, 10, 10), 'type_dict': {'regression_data': np.float64}},
{'ctx': mx.gpu(0), 'regression_data': (2, 2, 10, 10), 'type_dict': {'regression_data': np.float32}},
{'ctx': mx.gpu(0), 'regression_data': (2, 2, 10, 10), 'type_dict': {'regression_data': np.float16}},
{'ctx': mx.cpu(0), 'regression_data': (2, 2, 10, 10), 'type_dict': {'regression_data': np.float64}},
{'ctx': mx.cpu(0), 'regression_data': (2, 2, 10, 10), 'type_dict': {'regression_data': np.float32}}]
check_consistency(sym_logistic, ctx_list)
check_consistency(sym_linear, ctx_list)

if __name__ == '__main__':
test_batchnorm_with_type()
test_convolution_with_type()
Expand All @@ -220,6 +248,8 @@ def test_embedding_with_type():
test_fullyconnected_with_type()
test_activation_with_type()
test_embedding_with_type()
test_pooling_with_type()
test_regression_with_type()
#test_softmax_with_shape((3,4), mx.gpu())
#test_multi_softmax_with_shape((3,4,5), mx.gpu())

0 comments on commit 8cc2560

Please sign in to comment.