forked from pytorch/pytorch
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathLoss.cu
156 lines (129 loc) · 5.46 KB
/
Loss.cu
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
#include <ATen/ATen.h>
#include <ATen/NativeFunctions.h>
#include <ATen/Dispatch.h>
#include <ATen/cuda/CUDAApplyUtils.cuh>
#include <ATen/native/TensorIterator.h>
#include <ATen/native/cuda/Loops.cuh>
constexpr float EPSILON = 1e-12;
namespace {
using namespace at;
void binary_cross_entropy_backward_out_kernel(Tensor& grad_input, const Tensor& grad, const Tensor& input, const Tensor& target) {
at::TensorIterator iter = TensorIteratorConfig()
.add_output(grad_input)
.add_input(grad)
.add_input(input)
.add_input(target)
.build();
AT_DISPATCH_FLOATING_TYPES_AND2(at::ScalarType::Half, at::ScalarType::BFloat16, iter.common_dtype(), "binary_cross_entropy_backward_out_cuda", [&]() {
at::native::gpu_kernel(iter, [] GPU_LAMBDA (
scalar_t grad_val,
scalar_t input_val,
scalar_t target_val
) -> scalar_t {
const scalar_t one = 1;
const scalar_t epsilon = EPSILON;
scalar_t grad_input_denominator = max(
(one - input_val) * input_val,
epsilon
);
return grad_val * (input_val - target_val) / grad_input_denominator;
}
);
});
}
} // namespace
namespace at { namespace native {
Tensor kl_div_backward_cuda(const Tensor& grad, const Tensor& input, const Tensor& target, int64_t reduction, bool log_target) {
auto grad_input = at::empty_like(input);
if (!log_target) {
TensorIterator iter = TensorIteratorConfig()
.add_output(grad_input)
.add_input(target)
.add_input(grad)
.build();
AT_DISPATCH_FLOATING_TYPES_AND_HALF(input.scalar_type(), "kl_div_backward_cuda", [&]() {
scalar_t inv = (reduction == at::Reduction::Mean) ? scalar_t(1.0 / input.numel()) : scalar_t(1.0);
gpu_kernel(iter,
[inv] GPU_LAMBDA (scalar_t target_val, scalar_t grad_val) {
return (target_val > 0) ? scalar_t(-target_val * grad_val * inv) : scalar_t(0.0);
});
});
}
else {
grad_input = -at::exp(target) * grad;
if (reduction == at::Reduction::Mean) {
grad_input /= input.numel();
}
}
return grad_input;
}
Tensor binary_cross_entropy_cuda(const Tensor& input, const Tensor& target, const c10::optional<Tensor>& weight_opt, int64_t reduction) {
// See [Note: hacky wrapper removal for optional tensor]
c10::MaybeOwned<Tensor> weight_maybe_owned = at::borrow_from_optional_tensor(weight_opt);
const Tensor& weight = *weight_maybe_owned;
Tensor loss = at::empty_like(input);
return at::native::binary_cross_entropy_out_cuda(
input, target, weight, reduction, loss);
}
Tensor& binary_cross_entropy_out_cuda(const Tensor& input, const Tensor& target, const c10::optional<Tensor>& weight_opt, int64_t reduction, Tensor& loss) {
// See [Note: hacky wrapper removal for optional tensor]
c10::MaybeOwned<Tensor> weight_maybe_owned = at::borrow_from_optional_tensor(weight_opt);
const Tensor& weight = *weight_maybe_owned;
Tensor loss_squeezed = at::squeeze(loss);
TensorIterator iter = TensorIteratorConfig()
.add_output(loss_squeezed)
.add_input(at::squeeze(input))
.add_input(at::squeeze(target))
.build();
AT_DISPATCH_FLOATING_TYPES_AND2(at::ScalarType::Half, at::ScalarType::BFloat16, iter.common_dtype(), "binary_cross_entropy_out_cuda", [&]() {
gpu_kernel(iter,
[] GPU_LAMBDA (scalar_t input_val, scalar_t target_val) -> scalar_t {
const scalar_t zero = 0;
const scalar_t one = 1;
const scalar_t neg_100 = -100;
CUDA_KERNEL_ASSERT(input_val >= zero && input_val <= one);
scalar_t log_input_val = std::log(input_val);
scalar_t log_1_minus_input_val = std::log(one - input_val);
log_input_val = std::max(log_input_val, neg_100);
log_1_minus_input_val = std::max(log_1_minus_input_val, neg_100);
return ((target_val - one) * log_1_minus_input_val) - (target_val * log_input_val);
}
);
});
if (weight.defined()) {
loss.mul_(weight);
}
if (reduction != at::Reduction::None) {
Tensor loss_reduced;
if (reduction == at::Reduction::Mean) {
loss_reduced = loss.mean();
} else if (reduction == at::Reduction::Sum) {
loss_reduced = loss.sum();
}
loss.resize_as_(loss_reduced).copy_(loss_reduced);
}
return loss;
}
Tensor binary_cross_entropy_backward_cuda(const Tensor& grad, const Tensor& input, const Tensor& target, const c10::optional<Tensor>& weight_opt, int64_t reduction) {
// See [Note: hacky wrapper removal for optional tensor]
c10::MaybeOwned<Tensor> weight_maybe_owned = at::borrow_from_optional_tensor(weight_opt);
const Tensor& weight = *weight_maybe_owned;
Tensor grad_input = at::empty_like(input);
return at::native::binary_cross_entropy_backward_out_cuda(
grad, input, target, weight, reduction, grad_input);
}
Tensor& binary_cross_entropy_backward_out_cuda(const Tensor& grad, const Tensor& input, const Tensor& target, const c10::optional<Tensor>& weight_opt, int64_t reduction, Tensor& grad_input) {
// See [Note: hacky wrapper removal for optional tensor]
c10::MaybeOwned<Tensor> weight_maybe_owned = at::borrow_from_optional_tensor(weight_opt);
const Tensor& weight = *weight_maybe_owned;
Tensor grad_expand = grad.expand_as(input);
binary_cross_entropy_backward_out_kernel(grad_input, grad_expand, input, target);
if (weight.defined()) {
grad_input.mul_(weight);
}
if (reduction == at::Reduction::Mean) {
grad_input.div_(input.numel());
}
return grad_input;
}
}} // namespace at::native