forked from pytorch/pytorch
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Fill.cpp
142 lines (115 loc) · 3.97 KB
/
Fill.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
// Functions that fill Tensors with constants.
#include <ATen/ATen.h>
#include <ATen/Dispatch.h>
#include <ATen/native/Fill.h>
#include <ATen/native/TensorIterator.h>
#include <ATen/Utils.h>
#include <c10/util/accumulate.h>
#include <c10/util/irange.h>
namespace at {
namespace native {
// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fill ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Tensor& fill_out(Tensor& self, const Scalar& value) {
if (self.device() == at::kCPU && self.numel() == 1) {
return at::detail::scalar_fill(self, value);
}
auto iter = TensorIteratorConfig()
.set_check_mem_overlap(false) // Fill is idempotent, so overlap is okay
.check_all_same_dtype(false)
.add_output(self)
.resize_outputs(false)
.build();
fill_stub(iter.device_type(), iter, value);
return self;
}
Tensor& fill_out_quantized(Tensor& self, const Scalar& value) {
at::Tensor out = at::ones(self.sizes()).to(kFloat) * value;
out = out.to(self.device());
// Trust the `copy_` to handle the quantization and the boundary chacks.
self.copy_(out);
return self;
}
Tensor& fill_(Tensor& self, const Scalar& value) {
return fill_out(self, value);
}
Tensor& fill_quantized_(Tensor& self, const Scalar& value) {
return fill_out_quantized(self, value);
}
Tensor& fill_(Tensor& self, const Tensor& value) {
TORCH_CHECK(value.dim() == 0, "fill_ only supports 0-dimension value tensor but got tensor with ", value.dim(), " dimensions.");
return fill_out(self, value.item());
}
Tensor& fill_quantized_(Tensor& self, const Tensor& value) {
TORCH_CHECK(value.dim() == 0, "fill_ only supports 0-dimension value tensor but got tensor with ", value.dim(), " dimensions.");
return fill_out_quantized(self, value.item());
}
Tensor& fill_meta_(Tensor& self, const Scalar& value) {
return self;
}
Tensor& fill_meta_(Tensor& self, const Tensor& value) {
TORCH_CHECK(value.dim() == 0, "fill_ only supports 0-dimension value tensor but got tensor with ", value.dim(), " dimensions.");
return self;
}
DEFINE_DISPATCH(fill_stub);
// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fill_diagonal ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Tensor& fill_diagonal_(Tensor& self, const Scalar& fill_value, bool wrap) {
int64_t nDims = self.dim();
TORCH_CHECK(nDims >= 2, "dimensions must larger than 1");
int64_t height = self.size(0);
int64_t width = self.size(1);
if (nDims > 2) {
int64_t dim1 = height;
for (const auto i : c10::irange(1, nDims)) {
if (self.size(i) != dim1) {
AT_ERROR("all dimensions of input must be of equal length");
}
}
}
int64_t storage_offset = self.storage_offset();
std::vector<int64_t> sizes;
std::vector<int64_t> strides;
int64_t size = std::min(height, width);
int64_t stride = 0;
for (const auto i : c10::irange(nDims)) {
stride += self.stride(i);
}
strides.push_back(stride);
sizes.push_back(size);
auto main_diag = self.as_strided(sizes, strides, storage_offset);
main_diag.fill_(fill_value);
if (wrap && nDims == 2 && height > width + 1) {
std::vector<int64_t> wrap_sizes;
int64_t step = width + 1;
int64_t wrap_size = ((self.numel() + step - 1) / step) - size;
wrap_sizes.push_back(wrap_size);
int64_t offset = self.stride(0) * (width + 1);
auto wrap_diag = self.as_strided(wrap_sizes, strides, storage_offset + offset);
wrap_diag.fill_(fill_value);
}
return self;
}
Tensor& zero_cpu_(Tensor &self, int64_t nelements) {
void* ptr = self.data_ptr();
if (nullptr == ptr) {
return self.fill_(0);
}
int64_t size_bytes = nelements * self.dtype().itemsize();
if (size_bytes > 0) {
std::memset(ptr, 0, size_bytes);
}
return self;
}
Tensor& zero_(Tensor &self) {
int64_t nelements = c10::multiply_integers(self.sizes());
if (self.device() == at::kCPU &&
self.is_non_overlapping_and_dense() &&
nelements < internal::GRAIN_SIZE) {
return zero_cpu_(self, nelements);
}
return self.fill_(0);
}
Tensor& zero_meta_(Tensor& self) {
return self;
}
} // namespace native
} // namespace at