Skip to content

Commit

Permalink
Avoid redundant computation due to auto+Eigen
Browse files Browse the repository at this point in the history
  • Loading branch information
Tom94 committed Mar 14, 2022
1 parent 44080b8 commit ad31b9d
Show file tree
Hide file tree
Showing 5 changed files with 26 additions and 34 deletions.
20 changes: 9 additions & 11 deletions include/neural-graphics-primitives/common_device.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -77,10 +77,10 @@ inline __host__ __device__ Eigen::Array3f linear_to_srgb_derivative(const Eigen:

template <uint32_t N_DIMS, typename T>
__host__ __device__ Eigen::Matrix<float, N_DIMS, 1> read_image(const T* __restrict__ data, const Eigen::Vector2i& resolution, const Eigen::Vector2f& pos) {
auto pos_float = Eigen::Vector2f{pos.x() * (float)(resolution.x()-1), pos.y() * (float)(resolution.y()-1)};
Eigen::Vector2i texel = pos_float.cast<int>();
const Eigen::Vector2f pos_float = Eigen::Vector2f{pos.x() * (float)(resolution.x()-1), pos.y() * (float)(resolution.y()-1)};
const Eigen::Vector2i texel = pos_float.cast<int>();

auto weight = pos_float - texel.cast<float>();
const Eigen::Vector2f weight = pos_float - texel.cast<float>();

auto read_val = [&](Eigen::Vector2i pos) {
pos.x() = std::max(std::min(pos.x(), resolution.x()-1), 0);
Expand All @@ -100,22 +100,20 @@ __host__ __device__ Eigen::Matrix<float, N_DIMS, 1> read_image(const T* __restri
return result;
};

auto result = (
return (
(1 - weight.x()) * (1 - weight.y()) * read_val({texel.x(), texel.y()}) +
(weight.x()) * (1 - weight.y()) * read_val({texel.x()+1, texel.y()}) +
(1 - weight.x()) * (weight.y()) * read_val({texel.x(), texel.y()+1}) +
(weight.x()) * (weight.y()) * read_val({texel.x()+1, texel.y()+1})
);

return result;
}

template <uint32_t N_DIMS, typename T>
__device__ void deposit_image_gradient(const Eigen::Matrix<float, N_DIMS, 1>& value, T* __restrict__ gradient, T* __restrict__ gradient_weight, const Eigen::Vector2i& resolution, const Eigen::Vector2f& pos) {
auto pos_float = Eigen::Vector2f{pos.x() * (resolution.x()-1), pos.y() * (resolution.y()-1)};
Eigen::Vector2i texel = pos_float.cast<int>();
const Eigen::Vector2f pos_float = Eigen::Vector2f{pos.x() * (resolution.x()-1), pos.y() * (resolution.y()-1)};
const Eigen::Vector2i texel = pos_float.cast<int>();

auto weight = pos_float - texel.cast<float>();
const Eigen::Vector2f weight = pos_float - texel.cast<float>();

auto deposit_val = [&](const Eigen::Matrix<float, N_DIMS, 1>& value, T weight, Eigen::Vector2i pos) {
pos.x() = std::max(std::min(pos.x(), resolution.x()-1), 0);
Expand Down Expand Up @@ -210,7 +208,7 @@ inline __host__ __device__ Ray pixel_to_ray_pinhole(
float focus_z = 1.0f,
float dof = 0.0f
) {
auto uv = pixel.cast<float>().cwiseQuotient(resolution.cast<float>());
const Eigen::Vector2f uv = pixel.cast<float>().cwiseQuotient(resolution.cast<float>());

Eigen::Vector3f dir = {
(uv.x() - screen_center.x()) * (float)resolution.x() / focal_length.x(),
Expand Down Expand Up @@ -258,7 +256,7 @@ inline __host__ __device__ Ray pixel_to_ray(
const Eigen::Vector2i distortion_resolution = Eigen::Vector2i::Zero()
) {
Eigen::Vector2f offset = ld_random_pixel_offset(snap_to_pixel_centers ? 0 : spp, pixel.x(), pixel.y());
auto uv = (pixel.cast<float>() + offset).cwiseQuotient(resolution.cast<float>());
Eigen::Vector2f uv = (pixel.cast<float>() + offset).cwiseQuotient(resolution.cast<float>());

Eigen::Vector3f dir;
if (camera_distortion.mode == ECameraDistortionMode::FTheta) {
Expand Down
4 changes: 2 additions & 2 deletions src/testbed.cu
Original file line number Diff line number Diff line change
Expand Up @@ -478,7 +478,7 @@ void Testbed::imgui() {
if (ImGui::SliderFloat("Crop size", &render_diam, 0.1f, max_diam, "%.3f", ImGuiSliderFlags_Logarithmic | ImGuiSliderFlags_NoRoundToFormat)) {
accum_reset = true;
if (old_render_diam > 0.f && render_diam > 0.f) {
auto center = (m_render_aabb.max + m_render_aabb.min) * 0.5f;
const Vector3f center = (m_render_aabb.max + m_render_aabb.min) * 0.5f;
float scale = render_diam / old_render_diam;
m_render_aabb.max = ((m_render_aabb.max-center) * scale + center).cwiseMin(m_aabb.max);
m_render_aabb.min = ((m_render_aabb.min-center) * scale + center).cwiseMax(m_aabb.min);
Expand Down Expand Up @@ -859,7 +859,7 @@ void Testbed::draw_visualizations(const Matrix<float, 3, 4>& camera_matrix) {

float zscale = 1.0f / focal[m_fov_axis];
float xyscale = (float)m_window_res[m_fov_axis];
auto screen_center = render_screen_center();
Vector2f screen_center = render_screen_center();
view2proj <<
xyscale, 0, (float)m_window_res.x()*screen_center.x()*zscale, 0,
0, xyscale, (float)m_window_res.y()*screen_center.y()*zscale, 0,
Expand Down
10 changes: 5 additions & 5 deletions src/testbed_image.cu
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,7 @@ __global__ void shade_kernel_image(Vector2i resolution, const Vector2f* __restri

uint32_t idx = x + resolution.x() * y;

auto uv = positions[idx];
const Vector2f uv = positions[idx];
if (uv.x() < 0.0f || uv.x() > 1.0f || uv.y() < 0.0f || uv.y() > 1.0f) {
frame_buffer[idx] = Array4f::Zero();
return;
Expand Down Expand Up @@ -186,7 +186,7 @@ __global__ void eval_image_kernel_and_snap(uint32_t n_elements, const T* __restr

auto read_val = [&](int x, int y) {
auto val = ((tcnn::vector_t<T, 4>*)texture)[y * resolution.x() + x];
auto result = Array4f(val[0], val[1], val[2], val[3]);
Array4f result{val[0], val[1], val[2], val[3]};
if (!linear_colors) {
result.head<3>() = linear_to_srgb(result.head<3>());
}
Expand All @@ -202,10 +202,10 @@ __global__ void eval_image_kernel_and_snap(uint32_t n_elements, const T* __restr
} else {
pos = (pos.cwiseProduct(resolution.cast<float>()) - Vector2f::Constant(0.5f)).cwiseMax(0.0f).cwiseMin(resolution.cast<float>() - Vector2f::Constant(1.0f + 1e-4f));

Vector2i pos_int = pos.cast<int>();
auto weight = pos - pos_int.cast<float>();
const Vector2i pos_int = pos.cast<int>();
const Vector2f weight = pos - pos_int.cast<float>();

Vector2i idx = pos_int.cwiseMin(resolution - Vector2i::Constant(2)).cwiseMax(0);
const Vector2i idx = pos_int.cwiseMin(resolution - Vector2i::Constant(2)).cwiseMax(0);

val =
(1 - weight.x()) * (1 - weight.y()) * read_val(idx.x(), idx.y()) +
Expand Down
24 changes: 9 additions & 15 deletions src/testbed_nerf.cu
Original file line number Diff line number Diff line change
Expand Up @@ -1142,49 +1142,43 @@ __device__ LossAndGradient loss_and_gradient(const Vector3f& target, const Vecto
inline __device__ Array3f composit_and_lerp(Vector2f pos, const Vector2i& resolution, uint32_t img, const __half* training_images, const Array3f& background_color, const Array3f& exposure_scale = Array3f::Ones()) {
pos = (pos.cwiseProduct(resolution.cast<float>()) - Vector2f::Constant(0.5f)).cwiseMax(0.0f).cwiseMin(resolution.cast<float>() - Vector2f::Constant(1.0f + 1e-4f));

Vector2i pos_int = pos.cast<int>();
auto weight = pos - pos_int.cast<float>();
const Vector2i pos_int = pos.cast<int>();
const Vector2f weight = pos - pos_int.cast<float>();

Vector2i idx = pos_int.cwiseMin(resolution - Vector2i::Constant(2)).cwiseMax(0);
const Vector2i idx = pos_int.cwiseMin(resolution - Vector2i::Constant(2)).cwiseMax(0);

auto read_val = [&](const Vector2i& p) {
__half val[4];
*(uint64_t*)&val[0] = ((uint64_t*)training_images)[pixel_idx(p, resolution, img)];
return Array3f{val[0], val[1], val[2]} * exposure_scale + background_color * (1.0f - (float)val[3]);
};

Array3f result = (
return (
(1 - weight.x()) * (1 - weight.y()) * read_val({idx.x(), idx.y()}) +
(weight.x()) * (1 - weight.y()) * read_val({idx.x()+1, idx.y()}) +
(1 - weight.x()) * (weight.y()) * read_val({idx.x(), idx.y()+1}) +
(weight.x()) * (weight.y()) * read_val({idx.x()+1, idx.y()+1})
);

return result;
}

inline __device__ Array3f composit(Vector2f pos, const Vector2i& resolution, uint32_t img, const __half* training_images, const Array3f& background_color, const Array3f& exposure_scale = Array3f::Ones()) {
Vector2i idx = image_pos(pos, resolution);

auto read_val = [&](const Vector2i& p) {
__half val[4];
*(uint64_t*)&val[0] = ((uint64_t*)training_images)[pixel_idx(p, resolution, img)];
return Array3f{val[0], val[1], val[2]} * exposure_scale + background_color * (1.0f - (float)val[3]);
};

return read_val(idx);
return read_val(image_pos(pos, resolution));
}

inline __device__ Array4f read_rgba(Vector2f pos, const Vector2i& resolution, uint32_t img, const __half* training_images) {
Vector2i idx = image_pos(pos, resolution);

auto read_val = [&](const Vector2i& p) {
__half val[4];
*(uint64_t*)&val[0] = ((uint64_t*)training_images)[pixel_idx(p, resolution, img)];
return Array4f{val[0], val[1], val[2], val[3]};
};

return read_val(idx);
return read_val(image_pos(pos, resolution));
}

__global__ void compute_loss_kernel_train_nerf(
Expand Down Expand Up @@ -1363,9 +1357,9 @@ __global__ void compute_loss_kernel_train_nerf(
}

if (error_map) {
Vector2f pos = (xy.cwiseProduct(error_map_res.cast<float>()) - Vector2f::Constant(0.5f)).cwiseMax(0.0f).cwiseMin(error_map_res.cast<float>() - Vector2f::Constant(1.0f + 1e-4f));
Vector2i pos_int = pos.cast<int>();
auto weight = pos - pos_int.cast<float>();
const Vector2f pos = (xy.cwiseProduct(error_map_res.cast<float>()) - Vector2f::Constant(0.5f)).cwiseMax(0.0f).cwiseMin(error_map_res.cast<float>() - Vector2f::Constant(1.0f + 1e-4f));
const Vector2i pos_int = pos.cast<int>();
const Vector2f weight = pos - pos_int.cast<float>();

Vector2i idx = pos_int.cwiseMin(resolution - Vector2i::Constant(2)).cwiseMax(0);

Expand Down
2 changes: 1 addition & 1 deletion src/testbed_volume.cu
Original file line number Diff line number Diff line change
Expand Up @@ -419,7 +419,7 @@ void Testbed::render_volume(CudaRenderBuffer& render_buffer,
m_volume.hit_counter.enlarge(2);
m_volume.hit_counter.memset(0);

auto sky_col = m_background_color.head<3>();
Array3f sky_col = m_background_color.head<3>();

const dim3 threads = { 16, 8, 1 };
const dim3 blocks = { div_round_up((uint32_t)res.x(), threads.x), div_round_up((uint32_t)res.y(), threads.y), 1 };
Expand Down

0 comments on commit ad31b9d

Please sign in to comment.