From ad31b9d2289582bafc0e07f3465f070de8dd1134 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20M=C3=BCller?= Date: Mon, 14 Mar 2022 16:29:56 +0100 Subject: [PATCH] Avoid redundant computation due to auto+Eigen --- .../common_device.cuh | 20 +++++++--------- src/testbed.cu | 4 ++-- src/testbed_image.cu | 10 ++++---- src/testbed_nerf.cu | 24 +++++++------------ src/testbed_volume.cu | 2 +- 5 files changed, 26 insertions(+), 34 deletions(-) diff --git a/include/neural-graphics-primitives/common_device.cuh b/include/neural-graphics-primitives/common_device.cuh index 0577880c5..458294453 100644 --- a/include/neural-graphics-primitives/common_device.cuh +++ b/include/neural-graphics-primitives/common_device.cuh @@ -77,10 +77,10 @@ inline __host__ __device__ Eigen::Array3f linear_to_srgb_derivative(const Eigen: template __host__ __device__ Eigen::Matrix read_image(const T* __restrict__ data, const Eigen::Vector2i& resolution, const Eigen::Vector2f& pos) { - auto pos_float = Eigen::Vector2f{pos.x() * (float)(resolution.x()-1), pos.y() * (float)(resolution.y()-1)}; - Eigen::Vector2i texel = pos_float.cast(); + const Eigen::Vector2f pos_float = Eigen::Vector2f{pos.x() * (float)(resolution.x()-1), pos.y() * (float)(resolution.y()-1)}; + const Eigen::Vector2i texel = pos_float.cast(); - auto weight = pos_float - texel.cast(); + const Eigen::Vector2f weight = pos_float - texel.cast(); auto read_val = [&](Eigen::Vector2i pos) { pos.x() = std::max(std::min(pos.x(), resolution.x()-1), 0); @@ -100,22 +100,20 @@ __host__ __device__ Eigen::Matrix read_image(const T* __restri return result; }; - auto result = ( + return ( (1 - weight.x()) * (1 - weight.y()) * read_val({texel.x(), texel.y()}) + (weight.x()) * (1 - weight.y()) * read_val({texel.x()+1, texel.y()}) + (1 - weight.x()) * (weight.y()) * read_val({texel.x(), texel.y()+1}) + (weight.x()) * (weight.y()) * read_val({texel.x()+1, texel.y()+1}) ); - - return result; } template __device__ void deposit_image_gradient(const Eigen::Matrix& value, T* __restrict__ gradient, T* __restrict__ gradient_weight, const Eigen::Vector2i& resolution, const Eigen::Vector2f& pos) { - auto pos_float = Eigen::Vector2f{pos.x() * (resolution.x()-1), pos.y() * (resolution.y()-1)}; - Eigen::Vector2i texel = pos_float.cast(); + const Eigen::Vector2f pos_float = Eigen::Vector2f{pos.x() * (resolution.x()-1), pos.y() * (resolution.y()-1)}; + const Eigen::Vector2i texel = pos_float.cast(); - auto weight = pos_float - texel.cast(); + const Eigen::Vector2f weight = pos_float - texel.cast(); auto deposit_val = [&](const Eigen::Matrix& value, T weight, Eigen::Vector2i pos) { pos.x() = std::max(std::min(pos.x(), resolution.x()-1), 0); @@ -210,7 +208,7 @@ inline __host__ __device__ Ray pixel_to_ray_pinhole( float focus_z = 1.0f, float dof = 0.0f ) { - auto uv = pixel.cast().cwiseQuotient(resolution.cast()); + const Eigen::Vector2f uv = pixel.cast().cwiseQuotient(resolution.cast()); Eigen::Vector3f dir = { (uv.x() - screen_center.x()) * (float)resolution.x() / focal_length.x(), @@ -258,7 +256,7 @@ inline __host__ __device__ Ray pixel_to_ray( const Eigen::Vector2i distortion_resolution = Eigen::Vector2i::Zero() ) { Eigen::Vector2f offset = ld_random_pixel_offset(snap_to_pixel_centers ? 0 : spp, pixel.x(), pixel.y()); - auto uv = (pixel.cast() + offset).cwiseQuotient(resolution.cast()); + Eigen::Vector2f uv = (pixel.cast() + offset).cwiseQuotient(resolution.cast()); Eigen::Vector3f dir; if (camera_distortion.mode == ECameraDistortionMode::FTheta) { diff --git a/src/testbed.cu b/src/testbed.cu index f12d8df53..5383ad7e8 100644 --- a/src/testbed.cu +++ b/src/testbed.cu @@ -478,7 +478,7 @@ void Testbed::imgui() { if (ImGui::SliderFloat("Crop size", &render_diam, 0.1f, max_diam, "%.3f", ImGuiSliderFlags_Logarithmic | ImGuiSliderFlags_NoRoundToFormat)) { accum_reset = true; if (old_render_diam > 0.f && render_diam > 0.f) { - auto center = (m_render_aabb.max + m_render_aabb.min) * 0.5f; + const Vector3f center = (m_render_aabb.max + m_render_aabb.min) * 0.5f; float scale = render_diam / old_render_diam; m_render_aabb.max = ((m_render_aabb.max-center) * scale + center).cwiseMin(m_aabb.max); m_render_aabb.min = ((m_render_aabb.min-center) * scale + center).cwiseMax(m_aabb.min); @@ -859,7 +859,7 @@ void Testbed::draw_visualizations(const Matrix& camera_matrix) { float zscale = 1.0f / focal[m_fov_axis]; float xyscale = (float)m_window_res[m_fov_axis]; - auto screen_center = render_screen_center(); + Vector2f screen_center = render_screen_center(); view2proj << xyscale, 0, (float)m_window_res.x()*screen_center.x()*zscale, 0, 0, xyscale, (float)m_window_res.y()*screen_center.y()*zscale, 0, diff --git a/src/testbed_image.cu b/src/testbed_image.cu index ec71f0cb2..09e729075 100644 --- a/src/testbed_image.cu +++ b/src/testbed_image.cu @@ -152,7 +152,7 @@ __global__ void shade_kernel_image(Vector2i resolution, const Vector2f* __restri uint32_t idx = x + resolution.x() * y; - auto uv = positions[idx]; + const Vector2f uv = positions[idx]; if (uv.x() < 0.0f || uv.x() > 1.0f || uv.y() < 0.0f || uv.y() > 1.0f) { frame_buffer[idx] = Array4f::Zero(); return; @@ -186,7 +186,7 @@ __global__ void eval_image_kernel_and_snap(uint32_t n_elements, const T* __restr auto read_val = [&](int x, int y) { auto val = ((tcnn::vector_t*)texture)[y * resolution.x() + x]; - auto result = Array4f(val[0], val[1], val[2], val[3]); + Array4f result{val[0], val[1], val[2], val[3]}; if (!linear_colors) { result.head<3>() = linear_to_srgb(result.head<3>()); } @@ -202,10 +202,10 @@ __global__ void eval_image_kernel_and_snap(uint32_t n_elements, const T* __restr } else { pos = (pos.cwiseProduct(resolution.cast()) - Vector2f::Constant(0.5f)).cwiseMax(0.0f).cwiseMin(resolution.cast() - Vector2f::Constant(1.0f + 1e-4f)); - Vector2i pos_int = pos.cast(); - auto weight = pos - pos_int.cast(); + const Vector2i pos_int = pos.cast(); + const Vector2f weight = pos - pos_int.cast(); - Vector2i idx = pos_int.cwiseMin(resolution - Vector2i::Constant(2)).cwiseMax(0); + const Vector2i idx = pos_int.cwiseMin(resolution - Vector2i::Constant(2)).cwiseMax(0); val = (1 - weight.x()) * (1 - weight.y()) * read_val(idx.x(), idx.y()) + diff --git a/src/testbed_nerf.cu b/src/testbed_nerf.cu index 79ec5ac24..7d51aee7f 100644 --- a/src/testbed_nerf.cu +++ b/src/testbed_nerf.cu @@ -1142,10 +1142,10 @@ __device__ LossAndGradient loss_and_gradient(const Vector3f& target, const Vecto inline __device__ Array3f composit_and_lerp(Vector2f pos, const Vector2i& resolution, uint32_t img, const __half* training_images, const Array3f& background_color, const Array3f& exposure_scale = Array3f::Ones()) { pos = (pos.cwiseProduct(resolution.cast()) - Vector2f::Constant(0.5f)).cwiseMax(0.0f).cwiseMin(resolution.cast() - Vector2f::Constant(1.0f + 1e-4f)); - Vector2i pos_int = pos.cast(); - auto weight = pos - pos_int.cast(); + const Vector2i pos_int = pos.cast(); + const Vector2f weight = pos - pos_int.cast(); - Vector2i idx = pos_int.cwiseMin(resolution - Vector2i::Constant(2)).cwiseMax(0); + const Vector2i idx = pos_int.cwiseMin(resolution - Vector2i::Constant(2)).cwiseMax(0); auto read_val = [&](const Vector2i& p) { __half val[4]; @@ -1153,38 +1153,32 @@ inline __device__ Array3f composit_and_lerp(Vector2f pos, const Vector2i& resolu return Array3f{val[0], val[1], val[2]} * exposure_scale + background_color * (1.0f - (float)val[3]); }; - Array3f result = ( + return ( (1 - weight.x()) * (1 - weight.y()) * read_val({idx.x(), idx.y()}) + (weight.x()) * (1 - weight.y()) * read_val({idx.x()+1, idx.y()}) + (1 - weight.x()) * (weight.y()) * read_val({idx.x(), idx.y()+1}) + (weight.x()) * (weight.y()) * read_val({idx.x()+1, idx.y()+1}) ); - - return result; } inline __device__ Array3f composit(Vector2f pos, const Vector2i& resolution, uint32_t img, const __half* training_images, const Array3f& background_color, const Array3f& exposure_scale = Array3f::Ones()) { - Vector2i idx = image_pos(pos, resolution); - auto read_val = [&](const Vector2i& p) { __half val[4]; *(uint64_t*)&val[0] = ((uint64_t*)training_images)[pixel_idx(p, resolution, img)]; return Array3f{val[0], val[1], val[2]} * exposure_scale + background_color * (1.0f - (float)val[3]); }; - return read_val(idx); + return read_val(image_pos(pos, resolution)); } inline __device__ Array4f read_rgba(Vector2f pos, const Vector2i& resolution, uint32_t img, const __half* training_images) { - Vector2i idx = image_pos(pos, resolution); - auto read_val = [&](const Vector2i& p) { __half val[4]; *(uint64_t*)&val[0] = ((uint64_t*)training_images)[pixel_idx(p, resolution, img)]; return Array4f{val[0], val[1], val[2], val[3]}; }; - return read_val(idx); + return read_val(image_pos(pos, resolution)); } __global__ void compute_loss_kernel_train_nerf( @@ -1363,9 +1357,9 @@ __global__ void compute_loss_kernel_train_nerf( } if (error_map) { - Vector2f pos = (xy.cwiseProduct(error_map_res.cast()) - Vector2f::Constant(0.5f)).cwiseMax(0.0f).cwiseMin(error_map_res.cast() - Vector2f::Constant(1.0f + 1e-4f)); - Vector2i pos_int = pos.cast(); - auto weight = pos - pos_int.cast(); + const Vector2f pos = (xy.cwiseProduct(error_map_res.cast()) - Vector2f::Constant(0.5f)).cwiseMax(0.0f).cwiseMin(error_map_res.cast() - Vector2f::Constant(1.0f + 1e-4f)); + const Vector2i pos_int = pos.cast(); + const Vector2f weight = pos - pos_int.cast(); Vector2i idx = pos_int.cwiseMin(resolution - Vector2i::Constant(2)).cwiseMax(0); diff --git a/src/testbed_volume.cu b/src/testbed_volume.cu index d74b26883..23faa984f 100644 --- a/src/testbed_volume.cu +++ b/src/testbed_volume.cu @@ -419,7 +419,7 @@ void Testbed::render_volume(CudaRenderBuffer& render_buffer, m_volume.hit_counter.enlarge(2); m_volume.hit_counter.memset(0); - auto sky_col = m_background_color.head<3>(); + Array3f sky_col = m_background_color.head<3>(); const dim3 threads = { 16, 8, 1 }; const dim3 blocks = { div_round_up((uint32_t)res.x(), threads.x), div_round_up((uint32_t)res.y(), threads.y), 1 };