From b9cf301b10c464f4e31813fccc91c5755efa1276 Mon Sep 17 00:00:00 2001 From: Thomas Rouch Date: Thu, 23 Jun 2022 14:59:24 +0200 Subject: [PATCH] :bug: take 'dataset scale' into account when using orthographic camera --- .../common_device.cuh | 22 ++++++++++++++----- include/neural-graphics-primitives/testbed.h | 3 ++- src/testbed.cu | 9 +++++--- src/testbed_nerf.cu | 15 ++++++++----- 4 files changed, 34 insertions(+), 15 deletions(-) diff --git a/include/neural-graphics-primitives/common_device.cuh b/include/neural-graphics-primitives/common_device.cuh index 0ad58fc8e..9ff5be99f 100644 --- a/include/neural-graphics-primitives/common_device.cuh +++ b/include/neural-graphics-primitives/common_device.cuh @@ -273,7 +273,8 @@ inline __host__ __device__ Ray pixel_to_ray( const ECameraMode camera_mode = ECameraMode::Perspective, const CameraDistortion& camera_distortion = {}, const float* __restrict__ distortion_data = nullptr, - const Eigen::Vector2i distortion_resolution = Eigen::Vector2i::Zero() + const Eigen::Vector2i distortion_resolution = Eigen::Vector2i::Zero(), + const float dataset_scale = 1.f ) { Eigen::Vector2f offset = ld_random_pixel_offset(snap_to_pixel_centers ? 0 : spp); Eigen::Vector2f uv = (pixel.cast() + offset).cwiseQuotient(resolution.cast()); @@ -283,12 +284,16 @@ inline __host__ __device__ Ray pixel_to_ray( Eigen::Vector3f head_pos; if(camera_mode == ECameraMode::Orthographic){ + // 'dataset_scale' argument is only required by the orthographic camera. + // The focal length of Environment and Perspective cameras isn't affected by the change of dataset_scale, + // because all rays originate from the same point dir = {0.f, 0.f, 1.f}; // Camera forward head_pos = { (uv.x() - screen_center.x()) * (float)resolution.x() / focal_length.x(), (uv.y() - screen_center.y()) * (float)resolution.y() / focal_length.y(), 0.0f }; + head_pos *= dataset_scale; head_pos += shift; dir -= shift / parallax_shift.z(); // we could use focus_z here in the denominator. for now, we pack m_scale in here. } @@ -354,7 +359,8 @@ inline __host__ __device__ Eigen::Vector2f pos_to_pixel( const Eigen::Vector2f& screen_center, const Eigen::Vector3f& parallax_shift, const ECameraMode camera_mode, - const CameraDistortion& camera_distortion = {} + const CameraDistortion& camera_distortion = {}, + const float dataset_scale = 1.f ) { // We get 'pos' as an input. We have pos = origin + alpha*dir, with unknown alpha // tmp_dir = R^-1*(pos-t) @@ -368,7 +374,8 @@ inline __host__ __device__ Eigen::Vector2f pos_to_pixel( // origin = R*(head_pos+shift) + t tmp_dir -= shift; const Eigen::Vector3f head_dir_minus_shift = Eigen::Vector3f(0.f, 0.f, 1.f) - shift/parallax_shift.z(); - const Eigen::Vector3f head_pos = tmp_dir - tmp_dir.z() * head_dir_minus_shift; // Gives head_pos.z=0 since head_dir_minus_shift.z=1 + Eigen::Vector3f head_pos = tmp_dir - tmp_dir.z() * head_dir_minus_shift; // Gives head_pos.z=0 since head_dir_minus_shift.z=1 + head_pos /= dataset_scale; return { head_pos.x() * focal_length.x() + screen_center.x() * resolution.x(), head_pos.y() * focal_length.y() + screen_center.y() * resolution.y(), @@ -426,7 +433,8 @@ inline __host__ __device__ Eigen::Vector2f motion_vector_3d( const bool snap_to_pixel_centers, const float depth, const ECameraMode camera_mode, - const CameraDistortion& camera_distortion = {} + const CameraDistortion& camera_distortion = {}, + const float dataset_scale = 1.f ) { Ray ray = pixel_to_ray( sample_index, @@ -442,7 +450,8 @@ inline __host__ __device__ Eigen::Vector2f motion_vector_3d( camera_mode, camera_distortion, nullptr, - Eigen::Vector2i::Zero() + Eigen::Vector2i::Zero(), + dataset_scale ); Eigen::Vector2f prev_pixel = pos_to_pixel( @@ -453,7 +462,8 @@ inline __host__ __device__ Eigen::Vector2f motion_vector_3d( screen_center, parallax_shift, camera_mode, - camera_distortion + camera_distortion, + dataset_scale ); return prev_pixel - (pixel.cast() + ld_random_pixel_offset(sample_index)); diff --git a/include/neural-graphics-primitives/testbed.h b/include/neural-graphics-primitives/testbed.h index 9aa7a7e95..dcc5df818 100644 --- a/include/neural-graphics-primitives/testbed.h +++ b/include/neural-graphics-primitives/testbed.h @@ -157,7 +157,8 @@ class Testbed { float cone_angle_constant, ERenderMode render_mode, ECameraMode camera_mode, - cudaStream_t stream + cudaStream_t stream, + float dataset_scale ); uint32_t trace( diff --git a/src/testbed.cu b/src/testbed.cu index eabf6fced..bd0bf3508 100644 --- a/src/testbed.cu +++ b/src/testbed.cu @@ -2450,7 +2450,8 @@ __global__ void dlss_prep_kernel( const Vector2f image_pos, const Vector2f prev_image_pos, const Vector2i image_resolution, - const ECameraMode camera_mode + const ECameraMode camera_mode, + const float dataset_scale = 1.f ) { uint32_t x = threadIdx.x + blockDim.x * blockIdx.x; uint32_t y = threadIdx.y + blockDim.y * blockIdx.y; @@ -2489,7 +2490,8 @@ __global__ void dlss_prep_kernel( snap_to_pixel_centers, depth, camera_mode, - camera_distortion + camera_distortion, + dataset_scale ); surf2Dwrite(make_float2(mvec.x(), mvec.y()), mvec_surface, x_orig * sizeof(float2), y_orig); @@ -2652,7 +2654,8 @@ void Testbed::render_frame(const Matrix& camera_matrix0, const Matr m_image.pos, m_image.prev_pos, m_image.resolution, - m_camera_mode + m_camera_mode, + m_nerf.training.dataset.scale ); render_buffer.set_dlss_sharpening(m_dlss_sharpening); diff --git a/src/testbed_nerf.cu b/src/testbed_nerf.cu index 5b1bc206f..b0d34964a 100644 --- a/src/testbed_nerf.cu +++ b/src/testbed_nerf.cu @@ -1791,7 +1791,8 @@ __global__ void init_rays_with_payload_kernel_nerf( const float* __restrict__ distortion_data, const Vector2i distortion_resolution, ERenderMode render_mode, - ECameraMode camera_mode + ECameraMode camera_mode, + float dataset_scale ) { uint32_t x = threadIdx.x + blockDim.x * blockIdx.x; uint32_t y = threadIdx.y + blockDim.y * blockIdx.y; @@ -1825,7 +1826,8 @@ __global__ void init_rays_with_payload_kernel_nerf( camera_mode, camera_distortion, distortion_data, - distortion_resolution + distortion_resolution, + dataset_scale ); NerfPayload& payload = payloads[idx]; @@ -1973,7 +1975,8 @@ void Testbed::NerfTracer::init_rays_from_camera( float cone_angle_constant, ERenderMode render_mode, ECameraMode camera_mode, - cudaStream_t stream + cudaStream_t stream, + float dataset_scale ) { // Make sure we have enough memory reserved to render at the requested resolution size_t n_pixels = (size_t)resolution.x() * resolution.y(); @@ -2004,7 +2007,8 @@ void Testbed::NerfTracer::init_rays_from_camera( distortion_data, distortion_resolution, render_mode, - camera_mode + camera_mode, + dataset_scale ); m_n_rays_initialized = resolution.x() * resolution.y(); @@ -2268,7 +2272,8 @@ void Testbed::render_nerf(CudaRenderBuffer& render_buffer, const Vector2i& max_r m_nerf.cone_angle_constant, render_mode, m_camera_mode, - stream + stream, + m_nerf.training.dataset.scale ); uint32_t n_hit;