Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

✨ add 360 and ortho cameras #705

Open
wants to merge 6 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions include/neural-graphics-primitives/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,14 @@ enum class ERenderMode : int {
};
static constexpr const char* RenderModeStr = "AO\0Shade\0Normals\0Positions\0Depth\0Distortion\0Cost\0Slice\0\0";

enum class ECameraMode : int {
Perspective,
Orthographic,
Environment
};

static constexpr const char* CameraModeStr = "Perspective\0Orthographic\0Environment\0\0";

enum class ERandomMode : int {
Random,
Halton,
Expand Down
136 changes: 106 additions & 30 deletions include/neural-graphics-primitives/common_device.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -270,40 +270,75 @@ inline __host__ __device__ Ray pixel_to_ray(
bool snap_to_pixel_centers = false,
float focus_z = 1.0f,
float dof = 0.0f,
const ECameraMode camera_mode = ECameraMode::Perspective,
const CameraDistortion& camera_distortion = {},
const float* __restrict__ distortion_data = nullptr,
const Eigen::Vector2i distortion_resolution = Eigen::Vector2i::Zero()
const Eigen::Vector2i distortion_resolution = Eigen::Vector2i::Zero(),
const float dataset_scale = 1.f
) {
Eigen::Vector2f offset = ld_random_pixel_offset(snap_to_pixel_centers ? 0 : spp);
Eigen::Vector2f uv = (pixel.cast<float>() + offset).cwiseQuotient(resolution.cast<float>());

const Eigen::Vector3f shift = {parallax_shift.x(), parallax_shift.y(), 0.f};
Eigen::Vector3f dir;
if (camera_distortion.mode == ECameraDistortionMode::FTheta) {
dir = f_theta_undistortion(uv - screen_center, camera_distortion.params, {1000.f, 0.f, 0.f});
if (dir.x() == 1000.f) {
return {{1000.f, 0.f, 0.f}, {0.f, 0.f, 1.f}}; // return a point outside the aabb so the pixel is not rendered
}
} else if (camera_distortion.mode == ECameraDistortionMode::LatLong) {
dir = latlong_to_dir(uv);
} else {
dir = {

Eigen::Vector3f head_pos;
if(camera_mode == ECameraMode::Orthographic){
// 'dataset_scale' argument is only required by the orthographic camera.
// The focal length of Environment and Perspective cameras isn't affected by the change of dataset_scale,
// because all rays originate from the same point
dir = {0.f, 0.f, 1.f}; // Camera forward
head_pos = {
(uv.x() - screen_center.x()) * (float)resolution.x() / focal_length.x(),
(uv.y() - screen_center.y()) * (float)resolution.y() / focal_length.y(),
1.0f
0.0f
};
if (camera_distortion.mode == ECameraDistortionMode::Iterative) {
iterative_camera_undistortion(camera_distortion.params, &dir.x(), &dir.y());
}
head_pos *= dataset_scale;
head_pos += shift;
dir -= shift / parallax_shift.z(); // we could use focus_z here in the denominator. for now, we pack m_scale in here.
}
if (distortion_data) {
dir.head<2>() += read_image<2>(distortion_data, distortion_resolution, uv);
else if(camera_mode == ECameraMode::Environment){
// Camera convention: XYZ <-> Right Down Front
head_pos = {0.f, 0.f, 0.f};
const float phi = (uv.y()-0.5) * M_PI;
const float theta = (uv.x()-0.5) * 2.0 * M_PI;
const float cos_phi = std::cos(phi);
dir = {
cos_phi*std::sin(theta),
std::sin(phi),
cos_phi*std::cos(theta)
};
// Parallax isn't handled
}
else { // Perspective
head_pos = {0.f, 0.f, 0.f};
if (camera_distortion.mode == ECameraDistortionMode::FTheta) {
dir = f_theta_undistortion(uv - screen_center, camera_distortion.params, {1000.f, 0.f, 0.f});
if (dir.x() == 1000.f) {
return {{1000.f, 0.f, 0.f}, {0.f, 0.f, 1.f}}; // return a point outside the aabb so the pixel is not rendered
}
} else if (camera_distortion.mode == ECameraDistortionMode::LatLong) {
dir = latlong_to_dir(uv);
} else {
dir = {
(uv.x() - screen_center.x()) * (float)resolution.x() / focal_length.x(),
(uv.y() - screen_center.y()) * (float)resolution.y() / focal_length.y(),
1.0f
};
if (camera_distortion.mode == ECameraDistortionMode::Iterative) {
iterative_camera_undistortion(camera_distortion.params, &dir.x(), &dir.y());
}
}
if (distortion_data) {
dir.head<2>() += read_image<2>(distortion_data, distortion_resolution, uv);
}
head_pos += shift;
dir -= shift / parallax_shift.z(); // we could use focus_z here in the denominator. for now, we pack m_scale in here.
}

Eigen::Vector3f head_pos = {parallax_shift.x(), parallax_shift.y(), 0.f};
dir -= head_pos / parallax_shift.z(); // we could use focus_z here in the denominator. for now, we pack m_scale in here.
dir = camera_matrix.block<3, 3>(0, 0) * dir;

Eigen::Vector3f origin = camera_matrix.block<3, 3>(0, 0) * head_pos + camera_matrix.col(3);

if (dof == 0.0f) {
return {origin, dir};
}
Expand All @@ -323,16 +358,51 @@ inline __host__ __device__ Eigen::Vector2f pos_to_pixel(
const Eigen::Matrix<float, 3, 4>& camera_matrix,
const Eigen::Vector2f& screen_center,
const Eigen::Vector3f& parallax_shift,
const CameraDistortion& camera_distortion = {}
const ECameraMode camera_mode,
const CameraDistortion& camera_distortion = {},
const float dataset_scale = 1.f
) {
// Express ray in terms of camera frame
Eigen::Vector3f head_pos = {parallax_shift.x(), parallax_shift.y(), 0.f};
Eigen::Vector3f origin = camera_matrix.block<3, 3>(0, 0) * head_pos + camera_matrix.col(3);
// We get 'pos' as an input. We have pos = origin + alpha*dir, with unknown alpha
// tmp_dir = R^-1*(pos-t)
Eigen::Vector3f tmp_dir = camera_matrix.block<3, 3>(0, 0).inverse() * (pos - camera_matrix.col(3));
const Eigen::Vector3f shift = {parallax_shift.x(), parallax_shift.y(), 0.f};

if(camera_mode == ECameraMode::Orthographic){
// head_pos = {..., ..., 0}
// head_dir = {0,0,1}
// dir = R*(head_dir-shift/z)
// origin = R*(head_pos+shift) + t
tmp_dir -= shift;
const Eigen::Vector3f head_dir_minus_shift = Eigen::Vector3f(0.f, 0.f, 1.f) - shift/parallax_shift.z();
Eigen::Vector3f head_pos = tmp_dir - tmp_dir.z() * head_dir_minus_shift; // Gives head_pos.z=0 since head_dir_minus_shift.z=1
head_pos /= dataset_scale;
return {
head_pos.x() * focal_length.x() + screen_center.x() * resolution.x(),
head_pos.y() * focal_length.y() + screen_center.y() * resolution.y(),
};
}

if(camera_mode == ECameraMode::Environment){
// Parallax isn't handled
// head_dir = {..., ..., ...} with ||head_dir|| = 1
// dir = R*head_dir
// origin = t
tmp_dir = tmp_dir.normalized();
const float phi = std::asin(tmp_dir.y());
const float theta = std::atan2(tmp_dir.x(), tmp_dir.z());
return {
(0.5f + theta / (2.0*M_PI)) * resolution.x(),
(0.5f + phi / M_PI) * resolution.x(),
};
}

Eigen::Vector3f dir = pos - origin;
dir = camera_matrix.block<3, 3>(0, 0).inverse() * dir;
dir /= dir.z();
dir += head_pos / parallax_shift.z();
// Perspective
// head_dir = {..., ..., 1}
// dir = R*(head_dir-shift/z)
// origin = R*shift + t
tmp_dir -= shift;
tmp_dir /= tmp_dir.z();
Eigen::Vector3f dir = tmp_dir + shift / parallax_shift.z(); // Maintains dir.z=1 because shift.z=0

if (camera_distortion.mode == ECameraDistortionMode::Iterative) {
float du, dv;
Expand Down Expand Up @@ -362,7 +432,9 @@ inline __host__ __device__ Eigen::Vector2f motion_vector_3d(
const Eigen::Vector3f& parallax_shift,
const bool snap_to_pixel_centers,
const float depth,
const CameraDistortion& camera_distortion = {}
const ECameraMode camera_mode,
const CameraDistortion& camera_distortion = {},
const float dataset_scale = 1.f
) {
Ray ray = pixel_to_ray(
sample_index,
Expand All @@ -375,9 +447,11 @@ inline __host__ __device__ Eigen::Vector2f motion_vector_3d(
snap_to_pixel_centers,
1.0f,
0.0f,
camera_mode,
camera_distortion,
nullptr,
Eigen::Vector2i::Zero()
Eigen::Vector2i::Zero(),
dataset_scale
);

Eigen::Vector2f prev_pixel = pos_to_pixel(
Expand All @@ -387,7 +461,9 @@ inline __host__ __device__ Eigen::Vector2f motion_vector_3d(
prev_camera,
screen_center,
parallax_shift,
camera_distortion
camera_mode,
camera_distortion,
dataset_scale
);

return prev_pixel - (pixel.cast<float>() + ld_random_pixel_offset(sample_index));
Expand Down
5 changes: 4 additions & 1 deletion include/neural-graphics-primitives/testbed.h
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,9 @@ class Testbed {
int show_accel,
float cone_angle_constant,
ERenderMode render_mode,
cudaStream_t stream
ECameraMode camera_mode,
cudaStream_t stream,
float dataset_scale
);

uint32_t trace(
Expand Down Expand Up @@ -469,6 +471,7 @@ class Testbed {
float m_bounding_radius = 1;
float m_exposure = 0.f;

ECameraMode m_camera_mode = ECameraMode::Perspective;
ERenderMode m_render_mode = ERenderMode::Shade;
EMeshRenderMode m_mesh_render_mode = EMeshRenderMode::VertexNormals;

Expand Down
8 changes: 8 additions & 0 deletions src/python_api.cu
Original file line number Diff line number Diff line change
Expand Up @@ -238,6 +238,13 @@ PYBIND11_MODULE(pyngp, m) {
.value("Slice", ERenderMode::Slice)
.export_values();

py::enum_<ECameraMode>(m, "CameraMode")
.value("Perspective", ECameraMode::Perspective)
.value("Orthographic", ECameraMode::Orthographic)
.value("Environment", ECameraMode::Environment)
.export_values();


py::enum_<ERandomMode>(m, "RandomMode")
.value("Random", ERandomMode::Random)
.value("Halton", ERandomMode::Halton)
Expand Down Expand Up @@ -423,6 +430,7 @@ PYBIND11_MODULE(pyngp, m) {
.def_readwrite("shall_train_network", &Testbed::m_train_network)
.def_readwrite("render_groundtruth", &Testbed::m_render_ground_truth)
.def_readwrite("render_mode", &Testbed::m_render_mode)
.def_readwrite("camera_mode", &Testbed::m_camera_mode)
.def_readwrite("slice_plane_z", &Testbed::m_slice_plane_z)
.def_readwrite("dof", &Testbed::m_dof)
.def_readwrite("autofocus", &Testbed::m_autofocus)
Expand Down
13 changes: 10 additions & 3 deletions src/testbed.cu
Original file line number Diff line number Diff line change
Expand Up @@ -845,6 +845,7 @@ void Testbed::imgui() {
ImGui::Checkbox("Autofocus", &m_autofocus);

if (ImGui::TreeNode("Advanced camera settings")) {
accum_reset |= ImGui::Combo("Camera mode", (int*)&m_camera_mode, CameraModeStr);
accum_reset |= ImGui::SliderFloat2("Screen center", &m_screen_center.x(), 0.f, 1.f);
accum_reset |= ImGui::SliderFloat2("Parallax shift", &m_parallax_shift.x(), -1.f, 1.f);
accum_reset |= ImGui::SliderFloat("Slice / focus depth", &m_slice_plane_z, -m_bounding_radius, m_bounding_radius);
Expand Down Expand Up @@ -2505,7 +2506,9 @@ __global__ void dlss_prep_kernel(
const float prev_view_dist,
const Vector2f image_pos,
const Vector2f prev_image_pos,
const Vector2i image_resolution
const Vector2i image_resolution,
const ECameraMode camera_mode,
const float dataset_scale = 1.f
) {
uint32_t x = threadIdx.x + blockDim.x * blockIdx.x;
uint32_t y = threadIdx.y + blockDim.y * blockIdx.y;
Expand Down Expand Up @@ -2543,7 +2546,9 @@ __global__ void dlss_prep_kernel(
parallax_shift,
snap_to_pixel_centers,
depth,
camera_distortion
camera_mode,
camera_distortion,
dataset_scale
);

surf2Dwrite(make_float2(mvec.x(), mvec.y()), mvec_surface, x_orig * sizeof(float2), y_orig);
Expand Down Expand Up @@ -2705,7 +2710,9 @@ void Testbed::render_frame(const Matrix<float, 3, 4>& camera_matrix0, const Matr
m_prev_scale,
m_image.pos,
m_image.prev_pos,
m_image.resolution
m_image.resolution,
m_camera_mode,
m_nerf.training.dataset.scale
);

render_buffer.set_dlss_sharpening(m_dlss_sharpening);
Expand Down
22 changes: 16 additions & 6 deletions src/testbed_nerf.cu
Original file line number Diff line number Diff line change
Expand Up @@ -1790,7 +1790,9 @@ __global__ void init_rays_with_payload_kernel_nerf(
float* __restrict__ depthbuffer,
const float* __restrict__ distortion_data,
const Vector2i distortion_resolution,
ERenderMode render_mode
ERenderMode render_mode,
ECameraMode camera_mode,
float dataset_scale
) {
uint32_t x = threadIdx.x + blockDim.x * blockIdx.x;
uint32_t y = threadIdx.y + blockDim.y * blockIdx.y;
Expand Down Expand Up @@ -1821,9 +1823,11 @@ __global__ void init_rays_with_payload_kernel_nerf(
snap_to_pixel_centers,
plane_z,
dof,
camera_mode,
camera_distortion,
distortion_data,
distortion_resolution
distortion_resolution,
dataset_scale
);

NerfPayload& payload = payloads[idx];
Expand Down Expand Up @@ -1970,7 +1974,9 @@ void Testbed::NerfTracer::init_rays_from_camera(
int show_accel,
float cone_angle_constant,
ERenderMode render_mode,
cudaStream_t stream
ECameraMode camera_mode,
cudaStream_t stream,
float dataset_scale
) {
// Make sure we have enough memory reserved to render at the requested resolution
size_t n_pixels = (size_t)resolution.x() * resolution.y();
Expand Down Expand Up @@ -2000,7 +2006,9 @@ void Testbed::NerfTracer::init_rays_from_camera(
depth_buffer,
distortion_data,
distortion_resolution,
render_mode
render_mode,
camera_mode,
dataset_scale
);

m_n_rays_initialized = resolution.x() * resolution.y();
Expand Down Expand Up @@ -2263,7 +2271,9 @@ void Testbed::render_nerf(CudaRenderBuffer& render_buffer, const Vector2i& max_r
m_nerf.show_accel,
m_nerf.cone_angle_constant,
render_mode,
stream
m_camera_mode,
stream,
m_nerf.training.dataset.scale
);

uint32_t n_hit;
Expand Down Expand Up @@ -2440,7 +2450,7 @@ void Testbed::Nerf::Training::export_camera_extrinsics(const std::string& filena
trajectory.emplace_back(frame);
}
std::ofstream file(filename);
file << std::setw(2) << trajectory << std::endl;
file << std::setw(2) << trajectory << std::endl;
}

Eigen::Matrix<float, 3, 4> Testbed::Nerf::Training::get_camera_extrinsics(int frame_idx) {
Expand Down