Skip to content

Instantly share code, notes, and snippets.

@ZachL1
Created June 13, 2024 15:17
Show Gist options
  • Select an option

  • Save ZachL1/a3d3227c76228d2a571c4d040886874e to your computer and use it in GitHub Desktop.

Select an option

Save ZachL1/a3d3227c76228d2a571c4d040886874e to your computer and use it in GitHub Desktop.
Parse Omnidata's camera intrinsic and pose and convert them to OpenCV format, also provide an example of back-projection to 3D point cloud.
import json
import math
import numpy as np
import cv2
import torch
from plyfile import PlyData, PlyElement
from pytorch3d.transforms import euler_angles_to_matrix
from pytorch3d.renderer import FoVPerspectiveCameras
# Copied from omnidata, which is badly named, the function returns what is actually world_to_camera
def _get_cam_to_world_R_T_K(point_info):
EULER_X_OFFSET_RADS = math.radians(90.0)
location = point_info['camera_location']
rotation = point_info['camera_rotation_final']
fov = point_info['field_of_view_rads']
# Recover cam -> world
ex, ey, ez = rotation
R = euler_angles_to_matrix(torch.tensor(
[(ex - EULER_X_OFFSET_RADS, -ey, -ez)],
dtype=torch.double), 'XZY')
Tx, Ty, Tz = location
T = torch.tensor([[-Tx, Tz, Ty]], dtype=torch.double)
# P3D expects world -> cam
R_inv = R.transpose(1,2)
T_inv = -R.bmm(T.unsqueeze(-1)).squeeze(-1)
# T_inv = -R.bmm(T.unsqueeze(-1)).squeeze(-1)
# T_inv = T
# R_inv = R
K = FoVPerspectiveCameras(R=R_inv, T=T_inv, fov=fov, degrees=False).compute_projection_matrix(znear=0.001, zfar=512.0, fov=fov, aspect_ratio=1.0, degrees=False)
return dict(
cam_to_world_R=R_inv.squeeze(0).float(),
cam_to_world_T=T_inv.squeeze(0).float(),
proj_K=K.squeeze(0).float(),
proj_K_inv=K[:,:3,:3].inverse().squeeze(0).float())
def project_rgb_to_point_cloud(rgb_image, depth_map, K, R, t):
"""
unproject RGB-D image to point cloud.
camera coordinate system: x-right, y-down, z-forward.
3D coordinates in world coordinate system can project to 2D homogeneous coordinates by: x' = K * [R|t] * X
Parameters:
- rgb_image: RGB image
- depth_map: metric depth map
- K: camera intrinsic matrix, i.e. [[fx, 0, cx], [0, fy, cy], [0, 0, 1]]
- R: camera rotation matrix, i.e. world to camera
- t: camera translation vector, i.e. world to camera
Returns:
- point_cloud: point cloud with color, Nx6 array, each row is [x, y, z, r, g, b]
"""
h, w = depth_map.shape
i, j = np.meshgrid(np.arange(w), np.arange(h), indexing='xy')
# construct homogeneous pixel coordinates
uv1 = np.stack([i.ravel() + 0.5, j.ravel() + 0.5, np.ones_like(i).ravel()], axis=1)
# pixel coordinates -> camera coordinates
K_inv = np.linalg.inv(K)
xyz_camera = K_inv @ (uv1.T * depth_map.ravel())
# camera coordinates -> world coordinates
R_inv = R.T # camera to world
t_inv = -R_inv @ t # camera to world
xyz_world = R_inv @ xyz_camera + t_inv[:, None]
xyz_world = xyz_world.T
# point cloud with color
colors = rgb_image.reshape(-1, 3)
point_cloud = np.hstack([xyz_world, colors])
# filter out invalid points
valid_mask = (depth_map.ravel() > 0) & (depth_map.ravel() < 100)
point_cloud = point_cloud[valid_mask]
return point_cloud
def save_point_cloud(points_3d, save_path, binary=True):
"""
Save point cloud to disk
Parameters:
- points_3d: point cloud with color, Nx6 array, each row is [x, y, z, r, g, b]
- save_path: path to save the point cloud
"""
python_types = (float, float, float, int, int, int)
npy_types = [('x', 'f4'), ('y', 'f4'), ('z', 'f4'), ('red', 'u1'),
('green', 'u1'), ('blue', 'u1')]
if binary is True:
# Format into Numpy structured array
vertices = []
for row_idx in range(points_3d.shape[0]):
cur_point = points_3d[row_idx]
vertices.append(
tuple(
dtype(point)
for dtype, point in zip(python_types, cur_point)))
vertices_array = np.array(vertices, dtype=npy_types)
el = PlyElement.describe(vertices_array, 'vertex')
# write
PlyData([el]).write(save_path)
else:
x = np.squeeze(points_3d[:, 0])
y = np.squeeze(points_3d[:, 1])
z = np.squeeze(points_3d[:, 2])
r = np.squeeze(points_3d[:, 3])
g = np.squeeze(points_3d[:, 4])
b = np.squeeze(points_3d[:, 5])
ply_head = 'ply\n' \
'format ascii 1.0\n' \
'element vertex %d\n' \
'property float x\n' \
'property float y\n' \
'property float z\n' \
'property uchar red\n' \
'property uchar green\n' \
'property uchar blue\n' \
'end_header' % r.shape[0]
# ---- Save ply data to disk
np.savetxt(save_path, np.column_stack([x, y, z, r, g, b]), fmt='%f %f %f %d %d %d', header=ply_head, comments='')
def unproject_omni(rgb_path, pc_path):
depth_path = rgb_path.replace('/images/', '/depth/')[:-7] + "depth_zbuffer.png"
point_info_path = rgb_path.replace('/images/', '/point_info/')[:-7] + "point_info.json"
rgb_image = cv2.imread(rgb_path)[:, :, ::-1] # BGR -> RGB
depth_map = cv2.imread(depth_path, cv2.IMREAD_UNCHANGED).astype(np.float32) / 512.0
# camera intrinsics and pose
with open(point_info_path, 'r') as f:
omnidata = json.load(f)
# load R, T, K, the function returns what is actually world_to_camera
rtk = _get_cam_to_world_R_T_K(omnidata)
# camera intrinsics, convert to OpenCV format
P = rtk['proj_K'].numpy() # OpenGL projection matrix
w = h = 512 # omnidata should all be 512
fx, fy, cx, cy = P[0,0]*w/2, P[1,1]*h/2, (w-P[0,2]*w)/2, (P[1,2]*h+h)/2
K = np.array([[fx, 0, cx], [0, fy, cy], [0, 0, 1]])
# camera extrinsics/pose, convert to OpenCV format
world_to_cam_R_r = rtk['cam_to_world_R'].numpy() # for right multiply
world_to_cam_R = world_to_cam_R_r.T # for left multiply
world_to_cam_T = rtk['cam_to_world_T'].numpy()
# Coordinate system transformation, i.e. pose in pytorch3d -> pose in OpenCV
# Pytorch3D: right-handed, x-left, y-up, z-forward
# OpenCV: right-handed, x-right, y-down, z-forward
pytorch3d_to_opencv = np.array([[-1, 0, 0], [0, -1, 0], [0, 0, 1]])
world_to_cam_R = pytorch3d_to_opencv @ world_to_cam_R
world_to_cam_T = pytorch3d_to_opencv @ world_to_cam_T
# Now we get the R, T, K in OpenCV format
pc = project_rgb_to_point_cloud(rgb_image, depth_map, K, world_to_cam_R, world_to_cam_T)
save_point_cloud(pc, pc_path)
if __name__ == '__main__':
# read omnidata
rgb_path_1 = '/home/dzc/workspace/stereo/gaussian-splatting/data/taskonomy/onaga/images/point_0_view_0_domain_rgb.png'
rgb_path_2 = '/home/dzc/workspace/stereo/gaussian-splatting/data/taskonomy/onaga/images/point_0_view_1_domain_rgb.png'
unproject_omni(rgb_path_1, 'point_cloud_1.ply')
unproject_omni(rgb_path_2, 'point_cloud_2.ply')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment