A multi-camera and multimodal dataset for posture and gait analysis 1.0.0
(13,690 bytes)
import cv2
import numpy as np
import matplotlib.pyplot as plt
##### metadata #####
# original frames are 640x480 pixels for img and depth
frames_original_shape = (640, 480)
# internal calibration is always the same
default_cam_intrinsics = dict(
gait=np.array(
[[519.429, 0.00000, 313.237, 0.0],
[0.00000, 519.429, 239.934, 0.0],
[0.00000, 0.00000, 1.00000, 0.0],
[0.00000, 0.00000, 0.00000, 1.0]]),
posture=np.array(
[[514.283, 0.00000, 315.242, 0.0],
[0.00000, 514.283, 245.109, 0.0],
[0.00000, 0.00000, 1.00000, 0.0],
[0.00000, 0.00000, 0.00000, 1.0]])
)
# names for each of the processed xsens skeleton joints
xsens_joint_names = (
"pelvis", "l5", "l3", "t12", "t8", # 0-5
"neck", "head", "right_shoulder", "right_upper_arm", "right_forearm", # 5-10
"right_hand", "left_shoulder", "left_upper_arm", "left_forearm", "left_hand", # 10-15
"right_upper_leg", "right_lower_leg", "right_foot", "right_toe", "left_upper_leg", # 15-20
"left_lower_leg", "left_foot", "left_toe" # 20-23
)
##### common rotation matrices #####
rotx90 = np.array([[1.0, 0.0, 0.0],
[0.0, 0.0, -1.0],
[0.0, 1.0, 0.0]])
rotx180 = rotx90 @ rotx90
roty90 = np.array([[0.0, 0.0, 1.0],
[0.0, 1.0, 0.0],
[-1.0, 0.0, 0.0]])
roty180 = roty90 @ roty90
rotz90 = np.array([[0.0, -1.0, 1.0],
[1.0, 0.0, 0.0],
[0.0, 0.0, 1.0]])
rotz180 = rotz90 @ rotz90
rot_world_to_kinect_ref = np.array([[-1.0, 0.0, 0.0],
[0.0, 0.0, 1.0],
[0.0, 1.0, 0.0]])
rot_kinect_to_world_ref = rot_world_to_kinect_ref.T
def align_data_by_timestamp(list_data_ids, list_timestamps,
undersample_rate=1, frames_clip_idx=(5, 5),
visualize=False, plot_data_names=None):
"""
Aligns frames in directory by timestamp.
Args:
list_data_ids(Iterable): Iterable with ids of data to align.
list_timestamps(Iterable): Iterable with timestamps to be used
to align data.
undersample_rate(int): Undersample rate to apply to data.
frames_clip_idx(tuple[int]): Number of frames to ignore at the
beginning/end.
visualize(bool): If alignment results should be plotted.
plot_data_names(None, Iterable[str]): Data type name to put
in the plot.
Returns:
One list with frame ids for each of the dirs provided. Matching
indexes of each list are the closest temporally.
"""
# make timesteps relative to reference start
min_len_size = np.inf
init_t = list_timestamps[0][0]
time_ref = (list_timestamps[0] - init_t)[frames_clip_idx[0]:-frames_clip_idx[1]:undersample_rate]
frame_ref = list_data_ids[0][frames_clip_idx[0]:-frames_clip_idx[1]:undersample_rate]
aligned_data_ids = [frame_ref, ]
aligned_timestamps = [time_ref, ]
for ts, d_id in zip(list_timestamps[1:], list_data_ids[1:]):
ts, d_id = np.array(ts), np.array(d_id)
rel_ts = (ts - init_t)
# order samples by timestamp(allowing (0.5/30)ms ahead of ref
align_ids = np.searchsorted(rel_ts, time_ref - (0.5/30))
align_ids = (align_ids[align_ids != np.sort(align_ids)[-1]]) # remove left over not aligned samples
aligned_timestamps.append(rel_ts[align_ids])
min_len_size = min(len(align_ids), min_len_size)
# select frames which align
aligned_data_ids.append(d_id[align_ids])
# guarantee all data has same lengths
aligned_data_ids = [i[:min_len_size] for i in aligned_data_ids]
aligned_timestamps = [i[:min_len_size] for i in aligned_timestamps]
if visualize:
if plot_data_names is None:
plot_data_names = ["" for _ in range(len(list_data_ids))]
# visualize aligned results
fig, ax = plt.subplots(1, 2, figsize=(20*2, 20))
plt.ylabel("timestamp", fontsize=40)
# original timestamps
for (tstamps, label) in zip(list_timestamps, plot_data_names):
ts = (tstamps - init_t)[:min(25, min_len_size)*undersample_rate:undersample_rate]
ax[0].plot(ts)
ax[0].set_title("Original relative timestamps", fontsize=40)
ax[0].set_xlabel("nframe", fontsize=40)
# aligned timestamps
for (tstamps, label) in zip(aligned_timestamps, plot_data_names):
ts = tstamps[:min(25, min_len_size)]
ax[1].plot(ts)
ax[1].set_title("Aligned timestamps", fontsize=40)
ax[1].set_xlabel("nframe", fontsize=40)
plt.legend(plot_data_names, fontsize=40)
plt.show()
return aligned_data_ids, aligned_timestamps
def extract_timestamp_from_string(time_string, split_char="_"):
'''
Extracts formatted timestamp(s and ms from a string and returns its
float value.
ex. 0000000000_000000000 -> 0.0
'''
sec, ms = time_string.replace(", ", ".").split(split_char)
sec = '{0:010d}'.format(int(sec))
ms = '{0:09d}'.format(int(ms))
return float('.'.join([sec, ms]))
def process_depth_frame(frame_depth, save_shape=None):
# convert from mm(uint16) to meters(float32)
frame_depth = frame_depth.astype(np.float32) / 1000.0
if save_shape is not None:
frame_depth = cv2.resize(frame_depth, dsize=save_shape,
interpolation=cv2.INTER_NEAREST)
return frame_depth[..., np.newaxis]
def draw_img_keypoints(img, keypoints, radius=1, color=(255, 255, 255, 255)):
"""
Draws a list of keypoints into an image.
"""
#print(img.shape, img.max(), img.dtype)
#new_img=img.copy()
new_img = (cv2.cvtColor(img, cv2.COLOR_GRAY2RGB)
if img.shape[-1] == 1 else img.copy())
for point in keypoints:
new_img = cv2.circle(new_img, tuple(point),
radius=radius, color=color,
thickness=-1)
return new_img
def apply_homogeneous_transform(points3d, homog_transform):
"""
Apply an homogeneous transform matrix to an array of 3D points.
Args:
points3d(np.array[Nx3]): 3d coordinate points.
homog_transform(np.array[4x4]): homogeneous transformation matrix
Returns:
transformed(np.array[Nx3]) 3d new coordinate points.
"""
homog_points3d = np.append(points3d, np.ones((points3d.shape[0], 1)), axis=1)
transformed = (homog_transform @ homog_points3d.T).T
return transformed[:, :3]
def project_3d_to_2d(points3d, intrinsic_matrix, extrinsic_matrix=None):
"""
Project points from 3d to 2d given camera intrinsic and extrinsic
matrices:
# https://ksimek.github.io/2013/08/13/intrinsic/
# https://www.cs.cmu.edu/~16385/s17/Slides/11.1_Camera_matrix.pdf
intrinsic_matrix =
[[fx, s, x0, 0],
[0, fy, y0, 0],
[0, 0, 1, 0],
[0, 0, 0, 1]]
extrinsic_matrix =
[[Rxx, Rxy, Rxz, tx],
[Ryx, Ryy, Ryz, ty],
[Rzx, Rzy, Rzz, tz],
[0, 0, 0, 1]]
Args:
points3d (Nx3 array): points in world coordinates
intrinsic_matrix (4x4 array): Camera intrinsic matrix.
extrinsic_matrix (None, 4x4 array): Camera extrinsic matrix.
If None then it is assumed that the 3D referential is in
relation to the camera.
Returns:
points2d(Nx2 array): Projected points in pixel space.
"""
# projection matrix (intrinsic * extrinsic) or just (intrinsic) if no
# extrinsic transformation needed
proj_matrix = (intrinsic_matrix @ extrinsic_matrix
if extrinsic_matrix is not None
else intrinsic_matrix)
homog_points2d = apply_homogeneous_transform(points3d, proj_matrix)
# ((x, y, z) -> (fx*x/z, fy*y/z))
# no need to rescale as focal intrinsic params are in pixel space and not in mm.
fx = 1 #intrinsic_matrix[0, 0]
fy = 1 #intrinsic_matrix[1, 1]
points2d = np.array([fx * homog_points2d[:, 0] / homog_points2d[:, 2],
fy * homog_points2d[:, 1] / homog_points2d[:, 2]]).T
return points2d
def depth_to_pointcloud(depth_frame, camera_intrinsics, img_frame=None,
depth_range=(0.5, 5.0), num_points_pc=2048,
downsample_method=None, depth_mult=1.0):
"""
Convert depth map to 3D pointcloud.
Args:
depth_frame(np.ndarray): depth image frame to project to 3D space.
camera_intrinsics(np.ndarray): camera intrinsic parameters. Should
have [4x4] shape. Check project_3d_to_2d function for details.
img_frame(np.ndarray, None): Optional color for pointcloud. Should
have the same shape as "depth_frame", range [0.0, 1.0] and
(H,W,C) order.
depth_range(tuple[float]): valid depth range. Values outside
this range are ignored.
num_points_pc(int): number of points to have for the pointcloud.
If num_points_pc < depth_pixels an undersample method is
applied.
downsample_method(None, str): Undersample method to apply to
pointcloud data. Can be one of "linspace", "random" or None.
If None, then no undersample is applied.
depth_mult(float): multiplier to apply to depth data.
Returns:
pointcloud(np.ndarray): pointcloud datapoints.
pc_color(np.ndarray): color of each of the pointcloud points.
"""
# apply corrections to depth data:
# - convert from [0-1] range to kinect original uint32 range(in mm)
# - convert from kinect mm values to m
depth_frame = np.squeeze(depth_frame).astype(np.float32)
depth_frame = depth_frame * depth_mult
# drop depth data outside valid range
valid_depth = (depth_range[0] < depth_frame) & (depth_frame < depth_range[1])
img_y, img_x = np.argwhere(valid_depth).T
z = depth_frame[np.where(valid_depth)]
# downsample pointcloud
if downsample_method is None or num_points_pc >= len(z):
pass
elif downsample_method == "linspace":
selec_points = np.linspace(0, len(z), num=num_points_pc, endpoint=False, dtype=np.int32)
img_x, img_y, z = img_x[selec_points], img_y[selec_points], z[selec_points]
elif downsample_method == "random":
selec_points = np.random.randint(0, len(z), size=num_points_pc)
img_x, img_y, z = img_x[selec_points], img_y[selec_points], z[selec_points]
else:
raise NotImplementedError
# (xy_pos - cam_center_xy) * z / cam_focalLength
# invert (-x / -y) to transform pixel ref to world ref
x = -((img_x - camera_intrinsics[0, 2]) * z) / camera_intrinsics[0, 0]
y = -((img_y - camera_intrinsics[1, 2]) * z) / camera_intrinsics[1, 1]
pointcloud = np.stack([x, y, z]).T
# get pointcloud color from image_frame
if img_frame is not None:
pc_color = img_frame[img_y.astype(np.int32), img_x.astype(np.int32)]
else:
pc_color = (0.5, 0.5, 0.5, 0.5)
return pointcloud, pc_color
def multiple_cams_merge_pointcloud(depth_data, intrinsic_matrix,
extrinsic_matrix=None, n_points=4096,
depth_range=(0.1, 2.5)):
"""
Creates pointcloud from multiple depth cameras and joins them to
create a single pointcloud.
Args:
depth_data(Iterable(np.ndarray): Depth data from cameras to merge.
intrinsic_matrix(Iterable(np.ndarray): Intrinsics parameters from
cameras to merge.
extrinsic_matrix(Iterable(np.ndarray): Extrinsic transformation to
apply to each of the cameras to merge.
n_points(int): Number of desired points for resulting pointcloud.
depth_range(tuple[float]): range of depth values to keep.
"""
extrinsic_matrix = (extrinsic_matrix if extrinsic_matrix is not None
else [np.eye(4) for c in range(len(intrinsic_matrix))])
cam_pc, cam_pccolor = [], []
for c_dpt, c_intrinsics, c_extrinsics in \
zip(depth_data, intrinsic_matrix, extrinsic_matrix):
c_pc, c_clr = depth_to_pointcloud(
depth_frame=c_dpt,
camera_intrinsics=c_intrinsics,
img_frame=None,
depth_range=depth_range, num_points_pc=n_points,
downsample_method="random")
cam_pc.append(apply_homogeneous_transform(c_pc, c_extrinsics))
cam_pccolor.append(c_clr)
return np.concatenate(cam_pc), np.concatenate(cam_pccolor)
def rotation_matrix_from_vectors(vec1, vec2):
"""
Finds the rotation matrix that aligns vec1 to vec2.
Args:
vec1(np.ndarray): A 3d "source" vector
vec2(np.ndarray): A 3d "destination" vector
Returns:
rotation matrix(np.array[3x3]) which when applied to vec1, aligns
it with vec2.
"""
a = (vec1 / np.linalg.norm(vec1)).reshape(3)
b = (vec2 / np.linalg.norm(vec2)).reshape(3)
v = np.cross(a, b)
c = np.dot(a, b)
s = np.linalg.norm(v)
kmat = np.array([[0, -v[2], v[1]], [v[2], 0, -v[0]], [-v[1], v[0], 0]])
rotation_matrix = np.eye(3) + kmat + kmat.dot(kmat) * ((1 - c) / (s ** 2))
return rotation_matrix