[ADD] dependency requirement file issue #21, dataloaders for tum rgbd…

… and neuralrgbd dataset
PRBonn · Jun 6, 2024 · 128d188 · 128d188
1 parent dfa5b17
commit 128d188
Show file tree

Hide file tree

Showing 7 changed files with 250 additions and 64 deletions.
diff --git a/README.md b/README.md
@@ -134,12 +134,7 @@ The commands depend on your CUDA version. You may check the instructions [here](
 ### 3. Install other dependency
 
 ```
-pip3 install open3d==0.17 scikit-image gtsam wandb tqdm rich roma natsort pyquaternion pypose evo
-```
-
-If you would like to use some specific data loader or use PIN-SLAM with ROS, you can additionally install:
-```
-pip3 install laspy ouster-sdk rospkg 
+pip3 install -r requirements.txt
 ```
 
 ----
@@ -240,10 +235,11 @@ python3 pin_slam.py ./config/lidar_slam/run.yaml rosbag point_cloud_topic_name -
 python3 pin_slam.py ./config/lidar_slam/run.yaml rosbag -i /path/to/the/rosbag -vsmd
 ```
 
-The data loaders for some specific datasets are also available. For example, you can run on Replica RGB-D dataset without preprocessing the data by:
+The data loaders for [some specific datasets](https://github.com/PRBonn/PIN_SLAM/tree/main/dataset/dataloaders) are also available. For example, you can run on Replica RGB-D dataset without preprocessing the data by:
 ```
 # Download data
 sh scripts/download_replica.sh
+
 # Run PIN-SLAM
 python3 pin_slam.py ./config/rgbd_slam/run_replica.yaml replica room0 -i data/Replica -vsmd 
 ```

diff --git a/dataset/dataloaders/__init__.py b/dataset/dataloaders/__init__.py
@@ -39,7 +39,7 @@ def supported_file_extensions():
 
 def sequence_dataloaders():
     # TODO: automatically infer this
-    return ["kitti", "kitti_raw", "nuscenes", "helipr", "replica"]
+    return ["kitti", "kitti_raw", "nuscenes", "helipr", "replica", "tum", "neuralrgbd"]
 
 
 def available_dataloaders() -> List:

diff --git a/dataset/dataloaders/neuralrgbd.py b/dataset/dataloaders/neuralrgbd.py
@@ -0,0 +1,111 @@
+# MIT License
+#
+# Copyright (c) 2024 Yue Pan
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+import importlib
+import os
+from pathlib import Path
+from natsort import natsorted
+import numpy as np
+
+
+class NeuralRGBDDataset:
+    def __init__(self, data_dir: Path, sequence: str, *_, **__):
+        try:
+            self.o3d = importlib.import_module("open3d")
+        except ModuleNotFoundError as err:
+            print(f'open3d is not installed on your system, run "pip install open3d"')
+            exit(1)
+
+        sequence_dir = os.path.join(data_dir, sequence)
+        rgb_folder_path = os.path.join(sequence_dir, 'images')
+        depth_folder_path = os.path.join(sequence_dir, 'depth')
+
+        rgb_frames_name = natsorted(os.listdir(rgb_folder_path))    
+        depth_frames_name = natsorted(os.listdir(depth_folder_path)) 
+
+        self.rgb_frames = [os.path.join(rgb_folder_path, f) for f in rgb_frames_name if f.endswith('png')]
+        self.depth_frames = [os.path.join(depth_folder_path, f) for f in depth_frames_name if f.endswith('png')]
+
+        self.pose_path = os.path.join(sequence_dir, 'poses.txt')
+        self.gt_poses, _ = self.load_poses(self.pose_path)
+
+        H, W = 480, 640
+        focal_file_path =  os.path.join(sequence_dir, "focal.txt")
+        focal_file = open(focal_file_path, "r")
+        focal_length = float(focal_file.readline())
+
+        self.intrinsic = self.o3d.camera.PinholeCameraIntrinsic()
+        self.intrinsic.set_intrinsics(height=H,
+                                      width=W,
+                                      fx=focal_length,
+                                      fy=focal_length,
+                                      cx=(W-1)/2.0,
+                                      cy=(H-1)/2.0)
+        self.extrinsic = np.array([[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]])
+
+        self.depth_scale = 1000.0
+        self.max_depth_m = 10.0
+        self.down_sample_on = False
+        self.rand_down_rate = 0.1
+
+    def __len__(self):
+        return len(self.depth_frames)
+
+    def load_poses(self, path):
+        file = open(path, "r")
+        lines = file.readlines()
+        file.close()
+        poses = []
+        valid = []
+        lines_per_matrix = 4
+        for i in range(0, len(lines), lines_per_matrix):
+            if 'nan' in lines[i]:
+                valid.append(False)
+                poses.append(np.eye(4, 4, dtype=np.float32).tolist())
+            else:
+                valid.append(True)
+                pose_floats = np.array([[float(x) for x in line.split()] for line in lines[i:i+lines_per_matrix]])
+                poses.append(pose_floats)   
+        poses = np.array(poses)
+        valid = np.array(valid)
+        return poses, valid
+
+    def __getitem__(self, idx):
+
+        rgb_image = self.o3d.io.read_image(self.rgb_frames[idx])
+        depth_image = self.o3d.io.read_image(self.depth_frames[idx])
+        rgbd_image = self.o3d.geometry.RGBDImage.create_from_color_and_depth(rgb_image, 
+                                                                            depth_image, 
+                                                                            depth_scale=self.depth_scale, 
+                                                                            depth_trunc=self.max_depth_m, 
+                                                                            convert_rgb_to_intensity=False)
+
+        pcd = self.o3d.geometry.PointCloud.create_from_rgbd_image(
+            rgbd_image, self.intrinsic, self.extrinsic)
+        if self.down_sample_on:
+            pcd = pcd.random_down_sample(sampling_ratio=self.rand_down_rate)
+
+        points_xyz = np.array(pcd.points, dtype=np.float64)
+        points_rgb = np.array(pcd.colors, dtype=np.float64)
+        points_xyzrgb = np.hstack((points_xyz, points_rgb))
+
+        return points_xyzrgb 
diff --git a/dataset/dataloaders/tum.py b/dataset/dataloaders/tum.py
@@ -2,6 +2,7 @@
 #
 # Copyright (c) 2022 Ignacio Vizzo, Tiziano Guadagnino, Benedikt Mersch, Cyrill
 # Stachniss.
+# Copyright (c) 2024 Yue Pan
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
@@ -25,72 +26,129 @@
 from pathlib import Path
 
 import numpy as np
-from pyquaternion import Quaternion
-
 
 class TUMDataset:
-    def __init__(self, data_dir: Path, *_, **__):
+    def __init__(self, data_dir: Path, sequence: str, *_, **__):
         try:
             self.o3d = importlib.import_module("open3d")
         except ModuleNotFoundError as err:
             print(f'open3d is not installed on your system, run "pip install open3d"')
             exit(1)
 
-        self.data_dir = Path(data_dir)
-        self.sequence_id = os.path.basename(data_dir)
+        sequence_dir = os.path.join(data_dir, sequence)
+
+        self.rgb_frames, self.depth_frames, self.gt_poses = self.loadtum(sequence_dir)
 
-        # Load depth frames
-        self.depth_frames = np.loadtxt(fname=self.data_dir / "depth.txt", dtype=str)
+        self.intrinsic = self.o3d.camera.PinholeCameraIntrinsic()
+        H, W = 480, 640
 
-        # rgb single frame
-        rgb_path = os.path.join(self.data_dir, "rgb", os.listdir(self.data_dir / "rgb")[0])
-        self.rgb_default_frame = self.o3d.io.read_image(rgb_path)
+        if "freiburg1" in sequence:
+            fx, fy, cx, cy = 517.3, 516.5, 318.6, 255.3
+        elif "freiburg2" in sequence:
+            fx, fy, cx, cy = 520.9, 521.0, 325.1, 249.7
+        elif "freiburg3" in sequence:
+            fx, fy, cx, cy = 535.4, 539.2, 320.1, 247.6
+        else: # default
+            fx, fy, cx, cy = 525.0, 525.0, 319.5, 239.5
 
-        # Load GT poses
-        gt_list = np.loadtxt(fname=self.data_dir / "groundtruth.txt", dtype=str)
-        self.gt_poses = self.load_poses(gt_list)
+        self.intrinsic.set_intrinsics(height=H,
+                                     width=W,
+                                     fx=fx,
+                                     fy=fy,
+                                     cx=cx,
+                                     cy=cy)
+
+        self.down_sample_on = False
+        self.rand_down_rate = 0.1
 
     def __len__(self):
         return len(self.depth_frames)
 
-    def load_poses(self, gt_list):
-        indices = np.unique(
-            np.abs(
-                (
-                    np.subtract.outer(
-                        gt_list[:, 0].astype(np.float64),
-                        self.depth_frames[:, 0].astype(np.float64),
-                    )
-                )
-            ).argmin(0)
-        )
-        xyz = gt_list[indices][:, 1:4]
+    def loadtum(self, datapath, frame_rate=-1):
+        """ read video data in tum-rgbd format """
+        if os.path.isfile(os.path.join(datapath, 'groundtruth.txt')):
+            pose_list = os.path.join(datapath, 'groundtruth.txt')
 
-        rotations = np.array(
-            [
-                Quaternion(x=x, y=y, z=z, w=w).rotation_matrix
-                for x, y, z, w in gt_list[indices][:, 4:]
-            ]
-        )
-        num_poses = rotations.shape[0]
-        poses = np.eye(4, dtype=np.float64).reshape(1, 4, 4).repeat(num_poses, axis=0)
-        poses[:, :3, :3] = rotations
-        poses[:, :3, 3] = xyz
-        return poses
+        image_list = os.path.join(datapath, 'rgb.txt')
+        depth_list = os.path.join(datapath, 'depth.txt')
+
+        def parse_list(filepath, skiprows=0):
+            """ read list data """
+            data = np.loadtxt(filepath, delimiter=' ',
+                                dtype=np.unicode_, skiprows=skiprows)
+            return data
+
+        def associate_frames(tstamp_image, tstamp_depth, tstamp_pose, max_dt=0.08):
+            """ pair images, depths, and poses """
+            associations = []
+            for i, t in enumerate(tstamp_image):
+                if tstamp_pose is None:
+                    j = np.argmin(np.abs(tstamp_depth - t))
+                    if (np.abs(tstamp_depth[j] - t) < max_dt):
+                        associations.append((i, j))
+                else:
+                    j = np.argmin(np.abs(tstamp_depth - t))
+                    k = np.argmin(np.abs(tstamp_pose - t))
+
+                    if (np.abs(tstamp_depth[j] - t) < max_dt) and \
+                            (np.abs(tstamp_pose[k] - t) < max_dt):
+                        associations.append((i, j, k))
+            return associations
+
+        def pose_matrix_from_quaternion(pvec):
+            """ convert 4x4 pose matrix to (t, q) """
+            from scipy.spatial.transform import Rotation
+            pose = np.eye(4)
+            pose[:3, :3] = Rotation.from_quat(pvec[3:]).as_matrix() # rotation
+            pose[:3, 3] = pvec[:3] # translation
+            return pose
+
+        image_data = parse_list(image_list)
+        depth_data = parse_list(depth_list)
+        pose_data = parse_list(pose_list, skiprows=1)
+        pose_vecs = pose_data[:, 1:].astype(np.float64)
 
-    def get_frames_timestamps(self):
-        return self.depth_frames[:, 0]
+        tstamp_image = image_data[:, 0].astype(np.float64)
+        tstamp_depth = depth_data[:, 0].astype(np.float64)
+        tstamp_pose = pose_data[:, 0].astype(np.float64)
+        associations = associate_frames(
+            tstamp_image, tstamp_depth, tstamp_pose)
+
+        indicies = [0]
+        for i in range(1, len(associations)):
+            t0 = tstamp_image[associations[indicies[-1]][0]]
+            t1 = tstamp_image[associations[i][0]]
+            if t1 - t0 > 1.0 / frame_rate:
+                indicies += [i]
+
+        images, poses, depths = [], [], []
+        for ix in indicies:
+            (i, j, k) = associations[ix]
+            images += [os.path.join(datapath, image_data[i, 1])]
+            depths += [os.path.join(datapath, depth_data[j, 1])]
+            c2w = pose_matrix_from_quaternion(pose_vecs[k])
+            poses += [c2w]
+
+        poses = np.array(poses)
+
+        return images, depths, poses
 
     def __getitem__(self, idx):
-        depth_id = self.depth_frames[idx][-1]
-        depth_raw = self.o3d.io.read_image(str(self.data_dir / depth_id))
-        rgbd_image = self.o3d.geometry.RGBDImage.create_from_tum_format(
-            self.rgb_default_frame, depth_raw
-        )
+
+        im_color = self.o3d.io.read_image(self.rgb_frames[idx])
+        im_depth = self.o3d.io.read_image(self.depth_frames[idx]) 
+        rgbd_image = self.o3d.geometry.RGBDImage.create_from_tum_format(im_color,
+            im_depth, convert_rgb_to_intensity=False)
+
         pcd = self.o3d.geometry.PointCloud.create_from_rgbd_image(
             rgbd_image,
-            self.o3d.camera.PinholeCameraIntrinsic(
-                self.o3d.camera.PinholeCameraIntrinsicParameters.PrimeSenseDefault
-            ),
+            self.intrinsic
         )
-        return np.array(pcd.points, dtype=np.float64)
+        if self.down_sample_on:
+            pcd = pcd.random_down_sample(sampling_ratio=self.rand_down_rate)
+
+        points_xyz = np.array(pcd.points, dtype=np.float64)
+        points_rgb = np.array(pcd.colors, dtype=np.float64)
+        points_xyzrgb = np.hstack((points_xyz, points_rgb))
+
+        return points_xyzrgb 
diff --git a/dataset/slam_dataset.py b/dataset/slam_dataset.py
@@ -655,12 +655,6 @@ def write_results(self):
             os.path.join(self.run_path, "time_table.npy"), time_table
         )  # save detailed time table
 
-        plot_timing_detail(
-            time_table,
-            os.path.join(self.run_path, "time_details.png"),
-            self.config.pgo_on,
-        )
-
         pose_eval = None
 
         # pose estimation evaluation report
@@ -760,6 +754,12 @@ def write_results(self):
             except IOError:
                 print("I/O error")
 
+            plot_timing_detail(
+                time_table,
+                os.path.join(self.run_path, "time_details.png"),
+                self.config.pgo_on,
+            )
+
             # if self.config.o3d_vis_on:  # x service issue for remote server
             output_traj_plot_path_2d = os.path.join(self.run_path, "traj_plot_2d.png")
             output_traj_plot_path_3d = os.path.join(self.run_path, "traj_plot_3d.png")
@@ -801,7 +801,11 @@ def write_results(self):
 
         return pose_eval
 
-    def write_merged_point_cloud(self, down_vox_m=None, use_gt_pose=False, out_file_name="merged_point_cloud"):
+    def write_merged_point_cloud(self, down_vox_m=None, 
+                                use_gt_pose=False, 
+                                out_file_name="merged_point_cloud",
+                                frame_step = 1,
+                                merged_downsample = False):
 
         print("Begin to replay the dataset ...")
 
@@ -813,7 +817,7 @@ def write_merged_point_cloud(self, down_vox_m=None, use_gt_pose=False, out_file_
         map_color_np = np.empty((0, 3))
 
         for frame_id in tqdm(
-            range(self.total_pc_count)
+            range(0, self.total_pc_count, frame_step)
         ):  # frame id as the idx of the frame in the data folder without skipping
             if self.config.use_dataloader:
                 self.read_frame_with_loader(frame_id)
@@ -917,6 +921,10 @@ def write_merged_point_cloud(self, down_vox_m=None, use_gt_pose=False, out_file_
 
         # print("Estimate normal")
         # map_out_o3d.estimate_normals(max_nn=20)
+
+        # downsample again
+        if merged_downsample:
+            map_out_o3d = map_out_o3d.voxel_down_sample(voxel_size=down_vox_m)
 
         if self.run_path is not None:
             print("Output merged point cloud map")