Fix bug in pixel sampler (nerfstudio-project#3103)

* Fix bug in pixel sampler * Refactor code for distributing rays across images
jb-ye · Apr 30, 2024 · 759fda8 · 759fda8
1 parent b190874
commit 759fda8
Show file tree

Hide file tree

Showing 2 changed files with 34 additions and 17 deletions.
diff --git a/nerfstudio/data/pixel_samplers.py b/nerfstudio/data/pixel_samplers.py
@@ -26,7 +26,7 @@
 from torch import Tensor
 
 from nerfstudio.configs.base_config import InstantiateConfig
-from nerfstudio.data.utils.pixel_sampling_utils import erode_mask
+from nerfstudio.data.utils.pixel_sampling_utils import divide_rays_per_image, erode_mask
 
 
 @dataclass
@@ -302,19 +302,15 @@ def collate_image_dataset_batch_list(self, batch: Dict, num_rays_per_batch: int,
         all_images = []
         all_depth_images = []
 
-        num_rays_in_batch = num_rays_per_batch // num_images
-        if num_rays_in_batch % 2 != 0:
-            num_rays_in_batch += 1
+        assert num_rays_per_batch % 2 == 0, "num_rays_per_batch must be divisible by 2"
+        num_rays_per_image = divide_rays_per_image(num_rays_per_batch, num_images)
 
         if "mask" in batch:
-            for i in range(num_images):
+            for i, num_rays in enumerate(num_rays_per_image):
                 image_height, image_width, _ = batch["image"][i].shape
 
-                if i == num_images - 1:
-                    num_rays_in_batch = num_rays_per_batch - (num_images - 1) * num_rays_in_batch
-
                 indices = self.sample_method(
-                    num_rays_in_batch, 1, image_height, image_width, mask=batch["mask"][i].unsqueeze(0), device=device
+                    num_rays, 1, image_height, image_width, mask=batch["mask"][i].unsqueeze(0), device=device
                 )
                 indices[:, 0] = i
                 all_indices.append(indices)
@@ -323,16 +319,12 @@ def collate_image_dataset_batch_list(self, batch: Dict, num_rays_per_batch: int,
                     all_depth_images.append(batch["depth_image"][i][indices[:, 1], indices[:, 2]])
 
         else:
-            for i in range(num_images):
+            for i, num_rays in enumerate(num_rays_per_image):
                 image_height, image_width, _ = batch["image"][i].shape
-                if i == num_images - 1:
-                    num_rays_in_batch = num_rays_per_batch - (num_images - 1) * num_rays_in_batch
                 if self.config.is_equirectangular:
-                    indices = self.sample_method_equirectangular(
-                        num_rays_in_batch, 1, image_height, image_width, device=device
-                    )
+                    indices = self.sample_method_equirectangular(num_rays, 1, image_height, image_width, device=device)
                 else:
-                    indices = self.sample_method(num_rays_in_batch, 1, image_height, image_width, device=device)
+                    indices = self.sample_method(num_rays, 1, image_height, image_width, device=device)
                 indices[:, 0] = i
                 all_indices.append(indices)
                 all_images.append(batch["image"][i][indices[:, 1], indices[:, 2]])

diff --git a/nerfstudio/data/utils/pixel_sampling_utils.py b/nerfstudio/data/utils/pixel_sampling_utils.py
@@ -12,7 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-""" Pixel sampling utils such as eroding of valid masks that we sample from. """
+"""Pixel sampling utils such as eroding of valid masks that we sample from."""
+
+import math
+from typing import List
 
 import torch
 from jaxtyping import Float
@@ -63,3 +66,25 @@ def erode_mask(tensor: Float[Tensor, "bs 1 H W"], pixel_radius: int = 1):
     """
     kernel_size = 1 + 2 * pixel_radius
     return erode(tensor, kernel_size=kernel_size)
+
+
+def divide_rays_per_image(num_rays_per_batch: int, num_images: int) -> List[int]:
+    """Divide the batch of rays per image. Finds the optimal number of rays per image such that
+    it's still divisible by 2 and sums to the total number of rays.
+
+    Args:
+        num_rays_per_batch: Number of rays in the batch.
+        num_images: Number of images in the batch.
+
+    Returns:
+        num_rays_per_image: A list of the number of rays per image.
+    """
+    num_rays_per_image = num_rays_per_batch / num_images
+    residual = num_rays_per_image % 2
+    num_rays_per_image_under = int(num_rays_per_image - residual)
+    num_rays_per_image_over = int(num_rays_per_image_under + 2)
+    num_images_under = math.ceil(num_images * (1 - residual / 2))
+    num_images_over = num_images - num_images_under
+    num_rays_per_image = num_images_under * [num_rays_per_image_under] + num_images_over * [num_rays_per_image_over]
+    num_rays_per_image[-1] += num_rays_per_batch - sum(num_rays_per_image)
+    return num_rays_per_image