diff --git a/benchmarks/results/ycb_10objs.csv b/benchmarks/results/ycb_10objs.csv index 87b10eb9..74d7e81e 100644 --- a/benchmarks/results/ycb_10objs.csv +++ b/benchmarks/results/ycb_10objs.csv @@ -1,13 +1,13 @@ Experiment,% Correct,% Used MLH,Num Matching Steps,Rotation Error (radians),Run Time,Episode Run Time (s) -base_config_10distinctobj_dist_agent,98.57%,5.71%,36,0.31,12m,31s -base_config_10distinctobj_surf_agent,100.00%,0.00%,28,0.17,6m,27s -randrot_noise_10distinctobj_dist_agent,99.00%,7.00%,51,0.50,10m,56s -randrot_noise_10distinctobj_dist_on_distm,99.00%,1.00%,35,0.26,7m,48s -randrot_noise_10distinctobj_surf_agent,100.00%,1.00%,29,0.36,7m,49s -randrot_10distinctobj_surf_agent,100.00%,0.00%,29,0.37,4m,29s -randrot_noise_10distinctobj_5lms_dist_agent,100.00%,3.00%,52,0.88,21m,139s -base_10simobj_surf_agent,95.00%,10.00%,84,0.21,14m,76s -randrot_noise_10simobj_dist_agent,81.00%,38.00%,193,0.52,26m,206s -randrot_noise_10simobj_surf_agent,90.00%,35.00%,178,0.45,34m,294s -randomrot_rawnoise_10distinctobj_surf_agent,65.00%,77.00%,16,1.60,22m,24s -base_10multi_distinctobj_dist_agent,74.29%,37.14%,27,0.64,1h9m,2s \ No newline at end of file +base_config_10distinctobj_dist_agent,99.29%,5.71%,36,0.31,10m,31s +base_config_10distinctobj_surf_agent,100.00%,0.00%,28,0.21,6m,28s +randrot_noise_10distinctobj_dist_agent,98.00%,7.00%,46,0.50,9m,55s +randrot_noise_10distinctobj_dist_on_distm,99.00%,3.00%,35,0.26,7m,50s +randrot_noise_10distinctobj_surf_agent,100.00%,0.00%,31,0.34,8m,62s +randrot_10distinctobj_surf_agent,100.00%,0.00%,28,0.40,7m,50s +randrot_noise_10distinctobj_5lms_dist_agent,100.00%,7.00%,50,0.94,44m,189s +base_10simobj_surf_agent,95.71%,10.71%,82,0.21,18m,104s +randrot_noise_10simobj_dist_agent,82.00%,37.00%,185,0.52,26m,202s +randrot_noise_10simobj_surf_agent,89.00%,34.00%,183,0.47,35m,307s +randomrot_rawnoise_10distinctobj_surf_agent,68.00%,81.00%,15,1.72,22m,23s +base_10multi_distinctobj_dist_agent,72.86%,43.57%,23,0.75,1h12m,1s \ No newline at end of file diff --git a/benchmarks/results/ycb_77objs.csv b/benchmarks/results/ycb_77objs.csv index aac4a97c..7ea11655 100644 --- a/benchmarks/results/ycb_77objs.csv +++ b/benchmarks/results/ycb_77objs.csv @@ -1,6 +1,6 @@ Experiment,% Correct,% Used MLH,Num Matching Steps,Rotation Error (radians),Run Time,Episode Run Time (s) -base_77obj_dist_agent,93.51%,14.29%,90,0.31,1h37m,295s -base_77obj_surf_agent,98.27%,5.63%,57,0.21,46m,141s -randrot_noise_77obj_dist_agent,87.01%,28.57%,155,0.64,2h14m,479s -randrot_noise_77obj_surf_agent,94.93%,19.48%,102,0.62,1h23m,304s -randrot_noise_77obj_5lms_dist_agent,93.51%,3.90%,71,0.92,54m,1398s +base_77obj_dist_agent,92.21%,16.02%,88,0.30,1h38m,301s +base_77obj_surf_agent,98.27%,4.33%,52,0.18,42m,123s +randrot_noise_77obj_dist_agent,87.01%,29.00%,151,0.63,2h10m,468s +randrot_noise_77obj_surf_agent,94.37%,21.65%,113,0.61,1h31m,339s +randrot_noise_77obj_5lms_dist_agent,90.91%,5.19%,70,1.01,1h7m,1439s \ No newline at end of file diff --git a/docs/overview/benchmark-experiments.md b/docs/overview/benchmark-experiments.md index 441740ad..1ab7c685 100644 --- a/docs/overview/benchmark-experiments.md +++ b/docs/overview/benchmark-experiments.md @@ -51,18 +51,18 @@ The following results are obtained from experiments using the 10-object subsets | Experiment | % Correct | % Used MLH | Num Matching Steps | Rotation Error (radians) | Run Time | Episode Run Time (s) | |---------------------------------------------|-----------|------------|--------------------|--------------------------|----------|----------------------| -| base_config_10distinctobj_dist_agent | 98.57% | 5.71% | 36 | 0.31 | 12m | 31s | -| base_config_10distinctobj_surf_agent | 100.00% | 0.00% | 28 | 0.17 | 6m | 27s | -| randrot_noise_10distinctobj_dist_agent | 99.00% | 7.00% | 51 | 0.50 | 10m | 56s | -| randrot_noise_10distinctobj_dist_on_distm | 99.00% | 1.00% | 35 | 0.26 | 7m | 48s | -| randrot_noise_10distinctobj_surf_agent | 100.00% | 1.00% | 29 | 0.36 | 7m | 49s | -| randrot_10distinctobj_surf_agent | 100.00% | 0.00% | 29 | 0.37 | 4m | 29s | -| randrot_noise_10distinctobj_5lms_dist_agent | 100.00% | 3.00% | 52 | 0.88 | 21m | 139s | -| base_10simobj_surf_agent | 95.00% | 10.00% | 84 | 0.21 | 14m | 76s | -| randrot_noise_10simobj_dist_agent | 81.00% | 38.00% | 193 | 0.52 | 26m | 206s | -| randrot_noise_10simobj_surf_agent | 90.00% | 35.00% | 178 | 0.45 | 34m | 294s | -| randomrot_rawnoise_10distinctobj_surf_agent | 65.00% | 77.00% | 16 | 1.60 | 22m | 24s | -| base_10multi_distinctobj_dist_agent | 74.29% | 37.14% | 27 | 0.64 | 1h9m | 2s | +| base_config_10distinctobj_dist_agent | 99.29% | 5.71% | 36 | 0.31 | 10m | 31s | +| base_config_10distinctobj_surf_agent | 100.00% | 0.00% | 28 | 0.21 | 6m | 28s | +| randrot_noise_10distinctobj_dist_agent | 98.00% | 7.00% | 46 | 0.50 | 9m | 55s | +| randrot_noise_10distinctobj_dist_on_distm | 99.00% | 3.00% | 35 | 0.26 | 7m | 50s | +| randrot_noise_10distinctobj_surf_agent | 100.00% | 0.00% | 31 | 0.34 | 8m | 62s | +| randrot_10distinctobj_surf_agent | 100.00% | 0.00% | 28 | 0.40 | 7m | 50s | +| randrot_noise_10distinctobj_5lms_dist_agent | 100.00% | 7.00% | 50 | 0.94 | 44m | 189s | +| base_10simobj_surf_agent | 95.71% | 10.71% | 82 | 0.21 | 18m | 104s | +| randrot_noise_10simobj_dist_agent | 82.00% | 37.00% | 185 | 0.52 | 26m | 202s | +| randrot_noise_10simobj_surf_agent | 89.00% | 34.00% | 183 | 0.47 | 35m | 307s | +| randomrot_rawnoise_10distinctobj_surf_agent | 68.00% | 81.00% | 15 | 1.72 | 22m | 23s | +| base_10multi_distinctobj_dist_agent | 72.86% | 43.57% | 23 | 0.75 | 1h12m | 1s | ## Longer Experiments with all 77 YCB Objects @@ -75,11 +75,11 @@ The following results are obtained from experiments on the entire YCB dataset (7 | Experiment | % Correct | % Used MLH | Num Matching Steps | Rotation Error (radians) | Run Time | Episode Run Time (s) | |-------------------------------------|-----------|------------|--------------------|--------------------------|----------|----------------------| -| base_77obj_dist_agent | 93.51% | 14.29% | 90 | 0.31 | 1h37m | 295s | -| base_77obj_surf_agent | 98.27% | 5.63% | 57 | 0.21 | 46m | 141s | -| randrot_noise_77obj_dist_agent | 87.01% | 28.57% | 155 | 0.64 | 2h14m | 479s | -| randrot_noise_77obj_surf_agent | 94.93% | 19.48% | 102 | 0.62 | 1h23m | 304s | -| randrot_noise_77obj_5lms_dist_agent | 93.51% | 3.90% | 71 | 0.92 | 54m | 1398s | +| base_77obj_dist_agent | 92.21% | 16.02% | 88 | 0.30 | 1h38m | 301s | +| base_77obj_surf_agent | 98.27% | 4.33% | 52 | 0.18 | 42m | 123s | +| randrot_noise_77obj_dist_agent | 87.01% | 29.00% | 151 | 0.63 | 2h10m | 468s | +| randrot_noise_77obj_surf_agent | 94.37% | 21.65% | 113 | 0.61 | 1h31m | 339s | +| randrot_noise_77obj_5lms_dist_agent | 90.91% | 5.19% | 70 | 1.01 | 1h7m | 1439s | ### Explanation of Some of the Results diff --git a/src/tbp/monty/frameworks/environments/embodied_data.py b/src/tbp/monty/frameworks/environments/embodied_data.py index 1eb0d469..3d660f97 100644 --- a/src/tbp/monty/frameworks/environments/embodied_data.py +++ b/src/tbp/monty/frameworks/environments/embodied_data.py @@ -483,7 +483,7 @@ def __next__(self): def pre_episode(self): super().pre_episode() if not self.dataset.env._agents[0].action_space_type == "surface_agent": - self.get_good_view("view_finder") + self.get_good_view_with_patch_refinement() def first_step(self): """Carry out particular motor-system state updates required on the first step. @@ -509,26 +509,36 @@ def first_step(self): return self._observation - def get_good_view(self, view_sensor_id): + def get_good_view( + self, view_sensor_id: str, allow_translation: bool = True + ) -> None: """Policy to get a good view of the object before an episode starts. - Used by the distant agent - the surface agent makes use of the - touch_object method instead. Also currently used by the distant - after a "jump" has been initialized by a model-based policy. - - Move towards object until it fills n percent of the view sensor - or the closest point of the object is <0.03 distance from the - sensor (-> won't be rendered properly anymore). This makes sure - that big and small objects all fill similar amount of space in the - sensor field of view. Otherwise small objects may be too small to - perform saccades or the sensor ends up inside of big objects. + Used by the distant agent to find the initial view of an object at the + beginning of an episode with respect to a given sensor (the surface agent + makes use of the `touch_object` method instead). Also currently used + by the distant agent after a "jump" has been initialized by a model-based + policy. + + First, the agent moves towards object until it fills a minimum of percentage + (given by `motor_system.good_view_percentage`) of the sensor's field of view + or the closest point of the object is less than a given distance + (`motor_system.desired_object_distance`) from the sensor. This makes sure + that big and small objects all fill similar amount of space in the sensor's + field of view. Otherwise small objects may be too small to perform saccades or + the sensor ends up inside of big objects. This step is performed by default + but can be skipped by setting `allow_translation=False`. + + Second, the agent will then be oriented towards the object so that the + sensor's central pixel is on-object. In the case of multi-object experiments, + (i.e., when `num_distractors > 0`), there is an additional orientation step + performed prior to the translational movement step. Args: - view_sensor_id: The name of the sensor used as view finder. - This sensor should ideally be a zoomed out version of the - sensor patch such that it can contain the whole object - while the sensor patch always only sees a small patch of - the object. + view_sensor_id: The name of the sensor used to inform movements. + allow_translation: Whether to allow movement toward the object via + the motor systems's `move_close_enough` method. If `False`, only + orientienting movements are performed. Default is `True`. TODO M : move most of this to the motor systems, shouldn't be in embodied_data class @@ -548,25 +558,24 @@ def get_good_view(self, view_sensor_id): for action in actions: self._observation, self.motor_system.state = self.dataset[action] - # Move closer to the object, if not already close enough - action, close_enough = self.motor_system.move_close_enough( - self._observation, - view_sensor_id, - target_semantic_id=self.primary_target["semantic_id"], - multi_objects_present=self.num_distactors > 0, - ) - - # Continue moving to a close distance to the object - while not close_enough: - logging.debug("moving closer!") - self._observation, self.motor_system.state = self.dataset[action] - + if allow_translation: + # Move closer to the object, if not already close enough action, close_enough = self.motor_system.move_close_enough( self._observation, view_sensor_id, target_semantic_id=self.primary_target["semantic_id"], multi_objects_present=self.num_distactors > 0, ) + # Continue moving to a close distance to the object + while not close_enough: + logging.debug("moving closer!") + self._observation, self.motor_system.state = self.dataset[action] + action, close_enough = self.motor_system.move_close_enough( + self._observation, + view_sensor_id, + target_semantic_id=self.primary_target["semantic_id"], + multi_objects_present=self.num_distactors > 0, + ) # Re-center ourselves (if necessary) after having moved closer actions, on_object = self.motor_system.orient_to_object( @@ -592,6 +601,26 @@ def get_good_view(self, view_sensor_id): # ) # assert on_object, "Primary target must be visible at the start of the episode" + def get_good_view_with_patch_refinement(self) -> None: + """Policy to get a good view of the object for the central patch. + + Used by the distant agent to move and orient toward an object such that the + central patch is on-object. This is done by first moving and orienting the + agent toward the object using the view finder. A second orienting movement is + then performed using the central patch (i.e., the sensor module with id + "patch" or "patch_0") to ensure that the patch's central pixel is on-object. + + Also currently used by the distant agent after a "jump" has been initialized + by a model-based policy. + + + """ + self.get_good_view("view_finder") + for patch_id in ("patch", "patch_0"): + if patch_id in self._observation["agent_id_0"].keys(): + self.get_good_view(patch_id, allow_translation=False) + break + def execute_jump_attempt(self): """Attempt a hypothesis-testing "jump" onto a location of the object. @@ -716,7 +745,7 @@ def handle_successful_jump(self): self.motor_system.action_details["z_defined_pc"].append(None) else: - self.get_good_view("view_finder") + self.get_good_view_with_patch_refinement() # TODO implement better way to get better view after the jump that isn't # "cheating" by using get_good_view (which uses the semantic sensor) diff --git a/src/tbp/monty/frameworks/models/evidence_matching.py b/src/tbp/monty/frameworks/models/evidence_matching.py index 2eeb8ca0..29b60091 100644 --- a/src/tbp/monty/frameworks/models/evidence_matching.py +++ b/src/tbp/monty/frameworks/models/evidence_matching.py @@ -366,6 +366,12 @@ def reset(self): self.symmetry_evidence = 0 self.last_possible_hypotheses = None + self.current_mlh["graph_id"] = "no_observations_yet" + self.current_mlh["location"] = [0, 0, 0] + self.current_mlh["rotation"] = Rotation.from_euler("xyz", [0, 0, 0]) + self.current_mlh["scale"] = 1 + self.current_mlh["evidence"] = 0 + def receive_votes(self, vote_data): """Get evidence count votes and use to update own evidence counts. diff --git a/src/tbp/monty/frameworks/models/motor_policies.py b/src/tbp/monty/frameworks/models/motor_policies.py index acc289f6..b6b9ef3c 100644 --- a/src/tbp/monty/frameworks/models/motor_policies.py +++ b/src/tbp/monty/frameworks/models/motor_policies.py @@ -669,8 +669,9 @@ def orient_to_object( logging.debug("Searching for object") - # Check if the center of the view finder is on the object - if sem_obs[obs_dim[0] // 2][obs_dim[1] // 2] == target_semantic_id: + # Check if the central pixel is on-object. + y_mid, x_mid = obs_dim[0] // 2, obs_dim[1] // 2 + if sem_obs[y_mid, x_mid] == target_semantic_id: logging.debug("Already centered on the object") return [], True @@ -729,7 +730,7 @@ def find_location_to_look_at(self, sem3d_obs, image_shape, target_semantic_id): # as expected, which can otherwise break if e.g. on_object_image is passed # as an int or boolean rather than float smoothed_on_object_image = scipy.ndimage.gaussian_filter( - on_object_image, sem3d_obs.shape[0] / 10, mode="constant" + on_object_image, 2, mode="constant" ) idx_loc_to_look_at = np.argmax(smoothed_on_object_image * on_object_image) idx_loc_to_look_at = np.unravel_index(idx_loc_to_look_at, on_object_image.shape) @@ -1256,9 +1257,9 @@ def orienting_angle_from_normal(self, orienting: str) -> float: x, y, z = rotated_point_normal if "horizontal" == orienting: - return -np.degrees(np.arctan(x / z)) + return -np.degrees(np.arctan(x / z)) if z != 0 else -np.sign(x)*90.0 if "vertical" == orienting: - return -np.degrees(np.arctan(y / z)) + return -np.degrees(np.arctan(y / z)) if z != 0 else -np.sign(y)*90.0 ###