Add check to validate response length

Signed-off-by: Olivier Delalleau <[email protected]>
NVIDIA · Jan 10, 2024 · ada1fb2 · ada1fb2
1 parent 520e842
commit ada1fb2
Showing 1 changed file with 7 additions and 0 deletions.
diff --git a/nemo_aligner/models/nlp/gpt/megatron_gpt_ppo_actor.py b/nemo_aligner/models/nlp/gpt/megatron_gpt_ppo_actor.py
@@ -283,8 +283,15 @@ def infer(self, inference_batch):
             response_lengths = strategy.get_lengths().to(torch.int64).view((-1, 1))
             assert (response_lengths <= self.cfg.encoder_seq_length).all()
         response_lengths = broadcast_2d_tensor_within_pp(response_lengths, dtype=torch.int64).flatten()
+        max_response_length = response_lengths.max().item()
 
         response_tokens = torch.cuda.LongTensor(actor_output["token_ids"])
+        if max_response_length != response_tokens.size(1):  # sanity check to validate response length
+            raise AssertionError(
+                f"max response length ({max_response_length}) does not match the size of "
+                f"`response_tokens` ({response_tokens.size(1)})"
+            )
+
         # TODO(geshen): get nemo generate to return the unaltered log probs
         log_probs = self.get_inference_log_probs(
             response_tokens, forward_micro_batch_size=self.forward_micro_batch_size