From e164827db4ed46c14979fdaef970865784c54412 Mon Sep 17 00:00:00 2001
From: Adam Louly <adamlouly3@gmail.com>
Date: Fri, 27 Oct 2023 01:37:06 -0700
Subject: [PATCH] Handling block_size value for models with no sequence length
 limit. (#1487)

* handle block size -1

* change where to fetch

---------

Co-authored-by: Adam Louly <adamlouly@microsoft.com@orttrainingdev9.d32nl1ml4oruzj4qz3bqlggovf.px.internal.cloudapp.net>
---
 examples/onnxruntime/training/language-modeling/run_clm.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/examples/onnxruntime/training/language-modeling/run_clm.py b/examples/onnxruntime/training/language-modeling/run_clm.py
index cfe72186bc7..d4a473993ab 100644
--- a/examples/onnxruntime/training/language-modeling/run_clm.py
+++ b/examples/onnxruntime/training/language-modeling/run_clm.py
@@ -495,6 +495,8 @@ def tokenize_function(examples):
 
     if hasattr(config, "max_position_embeddings"):
         max_pos_embeddings = config.max_position_embeddings
+        if max_pos_embeddings < 0:
+            max_pos_embeddings = 1024
     else:
         # Define a default value if the attribute is missing in the config.
         max_pos_embeddings = 1024