From 513975f1d862a0285bc1d50c20cfdf11fe98c373 Mon Sep 17 00:00:00 2001
From: pnadolny13 <patnadolny@gmail.com>
Date: Fri, 22 Nov 2024 13:58:22 -0500
Subject: [PATCH] make batch size configurable

---
 map_gpt_embeddings/mappers.py | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/map_gpt_embeddings/mappers.py b/map_gpt_embeddings/mappers.py
index 0b7c32b..8077682 100644
--- a/map_gpt_embeddings/mappers.py
+++ b/map_gpt_embeddings/mappers.py
@@ -114,6 +114,17 @@ def map_schema_message(self, message_dict: dict) -> t.Iterable[Message]:
             description="The embedding model to use.",
             default=1_000_000 * 0.5,
         ),
+        th.Property(
+            "request_batch_size",
+            th.NumberType,
+            description=(
+                "The mapper writes records to a temporary local file, then gives it to an OpenAI cookbook "
+                "script for parallel processing API requests to maximize throughput while handling rate limits. "
+                "This configurations sets the amount of records to write to the temp file prior executing "
+                "the script."
+            ),
+            default=50,
+        ),
     ).to_dict()
 
     def _validate_config(self, *, raise_errors: bool = True) -> list[str]:
@@ -192,7 +203,7 @@ def map_record_message(self, message_dict: dict) -> t.Iterable[RecordMessage]:
                 )
                 self.cursor_position += 1
         # Run async process and output batch results
-        if self.cursor_position >= 50:
+        if self.cursor_position >= self.config["request_batch_size"]:
             self.cursor_position = 0
             asyncio.run(
                 process_api_requests_from_file(