From 513975f1d862a0285bc1d50c20cfdf11fe98c373 Mon Sep 17 00:00:00 2001 From: pnadolny13 Date: Fri, 22 Nov 2024 13:58:22 -0500 Subject: [PATCH] make batch size configurable --- map_gpt_embeddings/mappers.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/map_gpt_embeddings/mappers.py b/map_gpt_embeddings/mappers.py index 0b7c32b..8077682 100644 --- a/map_gpt_embeddings/mappers.py +++ b/map_gpt_embeddings/mappers.py @@ -114,6 +114,17 @@ def map_schema_message(self, message_dict: dict) -> t.Iterable[Message]: description="The embedding model to use.", default=1_000_000 * 0.5, ), + th.Property( + "request_batch_size", + th.NumberType, + description=( + "The mapper writes records to a temporary local file, then gives it to an OpenAI cookbook " + "script for parallel processing API requests to maximize throughput while handling rate limits. " + "This configurations sets the amount of records to write to the temp file prior executing " + "the script." + ), + default=50, + ), ).to_dict() def _validate_config(self, *, raise_errors: bool = True) -> list[str]: @@ -192,7 +203,7 @@ def map_record_message(self, message_dict: dict) -> t.Iterable[RecordMessage]: ) self.cursor_position += 1 # Run async process and output batch results - if self.cursor_position >= 50: + if self.cursor_position >= self.config["request_batch_size"]: self.cursor_position = 0 asyncio.run( process_api_requests_from_file(