Skip to content

Commit

Permalink
make batch size configurable
Browse files Browse the repository at this point in the history
  • Loading branch information
pnadolny13 committed Nov 22, 2024
1 parent 1650e8b commit 513975f
Showing 1 changed file with 12 additions and 1 deletion.
13 changes: 12 additions & 1 deletion map_gpt_embeddings/mappers.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,17 @@ def map_schema_message(self, message_dict: dict) -> t.Iterable[Message]:
description="The embedding model to use.",
default=1_000_000 * 0.5,
),
th.Property(
"request_batch_size",
th.NumberType,
description=(
"The mapper writes records to a temporary local file, then gives it to an OpenAI cookbook "
"script for parallel processing API requests to maximize throughput while handling rate limits. "
"This configurations sets the amount of records to write to the temp file prior executing "
"the script."
),
default=50,
),
).to_dict()

def _validate_config(self, *, raise_errors: bool = True) -> list[str]:
Expand Down Expand Up @@ -192,7 +203,7 @@ def map_record_message(self, message_dict: dict) -> t.Iterable[RecordMessage]:
)
self.cursor_position += 1
# Run async process and output batch results
if self.cursor_position >= 50:
if self.cursor_position >= self.config["request_batch_size"]:
self.cursor_position = 0
asyncio.run(
process_api_requests_from_file(
Expand Down

0 comments on commit 513975f

Please sign in to comment.