Skip to content

Commit

Permalink
Add JPEG quality param to image encoder (#1249)
Browse files Browse the repository at this point in the history
  • Loading branch information
bcherry authored Dec 18, 2024
1 parent 4d7a045 commit 7e8c089
Show file tree
Hide file tree
Showing 3 changed files with 51 additions and 10 deletions.
5 changes: 5 additions & 0 deletions .changeset/gorgeous-days-retire.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"livekit-agents": patch
---

Add JPEG quality param to image encoder
19 changes: 16 additions & 3 deletions livekit-agents/livekit/agents/llm/chat_context.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,13 +32,26 @@ class ChatImage:
You may need to consult your LLM provider's documentation on supported URL types.
```python
# With a VideoFrame, which will be automatically converted to a data URL internally
# Pass a VideoFrame directly, which will be automatically converted to a JPEG data URL internally
async for event in rtc.VideoStream(video_track):
chat_image = ChatImage(image=event.frame)
# this instance is now available for your ChatContext
# With a data URL
chat_image = ChatImage(image=f"data:image/jpeg;base64,{base64_encoded_image}")
# Encode your VideoFrame yourself for more control, and pass the result as a data URL (see EncodeOptions for more details)
from livekit.agents.utils.images import encode, EncodeOptions, ResizeOptions
image_bytes = encode(
event.frame,
EncodeOptions(
format="PNG",
resize_options=ResizeOptions(
width=512, height=512, strategy="scale_aspect_fit"
),
),
)
chat_image = ChatImage(
image=f"data:image/png;base64,{base64.b64encode(image_bytes).decode('utf-8')}"
)
# With an external URL
chat_image = ChatImage(image="https://example.com/image.jpg")
Expand Down
37 changes: 30 additions & 7 deletions livekit-agents/livekit/agents/utils/images/image.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,26 +25,42 @@

@dataclass
class EncodeOptions:
"""Options for encoding rtc.VideoFrame to portable image formats."""

format: Literal["JPEG", "PNG"] = "JPEG"
"""The format to encode the image."""

resize_options: Optional["ResizeOptions"] = None
"""Options for resizing the image."""

quality: Optional[int] = 75
"""Image compression quality, 0-100. Only applies to JPEG."""


@dataclass
class ResizeOptions:
"""Options for resizing rtc.VideoFrame as part of encoding to a portable image format."""

width: int
"""The desired resize width (in)"""

height: int
"""The desired height to resize the image to."""

strategy: Literal[
# Fit the image into the provided dimensions, with letterboxing
"center_aspect_fit",
# Fill the provided dimensions, with cropping
"center_aspect_cover",
# Fit the image into the provided dimensions, preserving its original aspect ratio
"scale_aspect_fit",
# Fill the provided dimensions, preserving its original aspect ratio (image will be larger than the provided dimensions)
"scale_aspect_cover",
# Precisely resize the image to the provided dimensions
"skew",
]
"""The strategy to use when resizing the image:
- center_aspect_fit: Fit the image into the provided dimensions, with letterboxing
- center_aspect_cover: Fill the provided dimensions, with cropping
- scale_aspect_fit: Fit the image into the provided dimensions, preserving its original aspect ratio
- scale_aspect_cover: Fill the provided dimensions, preserving its original aspect ratio (image will be larger than the provided dimensions)
- skew: Precisely resize the image to the provided dimensions
"""


def import_pil():
Expand All @@ -57,12 +73,19 @@ def import_pil():
)


def encode(frame: rtc.VideoFrame, options: EncodeOptions):
def encode(frame: rtc.VideoFrame, options: EncodeOptions) -> bytes:
"""Encode a rtc.VideoFrame to a portable image format (JPEG or PNG).
See EncodeOptions for more details.
"""
import_pil()
img = _image_from_frame(frame)
resized = _resize_image(img, options)
buffer = io.BytesIO()
resized.save(buffer, options.format)
kwargs = {}
if options.format == "JPEG" and options.quality is not None:
kwargs["quality"] = options.quality
resized.save(buffer, options.format, **kwargs)
buffer.seek(0)
return buffer.read()

Expand Down

0 comments on commit 7e8c089

Please sign in to comment.