forked from apple/corenet
-
Notifications
You must be signed in to change notification settings - Fork 0
/
image_processor.py
104 lines (86 loc) · 3.33 KB
/
image_processor.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
# For licensing see accompanying LICENSE file.
# Copyright (C) 2024 Apple Inc. All Rights Reserved.
#
# Taken from https://github.com/ml-explore/mlx-examples/blob/main/clip/image_processor.py
# with modifications about doc-string and typing.
import json
from pathlib import Path
from typing import List, Tuple
import mlx.core as mx
import numpy as np
from PIL.Image import Image
class CLIPImageProcessor:
"""Constructs an image processor that converts a PIL image into MX array.
A simple port of
https://github.com/huggingface/transformers/blob/main/src/transformers/models/clip/image_processing_clip.py.
"""
def __init__(
self,
crop_size: int = 224,
do_center_crop: bool = True,
do_normalize: bool = True,
do_resize: bool = True,
image_mean: List[float] = [0.48145466, 0.4578275, 0.40821073],
image_std: List[float] = [0.26862954, 0.26130258, 0.27577711],
size: int = 224,
**kwargs
) -> None:
self.crop_size = crop_size
self.do_center_crop = do_center_crop
self.do_normalize = do_normalize
self.do_resize = do_resize
self.image_mean = mx.array(image_mean)
self.image_std = mx.array(image_std)
self.size = size
def __call__(self, images: List[Image]) -> mx.array:
return mx.concatenate(
[self._preprocess(image)[None] for image in images], axis=0
)
def _preprocess(self, image: Image) -> mx.array:
if self.do_resize:
image = resize(image, self.size)
if self.do_center_crop:
image = center_crop(image, (self.crop_size, self.crop_size))
image = mx.array(np.array(image))
image = rescale(image)
if self.do_normalize:
image = normalize(image, self.image_mean, self.image_std)
return image
@staticmethod
def from_pretrained(path: str) -> "CLIPImageProcessor":
path = Path(path)
with open(path / "preprocessor_config.json", encoding="utf-8") as f:
config = json.load(f)
return CLIPImageProcessor(**config)
def resize(image: Image, short_size: int) -> Image:
"""
Resize image to short_size.
"""
width, height = image.size
short = min(width, height)
long = max(width, height)
if short == short_size:
return image
new_short = short_size
new_long = int(short_size * long / short)
new_size = (new_short, new_long) if width <= height else (new_long, new_short)
return image.resize(new_size)
def center_crop(image: Image, size: Tuple[int, int]) -> Image:
"""
Perform a crop of a image in the center of given size.
"""
if size[0] % 2 != 0 or size[1] % 2 != 0:
raise ValueError("Only even crop sizes supported.")
original_width, original_height = image.size
crop_height, crop_width = size
top = (original_height - crop_height) // 2
bottom = top + crop_height
left = (original_width - crop_width) // 2
right = left + crop_width
return image.crop((left, top, right, bottom))
def rescale(image: mx.array) -> mx.array:
"""Rescale an image to 0.0-1.0 region"""
return image.astype(mx.float32) * (1 / 255.0)
def normalize(image: mx.array, mean: mx.array, std: mx.array) -> mx.array:
"""Normalize an image with given mean and standard deviation"""
return (image - mean) / std