Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/main' into usd
Browse files Browse the repository at this point in the history
  • Loading branch information
keyboardAnt committed Nov 30, 2024
2 parents 1eccccd + 19dabe9 commit a6a6b38
Show file tree
Hide file tree
Showing 6 changed files with 160 additions and 16 deletions.
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,7 @@
"tf2onnx",
"timeout-decorator",
"tiktoken",
"timm<=0.9.16",
"timm<=1.0.11",
"tokenizers>=0.20,<0.21",
"torch",
"torchaudio",
Expand Down
2 changes: 1 addition & 1 deletion src/transformers/dependency_versions_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@
"tf2onnx": "tf2onnx",
"timeout-decorator": "timeout-decorator",
"tiktoken": "tiktoken",
"timm": "timm<=0.9.16",
"timm": "timm<=1.0.11",
"tokenizers": "tokenizers>=0.20,<0.21",
"torch": "torch",
"torchaudio": "torchaudio",
Expand Down
37 changes: 26 additions & 11 deletions src/transformers/models/trocr/processing_trocr.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,16 @@

import warnings
from contextlib import contextmanager
from typing import List, Union

from ...processing_utils import ProcessorMixin
from ...image_processing_utils import BatchFeature
from ...image_utils import ImageInput
from ...processing_utils import ProcessingKwargs, ProcessorMixin, Unpack
from ...tokenization_utils_base import PreTokenizedInput, TextInput


class TrOCRProcessorKwargs(ProcessingKwargs, total=False):
_defaults = {}


class TrOCRProcessor(ProcessorMixin):
Expand Down Expand Up @@ -61,7 +69,14 @@ def __init__(self, image_processor=None, tokenizer=None, **kwargs):
self.current_processor = self.image_processor
self._in_target_context_manager = False

def __call__(self, *args, **kwargs):
def __call__(
self,
images: ImageInput = None,
text: Union[TextInput, PreTokenizedInput, List[TextInput], List[PreTokenizedInput]] = None,
audio=None,
videos=None,
**kwargs: Unpack[TrOCRProcessorKwargs],
) -> BatchFeature:
"""
When used in normal mode, this method forwards all its arguments to AutoImageProcessor's
[`~AutoImageProcessor.__call__`] and returns its output. If used in the context
Expand All @@ -70,21 +85,21 @@ def __call__(self, *args, **kwargs):
"""
# For backward compatibility
if self._in_target_context_manager:
return self.current_processor(*args, **kwargs)

images = kwargs.pop("images", None)
text = kwargs.pop("text", None)
if len(args) > 0:
images = args[0]
args = args[1:]
return self.current_processor(images, **kwargs)

if images is None and text is None:
raise ValueError("You need to specify either an `images` or `text` input to process.")

output_kwargs = self._merge_kwargs(
TrOCRProcessorKwargs,
tokenizer_init_kwargs=self.tokenizer.init_kwargs,
**kwargs,
)

if images is not None:
inputs = self.image_processor(images, *args, **kwargs)
inputs = self.image_processor(images, **output_kwargs["images_kwargs"])
if text is not None:
encodings = self.tokenizer(text, **kwargs)
encodings = self.tokenizer(text, **output_kwargs["text_kwargs"])

if text is None:
return inputs
Expand Down
4 changes: 2 additions & 2 deletions src/transformers/pipelines/fill_mask.py
Original file line number Diff line number Diff line change
Expand Up @@ -245,12 +245,12 @@ def _sanitize_parameters(self, top_k=None, targets=None, tokenizer_kwargs=None):
)
return preprocess_params, {}, postprocess_params

def __call__(self, inputs, *args, **kwargs):
def __call__(self, inputs, **kwargs):
"""
Fill the masked token in the text(s) given as inputs.
Args:
args (`str` or `List[str]`):
inputs (`str` or `List[str]`):
One or several texts (or one list of prompts) with masked tokens.
targets (`str` or `List[str]`, *optional*):
When passed, the model will limit the scores to the passed targets instead of looking up in the whole
Expand Down
2 changes: 1 addition & 1 deletion src/transformers/trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -2113,7 +2113,7 @@ def train(
FutureWarning,
)
if len(kwargs) > 0:
raise TypeError(f"train() received got unexpected keyword arguments: {', '.join(list(kwargs.keys()))}.")
raise TypeError(f"train() got unexpected keyword arguments: {', '.join(list(kwargs.keys()))}.")
# This might change the seed so needs to run first.
self._hp_search_setup(trial)
self._train_batch_size = self.args.train_batch_size
Expand Down
129 changes: 129 additions & 0 deletions tests/models/trocr/test_processor_trocr.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
import os
import shutil
import tempfile
import unittest

import pytest

from transformers.models.xlm_roberta.tokenization_xlm_roberta import VOCAB_FILES_NAMES
from transformers.testing_utils import (
require_sentencepiece,
require_tokenizers,
require_vision,
)
from transformers.utils import is_vision_available

from ...test_processing_common import ProcessorTesterMixin


if is_vision_available():
from transformers import TrOCRProcessor, ViTImageProcessor, XLMRobertaTokenizerFast


@require_sentencepiece
@require_tokenizers
@require_vision
class TrOCRProcessorTest(ProcessorTesterMixin, unittest.TestCase):
text_input_name = "labels"
processor_class = TrOCRProcessor

def setUp(self):
self.tmpdirname = tempfile.mkdtemp()

vocab_tokens = ["[UNK]", "[CLS]", "[SEP]", "[PAD]", "[MASK]", "want", "##want", "##ed", "wa", "un", "runn", "##ing", ",", "low", "lowest"] # fmt: skip
self.vocab_file = os.path.join(self.tmpdirname, VOCAB_FILES_NAMES["vocab_file"])
with open(self.vocab_file, "w", encoding="utf-8") as vocab_writer:
vocab_writer.write("".join([x + "\n" for x in vocab_tokens]))

image_processor = ViTImageProcessor.from_pretrained("hf-internal-testing/tiny-random-vit")
tokenizer = XLMRobertaTokenizerFast.from_pretrained("FacebookAI/xlm-roberta-base")
processor = TrOCRProcessor(image_processor=image_processor, tokenizer=tokenizer)
processor.save_pretrained(self.tmpdirname)

def tearDown(self):
shutil.rmtree(self.tmpdirname)

def get_tokenizer(self, **kwargs):
return XLMRobertaTokenizerFast.from_pretrained(self.tmpdirname, **kwargs)

def get_image_processor(self, **kwargs):
return ViTImageProcessor.from_pretrained(self.tmpdirname, **kwargs)

def test_save_load_pretrained_default(self):
image_processor = self.get_image_processor()
tokenizer = self.get_tokenizer()
processor = TrOCRProcessor(image_processor=image_processor, tokenizer=tokenizer)

processor.save_pretrained(self.tmpdirname)
processor = TrOCRProcessor.from_pretrained(self.tmpdirname)

self.assertIsInstance(processor.tokenizer, XLMRobertaTokenizerFast)
self.assertEqual(processor.tokenizer.get_vocab(), tokenizer.get_vocab())
self.assertIsInstance(processor.image_processor, ViTImageProcessor)
self.assertEqual(processor.image_processor.to_json_string(), image_processor.to_json_string())

def test_save_load_pretrained_additional_features(self):
processor = TrOCRProcessor(tokenizer=self.get_tokenizer(), image_processor=self.get_image_processor())
processor.save_pretrained(self.tmpdirname)
tokenizer_add_kwargs = self.get_tokenizer(bos_token="(BOS)", eos_token="(EOS)")
image_processor_add_kwargs = self.get_image_processor(do_normalize=False, padding_value=1.0)

processor = TrOCRProcessor.from_pretrained(
self.tmpdirname, bos_token="(BOS)", eos_token="(EOS)", do_normalize=False, padding_value=1.0
)

self.assertIsInstance(processor.tokenizer, XLMRobertaTokenizerFast)
self.assertEqual(processor.tokenizer.get_vocab(), tokenizer_add_kwargs.get_vocab())

self.assertEqual(processor.image_processor.to_json_string(), image_processor_add_kwargs.to_json_string())
self.assertIsInstance(processor.image_processor, ViTImageProcessor)

def test_image_processor(self):
image_processor = self.get_image_processor()
tokenizer = self.get_tokenizer()
processor = TrOCRProcessor(tokenizer=tokenizer, image_processor=image_processor)
image_input = self.prepare_image_inputs()

input_feat_extract = image_processor(image_input, return_tensors="np")
input_processor = processor(images=image_input, return_tensors="np")

for key in input_feat_extract.keys():
self.assertAlmostEqual(input_feat_extract[key].sum(), input_processor[key].sum(), delta=1e-2)

def test_tokenizer(self):
image_processor = self.get_image_processor()
tokenizer = self.get_tokenizer()
processor = TrOCRProcessor(tokenizer=tokenizer, image_processor=image_processor)
input_str = "lower newer"

encoded_processor = processor(text=input_str)
encoded_tok = tokenizer(input_str)

for key in encoded_tok.keys():
self.assertListEqual(encoded_tok[key], encoded_processor[key])

def test_processor_text(self):
image_processor = self.get_image_processor()
tokenizer = self.get_tokenizer()
processor = TrOCRProcessor(tokenizer=tokenizer, image_processor=image_processor)
input_str = "lower newer"
image_input = self.prepare_image_inputs()

inputs = processor(text=input_str, images=image_input)

self.assertListEqual(list(inputs.keys()), ["pixel_values", "labels"])

# test if it raises when no input is passed
with pytest.raises(ValueError):
processor()

def test_tokenizer_decode(self):
image_processor = self.get_image_processor()
tokenizer = self.get_tokenizer()
processor = TrOCRProcessor(tokenizer=tokenizer, image_processor=image_processor)
predicted_ids = [[1, 4, 5, 8, 1, 0, 8], [3, 4, 3, 1, 1, 8, 9]]

decoded_processor = processor.batch_decode(predicted_ids)
decoded_tok = tokenizer.batch_decode(predicted_ids)

self.assertListEqual(decoded_tok, decoded_processor)

0 comments on commit a6a6b38

Please sign in to comment.