Skip to content

Commit

Permalink
Convert tests/benchmarks/ pytest-benchmark benchmarks into benchmarks…
Browse files Browse the repository at this point in the history
…/ asv benchmarks
  • Loading branch information
lapp0 committed May 18, 2024
1 parent a1bcb3e commit af440aa
Show file tree
Hide file tree
Showing 5 changed files with 60 additions and 74 deletions.
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
import pytest

import outlines

outlines.disable_cache()

from outlines.fsm.guide import RegexGuide # noqa: E402
from outlines.fsm.json_schema import build_regex_from_schema # noqa: E402

from .common import ensure_numba_compiled, setup_tokenizer # noqa: E402

simple_schema = """{
"$defs": {
"Armor": {
Expand Down Expand Up @@ -63,30 +63,19 @@
"required": ["id", "work", "recording_artists"]
}"""


schemas = dict(simple_schema=simple_schema, complex_schema=complex_schema)


@pytest.mark.parametrize("schema_name", schemas.keys())
def test_benchmark_json_schema_to_regex(benchmark, ensure_numba_compiled, schema_name):
"""Benchmark convert json schema to regex"""
schema = schemas[schema_name]
benchmark.pedantic(
build_regex_from_schema,
args=(schema,),
rounds=8,
)
class JsonSchemaBenchmark:
def setup(self):
self.tokenizer = setup_tokenizer()
ensure_numba_compiled(self.tokenizer)

def time_json_schema_to_regex(self):
for schema_name, schema in schemas.items():
build_regex_from_schema(schema)

@pytest.mark.parametrize("schema_name", schemas.keys())
def test_benchmark_json_schema_to_fsm(
benchmark, tokenizer, ensure_numba_compiled, schema_name
):
"""Benchmark compile json schema as FSM"""
schema = schemas[schema_name]
regex = build_regex_from_schema(schema)
benchmark.pedantic(
RegexGuide,
args=(regex, tokenizer),
rounds=8,
)
def time_json_schema_to_fsm(self):
for schema_name, schema in schemas.items():
regex = build_regex_from_schema(schema)
RegexGuide(regex, self.tokenizer)
36 changes: 36 additions & 0 deletions benchmarks/bench_numba_compile.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
import importlib

import interegular
import numba

import outlines

from .common import setup_tokenizer

outlines.disable_cache()


class NumbaCompileBenchmark:
def setup(self):
from outlines.fsm import regex

self.tokenizer = setup_tokenizer()
self.regex = regex
original_njit = numba.njit

def mock_njit(*args, **kwargs):
kwargs["cache"] = False
return original_njit(*args, **kwargs)

self.original_njit = original_njit
numba.njit = mock_njit
importlib.reload(self.regex)
self.regex_pattern, _ = self.regex.make_deterministic_fsm(
interegular.parse_pattern("a").to_fsm().reduce()
)

def teardown(self):
numba.njit = self.original_njit

def time_compile_numba(self):
self.regex.create_fsm_index_tokenizer(self.regex_pattern, self.tokenizer)
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import pytest

import outlines

from .common import ensure_numba_compiled, setup_tokenizer

outlines.disable_cache()

from outlines.fsm.guide import RegexGuide # noqa: E402
Expand All @@ -19,14 +19,11 @@
}


@pytest.mark.parametrize("regex_name", regex_samples.keys())
def test_benchmark_regex_to_fsm(
benchmark, tokenizer, ensure_numba_compiled, regex_name
):
"""Benchmark converting regex to FSM"""
regex_str = regex_samples[regex_name]
benchmark.pedantic(
RegexGuide,
args=(regex_str, tokenizer),
rounds=8,
)
class RegexFsmBenchmark:
def setup(self):
self.tokenizer = setup_tokenizer()
ensure_numba_compiled(self.tokenizer)

def time_regex_to_fsm(self):
for regex_name, regex_str in regex_samples.items():
RegexGuide(regex_str, self.tokenizer)
5 changes: 1 addition & 4 deletions tests/benchmark/conftest.py → benchmarks/common.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,14 @@
import pytest
from transformers import AutoTokenizer

from outlines.fsm.guide import RegexGuide
from outlines.models.transformers import TransformerTokenizer


@pytest.fixture
def tokenizer():
def setup_tokenizer():
tokenizer = AutoTokenizer.from_pretrained("gpt2")
return TransformerTokenizer(tokenizer)


@pytest.fixture
def ensure_numba_compiled(tokenizer):
RegexGuide("a", tokenizer)
return True
33 changes: 0 additions & 33 deletions tests/benchmark/test_benchmark_numba_compile.py

This file was deleted.

0 comments on commit af440aa

Please sign in to comment.