Skip to content

Commit

Permalink
Merge branch 'main' into fix/make-convert-token-to-string-pickleable
Browse files Browse the repository at this point in the history
  • Loading branch information
saattrupdan authored Nov 26, 2024
2 parents 3b3d926 + b55d314 commit 009e4df
Show file tree
Hide file tree
Showing 5 changed files with 6 additions and 15 deletions.
2 changes: 1 addition & 1 deletion docs/reference/generation/generation.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ title: Generation

# Generation

Once an [Outlines model](../models) is constructed you can use `outlines.generate` to generate text. Standard LLM generation is possible via `outlines.generate.text`, along with a variety of structured generation methods described below. (For a detailed technical explanation of how structured generation works, you may review the [Structured Generation Explanation](./structured_generation_explanation.md) page)
Once an [Outlines model](../models/models.md) is constructed you can use `outlines.generate` to generate text. Standard LLM generation is possible via `outlines.generate.text`, along with a variety of structured generation methods described below. (For a detailed technical explanation of how structured generation works, you may review the [Structured Generation Explanation](./structured_generation_explanation.md) page)

Before generating text, you must construct an `outlines.model`. Example:

Expand Down
2 changes: 0 additions & 2 deletions outlines/fsm/guide.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
)

from outlines import grammars
from outlines.caching import cache
from outlines.fsm.parsing import PartialLark, PartialParserState

if TYPE_CHECKING:
Expand Down Expand Up @@ -73,7 +72,6 @@ def copy(self):
return self


@cache()
def cached_create_states_mapping(regex_string, tokenizer, *args, **kwargs):
return uncached_create_states_mapping(regex_string, tokenizer, *args, **kwargs)

Expand Down
15 changes: 3 additions & 12 deletions tests/fsm/test_guide.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ def convert_token_to_string(self, token):
tokenizer = MockTokenizer()
fsm = RegexGuide.from_regex(regex_str, tokenizer)

assert fsm.states_to_token_maps == {0: {1: 1}}
assert fsm.states_to_token_maps.get_transitions() == {0: {1: 1}}

instruction = fsm.get_next_instruction(0)
assert isinstance(instruction, Generate)
Expand All @@ -70,9 +70,6 @@ def convert_token_to_string(self, token):

assert fsm.is_final_state(0) is False

for state in fsm.final_states:
assert fsm.is_final_state(state) is True


def test_regex_multi_byte_llama_like():
class MockTokenizer:
Expand Down Expand Up @@ -100,7 +97,7 @@ def convert_token_to_string(self, token):
tokenizer = MockTokenizer()
fsm = RegexGuide.from_regex(regex_str, tokenizer)

assert fsm.states_to_token_maps == {
assert fsm.states_to_token_maps.get_transitions() == {
0: {5: 1, 4: 2},
1: {6: 3},
3: {7: 4},
Expand All @@ -116,9 +113,6 @@ def convert_token_to_string(self, token):

assert fsm.is_final_state(0) is False

for state in fsm.final_states:
assert fsm.is_final_state(state) is True


def test_regex_multi_byte_gpt2_like():
class MockTokenizer:
Expand Down Expand Up @@ -147,7 +141,7 @@ def convert_token_to_string(self, token):
tokenizer = MockTokenizer()
fsm = RegexGuide.from_regex(regex_str, tokenizer)

assert fsm.states_to_token_maps == {
assert fsm.states_to_token_maps.get_transitions() == {
0: {5: 1, 10: 2},
1: {8: 5, 4: 3},
2: {11: 3},
Expand All @@ -163,9 +157,6 @@ def convert_token_to_string(self, token):

assert fsm.is_final_state(0) is False

for state in fsm.final_states:
assert fsm.is_final_state(state) is True


def test_regex_final_state():
"""Make sure that the FSM stays in the final state as we keep generating"""
Expand Down
1 change: 1 addition & 0 deletions tests/generate/test_integration_llamacpp.py
Original file line number Diff line number Diff line change
Expand Up @@ -274,6 +274,7 @@ def test_llama_cpp_pre_tokenizer_remains_broken():
generate.choice(model, ["skirt", "dress", "pen", "jacket"])


@pytest.mark.skip("Caching for guide was temporarily turned off")
def test_RegexGuide_caching(model, temp_cache_dir):
import llama_cpp

Expand Down
1 change: 1 addition & 0 deletions tests/generate/test_integration_transformers.py
Original file line number Diff line number Diff line change
Expand Up @@ -492,6 +492,7 @@ def test_transformers_use_existing_model_and_tokenizer():
assert isinstance(sequence, str)


@pytest.mark.skip("Caching for guide was temporarily turned off")
def test_RegexGuide_caching(temp_cache_dir):
import outlines.caching
from outlines.fsm.guide import cached_create_states_mapping
Expand Down

0 comments on commit 009e4df

Please sign in to comment.