diff --git a/outlines/text/json_schema.py b/outlines/text/json_schema.py index ae917a322..c076a2e4e 100644 --- a/outlines/text/json_schema.py +++ b/outlines/text/json_schema.py @@ -3,7 +3,8 @@ import re from typing import Dict -STRING = r'".*"' +STRING_INNER = r'(?:[^"\\]|\\.)' +STRING = f'"{STRING_INNER}*"' INTEGER = r"(0|[1-9][0-9]*)" NUMBER = rf"(-)?({INTEGER})(\.[0-9]+)?([eE][+-][0-9]+)?" BOOLEAN = r"(true|false)" @@ -225,9 +226,9 @@ def match_step_to_regex(step): return regex_str case {"type": "string", "maxLength": max_length}: - return f'".{{,{max_length}}}"' + return f'"{STRING_INNER}{{,{max_length}}}"' case {"type": "string", "minLength": min_length}: - return f'".{{{min_length},}}"' + return f'"{STRING_INNER}{{{min_length},}}"' case {"type": field_type}: return type_to_regex[field_type] diff --git a/tests/text/test_json_schema.py b/tests/text/test_json_schema.py index 1d46cdf6a..f8814aeba 100644 --- a/tests/text/test_json_schema.py +++ b/tests/text/test_json_schema.py @@ -12,6 +12,7 @@ NULL, NUMBER, STRING, + STRING_INNER, build_schedule_from_schema, match_step_to_regex, ) @@ -258,13 +259,13 @@ def test_match_number(pattern, does_match): ), ( {"title": "Foo", "type": "string", "maxLength": 3}, - '".{,3}"', - [('"ab"', True), ('"abcd"', False)], + f'"{STRING_INNER}{{,3}}"', + [('"ab"', True), ('"a""', False), ('"abcd"', False)], ), ( {"title": "Foo", "type": "string", "minLength": 3}, - '".{3,}"', - [('"ab"', False), ('"abcd"', True)], + f'"{STRING_INNER}{{3,}}"', + [('"ab"', False), ('"abcd"', True), ('"abc""', False)], ), ( {"title": "Foo", "type": "boolean"}, @@ -290,6 +291,7 @@ def test_match_number(pattern, does_match): f"({STRING}|{NUMBER})", [ ('"string"', True), + ('"st"ring"', False), ("1000", True), ("true", False), ],