From 06d565496966f0dbe184dd619b62ea276035f562 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Louf?= Date: Sat, 20 Apr 2024 16:46:15 +0200 Subject: [PATCH] Exclude escape character in JSON string fields --- outlines/fsm/json_schema.py | 2 +- tests/fsm/test_json_schema.py | 7 ++++++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/outlines/fsm/json_schema.py b/outlines/fsm/json_schema.py index 159782c01..095e41115 100644 --- a/outlines/fsm/json_schema.py +++ b/outlines/fsm/json_schema.py @@ -9,7 +9,7 @@ from referencing._core import Resolver from referencing.jsonschema import DRAFT202012 -STRING_INNER = r'(?:[^"\\\x00-\x1f\x7f-\x9f]|\\.)' +STRING_INNER = r'([^("\\\x00-\x1f\x7f-\x9f)]|\\\\)' STRING = f'"{STRING_INNER}*"' INTEGER = r"(-)?(0|[1-9][0-9]*)" NUMBER = rf"({INTEGER})(\.[0-9]+)?([eE][+-][0-9]+)?" diff --git a/tests/fsm/test_json_schema.py b/tests/fsm/test_json_schema.py index 0ab4ccf79..f5bdd8565 100644 --- a/tests/fsm/test_json_schema.py +++ b/tests/fsm/test_json_schema.py @@ -116,7 +116,12 @@ def test_match_number(pattern, does_match): ( {"title": "Foo", "type": "string"}, STRING, - [("unquotedstring", False), ('"quoted_string"', True)], + [ + ("unquotedstring", False), + ('"quoted_string"', True), + (r'"escape_\character"', False), + (r'"double_\\escape"', True), + ], ), # String with maximum length (