Skip to content

Commit

Permalink
misc/unicode: new testcase
Browse files Browse the repository at this point in the history
  • Loading branch information
pmderodat committed Mar 13, 2024
1 parent 6f53bc0 commit dbad162
Show file tree
Hide file tree
Showing 9 changed files with 343 additions and 0 deletions.
Empty file.
16 changes: 16 additions & 0 deletions testsuite/tests/misc/unicode/expected_concrete_syntax.lkt
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
import lexer_example

@with_lexer(foo_lexer)
grammar foo_grammar {
@main_rule main_rule <- list+(Example(@example StrLit(@string)))
}

@abstract class FooNode implements Node[FooNode] {
}

class Example: FooNode {
@parse_field f: StrLit
}

class StrLit: FooNode implements TokenNode {
}
43 changes: 43 additions & 0 deletions testsuite/tests/misc/unicode/main-iso-8859-1.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
# ��������������������������������������������������������������������������� #
# ��������������������������������������������������������������������������� #

example "1�"

# ��������������������������������������������������������������������������� #

example "1�2�"

# ��������������������������������������������������������������������������� #

example "1�2�3�"

# ��������������������������������������������������������������������������� #

example "1�2�3�4�"

# ��������������������������������������������������������������������������� #

example "1�2�3�4�5�"

# ��������������������������������������������������������������������������� #

example "1�2�3�4�5�6�"

# ��������������������������������������������������������������������������� #

example "1�2�3�4�5�6�7�"

# ��������������������������������������������������������������������������� #

example "1�2�3�4�5�6�7�8�"

# ��������������������������������������������������������������������������� #

example "1�2�3�4�5�6�7�8�9�"

# ��������������������������������������������������������������������������� #

example "1�2�3�4�5�6�7�8�9�0�"

# ��������������������������������������������������������������������������� #
# ��������������������������������������������������������������������������� #
131 changes: 131 additions & 0 deletions testsuite/tests/misc/unicode/main.adb
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
with Ada.Text_IO; use Ada.Text_IO;

with GNAT.Strings; use GNAT.Strings;
with GNATCOLL.Mmap; use GNATCOLL.Mmap;

with Langkit_Support.File_Readers; use Langkit_Support.File_Readers;
with Langkit_Support.Slocs; use Langkit_Support.Slocs;
with Libfoolang.Analysis; use Libfoolang.Analysis;
with Libfoolang.Common; use Libfoolang.Common;

with Support; use Support;

procedure Main is

Empty_File : constant String := "empty.txt";
Empty_Buffer : aliased constant String := "";

Example_File : constant String := "main-iso-8859-1.txt";
Example_Buffer : String_Access := Read_Whole_File (Example_File);

procedure Check
(From_Buffer : Boolean := False;
Empty_File : Boolean := False;
Wrong_Encoding : Boolean := False;
With_File_Reader : Boolean := False);

-----------
-- Check --
-----------

procedure Check
(From_Buffer : Boolean := False;
Empty_File : Boolean := False;
Wrong_Encoding : Boolean := False;
With_File_Reader : Boolean := False)
is
Charset : constant String :=
(if Wrong_Encoding then "utf-8" else "iso-8859-1");
Filename : constant String :=
(if Empty_File then Main.Empty_File else Example_File);
Buffer : constant access constant String :=
(if Empty_File then Empty_Buffer'Access else Example_Buffer);

Ctx : Analysis_Context;
U : Analysis_Unit;
begin
-- Put some label for this check

Put ("== ");
Put (if From_Buffer then "buffer" else "file");
Put (" | ");
Put (if Empty_File then "empty-file" else "example-file");
Put (" | ");
Put (if Wrong_Encoding then "wrong-encoding" else "correct-encoding");
Put (" | ");
Put (if With_File_Reader then "file-reader" else "default");
Put_Line (" ==");
New_Line;

-- Parse the source according to requested settings

Ctx := Create_Context
(File_Reader => (if With_File_Reader
then Get_File_Reader
else No_File_Reader_Reference));
if From_Buffer then
U := Ctx.Get_From_Buffer
(Filename => Filename,
Charset => Charset,
Buffer => Buffer.all);
else
U := Ctx.Get_From_File
(Filename => Filename, Charset => Charset);
end if;

-- Display parsing errors, if any

if U.Has_Diagnostics then
Put_Line ("Errors:");
for D of U.Diagnostics loop
Put_Line (" " & U.Format_GNU_Diagnostic (D));
end loop;
New_Line;
end if;

-- Summarize the content of the parsed unit

if U.Root.Is_Null then
Put_Line ("No root node");
else
Put_Line ("Root node children:" & U.Root.Children_Count'Image);
declare
D : constant Token_Data_Type := Data (U.First_Token);
begin
Put_Line
("First token: "
& Kind (D)'Image
& " at " & Image (Sloc_Range (D)));
end;
declare
D : constant Token_Data_Type := Data (U.Last_Token);
begin
Put_Line
("Last token: "
& Kind (D)'Image
& " at " & Image (Sloc_Range (D)));
end;
end if;
New_Line;
end Check;

begin
-- Get_From_File

Check;
Check (With_File_Reader => True);

Check (Empty_File => True);
Check (Empty_File => True, With_File_Reader => True);

Check (Wrong_Encoding => True);
Check (Wrong_Encoding => True, With_File_Reader => True);

-- Get_From_Buffer

Check (From_Buffer => True);
Check (From_Buffer => True, Empty_File => True);
Check (From_Buffer => True, Wrong_Encoding => True);

Free (Example_Buffer);
end Main;
46 changes: 46 additions & 0 deletions testsuite/tests/misc/unicode/support.adb
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
with Langkit_Support.Diagnostics; use Langkit_Support.Diagnostics;

package body Support is

type My_FR is new File_Reader_Interface with null record;

overriding procedure Read
(Self : My_FR;
Filename : String;
Charset : String;
Read_BOM : Boolean;
Contents : out Decoded_File_Contents;
Diagnostics : in out Diagnostics_Vectors.Vector);

overriding procedure Release (Self : in out My_FR) is null;

----------
-- Read --
----------

overriding procedure Read
(Self : My_FR;
Filename : String;
Charset : String;
Read_BOM : Boolean;
Contents : out Decoded_File_Contents;
Diagnostics : in out Diagnostics_Vectors.Vector)
is
begin
Direct_Read (Filename, Charset, Read_BOM, Contents, Diagnostics);
if Diagnostics.Is_Empty and then Contents.Buffer.all'Length > 79 then
Contents.Buffer.all (Contents.First .. Contents.First + 79) :=
(1 .. 80 => ' ');
end if;
end Read;

---------------------
-- Get_File_Reader --
---------------------

function Get_File_Reader return File_Reader_Reference is
begin
return Create_File_Reader_Reference (My_FR'(null record));
end Get_File_Reader;

end Support;
5 changes: 5 additions & 0 deletions testsuite/tests/misc/unicode/support.ads
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
with Langkit_Support.File_Readers; use Langkit_Support.File_Readers;

package Support is
function Get_File_Reader return File_Reader_Reference;
end Support;
76 changes: 76 additions & 0 deletions testsuite/tests/misc/unicode/test.out
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
== file | example-file | correct-encoding | default ==

Root node children: 10
First token: FOO_COMMENT at 1:1-1:80
Last token: FOO_TERMINATION at 44:1-44:1

== file | example-file | correct-encoding | file-reader ==

Root node children: 10
First token: FOO_WHITESPACE at 1:1-1:81
Last token: FOO_TERMINATION at 43:1-43:1

== file | empty-file | correct-encoding | default ==

Errors:
empty.txt:1:1: Expected 'example', got Termination

Root node children: 0
First token: FOO_TERMINATION at 1:1-1:1
Last token: FOO_TERMINATION at 1:1-1:1

== file | empty-file | correct-encoding | file-reader ==

Errors:
empty.txt:1:1: Expected 'example', got Termination

Root node children: 0
First token: FOO_TERMINATION at 1:1-1:1
Last token: FOO_TERMINATION at 1:1-1:1

== file | example-file | wrong-encoding | default ==

Errors:
main-iso-8859-1.txt:1:3: Could not decode source as "utf-8"
main-iso-8859-1.txt:1:1: Expected 'example', got Termination

Root node children: 0
First token: FOO_TERMINATION at 1:1-1:1
Last token: FOO_TERMINATION at 1:1-1:1

== file | example-file | wrong-encoding | file-reader ==

Errors:
main-iso-8859-1.txt:1:3: Could not decode source as "utf-8"
main-iso-8859-1.txt:1:1: Expected 'example', got Termination

Root node children: 0
First token: FOO_TERMINATION at 1:1-1:1
Last token: FOO_TERMINATION at 1:1-1:1

== buffer | example-file | correct-encoding | default ==

Root node children: 10
First token: FOO_COMMENT at 1:1-1:80
Last token: FOO_TERMINATION at 44:1-44:1

== buffer | empty-file | correct-encoding | default ==

Errors:
empty.txt:1:1: Expected 'example', got Termination

Root node children: 0
First token: FOO_TERMINATION at 1:1-1:1
Last token: FOO_TERMINATION at 1:1-1:1

== buffer | example-file | wrong-encoding | default ==

Errors:
main-iso-8859-1.txt:1:3: Could not decode source as "utf-8"
main-iso-8859-1.txt:1:1: Expected 'example', got Termination

Root node children: 0
First token: FOO_TERMINATION at 1:1-1:1
Last token: FOO_TERMINATION at 1:1-1:1

Done
25 changes: 25 additions & 0 deletions testsuite/tests/misc/unicode/test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
"""
Check that the handling of Unicode for various parsing settings (get from
file/buffer, encoding, file reader, ...) works correctly.
"""

from langkit.dsl import ASTNode, Field, T

from utils import build_and_run


class FooNode(ASTNode):
pass


class Example(FooNode):
f = Field(type=T.StrLit)


class StrLit(FooNode):
token_node = True


build_and_run(lkt_file="expected_concrete_syntax.lkt", gpr_mains=["main.adb"])

print("Done")
1 change: 1 addition & 0 deletions testsuite/tests/misc/unicode/test.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
driver: python

0 comments on commit dbad162

Please sign in to comment.