-
Notifications
You must be signed in to change notification settings - Fork 35
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge branch 'mr/pmderodat/utf8-preparatory' into 'master'
Preparatory work for the transition of source buffers to UTF-8 See merge request eng/libadalang/langkit!1022
- Loading branch information
Showing
12 changed files
with
354 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
import lexer_example | ||
|
||
@with_lexer(foo_lexer) | ||
grammar foo_grammar { | ||
@main_rule main_rule <- list+(Example(@example StrLit(@string))) | ||
} | ||
|
||
@abstract class FooNode implements Node[FooNode] { | ||
} | ||
|
||
class Example: FooNode { | ||
@parse_field f: StrLit | ||
} | ||
|
||
class StrLit: FooNode implements TokenNode { | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
# ��������������������������������������������������������������������������� # | ||
# ��������������������������������������������������������������������������� # | ||
|
||
example "1�" | ||
|
||
# ��������������������������������������������������������������������������� # | ||
|
||
example "1�2�" | ||
|
||
# ��������������������������������������������������������������������������� # | ||
|
||
example "1�2�3�" | ||
|
||
# ��������������������������������������������������������������������������� # | ||
|
||
example "1�2�3�4�" | ||
|
||
# ��������������������������������������������������������������������������� # | ||
|
||
example "1�2�3�4�5�" | ||
|
||
# ��������������������������������������������������������������������������� # | ||
|
||
example "1�2�3�4�5�6�" | ||
|
||
# ��������������������������������������������������������������������������� # | ||
|
||
example "1�2�3�4�5�6�7�" | ||
|
||
# ��������������������������������������������������������������������������� # | ||
|
||
example "1�2�3�4�5�6�7�8�" | ||
|
||
# ��������������������������������������������������������������������������� # | ||
|
||
example "1�2�3�4�5�6�7�8�9�" | ||
|
||
# ��������������������������������������������������������������������������� # | ||
|
||
example "1�2�3�4�5�6�7�8�9�0�" | ||
|
||
# ��������������������������������������������������������������������������� # | ||
# ��������������������������������������������������������������������������� # |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,131 @@ | ||
with Ada.Text_IO; use Ada.Text_IO; | ||
|
||
with GNAT.Strings; use GNAT.Strings; | ||
with GNATCOLL.Mmap; use GNATCOLL.Mmap; | ||
|
||
with Langkit_Support.File_Readers; use Langkit_Support.File_Readers; | ||
with Langkit_Support.Slocs; use Langkit_Support.Slocs; | ||
with Libfoolang.Analysis; use Libfoolang.Analysis; | ||
with Libfoolang.Common; use Libfoolang.Common; | ||
|
||
with Support; use Support; | ||
|
||
procedure Main is | ||
|
||
Empty_File : constant String := "empty.txt"; | ||
Empty_Buffer : aliased constant String := ""; | ||
|
||
Example_File : constant String := "main-iso-8859-1.txt"; | ||
Example_Buffer : String_Access := Read_Whole_File (Example_File); | ||
|
||
procedure Check | ||
(From_Buffer : Boolean := False; | ||
Empty_File : Boolean := False; | ||
Wrong_Encoding : Boolean := False; | ||
With_File_Reader : Boolean := False); | ||
|
||
----------- | ||
-- Check -- | ||
----------- | ||
|
||
procedure Check | ||
(From_Buffer : Boolean := False; | ||
Empty_File : Boolean := False; | ||
Wrong_Encoding : Boolean := False; | ||
With_File_Reader : Boolean := False) | ||
is | ||
Charset : constant String := | ||
(if Wrong_Encoding then "utf-8" else "iso-8859-1"); | ||
Filename : constant String := | ||
(if Empty_File then Main.Empty_File else Example_File); | ||
Buffer : constant access constant String := | ||
(if Empty_File then Empty_Buffer'Access else Example_Buffer); | ||
|
||
Ctx : Analysis_Context; | ||
U : Analysis_Unit; | ||
begin | ||
-- Put some label for this check | ||
|
||
Put ("== "); | ||
Put (if From_Buffer then "buffer" else "file"); | ||
Put (" | "); | ||
Put (if Empty_File then "empty-file" else "example-file"); | ||
Put (" | "); | ||
Put (if Wrong_Encoding then "wrong-encoding" else "correct-encoding"); | ||
Put (" | "); | ||
Put (if With_File_Reader then "file-reader" else "default"); | ||
Put_Line (" =="); | ||
New_Line; | ||
|
||
-- Parse the source according to requested settings | ||
|
||
Ctx := Create_Context | ||
(File_Reader => (if With_File_Reader | ||
then Get_File_Reader | ||
else No_File_Reader_Reference)); | ||
if From_Buffer then | ||
U := Ctx.Get_From_Buffer | ||
(Filename => Filename, | ||
Charset => Charset, | ||
Buffer => Buffer.all); | ||
else | ||
U := Ctx.Get_From_File | ||
(Filename => Filename, Charset => Charset); | ||
end if; | ||
|
||
-- Display parsing errors, if any | ||
|
||
if U.Has_Diagnostics then | ||
Put_Line ("Errors:"); | ||
for D of U.Diagnostics loop | ||
Put_Line (" " & U.Format_GNU_Diagnostic (D)); | ||
end loop; | ||
New_Line; | ||
end if; | ||
|
||
-- Summarize the content of the parsed unit | ||
|
||
if U.Root.Is_Null then | ||
Put_Line ("No root node"); | ||
else | ||
Put_Line ("Root node children:" & U.Root.Children_Count'Image); | ||
declare | ||
D : constant Token_Data_Type := Data (U.First_Token); | ||
begin | ||
Put_Line | ||
("First token: " | ||
& Kind (D)'Image | ||
& " at " & Image (Sloc_Range (D))); | ||
end; | ||
declare | ||
D : constant Token_Data_Type := Data (U.Last_Token); | ||
begin | ||
Put_Line | ||
("Last token: " | ||
& Kind (D)'Image | ||
& " at " & Image (Sloc_Range (D))); | ||
end; | ||
end if; | ||
New_Line; | ||
end Check; | ||
|
||
begin | ||
-- Get_From_File | ||
|
||
Check; | ||
Check (With_File_Reader => True); | ||
|
||
Check (Empty_File => True); | ||
Check (Empty_File => True, With_File_Reader => True); | ||
|
||
Check (Wrong_Encoding => True); | ||
Check (Wrong_Encoding => True, With_File_Reader => True); | ||
|
||
-- Get_From_Buffer | ||
|
||
Check (From_Buffer => True); | ||
Check (From_Buffer => True, Empty_File => True); | ||
Check (From_Buffer => True, Wrong_Encoding => True); | ||
|
||
Free (Example_Buffer); | ||
end Main; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
with Langkit_Support.Diagnostics; use Langkit_Support.Diagnostics; | ||
|
||
package body Support is | ||
|
||
type My_FR is new File_Reader_Interface with null record; | ||
|
||
overriding procedure Read | ||
(Self : My_FR; | ||
Filename : String; | ||
Charset : String; | ||
Read_BOM : Boolean; | ||
Contents : out Decoded_File_Contents; | ||
Diagnostics : in out Diagnostics_Vectors.Vector); | ||
|
||
overriding procedure Release (Self : in out My_FR) is null; | ||
|
||
---------- | ||
-- Read -- | ||
---------- | ||
|
||
overriding procedure Read | ||
(Self : My_FR; | ||
Filename : String; | ||
Charset : String; | ||
Read_BOM : Boolean; | ||
Contents : out Decoded_File_Contents; | ||
Diagnostics : in out Diagnostics_Vectors.Vector) | ||
is | ||
begin | ||
Direct_Read (Filename, Charset, Read_BOM, Contents, Diagnostics); | ||
if Diagnostics.Is_Empty and then Contents.Buffer.all'Length > 79 then | ||
Contents.Buffer.all (Contents.First .. Contents.First + 79) := | ||
(1 .. 80 => ' '); | ||
end if; | ||
end Read; | ||
|
||
--------------------- | ||
-- Get_File_Reader -- | ||
--------------------- | ||
|
||
function Get_File_Reader return File_Reader_Reference is | ||
begin | ||
return Create_File_Reader_Reference (My_FR'(null record)); | ||
end Get_File_Reader; | ||
|
||
end Support; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
with Langkit_Support.File_Readers; use Langkit_Support.File_Readers; | ||
|
||
package Support is | ||
function Get_File_Reader return File_Reader_Reference; | ||
end Support; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,76 @@ | ||
== file | example-file | correct-encoding | default == | ||
|
||
Root node children: 10 | ||
First token: FOO_COMMENT at 1:1-1:80 | ||
Last token: FOO_TERMINATION at 44:1-44:1 | ||
|
||
== file | example-file | correct-encoding | file-reader == | ||
|
||
Root node children: 10 | ||
First token: FOO_WHITESPACE at 1:1-1:81 | ||
Last token: FOO_TERMINATION at 43:1-43:1 | ||
|
||
== file | empty-file | correct-encoding | default == | ||
|
||
Errors: | ||
empty.txt:1:1: Expected 'example', got Termination | ||
|
||
Root node children: 0 | ||
First token: FOO_TERMINATION at 1:1-1:1 | ||
Last token: FOO_TERMINATION at 1:1-1:1 | ||
|
||
== file | empty-file | correct-encoding | file-reader == | ||
|
||
Errors: | ||
empty.txt:1:1: Expected 'example', got Termination | ||
|
||
Root node children: 0 | ||
First token: FOO_TERMINATION at 1:1-1:1 | ||
Last token: FOO_TERMINATION at 1:1-1:1 | ||
|
||
== file | example-file | wrong-encoding | default == | ||
|
||
Errors: | ||
main-iso-8859-1.txt:1:3: Could not decode source as "utf-8" | ||
main-iso-8859-1.txt:1:1: Expected 'example', got Termination | ||
|
||
Root node children: 0 | ||
First token: FOO_TERMINATION at 1:1-1:1 | ||
Last token: FOO_TERMINATION at 1:1-1:1 | ||
|
||
== file | example-file | wrong-encoding | file-reader == | ||
|
||
Errors: | ||
main-iso-8859-1.txt:1:3: Could not decode source as "utf-8" | ||
main-iso-8859-1.txt:1:1: Expected 'example', got Termination | ||
|
||
Root node children: 0 | ||
First token: FOO_TERMINATION at 1:1-1:1 | ||
Last token: FOO_TERMINATION at 1:1-1:1 | ||
|
||
== buffer | example-file | correct-encoding | default == | ||
|
||
Root node children: 10 | ||
First token: FOO_COMMENT at 1:1-1:80 | ||
Last token: FOO_TERMINATION at 44:1-44:1 | ||
|
||
== buffer | empty-file | correct-encoding | default == | ||
|
||
Errors: | ||
empty.txt:1:1: Expected 'example', got Termination | ||
|
||
Root node children: 0 | ||
First token: FOO_TERMINATION at 1:1-1:1 | ||
Last token: FOO_TERMINATION at 1:1-1:1 | ||
|
||
== buffer | example-file | wrong-encoding | default == | ||
|
||
Errors: | ||
main-iso-8859-1.txt:1:3: Could not decode source as "utf-8" | ||
main-iso-8859-1.txt:1:1: Expected 'example', got Termination | ||
|
||
Root node children: 0 | ||
First token: FOO_TERMINATION at 1:1-1:1 | ||
Last token: FOO_TERMINATION at 1:1-1:1 | ||
|
||
Done |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
""" | ||
Check that the handling of Unicode for various parsing settings (get from | ||
file/buffer, encoding, file reader, ...) works correctly. | ||
""" | ||
|
||
from langkit.dsl import ASTNode, Field, T | ||
|
||
from utils import build_and_run | ||
|
||
|
||
class FooNode(ASTNode): | ||
pass | ||
|
||
|
||
class Example(FooNode): | ||
f = Field(type=T.StrLit) | ||
|
||
|
||
class StrLit(FooNode): | ||
token_node = True | ||
|
||
|
||
build_and_run(lkt_file="expected_concrete_syntax.lkt", gpr_mains=["main.adb"]) | ||
|
||
print("Done") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
driver: python |