Skip to content

Commit

Permalink
Merge branch 'topic/vadim/normalization' into 'master'
Browse files Browse the repository at this point in the history
Unicode text normalization

Closes #259

See merge request eng/ide/VSS!348
  • Loading branch information
godunko committed Nov 27, 2024
2 parents 5685fe7 + 28857ac commit 0cdb5b0
Show file tree
Hide file tree
Showing 6 changed files with 376 additions and 157 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -467,6 +467,7 @@ package body VSS.Implementation.Text_Handlers.UTF8.Dynamic is
Self.Pointer.Size := @ + L;
Self.Pointer.Length := @ + 1;
Self.Pointer.Storage (Self.Pointer.Size) := 16#00#;
-- XXX Is it necessary? NUL is copied by move of storage data
end Insert;

--------------
Expand Down Expand Up @@ -685,4 +686,88 @@ package body VSS.Implementation.Text_Handlers.UTF8.Dynamic is
Unreference (Self.Pointer);
end Unreference;

-----------------------
-- UTF8_Insert_Slice --
-----------------------

overriding procedure UTF8_Insert_Slice
(Self : in out Dynamic_UTF8_Handler;
Into : VSS.Unicode.UTF8_Code_Unit_Index;
Storage : VSS.Implementation.UTF8_Encoding.UTF8_Code_Unit_Array;
From : VSS.Unicode.UTF8_Code_Unit_Index;
Size : VSS.Unicode.UTF8_Code_Unit_Count;
Length : VSS.Implementation.Strings.Character_Count) is
begin
Mutate
(Self.Pointer,
VSS.Unicode.UTF8_Code_Unit_Count (Self.Unsafe_Capacity) * 4,
Self.Pointer.Size + Size);

Self.Pointer.Storage (Into + Size .. Self.Pointer.Size + Size) :=
Self.Pointer.Storage (Into .. Self.Pointer.Size);
-- Move NUL terminator too.
Self.Pointer.Storage (Into .. Into + Size - 1) :=
Storage (From .. From + Size - 1);

Self.Pointer.Size := @ + Size;
Self.Pointer.Length := @ + Length;
end UTF8_Insert_Slice;

---------------
-- UTF8_Move --
---------------

overriding procedure UTF8_Move
(Self : in out Dynamic_UTF8_Handler;
From : VSS.Unicode.UTF8_Code_Unit_Index;
Size : VSS.Unicode.UTF8_Code_Unit_Count;
Into : VSS.Unicode.UTF8_Code_Unit_Index) is
begin
raise Program_Error;
end UTF8_Move;

------------------------
-- UTF8_Replace_Slice --
------------------------

overriding procedure UTF8_Replace_Slice
(Self : in out Dynamic_UTF8_Handler;
Replace_From : VSS.Unicode.UTF8_Code_Unit_Index;
Replace_Size : VSS.Unicode.UTF8_Code_Unit_Count;
Replace_Length : VSS.Implementation.Strings.Character_Count;
By_Storage : VSS.Implementation.UTF8_Encoding.UTF8_Code_Unit_Array;
By_From : VSS.Unicode.UTF8_Code_Unit_Index;
By_Size : VSS.Unicode.UTF8_Code_Unit_Count;
By_Length : VSS.Implementation.Strings.Character_Count)
is
New_Size : constant VSS.Unicode.UTF8_Code_Unit_Count :=
Self.Pointer.Size + By_Size - Replace_Size;

begin
Mutate
(Self.Pointer,
VSS.Unicode.UTF8_Code_Unit_Count (Self.Unsafe_Capacity) * 4,
New_Size);

Self.Pointer.Storage (Replace_From + By_Size .. New_Size) :=
Self.Pointer.Storage
(Replace_From + Replace_Size .. Self.Pointer.Size);
-- Move NUL terminator too.
Self.Pointer.Storage (Replace_From .. Replace_From + By_Size - 1) :=
By_Storage (By_From .. By_From + By_Size - 1);

Self.Pointer.Size := New_Size;
Self.Pointer.Length := @ + By_Length - Replace_Length;
end UTF8_Replace_Slice;

---------------
-- UTF8_Size --
---------------

overriding function UTF8_Size
(Self : Dynamic_UTF8_Handler) return VSS.Unicode.UTF8_Code_Unit_Count is
begin
return Self.Pointer.Size;
end UTF8_Size;

end VSS.Implementation.Text_Handlers.UTF8.Dynamic;
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ package VSS.Implementation.Text_Handlers.UTF8.Dynamic is
-- Size of the text handler object is fixed.

type Dynamic_UTF8_Handler is
new VSS.Implementation.Text_Handlers.Abstract_Text_Handler with
new VSS.Implementation.Text_Handlers.UTF8.Abstract_UTF8_Text with
record
Pointer : UTF8_String_Data_Access;
end record with Object_Size => 192;
Expand Down Expand Up @@ -149,6 +149,33 @@ package VSS.Implementation.Text_Handlers.UTF8.Dynamic is
VSS.Implementation.String_Vectors.String_Vector_Data_Access)
with Pre => Self.Pointer /= null;

overriding procedure UTF8_Insert_Slice
(Self : in out Dynamic_UTF8_Handler;
Into : VSS.Unicode.UTF8_Code_Unit_Index;
Storage : VSS.Implementation.UTF8_Encoding.UTF8_Code_Unit_Array;
From : VSS.Unicode.UTF8_Code_Unit_Index;
Size : VSS.Unicode.UTF8_Code_Unit_Count;
Length : VSS.Implementation.Strings.Character_Count);

overriding procedure UTF8_Move
(Self : in out Dynamic_UTF8_Handler;
From : VSS.Unicode.UTF8_Code_Unit_Index;
Size : VSS.Unicode.UTF8_Code_Unit_Count;
Into : VSS.Unicode.UTF8_Code_Unit_Index);

overriding procedure UTF8_Replace_Slice
(Self : in out Dynamic_UTF8_Handler;
Replace_From : VSS.Unicode.UTF8_Code_Unit_Index;
Replace_Size : VSS.Unicode.UTF8_Code_Unit_Count;
Replace_Length : VSS.Implementation.Strings.Character_Count;
By_Storage : VSS.Implementation.UTF8_Encoding.UTF8_Code_Unit_Array;
By_From : VSS.Unicode.UTF8_Code_Unit_Index;
By_Size : VSS.Unicode.UTF8_Code_Unit_Count;
By_Length : VSS.Implementation.Strings.Character_Count);

overriding function UTF8_Size
(Self : Dynamic_UTF8_Handler) return VSS.Unicode.UTF8_Code_Unit_Count;

-- Subprograms to help code refactoring, some of the will be moved to
-- generic UTF8 fastpath string API, and some moved to the body after
-- that.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -579,4 +579,145 @@ package body VSS.Implementation.Text_Handlers.UTF8.Static is
end return;
end To_UTF_8_String;

-----------------------
-- UTF8_Insert_Slice --
-----------------------

overriding procedure UTF8_Insert_Slice
(Self : in out Static_UTF8_Handler;
Into : VSS.Unicode.UTF8_Code_Unit_Index;
Storage : VSS.Implementation.UTF8_Encoding.UTF8_Code_Unit_Array;
From : VSS.Unicode.UTF8_Code_Unit_Index;
Size : VSS.Unicode.UTF8_Code_Unit_Count;
Length : VSS.Implementation.Strings.Character_Count) is
begin
if Self.Size + Size <= In_Place_Storage_Capacity then
Self.Storage (Into + Size .. Self.Size + Size) :=
Self.Storage (Into .. Self.Size);
-- Move NUL terminator too.
Self.Storage (Into .. Into + Size - 1) :=
Storage (From .. From + Size - 1);

Self.Size := @ + Size;
Self.Length := @ + Length;

else
-- Size of the current static storge is not enough, move current text
-- into dynamic storage, and call handler of the dynamic storage to
-- complete operation.

Unsafe_Convert_To_Dynamic
(Self,
VSS.Unicode.UTF8_Code_Unit_Count (Self.Unsafe_Capacity * 4),
Self.Size + Size);

declare
Text : Dynamic.Dynamic_UTF8_Handler
with Import, Convention => Ada, Address => Self'Address;

begin
Text.UTF8_Insert_Slice
(Into => Into,
Storage => Storage,
From => From,
Size => Size,
Length => Length);
end;
end if;
end UTF8_Insert_Slice;

---------------
-- UTF8_Move --
---------------

overriding procedure UTF8_Move
(Self : in out Static_UTF8_Handler;
From : VSS.Unicode.UTF8_Code_Unit_Index;
Size : VSS.Unicode.UTF8_Code_Unit_Count;
Into : VSS.Unicode.UTF8_Code_Unit_Index) is
begin
if From < Into then
raise Program_Error;

elsif Into < From then
declare
Buffer : constant
VSS.Implementation.UTF8_Encoding.UTF8_Code_Unit_Array
(0 .. Size - 1) := Self.Storage (From .. From + Size - 1);
Move_Size : constant VSS.Unicode.UTF8_Code_Unit_Offset :=
From - Into;

begin
Self.Storage (Into + Size .. Into + Size + Move_Size - 1) :=
Self.Storage (Into .. Into + Move_Size - 1);
Self.Storage (Into .. Into + Size - 1) := Buffer;
end;

else
null;
end if;
end UTF8_Move;

------------------------
-- UTF8_Replace_Slice --
------------------------

overriding procedure UTF8_Replace_Slice
(Self : in out Static_UTF8_Handler;
Replace_From : VSS.Unicode.UTF8_Code_Unit_Index;
Replace_Size : VSS.Unicode.UTF8_Code_Unit_Count;
Replace_Length : VSS.Implementation.Strings.Character_Count;
By_Storage : VSS.Implementation.UTF8_Encoding.UTF8_Code_Unit_Array;
By_From : VSS.Unicode.UTF8_Code_Unit_Index;
By_Size : VSS.Unicode.UTF8_Code_Unit_Count;
By_Length : VSS.Implementation.Strings.Character_Count) is
begin
if Self.Size - Replace_Size + By_Size <= In_Place_Storage_Capacity then
Self.Storage
(Replace_From + By_Size .. Self.Size - Replace_Size + By_Size) :=
Self.Storage (Replace_From + Replace_Size .. Self.Size);
-- Move NUL terminator too.
Self.Storage (Replace_From .. Replace_From + By_Size - 1) :=
By_Storage (By_From .. By_From + By_Size - 1);

Self.Size := @ + By_Size - Replace_Size;
Self.Length := @ + By_Length - Replace_Length;

else
-- Size of the current static storge is not enough, move current text
-- into dynamic storage, and call handler of the dynamic storage to
-- complete operation.

Unsafe_Convert_To_Dynamic
(Self,
VSS.Unicode.UTF8_Code_Unit_Count (Self.Unsafe_Capacity * 4),
Self.Size - Replace_Size + By_Size);

declare
Text : Dynamic.Dynamic_UTF8_Handler
with Import, Convention => Ada, Address => Self'Address;

begin
Text.UTF8_Replace_Slice
(Replace_From => Replace_From,
Replace_Size => Replace_Size,
Replace_Length => Replace_Length,
By_Storage => By_Storage,
By_From => By_From,
By_Size => By_Size,
By_Length => By_Length);
end;
end if;
end UTF8_Replace_Slice;

---------------
-- UTF8_Size --
---------------

overriding function UTF8_Size
(Self : Static_UTF8_Handler) return VSS.Unicode.UTF8_Code_Unit_Count is
begin
return Self.Size;
end UTF8_Size;

end VSS.Implementation.Text_Handlers.UTF8.Static;
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ package VSS.Implementation.Text_Handlers.UTF8.Static is
range 0 .. In_Place_Storage_Capacity;

type Static_UTF8_Handler is
new VSS.Implementation.Text_Handlers.Abstract_Text_Handler with
new VSS.Implementation.Text_Handlers.UTF8.Abstract_UTF8_Text with
record
Storage :
VSS.Implementation.UTF8_Encoding.UTF8_Code_Unit_Array
Expand Down Expand Up @@ -126,4 +126,31 @@ package VSS.Implementation.Text_Handlers.UTF8.Static is
Lines : in out
VSS.Implementation.String_Vectors.String_Vector_Data_Access);

overriding procedure UTF8_Insert_Slice
(Self : in out Static_UTF8_Handler;
Into : VSS.Unicode.UTF8_Code_Unit_Index;
Storage : VSS.Implementation.UTF8_Encoding.UTF8_Code_Unit_Array;
From : VSS.Unicode.UTF8_Code_Unit_Index;
Size : VSS.Unicode.UTF8_Code_Unit_Count;
Length : VSS.Implementation.Strings.Character_Count);

overriding procedure UTF8_Move
(Self : in out Static_UTF8_Handler;
From : VSS.Unicode.UTF8_Code_Unit_Index;
Size : VSS.Unicode.UTF8_Code_Unit_Count;
Into : VSS.Unicode.UTF8_Code_Unit_Index);

overriding procedure UTF8_Replace_Slice
(Self : in out Static_UTF8_Handler;
Replace_From : VSS.Unicode.UTF8_Code_Unit_Index;
Replace_Size : VSS.Unicode.UTF8_Code_Unit_Count;
Replace_Length : VSS.Implementation.Strings.Character_Count;
By_Storage : VSS.Implementation.UTF8_Encoding.UTF8_Code_Unit_Array;
By_From : VSS.Unicode.UTF8_Code_Unit_Index;
By_Size : VSS.Unicode.UTF8_Code_Unit_Count;
By_Length : VSS.Implementation.Strings.Character_Count);

overriding function UTF8_Size
(Self : Static_UTF8_Handler) return VSS.Unicode.UTF8_Code_Unit_Count;

end VSS.Implementation.Text_Handlers.UTF8.Static;
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,62 @@ package VSS.Implementation.Text_Handlers.UTF8
with Preelaborate
is

type Abstract_UTF8_Text is
abstract new VSS.Implementation.Text_Handlers.Abstract_Text_Handler
with null record;

not overriding procedure UTF8_Replace_Slice
(Self : in out Abstract_UTF8_Text;
Replace_From : VSS.Unicode.UTF8_Code_Unit_Index;
Replace_Size : VSS.Unicode.UTF8_Code_Unit_Count;
Replace_Length : VSS.Implementation.Strings.Character_Count;
By_Storage : VSS.Implementation.UTF8_Encoding.UTF8_Code_Unit_Array;
By_From : VSS.Unicode.UTF8_Code_Unit_Index;
By_Size : VSS.Unicode.UTF8_Code_Unit_Count;
By_Length : VSS.Implementation.Strings.Character_Count) is abstract;
-- Replace slice of the text by slice of the storage.
--
-- @param Self Text object to be modified
-- @param Replace_From Index of the first code unit to replace
-- @param Replace_Size Number of code units to replace
-- @param Replace_Length Number of characters to be replaced
-- @param By_Storage Storage of the replacement data
-- @param By_From Index of the first code unit in replcement data
-- @param By_Size Number of code units in replacement data
-- @param by_Length Number of character in replacement data

not overriding function UTF8_Size
(Self : Abstract_UTF8_Text) return VSS.Unicode.UTF8_Code_Unit_Count
is abstract;
-- Return number of code units in the given text

not overriding procedure UTF8_Insert_Slice
(Self : in out Abstract_UTF8_Text;
Into : VSS.Unicode.UTF8_Code_Unit_Index;
Storage : VSS.Implementation.UTF8_Encoding.UTF8_Code_Unit_Array;
From : VSS.Unicode.UTF8_Code_Unit_Index;
Size : VSS.Unicode.UTF8_Code_Unit_Count;
Length : VSS.Implementation.Strings.Character_Count) is abstract;
-- Insert slice of the storage into the text starting from the given
-- position.
--
-- @param Self Text object to be modified
-- @param Into Index of the code unit to insert
-- @param Storage Storeage of inserted data
-- @param From Index of the first code unit in inserted storage
-- @param Size Number of code units to insert
-- @param Length Number of character to insert

not overriding procedure UTF8_Move
(Self : in out Abstract_UTF8_Text;
From : VSS.Unicode.UTF8_Code_Unit_Index;
Size : VSS.Unicode.UTF8_Code_Unit_Count;
Into : VSS.Unicode.UTF8_Code_Unit_Index) is abstract;
-- Move given slice of the give size of the data starting from the given
-- position. From and Into positions must be valid positions in UTF-8
-- encoded data, thus size and length of the string is not changed by
-- this operation.

procedure Unsafe_Initialize
(Text : in out
VSS.Implementation.Text_Handlers.Abstract_Text_Handler'Class;
Expand Down
Loading

0 comments on commit 0cdb5b0

Please sign in to comment.