Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement a faster encoding algoritm for varint #98

Merged
merged 1 commit into from
Jul 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions AUTHORS
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
[email protected]
[email protected]
[email protected]
2 changes: 1 addition & 1 deletion Snappier.Benchmarks/Snappier.Benchmarks.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
<ProjectReference Include="..\Snappier\Snappier.csproj" />
</ItemGroup>
<ItemGroup Condition=" '$(Configuration)' == 'Previous' ">
<PackageReference Include="Snappier" Version="1.1.3" />
<PackageReference Include="Snappier" Version="1.1.6" />

<Compile Remove="FindMatchLength.cs" />
<Compile Remove="IncrementalCopy.cs" />
Expand Down
23 changes: 23 additions & 0 deletions Snappier.Benchmarks/VarIntEncodingWrite.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
#if !PREVIOUS

using BenchmarkDotNet.Attributes;
using Snappier.Internal;

namespace Snappier.Benchmarks
{
public class VarIntEncodingWrite
{
[Params(0u, 256u, 65536u)]
public uint Value { get; set; }

readonly byte[] _dest = new byte[8];

[Benchmark(Baseline = true)]
public int Baseline()
{
return VarIntEncoding.Write(_dest, Value);
}
}
}

#endif
69 changes: 69 additions & 0 deletions Snappier.Tests/Internal/VarIntEncodingWriteTests.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
using System;
using System.Collections.Generic;
using System.Linq;
using Snappier.Internal;
using Xunit;

namespace Snappier.Tests.Internal
{
public class VarIntEncodingWriteTests
{
public static TheoryData<uint, byte[]> TestData() =>
new() {
{ 0x00, [ 0x00 ] },
{ 0x01, [ 0x01 ] },
{ 0x7F, [ 0x7F ] },
{ 0x80, [ 0x80, 0x01 ] },
{ 0x555, [ 0xD5, 0x0A ] },
{ 0x7FFF, [ 0xFF, 0xFF, 0x01 ] },
{ 0xBFFF, [ 0xFF, 0xFF, 0x02 ] },
{ 0xFFFF, [ 0XFF, 0xFF, 0x03 ] },
{ 0x8000, [ 0x80, 0x80, 0x02 ] },
{ 0x5555, [ 0xD5, 0xAA, 0x01 ] },
{ 0xCAFEF00, [ 0x80, 0xDE, 0xBF, 0x65 ] },
{ 0xCAFEF00D, [ 0x8D, 0xE0, 0xFB, 0xD7, 0x0C ] },
{ 0xFFFFFFFF, [ 0xFF, 0xFF, 0xFF, 0xFF, 0x0F ] },
};

[Theory]
[MemberData(nameof(TestData))]
public void Test_Write(uint value, byte[] expected)
{
var bytes = new byte[5];

var length = VarIntEncoding.Write(bytes, value);
Assert.Equal(expected, bytes.Take(length));
}

[Theory]
[MemberData(nameof(TestData))]
public void Test_WriteWithPadding(uint value, byte[] expected)
{
// Test of the fast path where there are at least 8 bytes in the buffer

var bytes = new byte[sizeof(ulong)];

var length = VarIntEncoding.Write(bytes, value);
Assert.Equal(expected, bytes.Take(length));
}
}
}

/* ************************************************************
*
* @author Couchbase <[email protected]>
* @copyright 2021 Couchbase, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* ************************************************************/
8 changes: 4 additions & 4 deletions Snappier.Tests/Snappier.Tests.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,13 @@

<ItemGroup>
<PackageReference Include="JUnitTestLogger" Version="1.1.0" />
<PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.8.0" />
<PackageReference Include="xunit" Version="2.6.2" />
<PackageReference Include="xunit.runner.visualstudio" Version="2.5.4">
<PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.10.0" />
<PackageReference Include="xunit" Version="2.9.0" />
<PackageReference Include="xunit.runner.visualstudio" Version="2.8.2">
<PrivateAssets>all</PrivateAssets>
<IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets>
</PackageReference>
<PackageReference Include="coverlet.collector" Version="6.0.0">
<PackageReference Include="coverlet.collector" Version="6.0.2">
<PrivateAssets>all</PrivateAssets>
<IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets>
</PackageReference>
Expand Down
46 changes: 1 addition & 45 deletions Snappier/Internal/SnappyCompressor.cs
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ public int Compress(ReadOnlySpan<byte> input, Span<byte> output)

_workingMemory.EnsureCapacity(input.Length);

int bytesWritten = WriteUncompressedLength(output, input.Length);
int bytesWritten = VarIntEncoding.Write(output, (uint)input.Length);
output = output.Slice(bytesWritten);

while (input.Length > 0)
Expand Down Expand Up @@ -69,50 +69,6 @@ public void Dispose()
_workingMemory = null;
}

private static int WriteUncompressedLength(Span<byte> output, int length)
{
const int b = 0b1000_0000;

unchecked
{
if (length < (1 << 7))
{
output[0] = (byte) length;
return 1;
}
else if (length < (1 << 14))
{
output[0] = (byte) (length | b);
output[1] = (byte) (length >> 7);
return 2;
}
else if (length < (1 << 21))
{
output[0] = (byte) (length | b);
output[1] = (byte) ((length >> 7) | b);
output[2] = (byte) (length >> 14);
return 3;
}
else if (length < (1 << 28))
{
output[0] = (byte) (length | b);
output[1] = (byte) ((length >> 7) | b);
output[2] = (byte) ((length >> 14) | b);
output[3] = (byte) (length >> 21);
return 4;
}
else
{
output[0] = (byte) (length | b);
output[1] = (byte) ((length >> 7) | b);
output[2] = (byte) ((length >> 14) | b);
output[3] = (byte) ((length >> 21) | b);
output[4] = (byte) (length >> 28);
return 5;
}
}
}

#region CompressFragment

private static int CompressFragment(ReadOnlySpan<byte> input, Span<byte> output, Span<ushort> tableSpan)
Expand Down
51 changes: 51 additions & 0 deletions Snappier/Internal/VarIntEncoding.Write.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
using System;

namespace Snappier.Internal
{
internal static partial class VarIntEncoding
{
private static int WriteSlow(Span<byte> output, uint length)
{
const int b = 0b1000_0000;

unchecked
{
if (length < (1 << 7))
{
output[0] = (byte) length;
return 1;
}
else if (length < (1 << 14))
{
output[0] = (byte) (length | b);
output[1] = (byte) (length >> 7);
return 2;
}
else if (length < (1 << 21))
{
output[0] = (byte) (length | b);
output[1] = (byte) ((length >> 7) | b);
output[2] = (byte) (length >> 14);
return 3;
}
else if (length < (1 << 28))
{
output[0] = (byte) (length | b);
output[1] = (byte) ((length >> 7) | b);
output[2] = (byte) ((length >> 14) | b);
output[3] = (byte) (length >> 21);
return 4;
}
else
{
output[0] = (byte) (length | b);
output[1] = (byte) ((length >> 7) | b);
output[2] = (byte) ((length >> 14) | b);
output[3] = (byte) ((length >> 21) | b);
output[4] = (byte) (length >> 28);
return 5;
}
}
}
}
}
138 changes: 138 additions & 0 deletions Snappier/Internal/VarIntEncoding.WriteFast.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
using System;

#if NET6_0_OR_GREATER
using System.Numerics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using System.Runtime.Intrinsics.X86;
#endif

/*
* This file is ported from https://github.com/couchbase/couchbase-net-client/blob/c10fe9ef09beadb8512f696d764b7a770429e641/src/Couchbase/Core/Utils/Leb128.cs
* and therefore retains a Couchbase copyright.
**/

namespace Snappier.Internal
{
internal static partial class VarIntEncoding
{
/// <summary>
/// Maximum length, in bytes, when encoding a 32-bit integer.
/// </summary>
public const int MaxLength = 5;

/// <summary>
/// Encodes a value onto a buffer using little-ending varint encoding.
/// </summary>
/// <param name="buffer">Buffer to receive the value.</param>
/// <param name="value">Value to encode.</param>
/// <returns>Number of bytes encoded.</returns>
public static int Write(Span<byte> buffer, uint value)
{
// Note: This method is likely to be inlined into the caller, potentially
// eliding the size check if JIT knows the size of the buffer. BitConverter.IsLittleEndian
// will always be elided based on CPU architecture.

#if NET6_0_OR_GREATER
if (BitConverter.IsLittleEndian && buffer.Length >= sizeof(ulong))
{
// Only use the fast path on little-endian CPUs and when there's enough padding in the
// buffer to write an ulong. At most there will be 5 real bytes written, but for speed
// up to 8 bytes are being copied to the buffer from a register. This guard prevents a
// potential buffer overrun.

return WriteFast(ref MemoryMarshal.GetReference(buffer), value);
}
#endif

return WriteSlow(buffer, value);
}

#if NET6_0_OR_GREATER

private static int WriteFast(ref byte buffer, uint value)
{
// The use of unsafe writes below is made safe because this method is never
// called without at least 8 bytes available in the buffer.

if (value < 128)
{
// We need to special case 0 to ensure we write one byte, so go ahead and
// special case 0-127, which all write only one byte with the continuation bit unset.

buffer = (byte)value;
return 1;
}

// First get the value spread onto an ulong with 7 bit groups

ulong result = Spread7BitGroupsIntoBytes(value);

// Next, calculate the size of the output in bytes

int unusedBytes = BitOperations.LeadingZeroCount(result) >>> 3; // right shift is the equivalent of divide by 8

// Build a mask to set the continuation bits

const ulong allContinuationBits = 0x8080808080808080UL;
ulong mask = allContinuationBits >>> ((unusedBytes + 1) << 3); // left shift is the equivalent of multiply by 8

// Finally, write the result to the buffer

Unsafe.WriteUnaligned(ref buffer, result | mask);

return sizeof(ulong) - unusedBytes;
}

// This spreads the 4 bytes of an uint into the lower 5 bytes of an 8 byte ulong
// as 7 bit blocks, with the high bit of each byte set to 0. This is the basis
// of LEB128 encoding, but without the continuation bit set.
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static ulong Spread7BitGroupsIntoBytes(uint value)
{
// Only one of the three branches below will be included in the JIT output
// based on CPU support at runtime

if (Bmi2.X64.IsSupported)
{
return Bmi2.X64.ParallelBitDeposit(value, 0xf7f7f7f7fUL);
}

if (Bmi2.IsSupported)
{
// Intel x86 branch, using 32-bit BMI2 instruction

return Bmi2.ParallelBitDeposit(value, 0x7f7f7f7fU) |
((value & 0xf0000000UL) << 4);
}

// Fallback for unsupported CPUs (i.e. ARM)
return value & 0x0000007fUL
| ((value & 0x00003f80UL) << 1)
| ((value & 0x001fc000UL) << 2)
| ((value & 0x0fe00000UL) << 3)
| ((value & 0xf0000000UL) << 4);
}

#endif
}
}

/* ************************************************************
*
* @author Couchbase <[email protected]>
* @copyright 2021 Couchbase, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* ************************************************************/
Loading