From af7917fc1a681fd10b565a9c13b97d750b667bc0 Mon Sep 17 00:00:00 2001 From: Klaus Post Date: Fri, 3 May 2024 01:33:17 -0700 Subject: [PATCH] Cipher 1.9x speedup (#198) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Cipher 1.9x speedup Simplify cipher loop and unroll once. ``` λ benchcmp before.txt after.txt | grep -v "naive" benchmark old ns/op new ns/op delta BenchmarkCipher/bytes=7;offset=1-32 7.17 7.79 +8.65% BenchmarkCipher/bytes=125;offset=0-32 24.5 23.1 -6.07% BenchmarkCipher/bytes=1024;offset=0-32 126 63.4 -49.87% BenchmarkCipher/bytes=4096;offset=0-32 462 244 -47.26% BenchmarkCipher/bytes=4100;offset=4-32 460 249 -45.93% BenchmarkCipher/bytes=4099;offset=3-32 463 250 -45.93% BenchmarkCipher/bytes=32775;offset=49-32 3619 1936 -46.50% benchmark old MB/s new MB/s speedup BenchmarkCipher/bytes=7;offset=1-32 976.74 898.93 0.92x BenchmarkCipher/bytes=125;offset=0-32 5092.84 5423.57 1.06x BenchmarkCipher/bytes=1024;offset=0-32 8103.00 16159.29 1.99x BenchmarkCipher/bytes=4096;offset=0-32 8870.86 16818.08 1.90x BenchmarkCipher/bytes=4100;offset=4-32 8917.63 16491.32 1.85x BenchmarkCipher/bytes=4099;offset=3-32 8854.38 16379.58 1.85x BenchmarkCipher/bytes=32775;offset=49-32 9056.37 16926.83 1.87x ``` I tried a few variations, but this seemed fine without too many changes. * Upgrade CI to more modern Go versions. * Fix doc * Revert "Upgrade CI to more modern Go versions." This reverts commit ed2945571fe7cbfead692fd2ba1cbb6a60aeefb5. --- cipher.go | 18 +++++++++--------- cipher_test.go | 4 ++++ 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/cipher.go b/cipher.go index 3c35e6b..ffe4161 100644 --- a/cipher.go +++ b/cipher.go @@ -25,8 +25,8 @@ func Cipher(payload []byte, mask [4]byte, offset int) { // Count number of bytes will processed one by one from the beginning of payload. ln := remain[mpos] // Count number of bytes will processed one by one from the end of payload. - // This is done to process payload by 8 bytes in each iteration of main loop. - rn := (n - ln) % 8 + // This is done to process payload by 16 bytes in each iteration of main loop. + rn := (n - ln) % 16 for i := 0; i < ln; i++ { payload[i] ^= mask[(mpos+i)%4] @@ -44,15 +44,15 @@ func Cipher(payload []byte, mask [4]byte, offset int) { ) // Skip already processed right part. // Get number of uint64 parts remaining to process. - n = (n - ln - rn) >> 3 + n = (n - ln - rn) >> 4 + j := ln for i := 0; i < n; i++ { - var ( - j = ln + (i << 3) - chunk = payload[j : j+8] - ) - p := binary.LittleEndian.Uint64(chunk) - p = p ^ m2 + chunk := payload[j : j+16] + p := binary.LittleEndian.Uint64(chunk) ^ m2 + p2 := binary.LittleEndian.Uint64(chunk[8:]) ^ m2 binary.LittleEndian.PutUint64(chunk, p) + binary.LittleEndian.PutUint64(chunk[8:], p2) + j += 16 } } diff --git a/cipher_test.go b/cipher_test.go index 2b62913..af6a092 100644 --- a/cipher_test.go +++ b/cipher_test.go @@ -169,6 +169,8 @@ func BenchmarkCipher(b *testing.B) { b.Run(fmt.Sprintf("naive_bytes=%d;offset=%d", bench.size, bench.offset), func(b *testing.B) { var sink int64 + b.SetBytes(int64(bench.size)) + b.ResetTimer() for i := 0; i < b.N; i++ { r := cipherNaiveNoCp(bts, mask, bench.offset) sink += int64(len(r)) @@ -177,6 +179,8 @@ func BenchmarkCipher(b *testing.B) { }) b.Run(fmt.Sprintf("bytes=%d;offset=%d", bench.size, bench.offset), func(b *testing.B) { var sink int64 + b.SetBytes(int64(bench.size)) + b.ResetTimer() for i := 0; i < b.N; i++ { Cipher(bts, mask, bench.offset) sink += int64(len(bts))