From baf80642a49bba2b7bef0116dcf37c205498d92a Mon Sep 17 00:00:00 2001 From: Matthew Holt Date: Mon, 18 Nov 2024 11:43:25 -0700 Subject: [PATCH] sz: Support S2 (close #429) --- sz.go | 102 ++++++++++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 93 insertions(+), 9 deletions(-) diff --git a/sz.go b/sz.go index 8a926b7f..1893d7b8 100644 --- a/sz.go +++ b/sz.go @@ -6,15 +6,43 @@ import ( "io" "strings" - "github.com/golang/snappy" + "github.com/klauspost/compress/s2" ) func init() { RegisterFormat(Sz{}) } -// Sz facilitates Snappy compression. -type Sz struct{} +// Sz facilitates Snappy compression. It uses S2 +// for reading and writing, but by default will +// write Snappy-compatible data. +type Sz struct { + // Configurable S2 extension. + S2 S2 +} + +// S2 is an extension of Snappy that can read Snappy +// streams and write Snappy-compatible streams, but +// can also be configured to write Snappy-incompatible +// streams for greater gains. See +// https://pkg.go.dev/github.com/klauspost/compress/s2 +// for details and the documentation for each option. +type S2 struct { + // reader options + MaxBlockSize int + AllocBlock int + IgnoreStreamIdentifier bool + IgnoreCRC bool + + // writer options + AddIndex bool + Compression S2Level + BlockSize int + Concurrency int + FlushOnWrite bool + Padding int + SnappyIncompatible bool +} func (sz Sz) Extension() string { return ".sz" } @@ -22,7 +50,8 @@ func (sz Sz) Match(_ context.Context, filename string, stream io.Reader) (MatchR var mr MatchResult // match filename - if strings.Contains(strings.ToLower(filename), sz.Extension()) { + if strings.Contains(strings.ToLower(filename), sz.Extension()) || + strings.Contains(strings.ToLower(filename), ".s2") { mr.ByName = true } @@ -36,13 +65,68 @@ func (sz Sz) Match(_ context.Context, filename string, stream io.Reader) (MatchR return mr, nil } -func (Sz) OpenWriter(w io.Writer) (io.WriteCloser, error) { - return snappy.NewBufferedWriter(w), nil +func (sz Sz) OpenWriter(w io.Writer) (io.WriteCloser, error) { + var opts []s2.WriterOption + if sz.S2.AddIndex { + opts = append(opts, s2.WriterAddIndex()) + } + switch sz.S2.Compression { + case S2LevelNone: + opts = append(opts, s2.WriterUncompressed()) + case S2LevelBetter: + opts = append(opts, s2.WriterBetterCompression()) + case S2LevelBest: + opts = append(opts, s2.WriterBestCompression()) + } + if sz.S2.BlockSize != 0 { + opts = append(opts, s2.WriterBlockSize(sz.S2.BlockSize)) + } + if sz.S2.Concurrency != 0 { + opts = append(opts, s2.WriterConcurrency(sz.S2.Concurrency)) + } + if sz.S2.FlushOnWrite { + opts = append(opts, s2.WriterFlushOnWrite()) + } + if sz.S2.Padding != 0 { + opts = append(opts, s2.WriterPadding(sz.S2.Padding)) + } + if !sz.S2.SnappyIncompatible { + // this option is inverted because by default we should + // probably write Snappy-compatible streams + opts = append(opts, s2.WriterSnappyCompat()) + } + return s2.NewWriter(w, opts...), nil } -func (Sz) OpenReader(r io.Reader) (io.ReadCloser, error) { - return io.NopCloser(snappy.NewReader(r)), nil +func (sz Sz) OpenReader(r io.Reader) (io.ReadCloser, error) { + var opts []s2.ReaderOption + if sz.S2.AllocBlock != 0 { + opts = append(opts, s2.ReaderAllocBlock(sz.S2.AllocBlock)) + } + if sz.S2.IgnoreCRC { + opts = append(opts, s2.ReaderIgnoreCRC()) + } + if sz.S2.IgnoreStreamIdentifier { + opts = append(opts, s2.ReaderIgnoreStreamIdentifier()) + } + if sz.S2.MaxBlockSize != 0 { + opts = append(opts, s2.ReaderMaxBlockSize(sz.S2.MaxBlockSize)) + } + return io.NopCloser(s2.NewReader(r, opts...)), nil } -// https://github.com/google/snappy/blob/master/framing_format.txt +// Compression level for S2 (Snappy/Sz extension). +// EXPERIMENTAL: May be changed or removed without a major version bump. +type S2Level int + +// Compression levels for S2. +// EXPERIMENTAL: May be changed or removed without a major version bump. +const ( + S2LevelNone S2Level = 1 + S2LevelFast S2Level = 2 + S2LevelBetter S2Level = 3 + S2LevelBest S2Level = 4 +) + +// https://github.com/google/snappy/blob/master/framing_format.txt - contains "sNaPpY" var snappyHeader = []byte{0xff, 0x06, 0x00, 0x00, 0x73, 0x4e, 0x61, 0x50, 0x70, 0x59}