From baf80642a49bba2b7bef0116dcf37c205498d92a Mon Sep 17 00:00:00 2001 From: Matthew Holt Date: Mon, 18 Nov 2024 11:43:25 -0700 Subject: [PATCH 1/3] sz: Support S2 (close #429) --- sz.go | 102 ++++++++++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 93 insertions(+), 9 deletions(-) diff --git a/sz.go b/sz.go index 8a926b7f..1893d7b8 100644 --- a/sz.go +++ b/sz.go @@ -6,15 +6,43 @@ import ( "io" "strings" - "github.com/golang/snappy" + "github.com/klauspost/compress/s2" ) func init() { RegisterFormat(Sz{}) } -// Sz facilitates Snappy compression. -type Sz struct{} +// Sz facilitates Snappy compression. It uses S2 +// for reading and writing, but by default will +// write Snappy-compatible data. +type Sz struct { + // Configurable S2 extension. + S2 S2 +} + +// S2 is an extension of Snappy that can read Snappy +// streams and write Snappy-compatible streams, but +// can also be configured to write Snappy-incompatible +// streams for greater gains. See +// https://pkg.go.dev/github.com/klauspost/compress/s2 +// for details and the documentation for each option. +type S2 struct { + // reader options + MaxBlockSize int + AllocBlock int + IgnoreStreamIdentifier bool + IgnoreCRC bool + + // writer options + AddIndex bool + Compression S2Level + BlockSize int + Concurrency int + FlushOnWrite bool + Padding int + SnappyIncompatible bool +} func (sz Sz) Extension() string { return ".sz" } @@ -22,7 +50,8 @@ func (sz Sz) Match(_ context.Context, filename string, stream io.Reader) (MatchR var mr MatchResult // match filename - if strings.Contains(strings.ToLower(filename), sz.Extension()) { + if strings.Contains(strings.ToLower(filename), sz.Extension()) || + strings.Contains(strings.ToLower(filename), ".s2") { mr.ByName = true } @@ -36,13 +65,68 @@ func (sz Sz) Match(_ context.Context, filename string, stream io.Reader) (MatchR return mr, nil } -func (Sz) OpenWriter(w io.Writer) (io.WriteCloser, error) { - return snappy.NewBufferedWriter(w), nil +func (sz Sz) OpenWriter(w io.Writer) (io.WriteCloser, error) { + var opts []s2.WriterOption + if sz.S2.AddIndex { + opts = append(opts, s2.WriterAddIndex()) + } + switch sz.S2.Compression { + case S2LevelNone: + opts = append(opts, s2.WriterUncompressed()) + case S2LevelBetter: + opts = append(opts, s2.WriterBetterCompression()) + case S2LevelBest: + opts = append(opts, s2.WriterBestCompression()) + } + if sz.S2.BlockSize != 0 { + opts = append(opts, s2.WriterBlockSize(sz.S2.BlockSize)) + } + if sz.S2.Concurrency != 0 { + opts = append(opts, s2.WriterConcurrency(sz.S2.Concurrency)) + } + if sz.S2.FlushOnWrite { + opts = append(opts, s2.WriterFlushOnWrite()) + } + if sz.S2.Padding != 0 { + opts = append(opts, s2.WriterPadding(sz.S2.Padding)) + } + if !sz.S2.SnappyIncompatible { + // this option is inverted because by default we should + // probably write Snappy-compatible streams + opts = append(opts, s2.WriterSnappyCompat()) + } + return s2.NewWriter(w, opts...), nil } -func (Sz) OpenReader(r io.Reader) (io.ReadCloser, error) { - return io.NopCloser(snappy.NewReader(r)), nil +func (sz Sz) OpenReader(r io.Reader) (io.ReadCloser, error) { + var opts []s2.ReaderOption + if sz.S2.AllocBlock != 0 { + opts = append(opts, s2.ReaderAllocBlock(sz.S2.AllocBlock)) + } + if sz.S2.IgnoreCRC { + opts = append(opts, s2.ReaderIgnoreCRC()) + } + if sz.S2.IgnoreStreamIdentifier { + opts = append(opts, s2.ReaderIgnoreStreamIdentifier()) + } + if sz.S2.MaxBlockSize != 0 { + opts = append(opts, s2.ReaderMaxBlockSize(sz.S2.MaxBlockSize)) + } + return io.NopCloser(s2.NewReader(r, opts...)), nil } -// https://github.com/google/snappy/blob/master/framing_format.txt +// Compression level for S2 (Snappy/Sz extension). +// EXPERIMENTAL: May be changed or removed without a major version bump. +type S2Level int + +// Compression levels for S2. +// EXPERIMENTAL: May be changed or removed without a major version bump. +const ( + S2LevelNone S2Level = 1 + S2LevelFast S2Level = 2 + S2LevelBetter S2Level = 3 + S2LevelBest S2Level = 4 +) + +// https://github.com/google/snappy/blob/master/framing_format.txt - contains "sNaPpY" var snappyHeader = []byte{0xff, 0x06, 0x00, 0x00, 0x73, 0x4e, 0x61, 0x50, 0x70, 0x59} From 0195698c59c7037250b6d0d6a3d0ab01647c96c9 Mon Sep 17 00:00:00 2001 From: Matthew Holt Date: Mon, 18 Nov 2024 11:45:45 -0700 Subject: [PATCH 2/3] Update go.mod --- go.mod | 1 - go.sum | 2 -- 2 files changed, 3 deletions(-) diff --git a/go.mod b/go.mod index 296a9b2e..e94ef697 100644 --- a/go.mod +++ b/go.mod @@ -17,7 +17,6 @@ require ( require ( github.com/STARRY-S/zip v0.2.1 github.com/bodgit/sevenzip v1.5.2 - github.com/golang/snappy v0.0.4 github.com/pierrec/lz4/v4 v4.1.21 github.com/sorairolake/lzip-go v0.3.5 golang.org/x/text v0.20.0 diff --git a/go.sum b/go.sum index 7fa1ef4f..49beee1a 100644 --- a/go.sum +++ b/go.sum @@ -54,8 +54,6 @@ github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5y github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= github.com/golang/protobuf v1.3.3/go.mod h1:vzj43D7+SQXF/4pzW/hwtAqwc6iTitCiVSaWz5lYuqw= -github.com/golang/snappy v0.0.4 h1:yAGX7huGHXlcLOEtBnF4w7FQwA26wojNCwOYAEhLjQM= -github.com/golang/snappy v0.0.4/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= github.com/google/btree v0.0.0-20180813153112-4030bb1f1f0c/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ= github.com/google/btree v1.0.0/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ= github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M= From 81ed621e80d18cb892665e9383da942a264db8c9 Mon Sep 17 00:00:00 2001 From: Matt Holt Date: Tue, 19 Nov 2024 06:00:46 -0700 Subject: [PATCH 3/3] Update sz.go Co-authored-by: Klaus Post --- sz.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sz.go b/sz.go index 1893d7b8..4a553892 100644 --- a/sz.go +++ b/sz.go @@ -122,10 +122,10 @@ type S2Level int // Compression levels for S2. // EXPERIMENTAL: May be changed or removed without a major version bump. const ( - S2LevelNone S2Level = 1 - S2LevelFast S2Level = 2 - S2LevelBetter S2Level = 3 - S2LevelBest S2Level = 4 + S2LevelNone S2Level = 0 + S2LevelFast S2Level = 1 + S2LevelBetter S2Level = 2 + S2LevelBest S2Level = 3 ) // https://github.com/google/snappy/blob/master/framing_format.txt - contains "sNaPpY"