Skip to content
This repository has been archived by the owner on Nov 19, 2024. It is now read-only.

sz: Support S2 (close #429) #431

Merged
merged 4 commits into from
Nov 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ require (
require (
github.com/STARRY-S/zip v0.2.1
github.com/bodgit/sevenzip v1.6.0
github.com/golang/snappy v0.0.4
github.com/pierrec/lz4/v4 v4.1.21
github.com/sorairolake/lzip-go v0.3.5
golang.org/x/text v0.20.0
Expand Down
2 changes: 0 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -54,8 +54,6 @@ github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5y
github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
github.com/golang/protobuf v1.3.3/go.mod h1:vzj43D7+SQXF/4pzW/hwtAqwc6iTitCiVSaWz5lYuqw=
github.com/golang/snappy v0.0.4 h1:yAGX7huGHXlcLOEtBnF4w7FQwA26wojNCwOYAEhLjQM=
github.com/golang/snappy v0.0.4/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
github.com/google/btree v0.0.0-20180813153112-4030bb1f1f0c/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ=
github.com/google/btree v1.0.0/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ=
github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M=
Expand Down
102 changes: 93 additions & 9 deletions sz.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,23 +6,52 @@ import (
"io"
"strings"

"github.com/golang/snappy"
"github.com/klauspost/compress/s2"
)

func init() {
RegisterFormat(Sz{})
}

// Sz facilitates Snappy compression.
type Sz struct{}
// Sz facilitates Snappy compression. It uses S2
// for reading and writing, but by default will
// write Snappy-compatible data.
type Sz struct {
// Configurable S2 extension.
S2 S2
}

// S2 is an extension of Snappy that can read Snappy
// streams and write Snappy-compatible streams, but
// can also be configured to write Snappy-incompatible
// streams for greater gains. See
// https://pkg.go.dev/github.com/klauspost/compress/s2
// for details and the documentation for each option.
type S2 struct {
// reader options
MaxBlockSize int
AllocBlock int
IgnoreStreamIdentifier bool
IgnoreCRC bool

// writer options
AddIndex bool
Compression S2Level
BlockSize int
Concurrency int
FlushOnWrite bool
Padding int
SnappyIncompatible bool
}

func (sz Sz) Extension() string { return ".sz" }

func (sz Sz) Match(_ context.Context, filename string, stream io.Reader) (MatchResult, error) {
var mr MatchResult

// match filename
if strings.Contains(strings.ToLower(filename), sz.Extension()) {
if strings.Contains(strings.ToLower(filename), sz.Extension()) ||
strings.Contains(strings.ToLower(filename), ".s2") {
mr.ByName = true
}

Expand All @@ -36,13 +65,68 @@ func (sz Sz) Match(_ context.Context, filename string, stream io.Reader) (MatchR
return mr, nil
}

func (Sz) OpenWriter(w io.Writer) (io.WriteCloser, error) {
return snappy.NewBufferedWriter(w), nil
func (sz Sz) OpenWriter(w io.Writer) (io.WriteCloser, error) {
var opts []s2.WriterOption
if sz.S2.AddIndex {
opts = append(opts, s2.WriterAddIndex())
}
switch sz.S2.Compression {
case S2LevelNone:
opts = append(opts, s2.WriterUncompressed())
case S2LevelBetter:
opts = append(opts, s2.WriterBetterCompression())
case S2LevelBest:
opts = append(opts, s2.WriterBestCompression())
}
if sz.S2.BlockSize != 0 {
opts = append(opts, s2.WriterBlockSize(sz.S2.BlockSize))
}
if sz.S2.Concurrency != 0 {
opts = append(opts, s2.WriterConcurrency(sz.S2.Concurrency))
}
if sz.S2.FlushOnWrite {
opts = append(opts, s2.WriterFlushOnWrite())
}
if sz.S2.Padding != 0 {
opts = append(opts, s2.WriterPadding(sz.S2.Padding))
}
if !sz.S2.SnappyIncompatible {
// this option is inverted because by default we should
// probably write Snappy-compatible streams
opts = append(opts, s2.WriterSnappyCompat())
}
return s2.NewWriter(w, opts...), nil
}

func (Sz) OpenReader(r io.Reader) (io.ReadCloser, error) {
return io.NopCloser(snappy.NewReader(r)), nil
func (sz Sz) OpenReader(r io.Reader) (io.ReadCloser, error) {
var opts []s2.ReaderOption
if sz.S2.AllocBlock != 0 {
opts = append(opts, s2.ReaderAllocBlock(sz.S2.AllocBlock))
}
if sz.S2.IgnoreCRC {
opts = append(opts, s2.ReaderIgnoreCRC())
}
if sz.S2.IgnoreStreamIdentifier {
opts = append(opts, s2.ReaderIgnoreStreamIdentifier())
}
if sz.S2.MaxBlockSize != 0 {
opts = append(opts, s2.ReaderMaxBlockSize(sz.S2.MaxBlockSize))
}
return io.NopCloser(s2.NewReader(r, opts...)), nil
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A bit sad the API is truncated to an io.ReadCloser, but I guess the users can flip back to the org package for more extensive API.

Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm, yeah, I guess this makes type asserting difficult doesn't it. Sorry :(

I went through all the formats just now to see if the Closer API was really necessary but it does seem a few formats use it, so I think we need it to stay in order to keep the API advantages.

}

// https://github.com/google/snappy/blob/master/framing_format.txt
// Compression level for S2 (Snappy/Sz extension).
// EXPERIMENTAL: May be changed or removed without a major version bump.
type S2Level int

// Compression levels for S2.
// EXPERIMENTAL: May be changed or removed without a major version bump.
const (
S2LevelNone S2Level = 0
S2LevelFast S2Level = 1
S2LevelBetter S2Level = 2
S2LevelBest S2Level = 3
)

// https://github.com/google/snappy/blob/master/framing_format.txt - contains "sNaPpY"
var snappyHeader = []byte{0xff, 0x06, 0x00, 0x00, 0x73, 0x4e, 0x61, 0x50, 0x70, 0x59}
Loading