Skip to content
This repository has been archived by the owner on Nov 19, 2024. It is now read-only.

sz: Support S2 (close #429) #431

Merged
merged 4 commits into from
Nov 19, 2024
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ require (
require (
github.com/STARRY-S/zip v0.2.1
github.com/bodgit/sevenzip v1.6.0
github.com/golang/snappy v0.0.4
github.com/pierrec/lz4/v4 v4.1.21
github.com/sorairolake/lzip-go v0.3.5
golang.org/x/text v0.20.0
Expand Down
2 changes: 0 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -54,8 +54,6 @@ github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5y
github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
github.com/golang/protobuf v1.3.3/go.mod h1:vzj43D7+SQXF/4pzW/hwtAqwc6iTitCiVSaWz5lYuqw=
github.com/golang/snappy v0.0.4 h1:yAGX7huGHXlcLOEtBnF4w7FQwA26wojNCwOYAEhLjQM=
github.com/golang/snappy v0.0.4/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
github.com/google/btree v0.0.0-20180813153112-4030bb1f1f0c/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ=
github.com/google/btree v1.0.0/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ=
github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M=
Expand Down
102 changes: 93 additions & 9 deletions sz.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,23 +6,52 @@ import (
"io"
"strings"

"github.com/golang/snappy"
"github.com/klauspost/compress/s2"
)

func init() {
RegisterFormat(Sz{})
}

// Sz facilitates Snappy compression.
type Sz struct{}
// Sz facilitates Snappy compression. It uses S2
// for reading and writing, but by default will
// write Snappy-compatible data.
type Sz struct {
// Configurable S2 extension.
S2 S2
}

// S2 is an extension of Snappy that can read Snappy
// streams and write Snappy-compatible streams, but
// can also be configured to write Snappy-incompatible
// streams for greater gains. See
// https://pkg.go.dev/github.com/klauspost/compress/s2
// for details and the documentation for each option.
type S2 struct {
// reader options
MaxBlockSize int
AllocBlock int
IgnoreStreamIdentifier bool
IgnoreCRC bool

// writer options
AddIndex bool
Compression S2Level
BlockSize int
Concurrency int
FlushOnWrite bool
Padding int
SnappyIncompatible bool
}

func (sz Sz) Extension() string { return ".sz" }

func (sz Sz) Match(_ context.Context, filename string, stream io.Reader) (MatchResult, error) {
var mr MatchResult

// match filename
if strings.Contains(strings.ToLower(filename), sz.Extension()) {
if strings.Contains(strings.ToLower(filename), sz.Extension()) ||
strings.Contains(strings.ToLower(filename), ".s2") {
mr.ByName = true
}

Expand All @@ -36,13 +65,68 @@ func (sz Sz) Match(_ context.Context, filename string, stream io.Reader) (MatchR
return mr, nil
}

func (Sz) OpenWriter(w io.Writer) (io.WriteCloser, error) {
return snappy.NewBufferedWriter(w), nil
func (sz Sz) OpenWriter(w io.Writer) (io.WriteCloser, error) {
var opts []s2.WriterOption
if sz.S2.AddIndex {
opts = append(opts, s2.WriterAddIndex())
}
switch sz.S2.Compression {
case S2LevelNone:
opts = append(opts, s2.WriterUncompressed())
case S2LevelBetter:
opts = append(opts, s2.WriterBetterCompression())
case S2LevelBest:
opts = append(opts, s2.WriterBestCompression())
}
if sz.S2.BlockSize != 0 {
opts = append(opts, s2.WriterBlockSize(sz.S2.BlockSize))
}
if sz.S2.Concurrency != 0 {
opts = append(opts, s2.WriterConcurrency(sz.S2.Concurrency))
}
if sz.S2.FlushOnWrite {
opts = append(opts, s2.WriterFlushOnWrite())
}
if sz.S2.Padding != 0 {
opts = append(opts, s2.WriterPadding(sz.S2.Padding))
}
if !sz.S2.SnappyIncompatible {
// this option is inverted because by default we should
// probably write Snappy-compatible streams
opts = append(opts, s2.WriterSnappyCompat())
}
return s2.NewWriter(w, opts...), nil
}

func (Sz) OpenReader(r io.Reader) (io.ReadCloser, error) {
return io.NopCloser(snappy.NewReader(r)), nil
func (sz Sz) OpenReader(r io.Reader) (io.ReadCloser, error) {
var opts []s2.ReaderOption
if sz.S2.AllocBlock != 0 {
opts = append(opts, s2.ReaderAllocBlock(sz.S2.AllocBlock))
}
if sz.S2.IgnoreCRC {
opts = append(opts, s2.ReaderIgnoreCRC())
}
if sz.S2.IgnoreStreamIdentifier {
opts = append(opts, s2.ReaderIgnoreStreamIdentifier())
}
if sz.S2.MaxBlockSize != 0 {
opts = append(opts, s2.ReaderMaxBlockSize(sz.S2.MaxBlockSize))
}
return io.NopCloser(s2.NewReader(r, opts...)), nil
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A bit sad the API is truncated to an io.ReadCloser, but I guess the users can flip back to the org package for more extensive API.

Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm, yeah, I guess this makes type asserting difficult doesn't it. Sorry :(

I went through all the formats just now to see if the Closer API was really necessary but it does seem a few formats use it, so I think we need it to stay in order to keep the API advantages.

}

// https://github.com/google/snappy/blob/master/framing_format.txt
// Compression level for S2 (Snappy/Sz extension).
// EXPERIMENTAL: May be changed or removed without a major version bump.
type S2Level int

// Compression levels for S2.
// EXPERIMENTAL: May be changed or removed without a major version bump.
const (
S2LevelNone S2Level = 1
S2LevelFast S2Level = 2
S2LevelBetter S2Level = 3
S2LevelBest S2Level = 4
mholt marked this conversation as resolved.
Show resolved Hide resolved
)

// https://github.com/google/snappy/blob/master/framing_format.txt - contains "sNaPpY"
var snappyHeader = []byte{0xff, 0x06, 0x00, 0x00, 0x73, 0x4e, 0x61, 0x50, 0x70, 0x59}
Loading