From 84c6c0e38286f95a1ed07e685ce1f62210201286 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 15 Jan 2025 15:57:27 -0600 Subject: [PATCH] Bump github.com/parquet-go/parquet-go from 0.23.1-0.20241011155651-6446d1d0d2fe to 0.24.0 (#4303) * Bump github.com/parquet-go/parquet-go Bumps [github.com/parquet-go/parquet-go](https://github.com/parquet-go/parquet-go) from 0.23.1-0.20241011155651-6446d1d0d2fe to 0.24.0. - [Release notes](https://github.com/parquet-go/parquet-go/releases) - [Changelog](https://github.com/parquet-go/parquet-go/blob/main/CHANGELOG.md) - [Commits](https://github.com/parquet-go/parquet-go/commits/v0.24.0) --- updated-dependencies: - dependency-name: github.com/parquet-go/parquet-go dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] * Update serverless gomod --------- Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: tempo-gh-bot[bot] --- cmd/tempo-serverless/cloud-run/go.mod | 2 +- cmd/tempo-serverless/cloud-run/go.sum | 4 +- cmd/tempo-serverless/lambda/go.mod | 2 +- cmd/tempo-serverless/lambda/go.sum | 4 +- go.mod | 2 +- go.sum | 4 +- .../parquet-go/parquet-go/README.md | 23 +-- .../github.com/parquet-go/parquet-go/bloom.go | 15 +- .../parquet-go/parquet-go/bloom_be.go | 19 +++ .../parquet-go/parquet-go/bloom_le.go | 12 ++ .../parquet-go/parquet-go/column_buffer.go | 39 +---- .../parquet-go/parquet-go/dictionary.go | 21 +-- .../parquet-go/encoding/plain/plain.go | 138 ------------------ .../parquet-go/encoding/plain/plain_be.go | 113 ++++++++++++++ .../parquet-go/encoding/plain/plain_le.go | 52 +++++++ .../internal/bitpack/unpack_int32_be.go | 15 ++ .../internal/bitpack/unpack_int32_le.go | 9 ++ .../internal/bitpack/unpack_int32_purego.go | 22 +-- .../internal/bitpack/unpack_int64_purego.go | 22 +-- .../github.com/parquet-go/parquet-go/node.go | 14 +- .../github.com/parquet-go/parquet-go/value.go | 5 + .../parquet-go/parquet-go/value_be.go | 19 +++ .../parquet-go/parquet-go/value_le.go | 8 + vendor/modules.txt | 2 +- 24 files changed, 297 insertions(+), 269 deletions(-) create mode 100644 vendor/github.com/parquet-go/parquet-go/bloom_be.go create mode 100644 vendor/github.com/parquet-go/parquet-go/bloom_le.go create mode 100644 vendor/github.com/parquet-go/parquet-go/encoding/plain/plain_be.go create mode 100644 vendor/github.com/parquet-go/parquet-go/encoding/plain/plain_le.go create mode 100644 vendor/github.com/parquet-go/parquet-go/internal/bitpack/unpack_int32_be.go create mode 100644 vendor/github.com/parquet-go/parquet-go/internal/bitpack/unpack_int32_le.go create mode 100644 vendor/github.com/parquet-go/parquet-go/value_be.go create mode 100644 vendor/github.com/parquet-go/parquet-go/value_le.go diff --git a/cmd/tempo-serverless/cloud-run/go.mod b/cmd/tempo-serverless/cloud-run/go.mod index 26f4460c207..1b3e17c1d0d 100644 --- a/cmd/tempo-serverless/cloud-run/go.mod +++ b/cmd/tempo-serverless/cloud-run/go.mod @@ -78,7 +78,7 @@ require ( github.com/opentracing-contrib/go-grpc v0.0.0-20210225150812-73cb765af46e // indirect github.com/opentracing-contrib/go-stdlib v1.0.0 // indirect github.com/opentracing/opentracing-go v1.2.0 // indirect - github.com/parquet-go/parquet-go v0.23.1-0.20241011155651-6446d1d0d2fe // indirect + github.com/parquet-go/parquet-go v0.24.0 // indirect github.com/pelletier/go-toml/v2 v2.2.3 // indirect github.com/pierrec/lz4/v4 v4.1.22 // indirect github.com/pires/go-proxyproto v0.7.0 // indirect diff --git a/cmd/tempo-serverless/cloud-run/go.sum b/cmd/tempo-serverless/cloud-run/go.sum index b42acfaf74e..77d46b83cba 100644 --- a/cmd/tempo-serverless/cloud-run/go.sum +++ b/cmd/tempo-serverless/cloud-run/go.sum @@ -218,8 +218,8 @@ github.com/opentracing-contrib/go-stdlib v1.0.0/go.mod h1:qtI1ogk+2JhVPIXVc6q+NH github.com/opentracing/opentracing-go v1.1.0/go.mod h1:UkNAQd3GIcIGf0SeVgPpRdFStlNbqXla1AfSYxPUl2o= github.com/opentracing/opentracing-go v1.2.0 h1:uEJPy/1a5RIPAJ0Ov+OIO8OxWu77jEv+1B0VhjKrZUs= github.com/opentracing/opentracing-go v1.2.0/go.mod h1:GxEUsuufX4nBwe+T+Wl9TAgYrxe9dPLANfrWvHYVTgc= -github.com/parquet-go/parquet-go v0.23.1-0.20241011155651-6446d1d0d2fe h1:oUJ5TPnrEK/z+/PeoLL+jCgfngAZIDMyhZASetRcYYg= -github.com/parquet-go/parquet-go v0.23.1-0.20241011155651-6446d1d0d2fe/go.mod h1:OqBBRGBl7+llplCvDMql8dEKaDqjaFA/VAPw+OJiNiw= +github.com/parquet-go/parquet-go v0.24.0 h1:VrsifmLPDnas8zpoHmYiWDZ1YHzLmc7NmNwPGkI2JM4= +github.com/parquet-go/parquet-go v0.24.0/go.mod h1:OqBBRGBl7+llplCvDMql8dEKaDqjaFA/VAPw+OJiNiw= github.com/pelletier/go-toml/v2 v2.2.3 h1:YmeHyLY8mFWbdkNWwpr+qIL2bEqT0o95WSdkNHvL12M= github.com/pelletier/go-toml/v2 v2.2.3/go.mod h1:MfCQTFTvCcUyyvvwm1+G6H/jORL20Xlb6rzQu9GuUkc= github.com/pierrec/lz4/v4 v4.1.22 h1:cKFw6uJDK+/gfw5BcDL0JL5aBsAFdsIT18eRtLj7VIU= diff --git a/cmd/tempo-serverless/lambda/go.mod b/cmd/tempo-serverless/lambda/go.mod index 42fa58aa95d..3929b7b6ac3 100644 --- a/cmd/tempo-serverless/lambda/go.mod +++ b/cmd/tempo-serverless/lambda/go.mod @@ -81,7 +81,7 @@ require ( github.com/opentracing-contrib/go-grpc v0.0.0-20210225150812-73cb765af46e // indirect github.com/opentracing-contrib/go-stdlib v1.0.0 // indirect github.com/opentracing/opentracing-go v1.2.0 // indirect - github.com/parquet-go/parquet-go v0.23.1-0.20241011155651-6446d1d0d2fe // indirect + github.com/parquet-go/parquet-go v0.24.0 // indirect github.com/pelletier/go-toml/v2 v2.2.3 // indirect github.com/pierrec/lz4/v4 v4.1.22 // indirect github.com/pires/go-proxyproto v0.7.0 // indirect diff --git a/cmd/tempo-serverless/lambda/go.sum b/cmd/tempo-serverless/lambda/go.sum index e67c80a658e..d976870553a 100644 --- a/cmd/tempo-serverless/lambda/go.sum +++ b/cmd/tempo-serverless/lambda/go.sum @@ -222,8 +222,8 @@ github.com/opentracing-contrib/go-stdlib v1.0.0/go.mod h1:qtI1ogk+2JhVPIXVc6q+NH github.com/opentracing/opentracing-go v1.1.0/go.mod h1:UkNAQd3GIcIGf0SeVgPpRdFStlNbqXla1AfSYxPUl2o= github.com/opentracing/opentracing-go v1.2.0 h1:uEJPy/1a5RIPAJ0Ov+OIO8OxWu77jEv+1B0VhjKrZUs= github.com/opentracing/opentracing-go v1.2.0/go.mod h1:GxEUsuufX4nBwe+T+Wl9TAgYrxe9dPLANfrWvHYVTgc= -github.com/parquet-go/parquet-go v0.23.1-0.20241011155651-6446d1d0d2fe h1:oUJ5TPnrEK/z+/PeoLL+jCgfngAZIDMyhZASetRcYYg= -github.com/parquet-go/parquet-go v0.23.1-0.20241011155651-6446d1d0d2fe/go.mod h1:OqBBRGBl7+llplCvDMql8dEKaDqjaFA/VAPw+OJiNiw= +github.com/parquet-go/parquet-go v0.24.0 h1:VrsifmLPDnas8zpoHmYiWDZ1YHzLmc7NmNwPGkI2JM4= +github.com/parquet-go/parquet-go v0.24.0/go.mod h1:OqBBRGBl7+llplCvDMql8dEKaDqjaFA/VAPw+OJiNiw= github.com/pelletier/go-toml/v2 v2.2.3 h1:YmeHyLY8mFWbdkNWwpr+qIL2bEqT0o95WSdkNHvL12M= github.com/pelletier/go-toml/v2 v2.2.3/go.mod h1:MfCQTFTvCcUyyvvwm1+G6H/jORL20Xlb6rzQu9GuUkc= github.com/pierrec/lz4/v4 v4.1.22 h1:cKFw6uJDK+/gfw5BcDL0JL5aBsAFdsIT18eRtLj7VIU= diff --git a/go.mod b/go.mod index 4d40e51dd17..4c7ad4e7147 100644 --- a/go.mod +++ b/go.mod @@ -95,7 +95,7 @@ require ( github.com/open-telemetry/opentelemetry-collector-contrib/receiver/kafkareceiver v0.116.0 github.com/open-telemetry/opentelemetry-collector-contrib/receiver/opencensusreceiver v0.116.0 github.com/open-telemetry/opentelemetry-collector-contrib/receiver/zipkinreceiver v0.116.0 - github.com/parquet-go/parquet-go v0.23.1-0.20241011155651-6446d1d0d2fe + github.com/parquet-go/parquet-go v0.24.0 github.com/stoewer/parquet-cli v0.0.9 github.com/twmb/franz-go v1.18.0 github.com/twmb/franz-go/pkg/kadm v1.14.0 diff --git a/go.sum b/go.sum index 95c01463073..eb0092e105c 100644 --- a/go.sum +++ b/go.sum @@ -735,8 +735,8 @@ github.com/openzipkin/zipkin-go v0.4.3 h1:9EGwpqkgnwdEIJ+Od7QVSEIH+ocmm5nPat0G7s github.com/openzipkin/zipkin-go v0.4.3/go.mod h1:M9wCJZFWCo2RiY+o1eBCEMe0Dp2S5LDHcMZmk3RmK7c= github.com/ovh/go-ovh v1.6.0 h1:ixLOwxQdzYDx296sXcgS35TOPEahJkpjMGtzPadCjQI= github.com/ovh/go-ovh v1.6.0/go.mod h1:cTVDnl94z4tl8pP1uZ/8jlVxntjSIf09bNcQ5TJSC7c= -github.com/parquet-go/parquet-go v0.23.1-0.20241011155651-6446d1d0d2fe h1:oUJ5TPnrEK/z+/PeoLL+jCgfngAZIDMyhZASetRcYYg= -github.com/parquet-go/parquet-go v0.23.1-0.20241011155651-6446d1d0d2fe/go.mod h1:OqBBRGBl7+llplCvDMql8dEKaDqjaFA/VAPw+OJiNiw= +github.com/parquet-go/parquet-go v0.24.0 h1:VrsifmLPDnas8zpoHmYiWDZ1YHzLmc7NmNwPGkI2JM4= +github.com/parquet-go/parquet-go v0.24.0/go.mod h1:OqBBRGBl7+llplCvDMql8dEKaDqjaFA/VAPw+OJiNiw= github.com/pascaldekloe/goe v0.0.0-20180627143212-57f6aae5913c/go.mod h1:lzWF7FIEvWOWxwDKqyGYQf6ZUaNfKdP144TG7ZOy1lc= github.com/pascaldekloe/goe v0.1.0 h1:cBOtyMzM9HTpWjXfbbunk26uA6nG3a8n06Wieeh0MwY= github.com/pascaldekloe/goe v0.1.0/go.mod h1:lzWF7FIEvWOWxwDKqyGYQf6ZUaNfKdP144TG7ZOy1lc= diff --git a/vendor/github.com/parquet-go/parquet-go/README.md b/vendor/github.com/parquet-go/parquet-go/README.md index f0f0d8f5cf1..94bbdefad5b 100644 --- a/vendor/github.com/parquet-go/parquet-go/README.md +++ b/vendor/github.com/parquet-go/parquet-go/README.md @@ -1,10 +1,13 @@ -# parquet-go/parquet-go [![build status](https://github.com/parquet-go/parquet-go/actions/workflows/test.yml/badge.svg?branch=main)](https://github.com/parquet-go/parquet-go/actions) [![Go Report Card](https://goreportcard.com/badge/github.com/parquet-go/parquet-go)](https://goreportcard.com/report/github.com/parquet-go/parquet-go) [![Go Reference](https://pkg.go.dev/badge/github.com/parquet-go/parquet-go.svg)](https://pkg.go.dev/github.com/parquet-go/parquet-go) - -High-performance Go library to manipulate parquet files, initially developed at -[Twilio Segment](https://segment.com/engineering). - -![parquet-go-logo](https://github.com/parquet-go/parquet-go/assets/96151026/5b1f043b-2cee-4a64-a3c3-40d3353fecc0) - +
+ +
+

parquet-go/parquet-go

+

+High-performance Go library to manipulate parquet files, initially developed at +Twilio Segment. +

+ +
## Motivation @@ -35,7 +38,7 @@ using JSON or Protobuf. For more information, refer to the [Parquet Format Speci The package is distributed as a standard Go module that programs can take a dependency on and install with the following command: -``` +```bash go get github.com/parquet-go/parquet-go ``` @@ -289,11 +292,11 @@ defined by the sorting columns of the groups. There are a few constraints when merging row groups: -* The sorting columns of all the row groups must be the same, or the merge +- The sorting columns of all the row groups must be the same, or the merge operation must be explicitly configured a set of sorting columns which are a prefix of the sorting columns of all merged row groups. -* The schemas of row groups must all be equal, or the merge operation must +- The schemas of row groups must all be equal, or the merge operation must be explicitly configured with a schema that all row groups can be converted to, in which case the limitations of schema conversions apply. diff --git a/vendor/github.com/parquet-go/parquet-go/bloom.go b/vendor/github.com/parquet-go/parquet-go/bloom.go index 69d54c71712..30c64b84861 100644 --- a/vendor/github.com/parquet-go/parquet-go/bloom.go +++ b/vendor/github.com/parquet-go/parquet-go/bloom.go @@ -1,7 +1,6 @@ package parquet import ( - "encoding/binary" "io" "github.com/parquet-go/parquet-go/bloom" @@ -10,7 +9,6 @@ import ( "github.com/parquet-go/parquet-go/encoding" "github.com/parquet-go/parquet-go/format" "github.com/parquet-go/parquet-go/internal/unsafecast" - "golang.org/x/sys/cpu" ) // BloomFilter is an interface allowing applications to test whether a key @@ -174,18 +172,7 @@ func (splitBlockEncoding) EncodeInt64(dst []byte, src []int64) ([]byte, error) { } func (e splitBlockEncoding) EncodeInt96(dst []byte, src []deprecated.Int96) ([]byte, error) { - if cpu.IsBigEndian { - srcLen := len(src) - buf := make([]byte, srcLen*12) - for idx := range srcLen { - binary.LittleEndian.PutUint32(buf[(idx*12):4+(idx*12)], uint32(src[idx][0])) - binary.LittleEndian.PutUint32(buf[4+(idx*12):8+(idx*12)], uint32(src[idx][1])) - binary.LittleEndian.PutUint32(buf[8+(idx*12):12+(idx*12)], uint32(src[idx][2])) - } - splitBlockEncodeFixedLenByteArray(bloom.MakeSplitBlockFilter(dst), buf, 12) - } else { - splitBlockEncodeFixedLenByteArray(bloom.MakeSplitBlockFilter(dst), unsafecast.Slice[byte](src), 12) - } + splitBlockEncodeFixedLenByteArray(bloom.MakeSplitBlockFilter(dst), unsafecastInt96ToBytes(src), 12) return dst, nil } diff --git a/vendor/github.com/parquet-go/parquet-go/bloom_be.go b/vendor/github.com/parquet-go/parquet-go/bloom_be.go new file mode 100644 index 00000000000..f7800301a68 --- /dev/null +++ b/vendor/github.com/parquet-go/parquet-go/bloom_be.go @@ -0,0 +1,19 @@ +//go:build s390x + +package parquet + +import ( + "encoding/binary" + + "github.com/parquet-go/parquet-go/deprecated" +) + +func unsafecastInt96ToBytes(src []deprecated.Int96) []byte { + out := make([]byte, len(src)*12) + for i := range src { + binary.LittleEndian.PutUint32(out[(i*12):4+(i*12)], uint32(src[i][0])) + binary.LittleEndian.PutUint32(out[4+(i*12):8+(i*12)], uint32(src[i][1])) + binary.LittleEndian.PutUint32(out[8+(i*12):12+(i*12)], uint32(src[i][2])) + } + return out +} diff --git a/vendor/github.com/parquet-go/parquet-go/bloom_le.go b/vendor/github.com/parquet-go/parquet-go/bloom_le.go new file mode 100644 index 00000000000..5b93bf07177 --- /dev/null +++ b/vendor/github.com/parquet-go/parquet-go/bloom_le.go @@ -0,0 +1,12 @@ +//go:build !s390x + +package parquet + +import ( + "github.com/parquet-go/parquet-go/deprecated" + "github.com/parquet-go/parquet-go/internal/unsafecast" +) + +func unsafecastInt96ToBytes(src []deprecated.Int96) []byte { + return unsafecast.Slice[byte](src) +} diff --git a/vendor/github.com/parquet-go/parquet-go/column_buffer.go b/vendor/github.com/parquet-go/parquet-go/column_buffer.go index 8435cb2977c..d1bc339862d 100644 --- a/vendor/github.com/parquet-go/parquet-go/column_buffer.go +++ b/vendor/github.com/parquet-go/parquet-go/column_buffer.go @@ -16,12 +16,8 @@ import ( "github.com/parquet-go/parquet-go/internal/bitpack" "github.com/parquet-go/parquet-go/internal/unsafecast" "github.com/parquet-go/parquet-go/sparse" - "golang.org/x/sys/cpu" ) -const offsetOfU64 = unsafe.Offsetof(Value{}.u64) -const offsetOfPtr = unsafe.Offsetof(Value{}.ptr) - // ColumnBuffer is an interface representing columns of a row group. // // ColumnBuffer implements sort.Interface as a way to support reordering the @@ -107,29 +103,6 @@ func columnIndexOfNullable(base ColumnBuffer, maxDefinitionLevel byte, definitio }, nil } -// On a big endian system, a boolean/byte value, which is in little endian byte format, is byte aligned -// to the 7th byte in a u64 (8 bytes) variable.. Hence the data will be available at 7th byte when -// interpreted as a little endian byte format. So, in order to access a boolean/byte value out of u64 variable, -// we need to add an offset of "7"... -// In the same way, an int32/uint32/float value, which is in little endian byte format, is byte aligned -// to the 4th byte in a u64 (8 bytes) variable.. Hence the data will be available at 4th byte when -// interpreted as a little endian byte format. So, in order to access an int32/uint32/float value out of u64 variable, -// we need to add an offset of "4" -func getOffset(colDict interface{}) uintptr { - var offset uintptr = 0 - - if cpu.IsBigEndian { - switch colDict.(type) { - case booleanColumnBuffer, booleanDictionary: - offset = 7 - - case int32ColumnBuffer, uint32ColumnBuffer, floatColumnBuffer, int32Dictionary, floatDictionary, uint32Dictionary: - offset = 4 - } - } - return offset -} - type nullableColumnIndex struct { ColumnIndex maxDefinitionLevel byte @@ -855,8 +828,7 @@ func (col *booleanColumnBuffer) WriteBooleans(values []bool) (int, error) { } func (col *booleanColumnBuffer) WriteValues(values []Value) (int, error) { - offset := getOffset(*col) - col.writeValues(makeArrayValue(values, offsetOfU64+offset), columnLevels{}) + col.writeValues(makeArrayValue(values, offsetOfBool), columnLevels{}) return len(values), nil } @@ -995,8 +967,7 @@ func (col *int32ColumnBuffer) WriteInt32s(values []int32) (int, error) { } func (col *int32ColumnBuffer) WriteValues(values []Value) (int, error) { - offset := getOffset(*col) - col.writeValues(makeArrayValue(values, offsetOfU64+offset), columnLevels{}) + col.writeValues(makeArrayValue(values, offsetOfU32), columnLevels{}) return len(values), nil } @@ -1288,8 +1259,7 @@ func (col *floatColumnBuffer) WriteFloats(values []float32) (int, error) { } func (col *floatColumnBuffer) WriteValues(values []Value) (int, error) { - offset := getOffset(*col) - col.writeValues(makeArrayValue(values, offsetOfU64+offset), columnLevels{}) + col.writeValues(makeArrayValue(values, offsetOfU32), columnLevels{}) return len(values), nil } @@ -1776,8 +1746,7 @@ func (col *uint32ColumnBuffer) WriteUint32s(values []uint32) (int, error) { } func (col *uint32ColumnBuffer) WriteValues(values []Value) (int, error) { - offset := getOffset(*col) - col.writeValues(makeArrayValue(values, offsetOfU64+offset), columnLevels{}) + col.writeValues(makeArrayValue(values, offsetOfU32), columnLevels{}) return len(values), nil } diff --git a/vendor/github.com/parquet-go/parquet-go/dictionary.go b/vendor/github.com/parquet-go/parquet-go/dictionary.go index 9dce0ff6514..5ff0417b0f7 100644 --- a/vendor/github.com/parquet-go/parquet-go/dictionary.go +++ b/vendor/github.com/parquet-go/parquet-go/dictionary.go @@ -140,8 +140,7 @@ func (d *booleanDictionary) Index(i int32) Value { return d.makeValue(d.index(i) func (d *booleanDictionary) index(i int32) bool { return d.valueAt(int(i)) } func (d *booleanDictionary) Insert(indexes []int32, values []Value) { - offset := getOffset(*d) - d.insert(indexes, makeArrayValue(values, offsetOfU64+offset)) + d.insert(indexes, makeArrayValue(values, offsetOfBool)) } func (d *booleanDictionary) insert(indexes []int32, rows sparse.Array) { @@ -238,8 +237,7 @@ func (d *int32Dictionary) Index(i int32) Value { return d.makeValue(d.index(i)) func (d *int32Dictionary) index(i int32) int32 { return d.values[i] } func (d *int32Dictionary) Insert(indexes []int32, values []Value) { - offset := getOffset(*d) - d.insert(indexes, makeArrayValue(values, offsetOfU64+offset)) + d.insert(indexes, makeArrayValue(values, offsetOfU32)) } func (d *int32Dictionary) init(indexes []int32) { @@ -291,8 +289,7 @@ func (d *int32Dictionary) insert(indexes []int32, rows sparse.Array) { func (d *int32Dictionary) Lookup(indexes []int32, values []Value) { model := d.makeValue(0) memsetValues(values, model) - offset := getOffset(*d) - d.lookup(indexes, makeArrayValue(values, offsetOfU64+offset)) + d.lookup(indexes, makeArrayValue(values, offsetOfU32)) } func (d *int32Dictionary) Bounds(indexes []int32) (min, max Value) { @@ -520,8 +517,7 @@ func (d *floatDictionary) Index(i int32) Value { return d.makeValue(d.index(i)) func (d *floatDictionary) index(i int32) float32 { return d.values[i] } func (d *floatDictionary) Insert(indexes []int32, values []Value) { - offset := getOffset(*d) - d.insert(indexes, makeArrayValue(values, offsetOfU64+offset)) + d.insert(indexes, makeArrayValue(values, offsetOfU32)) } func (d *floatDictionary) init(indexes []int32) { @@ -560,8 +556,7 @@ func (d *floatDictionary) insert(indexes []int32, rows sparse.Array) { func (d *floatDictionary) Lookup(indexes []int32, values []Value) { model := d.makeValue(0) memsetValues(values, model) - offset := getOffset(*d) - d.lookup(indexes, makeArrayValue(values, offsetOfU64+offset)) + d.lookup(indexes, makeArrayValue(values, offsetOfU32)) } func (d *floatDictionary) Bounds(indexes []int32) (min, max Value) { @@ -930,8 +925,7 @@ func (d *uint32Dictionary) Index(i int32) Value { return d.makeValue(d.index(i)) func (d *uint32Dictionary) index(i int32) uint32 { return d.values[i] } func (d *uint32Dictionary) Insert(indexes []int32, values []Value) { - offset := getOffset(*d) - d.insert(indexes, makeArrayValue(values, offsetOfU64+offset)) + d.insert(indexes, makeArrayValue(values, offsetOfU32)) } func (d *uint32Dictionary) init(indexes []int32) { @@ -970,8 +964,7 @@ func (d *uint32Dictionary) insert(indexes []int32, rows sparse.Array) { func (d *uint32Dictionary) Lookup(indexes []int32, values []Value) { model := d.makeValue(0) memsetValues(values, model) - offset := getOffset(*d) - d.lookup(indexes, makeArrayValue(values, offsetOfU64+offset)) + d.lookup(indexes, makeArrayValue(values, offsetOfU32)) } func (d *uint32Dictionary) Bounds(indexes []int32) (min, max Value) { diff --git a/vendor/github.com/parquet-go/parquet-go/encoding/plain/plain.go b/vendor/github.com/parquet-go/parquet-go/encoding/plain/plain.go index 8a58af5c39d..690bc815555 100644 --- a/vendor/github.com/parquet-go/parquet-go/encoding/plain/plain.go +++ b/vendor/github.com/parquet-go/parquet-go/encoding/plain/plain.go @@ -6,7 +6,6 @@ package plain import ( "encoding/binary" "fmt" - "golang.org/x/sys/cpu" "io" "math" @@ -37,70 +36,10 @@ func (e *Encoding) EncodeBoolean(dst []byte, src []byte) ([]byte, error) { return append(dst[:0], src...), nil } -func (e *Encoding) EncodeInt32(dst []byte, src []int32) ([]byte, error) { - if cpu.IsBigEndian { - srcLen := len(src) - byteEnc := make([]byte, (srcLen * 4)) - idx := 0 - for k := range srcLen { - binary.LittleEndian.PutUint32(byteEnc[idx:(4+idx)], uint32((src)[k])) - idx += 4 - } - return append(dst[:0], (byteEnc)...), nil - } else { - return append(dst[:0], unsafecast.Slice[byte](src)...), nil - } -} - -func (e *Encoding) EncodeInt64(dst []byte, src []int64) ([]byte, error) { - if cpu.IsBigEndian { - srcLen := len(src) - byteEnc := make([]byte, (srcLen * 8)) - idx := 0 - for k := range srcLen { - binary.LittleEndian.PutUint64(byteEnc[idx:(8+idx)], uint64((src)[k])) - idx += 8 - } - return append(dst[:0], (byteEnc)...), nil - } else { - return append(dst[:0], unsafecast.Slice[byte](src)...), nil - } -} - func (e *Encoding) EncodeInt96(dst []byte, src []deprecated.Int96) ([]byte, error) { return append(dst[:0], unsafecast.Slice[byte](src)...), nil } -func (e *Encoding) EncodeFloat(dst []byte, src []float32) ([]byte, error) { - if cpu.IsBigEndian { - srcLen := len(src) - byteEnc := make([]byte, (srcLen * 4)) - idx := 0 - for k := range srcLen { - binary.LittleEndian.PutUint32(byteEnc[idx:(4+idx)], math.Float32bits((src)[k])) - idx += 4 - } - return append(dst[:0], (byteEnc)...), nil - } else { - return append(dst[:0], unsafecast.Slice[byte](src)...), nil - } -} - -func (e *Encoding) EncodeDouble(dst []byte, src []float64) ([]byte, error) { - if cpu.IsBigEndian { - srcLen := len(src) - byteEnc := make([]byte, (srcLen * 8)) - idx := 0 - for k := range srcLen { - binary.LittleEndian.PutUint64(byteEnc[idx:(8+idx)], math.Float64bits((src)[k])) - idx += 8 - } - return append(dst[:0], (byteEnc)...), nil - } else { - return append(dst[:0], unsafecast.Slice[byte](src)...), nil - } -} - func (e *Encoding) EncodeByteArray(dst []byte, src []byte, offsets []uint32) ([]byte, error) { dst = dst[:0] @@ -127,45 +66,6 @@ func (e *Encoding) DecodeBoolean(dst []byte, src []byte) ([]byte, error) { return append(dst[:0], src...), nil } -func (e *Encoding) DecodeInt32(dst []int32, src []byte) ([]int32, error) { - if (len(src) % 4) != 0 { - return dst, encoding.ErrDecodeInvalidInputSize(e, "INT32", len(src)) - } - - if cpu.IsBigEndian { - srcLen := (len(src) / 4) - byteDec := make([]int32, srcLen) - idx := 0 - for k := range srcLen { - byteDec[k] = int32(binary.LittleEndian.Uint32((src)[idx:(4 + idx)])) - idx += 4 - } - return append(dst[:0], (byteDec)...), nil - } else { - return append(dst[:0], unsafecast.Slice[int32](src)...), nil - } -} - -func (e *Encoding) DecodeInt64(dst []int64, src []byte) ([]int64, error) { - if (len(src) % 8) != 0 { - return dst, encoding.ErrDecodeInvalidInputSize(e, "INT64", len(src)) - } - - if cpu.IsBigEndian { - srcLen := (len(src) / 8) - byteDec := make([]int64, srcLen) - idx := 0 - for k := range srcLen { - byteDec[k] = int64(binary.LittleEndian.Uint64((src)[idx:(8 + idx)])) - idx += 8 - } - - return append(dst[:0], (byteDec)...), nil - } else { - return append(dst[:0], unsafecast.Slice[int64](src)...), nil - } -} - func (e *Encoding) DecodeInt96(dst []deprecated.Int96, src []byte) ([]deprecated.Int96, error) { if (len(src) % 12) != 0 { return dst, encoding.ErrDecodeInvalidInputSize(e, "INT96", len(src)) @@ -173,44 +73,6 @@ func (e *Encoding) DecodeInt96(dst []deprecated.Int96, src []byte) ([]deprecated return append(dst[:0], unsafecast.Slice[deprecated.Int96](src)...), nil } -func (e *Encoding) DecodeFloat(dst []float32, src []byte) ([]float32, error) { - if (len(src) % 4) != 0 { - return dst, encoding.ErrDecodeInvalidInputSize(e, "FLOAT", len(src)) - } - if cpu.IsBigEndian { - srcLen := (len(src) / 4) - byteDec := make([]float32, srcLen) - idx := 0 - for k := range srcLen { - byteDec[k] = float32(math.Float32frombits(binary.LittleEndian.Uint32((src)[idx:(4 + idx)]))) - idx += 4 - } - - return append(dst[:0], (byteDec)...), nil - } else { - return append(dst[:0], unsafecast.Slice[float32](src)...), nil - } -} - -func (e *Encoding) DecodeDouble(dst []float64, src []byte) ([]float64, error) { - if (len(src) % 8) != 0 { - return dst, encoding.ErrDecodeInvalidInputSize(e, "DOUBLE", len(src)) - } - if cpu.IsBigEndian { - srcLen := (len(src) / 8) - byteDec := make([]float64, srcLen) - idx := 0 - for k := range srcLen { - byteDec[k] = float64(math.Float64frombits(binary.LittleEndian.Uint64((src)[idx:(8 + idx)]))) - idx += 8 - } - - return append(dst[:0], (byteDec)...), nil - } else { - return append(dst[:0], unsafecast.Slice[float64](src)...), nil - } -} - func (e *Encoding) DecodeByteArray(dst []byte, src []byte, offsets []uint32) ([]byte, []uint32, error) { dst, offsets = dst[:0], offsets[:0] diff --git a/vendor/github.com/parquet-go/parquet-go/encoding/plain/plain_be.go b/vendor/github.com/parquet-go/parquet-go/encoding/plain/plain_be.go new file mode 100644 index 00000000000..6c8c9000b52 --- /dev/null +++ b/vendor/github.com/parquet-go/parquet-go/encoding/plain/plain_be.go @@ -0,0 +1,113 @@ +//go:build s390x + +package plain + +import ( + "encoding/binary" + "math" + + "github.com/parquet-go/parquet-go/encoding" +) + +// TODO: optimize by doing the byte swap in the output slice instead of +// allocating a temporay buffer. + +func (e *Encoding) EncodeInt32(dst []byte, src []int32) ([]byte, error) { + srcLen := len(src) + byteEnc := make([]byte, (srcLen * 4)) + idx := 0 + for k := range srcLen { + binary.LittleEndian.PutUint32(byteEnc[idx:(4+idx)], uint32((src)[k])) + idx += 4 + } + return append(dst[:0], (byteEnc)...), nil +} + +func (e *Encoding) EncodeInt64(dst []byte, src []int64) ([]byte, error) { + srcLen := len(src) + byteEnc := make([]byte, (srcLen * 8)) + idx := 0 + for k := range srcLen { + binary.LittleEndian.PutUint64(byteEnc[idx:(8+idx)], uint64((src)[k])) + idx += 8 + } + return append(dst[:0], (byteEnc)...), nil +} + +func (e *Encoding) EncodeFloat(dst []byte, src []float32) ([]byte, error) { + srcLen := len(src) + byteEnc := make([]byte, (srcLen * 4)) + idx := 0 + for k := range srcLen { + binary.LittleEndian.PutUint32(byteEnc[idx:(4+idx)], math.Float32bits((src)[k])) + idx += 4 + } + return append(dst[:0], (byteEnc)...), nil +} + +func (e *Encoding) EncodeDouble(dst []byte, src []float64) ([]byte, error) { + srcLen := len(src) + byteEnc := make([]byte, (srcLen * 8)) + idx := 0 + for k := range srcLen { + binary.LittleEndian.PutUint64(byteEnc[idx:(8+idx)], math.Float64bits((src)[k])) + idx += 8 + } + return append(dst[:0], (byteEnc)...), nil +} + +func (e *Encoding) DecodeInt32(dst []int32, src []byte) ([]int32, error) { + if (len(src) % 4) != 0 { + return dst, encoding.ErrDecodeInvalidInputSize(e, "INT32", len(src)) + } + srcLen := (len(src) / 4) + byteDec := make([]int32, srcLen) + idx := 0 + for k := range srcLen { + byteDec[k] = int32(binary.LittleEndian.Uint32((src)[idx:(4 + idx)])) + idx += 4 + } + return append(dst[:0], (byteDec)...), nil +} + +func (e *Encoding) DecodeInt64(dst []int64, src []byte) ([]int64, error) { + if (len(src) % 8) != 0 { + return dst, encoding.ErrDecodeInvalidInputSize(e, "INT64", len(src)) + } + srcLen := (len(src) / 8) + byteDec := make([]int64, srcLen) + idx := 0 + for k := range srcLen { + byteDec[k] = int64(binary.LittleEndian.Uint64((src)[idx:(8 + idx)])) + idx += 8 + } + return append(dst[:0], (byteDec)...), nil +} + +func (e *Encoding) DecodeFloat(dst []float32, src []byte) ([]float32, error) { + if (len(src) % 4) != 0 { + return dst, encoding.ErrDecodeInvalidInputSize(e, "FLOAT", len(src)) + } + srcLen := (len(src) / 4) + byteDec := make([]float32, srcLen) + idx := 0 + for k := range srcLen { + byteDec[k] = float32(math.Float32frombits(binary.LittleEndian.Uint32((src)[idx:(4 + idx)]))) + idx += 4 + } + return append(dst[:0], (byteDec)...), nil +} + +func (e *Encoding) DecodeDouble(dst []float64, src []byte) ([]float64, error) { + if (len(src) % 8) != 0 { + return dst, encoding.ErrDecodeInvalidInputSize(e, "DOUBLE", len(src)) + } + srcLen := (len(src) / 8) + byteDec := make([]float64, srcLen) + idx := 0 + for k := range srcLen { + byteDec[k] = float64(math.Float64frombits(binary.LittleEndian.Uint64((src)[idx:(8 + idx)]))) + idx += 8 + } + return append(dst[:0], (byteDec)...), nil +} diff --git a/vendor/github.com/parquet-go/parquet-go/encoding/plain/plain_le.go b/vendor/github.com/parquet-go/parquet-go/encoding/plain/plain_le.go new file mode 100644 index 00000000000..bd1eadf6a06 --- /dev/null +++ b/vendor/github.com/parquet-go/parquet-go/encoding/plain/plain_le.go @@ -0,0 +1,52 @@ +//go:build !s390x + +package plain + +import ( + "github.com/parquet-go/parquet-go/encoding" + "github.com/parquet-go/parquet-go/internal/unsafecast" +) + +func (e *Encoding) EncodeInt32(dst []byte, src []int32) ([]byte, error) { + return append(dst[:0], unsafecast.Slice[byte](src)...), nil +} + +func (e *Encoding) EncodeInt64(dst []byte, src []int64) ([]byte, error) { + return append(dst[:0], unsafecast.Slice[byte](src)...), nil +} + +func (e *Encoding) EncodeFloat(dst []byte, src []float32) ([]byte, error) { + return append(dst[:0], unsafecast.Slice[byte](src)...), nil +} + +func (e *Encoding) EncodeDouble(dst []byte, src []float64) ([]byte, error) { + return append(dst[:0], unsafecast.Slice[byte](src)...), nil +} + +func (e *Encoding) DecodeInt32(dst []int32, src []byte) ([]int32, error) { + if (len(src) % 4) != 0 { + return dst, encoding.ErrDecodeInvalidInputSize(e, "INT32", len(src)) + } + return append(dst[:0], unsafecast.Slice[int32](src)...), nil +} + +func (e *Encoding) DecodeInt64(dst []int64, src []byte) ([]int64, error) { + if (len(src) % 8) != 0 { + return dst, encoding.ErrDecodeInvalidInputSize(e, "INT64", len(src)) + } + return append(dst[:0], unsafecast.Slice[int64](src)...), nil +} + +func (e *Encoding) DecodeFloat(dst []float32, src []byte) ([]float32, error) { + if (len(src) % 4) != 0 { + return dst, encoding.ErrDecodeInvalidInputSize(e, "FLOAT", len(src)) + } + return append(dst[:0], unsafecast.Slice[float32](src)...), nil +} + +func (e *Encoding) DecodeDouble(dst []float64, src []byte) ([]float64, error) { + if (len(src) % 8) != 0 { + return dst, encoding.ErrDecodeInvalidInputSize(e, "DOUBLE", len(src)) + } + return append(dst[:0], unsafecast.Slice[float64](src)...), nil +} diff --git a/vendor/github.com/parquet-go/parquet-go/internal/bitpack/unpack_int32_be.go b/vendor/github.com/parquet-go/parquet-go/internal/bitpack/unpack_int32_be.go new file mode 100644 index 00000000000..0f4ba054c42 --- /dev/null +++ b/vendor/github.com/parquet-go/parquet-go/internal/bitpack/unpack_int32_be.go @@ -0,0 +1,15 @@ +//go:build s390x + +package bitpack + +import "encoding/binary" + +func unsafecastBytesToUint32(src []byte) []uint32 { + out := make([]uint32, len(src)/4) + idx := 0 + for k := range out { + out[k] = binary.LittleEndian.Uint32((src)[idx:(4 + idx)]) + idx += 4 + } + return out +} diff --git a/vendor/github.com/parquet-go/parquet-go/internal/bitpack/unpack_int32_le.go b/vendor/github.com/parquet-go/parquet-go/internal/bitpack/unpack_int32_le.go new file mode 100644 index 00000000000..f754e704ff1 --- /dev/null +++ b/vendor/github.com/parquet-go/parquet-go/internal/bitpack/unpack_int32_le.go @@ -0,0 +1,9 @@ +//go:build !s390x + +package bitpack + +import "github.com/parquet-go/parquet-go/internal/unsafecast" + +func unsafecastBytesToUint32(src []byte) []uint32 { + return unsafecast.Slice[uint32](src) +} diff --git a/vendor/github.com/parquet-go/parquet-go/internal/bitpack/unpack_int32_purego.go b/vendor/github.com/parquet-go/parquet-go/internal/bitpack/unpack_int32_purego.go index cddbd773a51..1e65d8c02b9 100644 --- a/vendor/github.com/parquet-go/parquet-go/internal/bitpack/unpack_int32_purego.go +++ b/vendor/github.com/parquet-go/parquet-go/internal/bitpack/unpack_int32_purego.go @@ -2,28 +2,8 @@ package bitpack -import ( - "encoding/binary" - - "golang.org/x/sys/cpu" - - "github.com/parquet-go/parquet-go/internal/unsafecast" -) - func unpackInt32(dst []int32, src []byte, bitWidth uint) { - var bits []uint32 - if cpu.IsBigEndian { - srcLen := (len(src) / 4) - bits = make([]uint32, srcLen) - idx := 0 - for k := range srcLen { - bits[k] = binary.LittleEndian.Uint32((src)[idx:(4 + idx)]) - idx += 4 - } - } else { - bits = unsafecast.Slice[uint32](src) - } - + bits := unsafecastBytesToUint32(src) bitMask := uint32(1<