forked from xitongsys/parquet-go
-
Notifications
You must be signed in to change notification settings - Fork 0
/
ParquetType_test.go
143 lines (130 loc) · 7.78 KB
/
ParquetType_test.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
package ParquetType
import (
"bytes"
"encoding/binary"
"fmt"
"testing"
"github.com/krehermann/parquet-go/parquet"
)
func TestStrToParquetType(t *testing.T) {
testData := []struct {
StrData string
GoData interface{}
PT *parquet.Type
CT *parquet.ConvertedType
}{
{"false", bool(false), parquet.TypePtr(parquet.Type_BOOLEAN), nil},
{"1", int32(1), parquet.TypePtr(parquet.Type_INT32), nil},
{"0", int64(0), parquet.TypePtr(parquet.Type_INT64), nil},
{"12345", StrIntToBinary("12345", "LittleEndian", 12, true), parquet.TypePtr(parquet.Type_INT96), nil},
{"0.1", float32(0.1), parquet.TypePtr(parquet.Type_FLOAT), nil},
{"0.1", float64(0.1), parquet.TypePtr(parquet.Type_DOUBLE), nil},
{"abc bcd", string("abc bcd"), parquet.TypePtr(parquet.Type_BYTE_ARRAY), nil},
{"abc bcd", string("abc bcd"), parquet.TypePtr(parquet.Type_FIXED_LEN_BYTE_ARRAY), nil},
{"abc bcd", string("abc bcd"), parquet.TypePtr(parquet.Type_BYTE_ARRAY), parquet.ConvertedTypePtr(parquet.ConvertedType_UTF8)},
{"1", int32(1), parquet.TypePtr(parquet.Type_INT32), parquet.ConvertedTypePtr(parquet.ConvertedType_INT_8)},
{"1", int32(1), parquet.TypePtr(parquet.Type_INT32), parquet.ConvertedTypePtr(parquet.ConvertedType_INT_16)},
{"1", int32(1), parquet.TypePtr(parquet.Type_INT32), parquet.ConvertedTypePtr(parquet.ConvertedType_INT_32)},
{"1", int64(1), parquet.TypePtr(parquet.Type_INT64), parquet.ConvertedTypePtr(parquet.ConvertedType_INT_64)},
{"1", uint32(1), parquet.TypePtr(parquet.Type_INT32), parquet.ConvertedTypePtr(parquet.ConvertedType_UINT_8)},
{"1", uint32(1), parquet.TypePtr(parquet.Type_INT32), parquet.ConvertedTypePtr(parquet.ConvertedType_UINT_16)},
{"1", uint32(1), parquet.TypePtr(parquet.Type_INT32), parquet.ConvertedTypePtr(parquet.ConvertedType_UINT_32)},
{"1", uint64(1), parquet.TypePtr(parquet.Type_INT64), parquet.ConvertedTypePtr(parquet.ConvertedType_UINT_64)},
{"1", int32(1), parquet.TypePtr(parquet.Type_INT32), parquet.ConvertedTypePtr(parquet.ConvertedType_DATE)},
{"1", int32(1), parquet.TypePtr(parquet.Type_INT32), parquet.ConvertedTypePtr(parquet.ConvertedType_TIME_MILLIS)},
{"1", int64(1), parquet.TypePtr(parquet.Type_INT64), parquet.ConvertedTypePtr(parquet.ConvertedType_TIME_MICROS)},
{"1", int64(1), parquet.TypePtr(parquet.Type_INT64), parquet.ConvertedTypePtr(parquet.ConvertedType_TIMESTAMP_MICROS)},
{"1", int64(1), parquet.TypePtr(parquet.Type_INT64), parquet.ConvertedTypePtr(parquet.ConvertedType_TIMESTAMP_MILLIS)},
{"123456789", StrIntToBinary("123456789", "LittleEndian", 12, false), parquet.TypePtr(parquet.Type_FIXED_LEN_BYTE_ARRAY), parquet.ConvertedTypePtr(parquet.ConvertedType_INTERVAL)},
{"123.45", int32(12345), parquet.TypePtr(parquet.Type_INT32), parquet.ConvertedTypePtr(parquet.ConvertedType_DECIMAL)},
{"123.45", int64(12345), parquet.TypePtr(parquet.Type_INT64), parquet.ConvertedTypePtr(parquet.ConvertedType_DECIMAL)},
{"123.45", StrIntToBinary("12345", "BigEndian", 12, true), parquet.TypePtr(parquet.Type_FIXED_LEN_BYTE_ARRAY), parquet.ConvertedTypePtr(parquet.ConvertedType_DECIMAL)},
{"123.45", StrIntToBinary("12345", "BigEndian", 0, true), parquet.TypePtr(parquet.Type_BYTE_ARRAY), parquet.ConvertedTypePtr(parquet.ConvertedType_DECIMAL)},
}
for _, data := range testData {
res := fmt.Sprintf("%v", StrToParquetType(data.StrData, data.PT, data.CT, 12, 2))
expect := fmt.Sprintf("%v", data.GoData)
if res != expect {
t.Errorf("StrToParquetType err %v-%v, expect %v, get %v", data.PT, data.CT, expect, res)
}
}
}
func TestTypeNameToParquetType(t *testing.T) {
testData := []struct {
name string
baseName string
PT *parquet.Type
CT *parquet.ConvertedType
}{
{"BOOLEAN", "", parquet.TypePtr(parquet.Type_BOOLEAN), nil},
{"INT32", "", parquet.TypePtr(parquet.Type_INT32), nil},
{"INT64", "", parquet.TypePtr(parquet.Type_INT64), nil},
{"INT96", "", parquet.TypePtr(parquet.Type_INT96), nil},
{"FLOAT", "", parquet.TypePtr(parquet.Type_FLOAT), nil},
{"DOUBLE", "", parquet.TypePtr(parquet.Type_DOUBLE), nil},
{"BYTE_ARRAY", "", parquet.TypePtr(parquet.Type_BYTE_ARRAY), nil},
{"FIXED_LEN_BYTE_ARRAY", "", parquet.TypePtr(parquet.Type_FIXED_LEN_BYTE_ARRAY), nil},
{"UTF8", "", parquet.TypePtr(parquet.Type_BYTE_ARRAY), parquet.ConvertedTypePtr(parquet.ConvertedType_UTF8)},
{"INT_8", "", parquet.TypePtr(parquet.Type_INT32), parquet.ConvertedTypePtr(parquet.ConvertedType_INT_8)},
{"INT_16", "", parquet.TypePtr(parquet.Type_INT32), parquet.ConvertedTypePtr(parquet.ConvertedType_INT_16)},
{"INT_32", "", parquet.TypePtr(parquet.Type_INT32), parquet.ConvertedTypePtr(parquet.ConvertedType_INT_32)},
{"INT_64", "", parquet.TypePtr(parquet.Type_INT64), parquet.ConvertedTypePtr(parquet.ConvertedType_INT_64)},
{"UINT_8", "", parquet.TypePtr(parquet.Type_INT32), parquet.ConvertedTypePtr(parquet.ConvertedType_UINT_8)},
{"UINT_16", "", parquet.TypePtr(parquet.Type_INT32), parquet.ConvertedTypePtr(parquet.ConvertedType_UINT_16)},
{"UINT_32", "", parquet.TypePtr(parquet.Type_INT32), parquet.ConvertedTypePtr(parquet.ConvertedType_UINT_32)},
{"UINT_64", "", parquet.TypePtr(parquet.Type_INT64), parquet.ConvertedTypePtr(parquet.ConvertedType_UINT_64)},
{"DATE", "", parquet.TypePtr(parquet.Type_INT32), parquet.ConvertedTypePtr(parquet.ConvertedType_DATE)},
{"TIME_MILLIS", "", parquet.TypePtr(parquet.Type_INT32), parquet.ConvertedTypePtr(parquet.ConvertedType_TIME_MILLIS)},
{"TIME_MICROS", "", parquet.TypePtr(parquet.Type_INT64), parquet.ConvertedTypePtr(parquet.ConvertedType_TIME_MICROS)},
{"TIMESTAMP_MICROS", "", parquet.TypePtr(parquet.Type_INT64), parquet.ConvertedTypePtr(parquet.ConvertedType_TIMESTAMP_MICROS)},
{"TIMESTAMP_MILLIS", "", parquet.TypePtr(parquet.Type_INT64), parquet.ConvertedTypePtr(parquet.ConvertedType_TIMESTAMP_MILLIS)},
{"INTERVAL", "", parquet.TypePtr(parquet.Type_FIXED_LEN_BYTE_ARRAY), parquet.ConvertedTypePtr(parquet.ConvertedType_INTERVAL)},
{"DECIMAL", "INT32", parquet.TypePtr(parquet.Type_INT32), parquet.ConvertedTypePtr(parquet.ConvertedType_DECIMAL)},
{"DECIMAL", "INT64", parquet.TypePtr(parquet.Type_INT64), parquet.ConvertedTypePtr(parquet.ConvertedType_DECIMAL)},
{"DECIMAL", "FIXED_LEN_BYTE_ARRAY", parquet.TypePtr(parquet.Type_FIXED_LEN_BYTE_ARRAY), parquet.ConvertedTypePtr(parquet.ConvertedType_DECIMAL)},
{"DECIMAL", "BYTE_ARRAY", parquet.TypePtr(parquet.Type_BYTE_ARRAY), parquet.ConvertedTypePtr(parquet.ConvertedType_DECIMAL)},
}
for _, data := range testData {
pT, cT := TypeNameToParquetType(data.name, data.baseName)
if *pT != *data.PT || (cT != nil && *cT != *data.CT) {
t.Errorf("TypeNameToParquetType err, expect %v-%v, get %v-%v", *pT, *cT, *data.PT, *data.CT)
}
}
}
func TestStrIntToBinary(t *testing.T) {
cases := []struct {
num int32
nums string
order string
length int
signed bool
}{
{0, "0", "LittleEndian", 4, true},
{10, "10", "LittleEndian", 4, true},
{-10, "-10", "LittleEndian", 4, true},
{-111, "-111", "LittleEndian", 4, true},
{2147483647, "2147483647", "LittleEndian", 0, true},
{-2147483648, "-2147483648", "LittleEndian", 0, true},
{-2147483648, "2147483648", "LittleEndian", 0, false},
{0, "0", "BigEndian", 4, true},
{10, "10", "BigEndian", 4, true},
{-10, "-10", "BigEndian", 4, true},
{-111, "-111", "BigEndian", 4, true},
{2147483647, "2147483647", "BigEndian", 0, true},
{-2147483648, "-2147483648", "BigEndian", 0, true},
{-2147483648, "2147483648", "BigEndian", 0, false},
}
for _, c := range cases {
buf := new(bytes.Buffer)
if c.order == "LittleEndian" {
binary.Write(buf, binary.LittleEndian, c.num)
} else {
binary.Write(buf, binary.BigEndian, c.num)
}
expect := string(buf.Bytes())
res := StrIntToBinary(c.nums, c.order, c.length, c.signed)
if res != expect {
t.Errorf("StrIntToBinary error %b, expect %b, get %b", c.num, []byte(expect), []byte(res))
}
}
}