From 891287c5932b9c72024317f9b96c67fb11528443 Mon Sep 17 00:00:00 2001 From: yuin Date: Thu, 14 Jul 2016 13:35:09 +0900 Subject: [PATCH] first commit --- README.md | 51 ++++++++++++++ charsetutil.go | 117 ++++++++++++++++++++++++++++++++ charsetutil_test.go | 162 ++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 330 insertions(+) create mode 100644 README.md create mode 100644 charsetutil.go create mode 100644 charsetutil_test.go diff --git a/README.md b/README.md new file mode 100644 index 0000000..643c024 --- /dev/null +++ b/README.md @@ -0,0 +1,51 @@ +## charsetutil - An easiest way to convert character set encodings in Go + +charsetutil provides easiest way to convert character set encodings in Go. + +## Install + +```bash +go get github.com/yuin/charsetutil +``` + +## Utilities + +- `Decode*` : Converts from the specified charset to UTF-8. +- `Encode*` : Converts from the UTF-8 to specified charset. + +- `MustDecode*` : Same as `Decode*`, but panics when errors occur +- `MustEncode*` : Same as `Encode*`, but panics when errors occur + + +```go +b, err = EncodeString("こんにちわ", "Windows-31J") +b, err = Encode("こんにちわ", "Windows-31J") +b, err = EncodeBytes([]byte("こんにちわ"), "Windows-31J") +b, err = EncodeReader(strings.NewReader("こんにちわ"), "Windows-31J") +b = MustEncodeString("こんにちわ", "Windows-31J") +b = MustEncode("こんにちわ", "Windows-31J") +b = MustEncodeBytes([]byte("こんにちわ"), "Windows-31J") +b = MustEncodeReader(strings.NewReader("こんにちわ"), "Windows-31J") + +s, err = DecodeString(string(source), "Windows-31J") +s, err = Decode(source, "Windows-31J") +s, err = DecodeBytes(source, "Windows-31J") +s, err = DecodeReader(bytes.NewReader(source), "Windows-31J") +s = MustDecodeString(string(source), "Windows-31J") +s = MustDecode(source, "Windows-31J") +s = MustDecodeBytes(source, "Windows-31J") +s = MustDecodeReader(bytes.NewReader(source), "Windows-31J") +``` + +## Supported character sets + +See [Encoding spec on WHATWG](https://encoding.spec.whatwg.org/#names-and-labels) + +## Author + +Yusuke Inuzuka + +## License + +[BSD License](http://opensource.org/licenses/BSD-2-Clause) + diff --git a/charsetutil.go b/charsetutil.go new file mode 100644 index 0000000..19569b8 --- /dev/null +++ b/charsetutil.go @@ -0,0 +1,117 @@ +package charsetutil + +import ( + "bytes" + "errors" + "fmt" + "golang.org/x/net/html/charset" + "golang.org/x/text/transform" + "io" + "io/ioutil" + "strings" +) + +func panicIfError(err error) { + if err != nil { + panic(err) + } +} + +func DecodeReader(s io.Reader, enc string) (string, error) { + reader, err := charset.NewReaderLabel(enc, s) + if err != nil { + return "", err + } + bytes, err := ioutil.ReadAll(reader) + if err != nil { + return "", err + } + return string(bytes), nil +} + +func MustDecodeReader(s io.Reader, enc string) string { + ret, err := DecodeReader(s, enc) + panicIfError(err) + return ret +} + +func DecodeBytes(s []byte, enc string) (string, error) { + return DecodeReader(bytes.NewReader(s), enc) +} + +func MustDecodeBytes(s []byte, enc string) string { + ret, err := DecodeReader(bytes.NewReader(s), enc) + panicIfError(err) + return ret +} + +func DecodeString(s, enc string) (string, error) { + return DecodeReader(strings.NewReader(s), enc) +} + +func MustDecodeString(s, enc string) string { + ret, err := DecodeReader(strings.NewReader(s), enc) + panicIfError(err) + return ret +} + +func Decode(s []byte, enc string) (string, error) { + return DecodeReader(bytes.NewReader(s), enc) +} + +func MustDecode(s []byte, enc string) string { + ret, err := DecodeReader(bytes.NewReader(s), enc) + panicIfError(err) + return ret +} + +func EncodeReader(s io.Reader, enc string) ([]byte, error) { + e, _ := charset.Lookup(enc) + if e == nil { + return nil, errors.New(fmt.Sprintf("unsupported charset: %q", enc)) + } + var buf bytes.Buffer + writer := transform.NewWriter(&buf, e.NewEncoder()) + _, err := io.Copy(writer, s) + if err != nil { + return nil, err + } + return buf.Bytes(), nil +} + +func MustEncodeReader(s io.Reader, enc string) []byte { + ret, err := EncodeReader(s, enc) + panicIfError(err) + return ret +} + +func EncodeBytes(s []byte, enc string) ([]byte, error) { + return EncodeReader(bytes.NewReader(s), enc) +} + +func MustEncodeBytes(s []byte, enc string) []byte { + ret, err := EncodeReader(bytes.NewReader(s), enc) + panicIfError(err) + return ret +} + +func EncodeString(s, enc string) ([]byte, error) { + return EncodeReader(strings.NewReader(s), enc) +} + +func MustEncodeString(s, enc string) []byte { + ret, err := EncodeReader(strings.NewReader(s), enc) + panicIfError(err) + return ret +} + +func Encode(s string, enc string) ([]byte, error) { + return EncodeReader(strings.NewReader(s), enc) +} + +func MustEncode(s string, enc string) []byte { + ret, err := EncodeReader(strings.NewReader(s), enc) + panicIfError(err) + return ret + +} diff --git a/charsetutil_test.go b/charsetutil_test.go new file mode 100644 index 0000000..ef8e963 --- /dev/null +++ b/charsetutil_test.go @@ -0,0 +1,162 @@ +package charsetutil + +import ( + "bytes" + "strings" + "testing" +) + +func TestEncodeOk(t *testing.T) { + expected := []byte{'\x82', '\xb1', '\x82', '\xf1', '\x82', '\xc9', '\x82', '\xbf', '\x82', '\xed'} + assert := func(b []byte, err error) { + if err != nil { + t.Errorf("Failed: %s", err.Error()) + } + if string(b) != string(expected) { + t.Error("Failed") + } + } + + b, err := EncodeString("こんにちわ", "Windows-31J") + assert(b, err) + + b, err = EncodeBytes([]byte("こんにちわ"), "Windows-31J") + assert(b, err) + + b, err = Encode("こんにちわ", "Windows-31J") + assert(b, err) + + b, err = EncodeReader(strings.NewReader("こんにちわ"), "Windows-31J") + assert(b, err) + + b = MustEncodeString("こんにちわ", "Windows-31J") + assert(b, nil) + + b = MustEncodeBytes([]byte("こんにちわ"), "Windows-31J") + assert(b, nil) + + b = MustEncode("こんにちわ", "Windows-31J") + assert(b, nil) + + b = MustEncodeReader(strings.NewReader("こんにちわ"), "Windows-31J") + assert(b, nil) +} + +func TestEncodeError(t *testing.T) { + assert := func(b []byte, err error) { + if b != nil || err == nil { + t.Error("Failed") + } + } + + assertPanic := func(f func() []byte) { + defer func() { + if recover() == nil { + t.Error("Should be failed") + } + }() + b := f() + if b != nil { + t.Error("Failed") + } + } + + b, err := EncodeString("こんにちわ", "unknown") + assert(b, err) + + b, err = EncodeBytes([]byte("こんにちわ"), "unknown") + assert(b, err) + + b, err = Encode("こんにちわ", "unknown") + assert(b, err) + + b, err = EncodeReader(strings.NewReader("こんにちわ"), "unknown") + assert(b, err) + + assertPanic(func() []byte { return MustEncodeString("こんにちわ", "unknown") }) + + assertPanic(func() []byte { return MustEncodeBytes([]byte("こんにちわ"), "unknown") }) + + assertPanic(func() []byte { return MustEncode("こんにちわ", "unknown") }) + + assertPanic(func() []byte { return MustEncodeReader(strings.NewReader("こんにちわ"), "unknown") }) +} + +func TestDecodeOk(t *testing.T) { + source := []byte{'\x82', '\xb1', '\x82', '\xf1', '\x82', '\xc9', '\x82', '\xbf', '\x82', '\xed'} + expected := "こんにちわ" + + assert := func(b string, err error) { + if err != nil { + t.Errorf("Failed: %s", err.Error()) + } + if b != expected { + t.Error("Failed") + } + } + + b, err := DecodeString(string(source), "Windows-31J") + assert(b, err) + + b, err = DecodeBytes(source, "Windows-31J") + assert(b, err) + + b, err = Decode(source, "Windows-31J") + assert(b, err) + + b, err = DecodeReader(bytes.NewReader(source), "Windows-31J") + assert(b, err) + + b = MustDecodeString(string(source), "Windows-31J") + assert(b, nil) + + b = MustDecodeBytes(source, "Windows-31J") + assert(b, nil) + + b = MustDecode(source, "Windows-31J") + assert(b, nil) + + b = MustDecodeReader(bytes.NewReader(source), "Windows-31J") + assert(b, nil) +} + +func TestDecodeError(t *testing.T) { + source := []byte{'\x82', '\xb1', '\x82', '\xf1', '\x82', '\xc9', '\x82', '\xbf', '\x82', '\xed'} + assert := func(s string, err error) { + if s != "" || err == nil { + t.Error("Failed") + } + } + + assertPanic := func(f func() string) { + defer func() { + if recover() == nil { + t.Error("Should be failed") + } + }() + s := f() + if s != "" { + t.Error("Failed") + } + } + + b, err := DecodeString(string(source), "unknown") + assert(b, err) + + b, err = DecodeBytes(source, "unknown") + assert(b, err) + + b, err = Decode(source, "unknown") + assert(b, err) + + b, err = DecodeReader(bytes.NewReader(source), "unknown") + assert(b, err) + + assertPanic(func() string { return MustDecodeString(string(source), "unknown") }) + + assertPanic(func() string { return MustDecodeBytes(source, "unknown") }) + + assertPanic(func() string { return MustDecode(source, "unknown") }) + + assertPanic(func() string { return MustDecodeReader(bytes.NewReader(source), "unknown") }) +}