Skip to content

Commit

Permalink
first commit
Browse files Browse the repository at this point in the history
  • Loading branch information
yuin committed Jul 14, 2016
1 parent 037ab82 commit 891287c
Show file tree
Hide file tree
Showing 3 changed files with 330 additions and 0 deletions.
51 changes: 51 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
## charsetutil - An easiest way to convert character set encodings in Go

charsetutil provides easiest way to convert character set encodings in Go.

## Install

```bash
go get github.com/yuin/charsetutil
```

## Utilities

- `Decode*` : Converts from the specified charset to UTF-8.
- `Encode*` : Converts from the UTF-8 to specified charset.

- `MustDecode*` : Same as `Decode*`, but panics when errors occur
- `MustEncode*` : Same as `Encode*`, but panics when errors occur


```go
b, err = EncodeString("こんにちわ", "Windows-31J")
b, err = Encode("こんにちわ", "Windows-31J")
b, err = EncodeBytes([]byte("こんにちわ"), "Windows-31J")
b, err = EncodeReader(strings.NewReader("こんにちわ"), "Windows-31J")
b = MustEncodeString("こんにちわ", "Windows-31J")
b = MustEncode("こんにちわ", "Windows-31J")
b = MustEncodeBytes([]byte("こんにちわ"), "Windows-31J")
b = MustEncodeReader(strings.NewReader("こんにちわ"), "Windows-31J")

s, err = DecodeString(string(source), "Windows-31J")
s, err = Decode(source, "Windows-31J")
s, err = DecodeBytes(source, "Windows-31J")
s, err = DecodeReader(bytes.NewReader(source), "Windows-31J")
s = MustDecodeString(string(source), "Windows-31J")
s = MustDecode(source, "Windows-31J")
s = MustDecodeBytes(source, "Windows-31J")
s = MustDecodeReader(bytes.NewReader(source), "Windows-31J")
```

## Supported character sets

See [Encoding spec on WHATWG](https://encoding.spec.whatwg.org/#names-and-labels)

## Author

Yusuke Inuzuka

## License

[BSD License](http://opensource.org/licenses/BSD-2-Clause)

117 changes: 117 additions & 0 deletions charsetutil.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
package charsetutil

import (
"bytes"
"errors"
"fmt"
"golang.org/x/net/html/charset"
"golang.org/x/text/transform"
"io"
"io/ioutil"
"strings"
)

func panicIfError(err error) {
if err != nil {
panic(err)
}
}

func DecodeReader(s io.Reader, enc string) (string, error) {
reader, err := charset.NewReaderLabel(enc, s)
if err != nil {
return "", err
}
bytes, err := ioutil.ReadAll(reader)
if err != nil {
return "", err
}
return string(bytes), nil
}

func MustDecodeReader(s io.Reader, enc string) string {
ret, err := DecodeReader(s, enc)
panicIfError(err)
return ret
}

func DecodeBytes(s []byte, enc string) (string, error) {
return DecodeReader(bytes.NewReader(s), enc)
}

func MustDecodeBytes(s []byte, enc string) string {
ret, err := DecodeReader(bytes.NewReader(s), enc)
panicIfError(err)
return ret
}

func DecodeString(s, enc string) (string, error) {
return DecodeReader(strings.NewReader(s), enc)
}

func MustDecodeString(s, enc string) string {
ret, err := DecodeReader(strings.NewReader(s), enc)
panicIfError(err)
return ret
}

func Decode(s []byte, enc string) (string, error) {
return DecodeReader(bytes.NewReader(s), enc)
}

func MustDecode(s []byte, enc string) string {
ret, err := DecodeReader(bytes.NewReader(s), enc)
panicIfError(err)
return ret
}

func EncodeReader(s io.Reader, enc string) ([]byte, error) {
e, _ := charset.Lookup(enc)
if e == nil {
return nil, errors.New(fmt.Sprintf("unsupported charset: %q", enc))
}
var buf bytes.Buffer
writer := transform.NewWriter(&buf, e.NewEncoder())
_, err := io.Copy(writer, s)
if err != nil {
return nil, err
}
return buf.Bytes(), nil
}

func MustEncodeReader(s io.Reader, enc string) []byte {
ret, err := EncodeReader(s, enc)
panicIfError(err)
return ret
}

func EncodeBytes(s []byte, enc string) ([]byte, error) {
return EncodeReader(bytes.NewReader(s), enc)
}

func MustEncodeBytes(s []byte, enc string) []byte {
ret, err := EncodeReader(bytes.NewReader(s), enc)
panicIfError(err)
return ret
}

func EncodeString(s, enc string) ([]byte, error) {
return EncodeReader(strings.NewReader(s), enc)
}

func MustEncodeString(s, enc string) []byte {
ret, err := EncodeReader(strings.NewReader(s), enc)
panicIfError(err)
return ret
}

func Encode(s string, enc string) ([]byte, error) {
return EncodeReader(strings.NewReader(s), enc)
}

func MustEncode(s string, enc string) []byte {
ret, err := EncodeReader(strings.NewReader(s), enc)
panicIfError(err)
return ret

}
162 changes: 162 additions & 0 deletions charsetutil_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,162 @@
package charsetutil

import (
"bytes"
"strings"
"testing"
)

func TestEncodeOk(t *testing.T) {
expected := []byte{'\x82', '\xb1', '\x82', '\xf1', '\x82', '\xc9', '\x82', '\xbf', '\x82', '\xed'}
assert := func(b []byte, err error) {
if err != nil {
t.Errorf("Failed: %s", err.Error())
}
if string(b) != string(expected) {
t.Error("Failed")
}
}

b, err := EncodeString("こんにちわ", "Windows-31J")
assert(b, err)

b, err = EncodeBytes([]byte("こんにちわ"), "Windows-31J")
assert(b, err)

b, err = Encode("こんにちわ", "Windows-31J")
assert(b, err)

b, err = EncodeReader(strings.NewReader("こんにちわ"), "Windows-31J")
assert(b, err)

b = MustEncodeString("こんにちわ", "Windows-31J")
assert(b, nil)

b = MustEncodeBytes([]byte("こんにちわ"), "Windows-31J")
assert(b, nil)

b = MustEncode("こんにちわ", "Windows-31J")
assert(b, nil)

b = MustEncodeReader(strings.NewReader("こんにちわ"), "Windows-31J")
assert(b, nil)
}

func TestEncodeError(t *testing.T) {
assert := func(b []byte, err error) {
if b != nil || err == nil {
t.Error("Failed")
}
}

assertPanic := func(f func() []byte) {
defer func() {
if recover() == nil {
t.Error("Should be failed")
}
}()
b := f()
if b != nil {
t.Error("Failed")
}
}

b, err := EncodeString("こんにちわ", "unknown")
assert(b, err)

b, err = EncodeBytes([]byte("こんにちわ"), "unknown")
assert(b, err)

b, err = Encode("こんにちわ", "unknown")
assert(b, err)

b, err = EncodeReader(strings.NewReader("こんにちわ"), "unknown")
assert(b, err)

assertPanic(func() []byte { return MustEncodeString("こんにちわ", "unknown") })

assertPanic(func() []byte { return MustEncodeBytes([]byte("こんにちわ"), "unknown") })

assertPanic(func() []byte { return MustEncode("こんにちわ", "unknown") })

assertPanic(func() []byte { return MustEncodeReader(strings.NewReader("こんにちわ"), "unknown") })
}

func TestDecodeOk(t *testing.T) {
source := []byte{'\x82', '\xb1', '\x82', '\xf1', '\x82', '\xc9', '\x82', '\xbf', '\x82', '\xed'}
expected := "こんにちわ"

assert := func(b string, err error) {
if err != nil {
t.Errorf("Failed: %s", err.Error())
}
if b != expected {
t.Error("Failed")
}
}

b, err := DecodeString(string(source), "Windows-31J")
assert(b, err)

b, err = DecodeBytes(source, "Windows-31J")
assert(b, err)

b, err = Decode(source, "Windows-31J")
assert(b, err)

b, err = DecodeReader(bytes.NewReader(source), "Windows-31J")
assert(b, err)

b = MustDecodeString(string(source), "Windows-31J")
assert(b, nil)

b = MustDecodeBytes(source, "Windows-31J")
assert(b, nil)

b = MustDecode(source, "Windows-31J")
assert(b, nil)

b = MustDecodeReader(bytes.NewReader(source), "Windows-31J")
assert(b, nil)
}

func TestDecodeError(t *testing.T) {
source := []byte{'\x82', '\xb1', '\x82', '\xf1', '\x82', '\xc9', '\x82', '\xbf', '\x82', '\xed'}
assert := func(s string, err error) {
if s != "" || err == nil {
t.Error("Failed")
}
}

assertPanic := func(f func() string) {
defer func() {
if recover() == nil {
t.Error("Should be failed")
}
}()
s := f()
if s != "" {
t.Error("Failed")
}
}

b, err := DecodeString(string(source), "unknown")
assert(b, err)

b, err = DecodeBytes(source, "unknown")
assert(b, err)

b, err = Decode(source, "unknown")
assert(b, err)

b, err = DecodeReader(bytes.NewReader(source), "unknown")
assert(b, err)

assertPanic(func() string { return MustDecodeString(string(source), "unknown") })

assertPanic(func() string { return MustDecodeBytes(source, "unknown") })

assertPanic(func() string { return MustDecode(source, "unknown") })

assertPanic(func() string { return MustDecodeReader(bytes.NewReader(source), "unknown") })
}

0 comments on commit 891287c

Please sign in to comment.