-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
3 changed files
with
330 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,51 @@ | ||
## charsetutil - An easiest way to convert character set encodings in Go | ||
|
||
charsetutil provides easiest way to convert character set encodings in Go. | ||
|
||
## Install | ||
|
||
```bash | ||
go get github.com/yuin/charsetutil | ||
``` | ||
|
||
## Utilities | ||
|
||
- `Decode*` : Converts from the specified charset to UTF-8. | ||
- `Encode*` : Converts from the UTF-8 to specified charset. | ||
|
||
- `MustDecode*` : Same as `Decode*`, but panics when errors occur | ||
- `MustEncode*` : Same as `Encode*`, but panics when errors occur | ||
|
||
|
||
```go | ||
b, err = EncodeString("こんにちわ", "Windows-31J") | ||
b, err = Encode("こんにちわ", "Windows-31J") | ||
b, err = EncodeBytes([]byte("こんにちわ"), "Windows-31J") | ||
b, err = EncodeReader(strings.NewReader("こんにちわ"), "Windows-31J") | ||
b = MustEncodeString("こんにちわ", "Windows-31J") | ||
b = MustEncode("こんにちわ", "Windows-31J") | ||
b = MustEncodeBytes([]byte("こんにちわ"), "Windows-31J") | ||
b = MustEncodeReader(strings.NewReader("こんにちわ"), "Windows-31J") | ||
|
||
s, err = DecodeString(string(source), "Windows-31J") | ||
s, err = Decode(source, "Windows-31J") | ||
s, err = DecodeBytes(source, "Windows-31J") | ||
s, err = DecodeReader(bytes.NewReader(source), "Windows-31J") | ||
s = MustDecodeString(string(source), "Windows-31J") | ||
s = MustDecode(source, "Windows-31J") | ||
s = MustDecodeBytes(source, "Windows-31J") | ||
s = MustDecodeReader(bytes.NewReader(source), "Windows-31J") | ||
``` | ||
|
||
## Supported character sets | ||
|
||
See [Encoding spec on WHATWG](https://encoding.spec.whatwg.org/#names-and-labels) | ||
|
||
## Author | ||
|
||
Yusuke Inuzuka | ||
|
||
## License | ||
|
||
[BSD License](http://opensource.org/licenses/BSD-2-Clause) | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,117 @@ | ||
package charsetutil | ||
|
||
import ( | ||
"bytes" | ||
"errors" | ||
"fmt" | ||
"golang.org/x/net/html/charset" | ||
"golang.org/x/text/transform" | ||
"io" | ||
"io/ioutil" | ||
"strings" | ||
) | ||
|
||
func panicIfError(err error) { | ||
if err != nil { | ||
panic(err) | ||
} | ||
} | ||
|
||
func DecodeReader(s io.Reader, enc string) (string, error) { | ||
reader, err := charset.NewReaderLabel(enc, s) | ||
if err != nil { | ||
return "", err | ||
} | ||
bytes, err := ioutil.ReadAll(reader) | ||
if err != nil { | ||
return "", err | ||
} | ||
return string(bytes), nil | ||
} | ||
|
||
func MustDecodeReader(s io.Reader, enc string) string { | ||
ret, err := DecodeReader(s, enc) | ||
panicIfError(err) | ||
return ret | ||
} | ||
|
||
func DecodeBytes(s []byte, enc string) (string, error) { | ||
return DecodeReader(bytes.NewReader(s), enc) | ||
} | ||
|
||
func MustDecodeBytes(s []byte, enc string) string { | ||
ret, err := DecodeReader(bytes.NewReader(s), enc) | ||
panicIfError(err) | ||
return ret | ||
} | ||
|
||
func DecodeString(s, enc string) (string, error) { | ||
return DecodeReader(strings.NewReader(s), enc) | ||
} | ||
|
||
func MustDecodeString(s, enc string) string { | ||
ret, err := DecodeReader(strings.NewReader(s), enc) | ||
panicIfError(err) | ||
return ret | ||
} | ||
|
||
func Decode(s []byte, enc string) (string, error) { | ||
return DecodeReader(bytes.NewReader(s), enc) | ||
} | ||
|
||
func MustDecode(s []byte, enc string) string { | ||
ret, err := DecodeReader(bytes.NewReader(s), enc) | ||
panicIfError(err) | ||
return ret | ||
} | ||
|
||
func EncodeReader(s io.Reader, enc string) ([]byte, error) { | ||
e, _ := charset.Lookup(enc) | ||
if e == nil { | ||
return nil, errors.New(fmt.Sprintf("unsupported charset: %q", enc)) | ||
} | ||
var buf bytes.Buffer | ||
writer := transform.NewWriter(&buf, e.NewEncoder()) | ||
_, err := io.Copy(writer, s) | ||
if err != nil { | ||
return nil, err | ||
} | ||
return buf.Bytes(), nil | ||
} | ||
|
||
func MustEncodeReader(s io.Reader, enc string) []byte { | ||
ret, err := EncodeReader(s, enc) | ||
panicIfError(err) | ||
return ret | ||
} | ||
|
||
func EncodeBytes(s []byte, enc string) ([]byte, error) { | ||
return EncodeReader(bytes.NewReader(s), enc) | ||
} | ||
|
||
func MustEncodeBytes(s []byte, enc string) []byte { | ||
ret, err := EncodeReader(bytes.NewReader(s), enc) | ||
panicIfError(err) | ||
return ret | ||
} | ||
|
||
func EncodeString(s, enc string) ([]byte, error) { | ||
return EncodeReader(strings.NewReader(s), enc) | ||
} | ||
|
||
func MustEncodeString(s, enc string) []byte { | ||
ret, err := EncodeReader(strings.NewReader(s), enc) | ||
panicIfError(err) | ||
return ret | ||
} | ||
|
||
func Encode(s string, enc string) ([]byte, error) { | ||
return EncodeReader(strings.NewReader(s), enc) | ||
} | ||
|
||
func MustEncode(s string, enc string) []byte { | ||
ret, err := EncodeReader(strings.NewReader(s), enc) | ||
panicIfError(err) | ||
return ret | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,162 @@ | ||
package charsetutil | ||
|
||
import ( | ||
"bytes" | ||
"strings" | ||
"testing" | ||
) | ||
|
||
func TestEncodeOk(t *testing.T) { | ||
expected := []byte{'\x82', '\xb1', '\x82', '\xf1', '\x82', '\xc9', '\x82', '\xbf', '\x82', '\xed'} | ||
assert := func(b []byte, err error) { | ||
if err != nil { | ||
t.Errorf("Failed: %s", err.Error()) | ||
} | ||
if string(b) != string(expected) { | ||
t.Error("Failed") | ||
} | ||
} | ||
|
||
b, err := EncodeString("こんにちわ", "Windows-31J") | ||
assert(b, err) | ||
|
||
b, err = EncodeBytes([]byte("こんにちわ"), "Windows-31J") | ||
assert(b, err) | ||
|
||
b, err = Encode("こんにちわ", "Windows-31J") | ||
assert(b, err) | ||
|
||
b, err = EncodeReader(strings.NewReader("こんにちわ"), "Windows-31J") | ||
assert(b, err) | ||
|
||
b = MustEncodeString("こんにちわ", "Windows-31J") | ||
assert(b, nil) | ||
|
||
b = MustEncodeBytes([]byte("こんにちわ"), "Windows-31J") | ||
assert(b, nil) | ||
|
||
b = MustEncode("こんにちわ", "Windows-31J") | ||
assert(b, nil) | ||
|
||
b = MustEncodeReader(strings.NewReader("こんにちわ"), "Windows-31J") | ||
assert(b, nil) | ||
} | ||
|
||
func TestEncodeError(t *testing.T) { | ||
assert := func(b []byte, err error) { | ||
if b != nil || err == nil { | ||
t.Error("Failed") | ||
} | ||
} | ||
|
||
assertPanic := func(f func() []byte) { | ||
defer func() { | ||
if recover() == nil { | ||
t.Error("Should be failed") | ||
} | ||
}() | ||
b := f() | ||
if b != nil { | ||
t.Error("Failed") | ||
} | ||
} | ||
|
||
b, err := EncodeString("こんにちわ", "unknown") | ||
assert(b, err) | ||
|
||
b, err = EncodeBytes([]byte("こんにちわ"), "unknown") | ||
assert(b, err) | ||
|
||
b, err = Encode("こんにちわ", "unknown") | ||
assert(b, err) | ||
|
||
b, err = EncodeReader(strings.NewReader("こんにちわ"), "unknown") | ||
assert(b, err) | ||
|
||
assertPanic(func() []byte { return MustEncodeString("こんにちわ", "unknown") }) | ||
|
||
assertPanic(func() []byte { return MustEncodeBytes([]byte("こんにちわ"), "unknown") }) | ||
|
||
assertPanic(func() []byte { return MustEncode("こんにちわ", "unknown") }) | ||
|
||
assertPanic(func() []byte { return MustEncodeReader(strings.NewReader("こんにちわ"), "unknown") }) | ||
} | ||
|
||
func TestDecodeOk(t *testing.T) { | ||
source := []byte{'\x82', '\xb1', '\x82', '\xf1', '\x82', '\xc9', '\x82', '\xbf', '\x82', '\xed'} | ||
expected := "こんにちわ" | ||
|
||
assert := func(b string, err error) { | ||
if err != nil { | ||
t.Errorf("Failed: %s", err.Error()) | ||
} | ||
if b != expected { | ||
t.Error("Failed") | ||
} | ||
} | ||
|
||
b, err := DecodeString(string(source), "Windows-31J") | ||
assert(b, err) | ||
|
||
b, err = DecodeBytes(source, "Windows-31J") | ||
assert(b, err) | ||
|
||
b, err = Decode(source, "Windows-31J") | ||
assert(b, err) | ||
|
||
b, err = DecodeReader(bytes.NewReader(source), "Windows-31J") | ||
assert(b, err) | ||
|
||
b = MustDecodeString(string(source), "Windows-31J") | ||
assert(b, nil) | ||
|
||
b = MustDecodeBytes(source, "Windows-31J") | ||
assert(b, nil) | ||
|
||
b = MustDecode(source, "Windows-31J") | ||
assert(b, nil) | ||
|
||
b = MustDecodeReader(bytes.NewReader(source), "Windows-31J") | ||
assert(b, nil) | ||
} | ||
|
||
func TestDecodeError(t *testing.T) { | ||
source := []byte{'\x82', '\xb1', '\x82', '\xf1', '\x82', '\xc9', '\x82', '\xbf', '\x82', '\xed'} | ||
assert := func(s string, err error) { | ||
if s != "" || err == nil { | ||
t.Error("Failed") | ||
} | ||
} | ||
|
||
assertPanic := func(f func() string) { | ||
defer func() { | ||
if recover() == nil { | ||
t.Error("Should be failed") | ||
} | ||
}() | ||
s := f() | ||
if s != "" { | ||
t.Error("Failed") | ||
} | ||
} | ||
|
||
b, err := DecodeString(string(source), "unknown") | ||
assert(b, err) | ||
|
||
b, err = DecodeBytes(source, "unknown") | ||
assert(b, err) | ||
|
||
b, err = Decode(source, "unknown") | ||
assert(b, err) | ||
|
||
b, err = DecodeReader(bytes.NewReader(source), "unknown") | ||
assert(b, err) | ||
|
||
assertPanic(func() string { return MustDecodeString(string(source), "unknown") }) | ||
|
||
assertPanic(func() string { return MustDecodeBytes(source, "unknown") }) | ||
|
||
assertPanic(func() string { return MustDecode(source, "unknown") }) | ||
|
||
assertPanic(func() string { return MustDecodeReader(bytes.NewReader(source), "unknown") }) | ||
} |