forked from TypesettingTools/Aegisub
-
Notifications
You must be signed in to change notification settings - Fork 36
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
system iconv doesn't support UTF-16/32. Replace occurances with UCS-2/4 instead.
- Loading branch information
Showing
3 changed files
with
158 additions
and
7 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,151 @@ | ||
diff --git a/libaegisub/common/charset.cpp b/libaegisub/common/charset.cpp | ||
index fa25eea83..da6f2801f 100644 | ||
--- a/libaegisub/common/charset.cpp | ||
+++ b/libaegisub/common/charset.cpp | ||
@@ -37,11 +37,11 @@ std::string Detect(agi::fs::path const& file) { | ||
if (!strncmp(header, "\x00\x00\xfe\xff", 4)) | ||
return "utf-32be"; | ||
if (!strncmp(header, "\xff\xfe\x00\x00", 4)) | ||
- return "utf-32le"; | ||
+ return "ucs-4le"; | ||
if (!strncmp(header, "\xfe\xff", 2)) | ||
- return "utf-16be"; | ||
+ return "ucs-2be"; | ||
if (!strncmp(header, "\xff\xfe", 2)) | ||
- return "utf-16le"; | ||
+ return "ucs-2le"; | ||
if (!strncmp(header, "\x1a\x45\xdf\xa3", 4)) | ||
return "binary"; // Actually EBML/Matroska | ||
} | ||
diff --git a/libaegisub/common/format.cpp b/libaegisub/common/format.cpp | ||
index 829baa782..2836d605b 100644 | ||
--- a/libaegisub/common/format.cpp | ||
+++ b/libaegisub/common/format.cpp | ||
@@ -22,9 +22,9 @@ | ||
#include <boost/filesystem/path.hpp> | ||
|
||
#ifdef _MSC_VER | ||
-#define WCHAR_T_ENC "utf-16le" | ||
+#define WCHAR_T_ENC "ucs-2le" | ||
#else | ||
-#define WCHAR_T_ENC "utf-32le" | ||
+#define WCHAR_T_ENC "ucs-4le" | ||
#endif | ||
|
||
template class boost::interprocess::basic_vectorstream<std::string>; | ||
diff --git a/libaegisub/include/libaegisub/charsets.def b/libaegisub/include/libaegisub/charsets.def | ||
index 72edba3ab..a2040cdcc 100644 | ||
--- a/libaegisub/include/libaegisub/charsets.def | ||
+++ b/libaegisub/include/libaegisub/charsets.def | ||
@@ -1,12 +1,12 @@ | ||
ADD("Local", "") | ||
|
||
ADD("Unicode (UTF-8)", "utf-8") | ||
-ADD("Unicode (UTF-16)", "utf-16") | ||
-ADD("Unicode (UTF-16BE)", "utf-16be") | ||
-ADD("Unicode (UTF-16LE)", "utf-16le") | ||
+ADD("Unicode (UCS-2)", "ucs-2") | ||
+ADD("Unicode (UCS-2BE)", "ucs-2be") | ||
+ADD("Unicode (UCS-2LE)", "ucs-2le") | ||
ADD("Unicode (UTF-32)", "utf-32") | ||
ADD("Unicode (UTF-32BE)", "utf-32be") | ||
-ADD("Unicode (UTF-32LE)", "utf-32le") | ||
+ADD("Unicode (UCS-4LE)", "ucs-4le") | ||
ADD("Unicode (UTF-7)", "utf-7") | ||
|
||
ADD("Arabic (IBM-864)", "ibm864") | ||
diff --git a/tests/tests/iconv.cpp b/tests/tests/iconv.cpp | ||
index 91d512b1b..351e9f0fd 100644 | ||
--- a/tests/tests/iconv.cpp | ||
+++ b/tests/tests/iconv.cpp | ||
@@ -22,12 +22,12 @@ | ||
using namespace agi::charset; | ||
|
||
TEST(lagi_iconv, BasicSetup) { | ||
- EXPECT_NO_THROW(IconvWrapper("UTF-8", "UTF-16LE")); | ||
+ EXPECT_NO_THROW(IconvWrapper("UTF-8", "UCS-2LE")); | ||
} | ||
|
||
TEST(lagi_iconv, InvalidConversions) { | ||
- EXPECT_THROW(IconvWrapper("nonexistent charset", "UTF-16LE"), UnsupportedConversion); | ||
- EXPECT_THROW(IconvWrapper("UTF-16LE", "nonexistent charset"), UnsupportedConversion); | ||
+ EXPECT_THROW(IconvWrapper("nonexistent charset", "UCS-2LE"), UnsupportedConversion); | ||
+ EXPECT_THROW(IconvWrapper("UCS-2LE", "nonexistent charset"), UnsupportedConversion); | ||
EXPECT_THROW(IconvWrapper("nonexistent charset", "nonexistent charset"), UnsupportedConversion); | ||
} | ||
|
||
@@ -40,7 +40,7 @@ TEST(lagi_iconv, StrLen1) { | ||
} | ||
} | ||
TEST(lagi_iconv, StrLen2) { | ||
- IconvWrapper conv("UTF-16LE", "UTF-16LE", false); | ||
+ IconvWrapper conv("UCS-2LE", "UCS-2LE", false); | ||
for (int i = 0; i < 10; i++) { | ||
std::basic_string<int16_t> str(i, ' '); | ||
ASSERT_EQ(2*i, conv.SrcStrLen((const char *)str.c_str())); | ||
@@ -48,7 +48,7 @@ TEST(lagi_iconv, StrLen2) { | ||
} | ||
} | ||
TEST(lagi_iconv, StrLen4) { | ||
- IconvWrapper conv("UTF-32LE", "UTF-32LE", false); | ||
+ IconvWrapper conv("UCS-4LE", "UCS-4LE", false); | ||
for (int i = 0; i < 10; i++) { | ||
std::basic_string<int32_t> str(i, ' '); | ||
ASSERT_EQ(4*i, conv.SrcStrLen((const char *)str.c_str())); | ||
@@ -60,7 +60,7 @@ TEST(lagi_iconv, StrLen4) { | ||
TEST(lagi_iconv, Fallbacks) { | ||
IconvWrapper nofallback("UTF-8", "Shift-JIS", false); | ||
IconvWrapper fallback("UTF-8", "Shift-JIS", true); | ||
- IconvWrapper noneneeded("UTF-8", "UTF-16LE", false); | ||
+ IconvWrapper noneneeded("UTF-8", "UCS-2LE", false); | ||
|
||
// Shift-JIS does not have a backslash | ||
EXPECT_THROW(nofallback.Convert("\\"), BadInput); | ||
@@ -84,17 +84,17 @@ TEST(lagi_iconv, Fallbacks) { | ||
} | ||
|
||
TEST(lagi_iconv, BadInput) { | ||
- IconvWrapper utf16("UTF-16LE", "UTF-8"); | ||
+ IconvWrapper utf16("UCS-2LE", "UTF-8"); | ||
EXPECT_THROW(utf16.Convert(" "), BadInput); | ||
- IconvWrapper utf8("UTF-8", "UTF-16LE"); | ||
+ IconvWrapper utf8("UTF-8", "UCS-2LE"); | ||
EXPECT_THROW(utf8.Convert("\xE2\xFF"), BadInput); | ||
} | ||
#endif | ||
|
||
TEST(lagi_iconv, Conversions) { | ||
- IconvWrapper utf16le("UTF-16LE", "UTF-8", false); | ||
- IconvWrapper utf16be("UTF-16BE", "UTF-8", false); | ||
- IconvWrapper utf8("UTF-8", "UTF-16LE", false); | ||
+ IconvWrapper utf16le("UCS-2LE", "UTF-8", false); | ||
+ IconvWrapper utf16be("UCS-2BE", "UTF-8", false); | ||
+ IconvWrapper utf8("UTF-8", "UCS-2LE", false); | ||
|
||
char space_utf8_[] = " "; | ||
char space_utf16be_[] = {0, 32, 0, 0}; | ||
@@ -110,7 +110,7 @@ TEST(lagi_iconv, Conversions) { | ||
|
||
// Basic overflow tests | ||
TEST(lagi_iconv, Buffer) { | ||
- IconvWrapper conv("UTF-8", "UTF-16LE", false); | ||
+ IconvWrapper conv("UTF-8", "UCS-2LE", false); | ||
char buff[32]; | ||
memset(buff, 0xFF, sizeof(buff)); | ||
|
||
diff --git a/tests/tests/line_iterator.cpp b/tests/tests/line_iterator.cpp | ||
index 41e6ae2f9..2424c170b 100644 | ||
--- a/tests/tests/line_iterator.cpp | ||
+++ b/tests/tests/line_iterator.cpp | ||
@@ -49,9 +49,9 @@ void expect_eq(const char *str, Values... values) { | ||
std::string utf8(str); | ||
test<T>(utf8, "utf-8", values...); | ||
|
||
- agi::charset::IconvWrapper conv("utf-8", "utf-16"); | ||
+ agi::charset::IconvWrapper conv("utf-8", "ucs-2"); | ||
auto utf16 = conv.Convert(utf8); | ||
- test<T>(utf16, "utf-16", values...); | ||
+ test<T>(utf16, "ucs-2", values...); | ||
} | ||
|
||
TEST(lagi_line, int) { |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters