diff --git a/source/DataFile.cpp b/source/DataFile.cpp index db076888d031..b70095893cbf 100644 --- a/source/DataFile.cpp +++ b/source/DataFile.cpp @@ -109,7 +109,13 @@ void DataFile::LoadData(const string &data) size_t lineNumber = 0; size_t end = data.length(); - for(size_t pos = 0; pos < end; ) + + size_t pos = 0; + // If the first character is the UTF8 byte order mark (BOM), skip it. + if(!Utf8::IsBOM(Utf8::DecodeCodePoint(data, pos))) + pos = 0; + + while(pos < end) { ++lineNumber; size_t tokenPos = pos; diff --git a/source/text/Utf8.cpp b/source/text/Utf8.cpp index 0e138bc2c919..def3001ea37d 100644 --- a/source/text/Utf8.cpp +++ b/source/text/Utf8.cpp @@ -23,6 +23,12 @@ this program. If not, see . using namespace std; +namespace { + constexpr char32_t BOM = 0x0000FEFF; +} + + + namespace Utf8 { #if defined(_WIN32) wstring ToUTF16(const string &input, bool isPath) @@ -60,6 +66,14 @@ namespace Utf8 { + // Check if this character is the byte order mark (BOM) sequence. + bool IsBOM(char32_t c) + { + return c == BOM; + } + + + size_t NextCodePoint(const string &str, size_t pos) { if(pos >= str.length()) diff --git a/source/text/Utf8.h b/source/text/Utf8.h index 2094d626669c..c639cd77bf99 100644 --- a/source/text/Utf8.h +++ b/source/text/Utf8.h @@ -25,6 +25,9 @@ namespace Utf8 { std::string ToUTF8(const wchar_t *str); #endif + // Check if this character is the byte order mark (BOM) sequence. + bool IsBOM(char32_t c); + // Skip to the next unicode code point after pos in utf8. // Return string::npos when there are no more code points. std::size_t NextCodePoint(const std::string &str, std::size_t pos);