Skip to content

Commit

Permalink
fix(parsing): Detect and move past the UTF8 byte order mark (BOM) at …
Browse files Browse the repository at this point in the history
…the beginning of a data file (endless-sky#10121)
  • Loading branch information
warp-core authored Jun 2, 2024
1 parent 314dc21 commit d1ddd0f
Show file tree
Hide file tree
Showing 3 changed files with 24 additions and 1 deletion.
8 changes: 7 additions & 1 deletion source/DataFile.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,13 @@ void DataFile::LoadData(const string &data)
size_t lineNumber = 0;

size_t end = data.length();
for(size_t pos = 0; pos < end; )

size_t pos = 0;
// If the first character is the UTF8 byte order mark (BOM), skip it.
if(!Utf8::IsBOM(Utf8::DecodeCodePoint(data, pos)))
pos = 0;

while(pos < end)
{
++lineNumber;
size_t tokenPos = pos;
Expand Down
14 changes: 14 additions & 0 deletions source/text/Utf8.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,12 @@ this program. If not, see <https://www.gnu.org/licenses/>.

using namespace std;

namespace {
constexpr char32_t BOM = 0x0000FEFF;
}



namespace Utf8 {
#if defined(_WIN32)
wstring ToUTF16(const string &input, bool isPath)
Expand Down Expand Up @@ -60,6 +66,14 @@ namespace Utf8 {



// Check if this character is the byte order mark (BOM) sequence.
bool IsBOM(char32_t c)
{
return c == BOM;
}



size_t NextCodePoint(const string &str, size_t pos)
{
if(pos >= str.length())
Expand Down
3 changes: 3 additions & 0 deletions source/text/Utf8.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,9 @@ namespace Utf8 {
std::string ToUTF8(const wchar_t *str);
#endif

// Check if this character is the byte order mark (BOM) sequence.
bool IsBOM(char32_t c);

// Skip to the next unicode code point after pos in utf8.
// Return string::npos when there are no more code points.
std::size_t NextCodePoint(const std::string &str, std::size_t pos);
Expand Down

0 comments on commit d1ddd0f

Please sign in to comment.