Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Text format (e.g. IridasCube) parsing optimizations #2074

Merged
merged 10 commits into from
Dec 7, 2024
3 changes: 3 additions & 0 deletions share/cmake/utils/CompilerFlags.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,9 @@ if(USE_MSVC)
)
endif()

# Make MSVC compiler report correct __cplusplus version (otherwise reports 199711L)
set(PLATFORM_COMPILE_OPTIONS "${PLATFORM_COMPILE_OPTIONS};/Zc:__cplusplus")

# Explicitely specify the default warning level i.e. /W3.
# Note: Do not use /Wall (i.e. /W4) which adds 'informational' warnings.
set(PLATFORM_COMPILE_OPTIONS "${PLATFORM_COMPILE_OPTIONS};/W3")
Expand Down
4 changes: 2 additions & 2 deletions src/OpenColorIO/LookParse.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,13 @@ void LookParseResult::Token::parse(const std::string & str)
{
// Assert no commas, colons, or | in str.

if(StringUtils::StartsWith(str, "+"))
if(StringUtils::StartsWith(str, '+'))
{
name = StringUtils::LeftTrim(str, '+');
dir = TRANSFORM_DIR_FORWARD;
}
// TODO: Handle --
else if(StringUtils::StartsWith(str, "-"))
else if(StringUtils::StartsWith(str, '-'))
{
name = StringUtils::LeftTrim(str, '-');
dir = TRANSFORM_DIR_INVERSE;
Expand Down
4 changes: 2 additions & 2 deletions src/OpenColorIO/ParseUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -672,13 +672,13 @@ bool nextline(std::istream &istream, std::string &line)
{
line.resize(line.size() - 1);
}
if(!StringUtils::Trim(line).empty())
if(!StringUtils::IsEmptyOrWhiteSpace(line))
{
return true;
}
}

line = "";
line.clear();
return false;
}

Expand Down
4 changes: 2 additions & 2 deletions src/OpenColorIO/fileformats/FileFormat3DL.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -263,11 +263,11 @@ CachedFileRcPtr LocalFileFormat::read(std::istream & istream,
if(lineParts.empty()) continue;
if (lineParts.size() > 0)
{
if (StringUtils::StartsWith(lineParts[0], "#"))
if (StringUtils::StartsWith(lineParts[0], '#'))
{
continue;
}
if (StringUtils::StartsWith(lineParts[0], "<"))
if (StringUtils::StartsWith(lineParts[0], '<'))
{
// Format error: reject files that could be
// formatted as xml.
Expand Down
6 changes: 3 additions & 3 deletions src/OpenColorIO/fileformats/FileFormatIridasCube.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,7 @@ LocalFileFormat::read(std::istream & istream,
{
++lineNumber;
// All lines starting with '#' are comments
if (StringUtils::StartsWith(line,"#")) continue;
if (StringUtils::StartsWith(line,'#')) continue;

line = StringUtils::Lower(StringUtils::Trim(line));

Expand Down Expand Up @@ -310,10 +310,10 @@ LocalFileFormat::read(std::istream & istream,

do
{
line = StringUtils::Trim(line);
line = StringUtils::LeftTrim(line);

// All lines starting with '#' are comments
if (StringUtils::StartsWith(line,"#")) continue;
if (StringUtils::StartsWith(line,'#')) continue;

if (line.empty()) continue;

Expand Down
2 changes: 1 addition & 1 deletion src/OpenColorIO/fileformats/FileFormatIridasItx.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ CachedFileRcPtr LocalFileFormat::read(std::istream & istream,
{
++lineNumber;
// All lines starting with '#' are comments
if(StringUtils::StartsWith(line,"#")) continue;
if(StringUtils::StartsWith(line,'#')) continue;

// Strip, lowercase, and split the line
parts = StringUtils::SplitByWhiteSpaces(StringUtils::Lower(StringUtils::Trim(line)));
Expand Down
2 changes: 1 addition & 1 deletion src/OpenColorIO/fileformats/FileFormatPandora.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ CachedFileRcPtr LocalFileFormat::read(std::istream & istream,
if(parts.empty()) continue;

// Skip all lines starting with '#'
if(StringUtils::StartsWith(parts[0],"#")) continue;
if(StringUtils::StartsWith(parts[0],'#')) continue;

if(parts[0] == "channel")
{
Expand Down
2 changes: 1 addition & 1 deletion src/OpenColorIO/fileformats/FileFormatResolveCube.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -296,7 +296,7 @@ CachedFileRcPtr LocalFileFormat::read(std::istream & istream,
++lineNumber;

// All lines starting with '#' are comments
if(StringUtils::StartsWith(line,"#"))
if(StringUtils::StartsWith(line,'#'))
{
if(headerComplete)
{
Expand Down
2 changes: 1 addition & 1 deletion src/OpenColorIO/fileformats/FileFormatSpi1D.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,7 @@ CachedFileRcPtr LocalFileFormat::read(std::istream & istream,
}
}
}
while (istream.good() && !StringUtils::StartsWith(headerLine,"{"));
while (istream.good() && !StringUtils::StartsWith(headerLine,'{'));
}

if (version == -1)
Expand Down
2 changes: 1 addition & 1 deletion src/OpenColorIO/fileformats/FileFormatTruelight.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ CachedFileRcPtr LocalFileFormat::read(std::istream & istream,
if(parts.empty()) continue;

// Parse header metadata (which starts with #)
if(StringUtils::StartsWith(parts[0],"#"))
if(StringUtils::StartsWith(parts[0],'#'))
{
if(parts.size() < 2) continue;

Expand Down
2 changes: 1 addition & 1 deletion src/OpenColorIO/fileformats/FileFormatVF.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ CachedFileRcPtr LocalFileFormat::read(std::istream & istream,

if(parts.empty()) continue;

if(StringUtils::StartsWith(parts[0],"#")) continue;
if(StringUtils::StartsWith(parts[0],'#')) continue;

if(!in3d)
{
Expand Down
89 changes: 86 additions & 3 deletions src/utils/NumberUtils.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,22 @@
#ifndef INCLUDED_NUMBERUTILS_H
#define INCLUDED_NUMBERUTILS_H

// With C++17, we can use <charconv> from_chars.
//
// That has advantage of not dealing with locale / errno,
// which might involve locks or thread local storage accesses.
//
// Note that it is not enough to check __cplusplus version,
// since even if compiler reports C++17 it might not implement
// from_chars for floats. Checking __cpp_lib_to_chars works
// correctly and the check starts passing with gcc 11, clang 12,
// msvc 2019 (16.4). It correctly does not pass on apple clang 15,
// since it does not implement from_chars for floats.
#if __cpp_lib_to_chars >= 201611L
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Minor but to make sure:
Seems like __cpp_lib_to_chars "feature-test macros" are standardized in C++20, If I understand correctly -in theory- a C++17 compiler may not have this macro defined even if from_chars are implemented, thus we may have a false-negative here. Is that correct?

The worst case it'll fall back to slow path, so that's totally fine but I'm just curious.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In theory, yes. In practice, this was the "least bad" way I found that would work on all of MSVC/clang/gcc, while also working on Apple Clang (in a sense that Apple Clang does not pass the test; from_chars on floats is still not there). In theory there could be some exotic compiler that might have from_chars for floats but not expose __cpp_lib_to_chars.

#define USE_CHARCONV_FROM_CHARS
#include <charconv>
#endif

#if defined(_MSC_VER)
#define really_inline __forceinline
#else
Expand Down Expand Up @@ -54,14 +70,56 @@ struct from_chars_result

static const Locale loc;

#ifdef USE_CHARCONV_FROM_CHARS
really_inline bool from_chars_is_space(char c) noexcept
{
return c == ' ' || c == '\n' || c == '\t' || c == '\r' || c == '\v' || c == '\f';
}

// skip prefix whitespace and "+"
really_inline const char* from_chars_skip_prefix(const char* first, const char* last) noexcept
{
while (first < last && from_chars_is_space(first[0]))
{
++first;
}
if (first < last && first[0] == '+')
{
++first;
}
return first;
}

really_inline bool from_chars_hex_prefix(const char*& first, const char* last) noexcept
{
if (first + 2 < last && first[0] == '0' && (first[1] == 'x' || first[1] == 'X'))
{
first += 2;
return true;
}
return false;
}
#endif

really_inline from_chars_result from_chars(const char *first, const char *last, double &value) noexcept
{
errno = 0;
if (!first || !last || first == last)
{
return {first, std::errc::invalid_argument};
}

#ifdef USE_CHARCONV_FROM_CHARS
first = from_chars_skip_prefix(first, last);
std::chars_format fmt = std::chars_format::general;
if (from_chars_hex_prefix(first, last))
{
fmt = std::chars_format::hex;
}
std::from_chars_result res = std::from_chars(first, last, value, fmt);
return from_chars_result{ res.ptr, res.ec };
#else

errno = 0;
char * endptr = nullptr;

double
Expand All @@ -88,16 +146,28 @@ really_inline from_chars_result from_chars(const char *first, const char *last,
{
return {first, std::errc::argument_out_of_domain};
}
#endif
}

really_inline from_chars_result from_chars(const char *first, const char *last, float &value) noexcept
{
errno = 0;
if (!first || !last || first == last)
{
return {first, std::errc::invalid_argument};
}

#ifdef USE_CHARCONV_FROM_CHARS
first = from_chars_skip_prefix(first, last);
std::chars_format fmt = std::chars_format::general;
if (from_chars_hex_prefix(first, last))
{
fmt = std::chars_format::hex;
}
std::from_chars_result res = std::from_chars(first, last, value, fmt);
return from_chars_result{ res.ptr, res.ec };
#else

errno = 0;
char *endptr = nullptr;

float
Expand Down Expand Up @@ -132,16 +202,28 @@ really_inline from_chars_result from_chars(const char *first, const char *last,
{
return {first, std::errc::argument_out_of_domain};
}
#endif
}

really_inline from_chars_result from_chars(const char *first, const char *last, long int &value) noexcept
{
errno = 0;
if (!first || !last || first == last)
{
return {first, std::errc::invalid_argument};
}

#ifdef USE_CHARCONV_FROM_CHARS
first = from_chars_skip_prefix(first, last);
int base = 10;
if (from_chars_hex_prefix(first, last))
{
base = 16;
}
std::from_chars_result res = std::from_chars(first, last, value, base);
return from_chars_result{ res.ptr, res.ec };
#else

errno = 0;
char *endptr = nullptr;

long int
Expand Down Expand Up @@ -170,6 +252,7 @@ really_inline from_chars_result from_chars(const char *first, const char *last,
{
return {first, std::errc::argument_out_of_domain};
}
#endif
}
} // namespace NumberUtils
} // namespace OCIO_NAMESPACE
Expand Down
24 changes: 22 additions & 2 deletions src/utils/StringUtils.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,13 @@ inline unsigned char Upper(unsigned char c)
}
}

// Checks if character is whitespace (space, tab, newline or other
// non-printable char). Does not take locale into account.
inline bool IsSpace(unsigned char c)
{
return c <= ' ';
}

// Return the lower case string.
inline std::string Lower(std::string str)
{
Expand Down Expand Up @@ -95,6 +102,13 @@ inline bool StartsWith(const std::string & str, const std::string & prefix)
return str.size() >= prefix.size() && 0 == str.compare(0, prefix.size(), prefix);
}

// Return true if the string starts with the prefix character.
// Note: The comparison is case sensitive.
inline bool StartsWith(const std::string& str, char prefix)
{
return !str.empty() && str.front() == prefix;
}

// Starting from the left, trim the character.
inline std::string LeftTrim(std::string str, char c)
{
Expand All @@ -106,7 +120,7 @@ inline std::string LeftTrim(std::string str, char c)
// Starting from the left, trim all the space characters i.e. space, tabulation, etc.
inline std::string LeftTrim(std::string str)
{
const auto it = std::find_if(str.begin(), str.end(), [](char ch) { return !std::isspace(static_cast<unsigned char>(ch)); });
const auto it = std::find_if(str.begin(), str.end(), [](char ch) { return !IsSpace(ch); });
str.erase(str.begin(), it);
return str;
}
Expand All @@ -123,7 +137,7 @@ inline std::string RightTrim(std::string str, char c)
inline std::string RightTrim(std::string str)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry for delayed response.
I wanted to double check that those changes won't break the other parts of the library where we need to deal with unicode strings but then pulled into different directions.

Are we sure that this "unsigned char" based IsSpace() function can operate reliably with UTF-8 encoded, potentially multi-byte characters? Especially the RightTrim() feels dangerous which is used in places like GetAbsoluteSearchPaths() etc.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm with a closer look, that doesn't seem to be using this overload. So the current use-case wise we're safe I guess.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes this would work correctly with UTF-8 too, as in it would detect ASCII spaces just fine. UTF-8 is a full superset of 7-bit ASCII; any "longer than one byte" character bytes in there all have the highest bit set.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yup, you are right. MSB can be 0 only for the first byte of the multi-byte character.
All clear!

{
const auto it =
std::find_if(str.rbegin(), str.rend(), [](char ch) { return !std::isspace(static_cast<unsigned char>(ch)); });
std::find_if(str.rbegin(), str.rend(), [](char ch) { return !IsSpace(ch); });
str.erase(it.base(), str.end());
return str;
}
Expand All @@ -148,6 +162,12 @@ inline void Trim(StringVec & list)
}
}

// Checks if string is empty or white-space only.
inline bool IsEmptyOrWhiteSpace(const std::string& str)
{
return std::find_if(str.begin(), str.end(), [](char ch) { return !IsSpace(ch); }) == str.end();
}

// Split a string content using an arbitrary separator.
inline StringVec Split(const std::string & str, char separator)
{
Expand Down
37 changes: 37 additions & 0 deletions tests/cpu/fileformats/FileFormatIridasCube_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,43 @@ OCIO_ADD_TEST(FileFormatIridasCube, read_failure)
}
}

OCIO_ADD_TEST(FileFormatIridasCube, whitespace_handling)
{
const std::string SAMPLE =
"# comment\n"
"# comment with trailing space \n"
"# next up various forms of empty lines\n"
"\n"
" \n"
" \t \n"
"# whitespace before keywords or after data should be supported\n"
" LUT_3D_SIZE \t 2 \t\n"
"\t \tDOMAIN_MIN 0.25 0.5 0.75\n"
"\n"
"DOMAIN_MAX\t1.5\t2.5\t3.5\n"

"0.0 0.0 0.0\n"
"# comments in between data should be ignored\n"
" 1.0 0.0 \t 0.0\n"
"0.0 1.0 0.0 \n"
" 1.0 1.0 0.0\n"
" \n"
"0.0 0.0 1.0\n"
"1.0 0.0 1.0\n"
"0.0 1.0 1.0\n"
"1.0 1.0 1.0\n"
" \n"
"\n";

OCIO::LocalCachedFileRcPtr file;
OCIO_CHECK_NO_THROW(file = ReadIridasCube(SAMPLE));
OCIO_CHECK_EQUAL(file->domain_min[0], 0.25f);
OCIO_CHECK_EQUAL(file->domain_min[2], 0.75f);
OCIO_CHECK_EQUAL(file->domain_max[0], 1.5f);
OCIO_CHECK_EQUAL(file->domain_max[2], 3.5f);
OCIO_CHECK_EQUAL(file->lut3D->getArray().getLength(), 2);
}

OCIO_ADD_TEST(FileFormatIridasCube, no_shaper)
{
// check baker output
Expand Down
Loading
Loading