Skip to content

Commit

Permalink
Metadata length is measured in characters
Browse files Browse the repository at this point in the history
  • Loading branch information
veloman-yunkan committed Mar 28, 2023
1 parent a975cd0 commit 5a584f9
Show file tree
Hide file tree
Showing 3 changed files with 13 additions and 11 deletions.
13 changes: 9 additions & 4 deletions src/metadata.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@

#include <sstream>
#include <regex>
#include <unicode/unistr.h>

namespace zim
{
Expand All @@ -41,6 +42,10 @@ bool matchRegex(const std::string& regexStr, const std::string& text)
return std::regex_match(text.begin(), text.end(), regex);
}

size_t getTextLength(const std::string& utf8EncodedString)
{
return icu::UnicodeString::fromUTF8(utf8EncodedString).length();
}

#include "metadata_constraints.cpp"

Expand Down Expand Up @@ -99,14 +104,14 @@ Metadata::Errors Metadata::checkSimpleConstraints() const
const auto& value = nv.second;
try {
const auto& rmr = getReservedMetadataRecord(name);
if ( value.size() < rmr.minLength ) {
if ( rmr.minLength != 0 && getTextLength(value) < rmr.minLength ) {
std::ostringstream oss;
oss << name << " must be at least " << rmr.minLength << " bytes";
oss << name << " must contain at least " << rmr.minLength << " characters";
errors.push_back(oss.str());
}
if ( rmr.maxLength != 0 && value.size() > rmr.maxLength ) {
if ( rmr.maxLength != 0 && getTextLength(value) > rmr.maxLength ) {
std::ostringstream oss;
oss << name << " must be at most " << rmr.maxLength << " bytes";
oss << name << " must contain at most " << rmr.maxLength << " characters";
errors.push_back(oss.str());
}
if ( !rmr.regex.empty() && !matchRegex(rmr.regex, value) ) {
Expand Down
7 changes: 2 additions & 5 deletions src/metadata_constraints.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,8 @@ const Metadata::ReservedMetadataTable reservedMetadataInfoTable = {
{
"Illustration_48x48@1",
MANDATORY,
67, // this is the lower limit on a syntactically valid PNG file
// (according to https://github.com/mathiasbynens/small)
10000, // this is roughly the size of the raw (i.e. without any compression)
// RGBA pixel data of a 48x48 image
// Question: how much PNG metadata shall we allow?
0, // There are no constraints on the illustration metadata size
0, // in order to avoid decoding it as UTF-8 encoded text
PNG_REGEXP
},
};
4 changes: 2 additions & 2 deletions test/metadata-test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ TEST(Metadata, minSizeConstraints)
ASSERT_FALSE(m.valid());
ASSERT_EQ(m.check(),
zim::Metadata::Errors({
"Title must be at least 1 bytes"
"Title must contain at least 1 characters"
})
);
m.set("Title", "t");
Expand All @@ -96,7 +96,7 @@ TEST(Metadata, maxSizeConstraints)
ASSERT_FALSE(m.valid());
ASSERT_EQ(m.check(),
zim::Metadata::Errors({
"Title must be at most 30 bytes"
"Title must contain at most 30 characters"
})
);
m.set("Title", std::string(30, 'a'));
Expand Down

0 comments on commit 5a584f9

Please sign in to comment.