Skip to content

Commit

Permalink
Explicitly check for UTF-8 in REGEX* calls
Browse files Browse the repository at this point in the history
  • Loading branch information
dcamper committed Apr 18, 2024
1 parent 99be78d commit 04a3258
Show file tree
Hide file tree
Showing 3 changed files with 45 additions and 4 deletions.
1 change: 1 addition & 0 deletions ecl/hql/hqlgram.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -555,6 +555,7 @@ class HqlGram : implements IErrorReceiver, public CInterface
void ensureString(attribute &a);
void ensureTypeCanBeIndexed(attribute &a);
void ensureUnicode(attribute &a);
void ensureUTF8(attribute &a);
void ensureData(attribute &a);
void ensureTransformTypeMatch(attribute & tattr, IHqlExpression * ds);
bool checkTransformTypeMatch(const attribute & errpos, IHqlExpression * ds, IHqlExpression * transform);
Expand Down
26 changes: 22 additions & 4 deletions ecl/hql/hqlgram.y
Original file line number Diff line number Diff line change
Expand Up @@ -6442,7 +6442,9 @@ primexpr1
{
parser->normalizeExpression($3, type_stringorunicode, false);
parser->checkRegex($3);
if(isUnicodeType($3.queryExprType()))
if(isUTF8Type($3.queryExprType()))
parser->normalizeExpression($5, type_utf8, false);
else if(isUnicodeType($3.queryExprType()))
parser->normalizeExpression($5, type_unicode, false);
else
parser->normalizeExpression($5, type_string, false);
Expand All @@ -6453,7 +6455,12 @@ primexpr1
parser->normalizeExpression($3, type_stringorunicode, false);
parser->checkRegex($3);
Owned<ITypeInfo> subType;
if(isUnicodeType($3.queryExprType()))
if(isUTF8Type($3.queryExprType()))
{
parser->normalizeExpression($5, type_utf8, false);
subType.setown(makeUtf8Type(UNKNOWN_LENGTH, 0));
}
else if(isUnicodeType($3.queryExprType()))
{
parser->normalizeExpression($5, type_unicode, false);
subType.setown(makeUnicodeType(UNKNOWN_LENGTH, 0));
Expand All @@ -6471,7 +6478,12 @@ primexpr1
parser->normalizeExpression($3, type_stringorunicode, false);
parser->checkRegex($3);
Owned<ITypeInfo> retType;
if(isUnicodeType($3.queryExprType()))
if(isUTF8Type($3.queryExprType()))
{
parser->normalizeExpression($5, type_utf8, false);
retType.setown(makeUtf8Type(UNKNOWN_LENGTH, $3.queryExprType()->queryLocale()));
}
else if(isUnicodeType($3.queryExprType()))
{
parser->normalizeExpression($5, type_unicode, false);
retType.setown(makeUnicodeType(UNKNOWN_LENGTH, $3.queryExprType()->queryLocale()));
Expand All @@ -6488,7 +6500,13 @@ primexpr1
{
parser->normalizeExpression($3, type_stringorunicode, false);
Owned<ITypeInfo> retType;
if(isUnicodeType($3.queryExprType()))
if(isUTF8Type($3.queryExprType()))
{
parser->normalizeExpression($5, type_utf8, false);
parser->normalizeExpression($7, type_utf8, false);
retType.setown(makeUtf8Type(UNKNOWN_LENGTH, 0));
}
else if(isUnicodeType($3.queryExprType()))
{
parser->normalizeExpression($5, type_unicode, false);
parser->normalizeExpression($7, type_unicode, false);
Expand Down
22 changes: 22 additions & 0 deletions ecl/hql/hqlgram2.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4703,6 +4703,9 @@ void HqlGram::normalizeExpression(attribute & exprAttr, type_t expectedType, boo
case type_unicode:
ensureUnicode(exprAttr);
break;
case type_utf8:
ensureUTF8(exprAttr);
break;
default:
throwUnexpected();
}
Expand Down Expand Up @@ -4974,6 +4977,25 @@ void HqlGram::ensureUnicode(attribute &a)
}
}

void HqlGram::ensureUTF8(attribute &a)
{
ITypeInfo *t1 = a.queryExprType();
if (t1 && !isUTF8Type(t1))
{
if (isStringType(t1))
{
Owned<ITypeInfo> utf8Type = makeUtf8Type(UNKNOWN_LENGTH, NULL);
OwnedHqlExpr value = a.getExpr();
a.setExpr(ensureExprType(value, utf8Type));
}
else
{
StringBuffer s;
reportError(ERR_TYPE_INCOMPATIBLE, a, "Incompatible types: expected UTF8, given %s", getFriendlyTypeStr(t1, s).str());
}
}
}

void HqlGram::ensureData(attribute &a)
{
ITypeInfo *t1 = a.queryExprType();
Expand Down

0 comments on commit 04a3258

Please sign in to comment.