From 19f5dc94d06fe59146ce3ae9b1b9f00706803588 Mon Sep 17 00:00:00 2001 From: Seonghyun Kim Date: Wed, 17 Jul 2024 19:08:44 +0900 Subject: [PATCH 1/2] Fix Bug in RegExpObject::createRegExpMatchedArray Signed-off-by: Seonghyun Kim --- src/runtime/RegExpObject.cpp | 35 ++++++- tools/test/test262/excludelist.orig.xml | 118 ------------------------ 2 files changed, 33 insertions(+), 120 deletions(-) diff --git a/src/runtime/RegExpObject.cpp b/src/runtime/RegExpObject.cpp index d474a2935..b8fc3fa9b 100644 --- a/src/runtime/RegExpObject.cpp +++ b/src/runtime/RegExpObject.cpp @@ -503,8 +503,17 @@ ArrayObject* RegExpObject::createRegExpMatchedArray(ExecutionState& state, const for (auto it = m_yarrPattern->m_captureGroupNames.begin(); it != m_yarrPattern->m_captureGroupNames.end(); ++it) { auto foundMapElement = m_yarrPattern->m_namedGroupToParenIndices.find(*it); if (foundMapElement != m_yarrPattern->m_namedGroupToParenIndices.end()) { + Value value; + for (size_t i = 0; i < foundMapElement->second.size(); i++) { + Value indexValue = indices->getOwnProperty(state, + ObjectPropertyName(state, foundMapElement->second[i])) + .value(state, indices); + if (!indexValue.isUndefinedOrNull()) { + value = indexValue; + } + } groups->directDefineOwnProperty(state, ObjectPropertyName(state, it->impl()), - ObjectPropertyDescriptor(indices->getOwnProperty(state, ObjectPropertyName(state, foundMapElement->second[0])).value(state, this), ObjectPropertyDescriptor::AllPresent)); + ObjectPropertyDescriptor(value, ObjectPropertyDescriptor::AllPresent)); } } indices->directDefineOwnProperty(state, ObjectPropertyName(state.context()->staticStrings().groups), ObjectPropertyDescriptor(Value(groups), ObjectPropertyDescriptor::AllPresent)); @@ -518,8 +527,30 @@ ArrayObject* RegExpObject::createRegExpMatchedArray(ExecutionState& state, const for (auto it = m_yarrPattern->m_captureGroupNames.begin(); it != m_yarrPattern->m_captureGroupNames.end(); ++it) { auto foundMapElement = m_yarrPattern->m_namedGroupToParenIndices.find(*it); if (foundMapElement != m_yarrPattern->m_namedGroupToParenIndices.end()) { + Value value; + for (size_t i = 0; i < foundMapElement->second.size(); i++) { + Value indexValue; + size_t index = foundMapElement->second[i]; + size_t indicesIndex = 0; + for (unsigned i = 0; i < result.m_matchResults.size(); i++) { + for (unsigned j = 0; j < result.m_matchResults[i].size(); j++) { + if (indicesIndex == index) { + if (result.m_matchResults[i][j].m_start != std::numeric_limits::max()) { + indexValue = new StringView(input, result.m_matchResults[i][j].m_start, result.m_matchResults[i][j].m_end); + } + break; + } + indicesIndex++; + } + } + + if (!indexValue.isUndefinedOrNull()) { + value = indexValue; + } + } + groups->directDefineOwnProperty(state, ObjectPropertyName(state, it->impl()), - ObjectPropertyDescriptor(arr->getOwnProperty(state, ObjectPropertyName(state, foundMapElement->second[0])).value(state, this), ObjectPropertyDescriptor::AllPresent)); + ObjectPropertyDescriptor(value, ObjectPropertyDescriptor::AllPresent)); } } arr->directDefineOwnProperty(state, ObjectPropertyName(state.context()->staticStrings().groups), ObjectPropertyDescriptor(Value(groups), ObjectPropertyDescriptor::AllPresent)); diff --git a/tools/test/test262/excludelist.orig.xml b/tools/test/test262/excludelist.orig.xml index e68130990..32392e6f7 100644 --- a/tools/test/test262/excludelist.orig.xml +++ b/tools/test/test262/excludelist.orig.xml @@ -579,13 +579,6 @@ TODO TODO TODO - TODO - TODO - TODO - TODO - TODO - TODO - TODO TODO TODO TODO @@ -595,8 +588,6 @@ TODO TODO TODO - TODO - TODO TODO TODO TODO @@ -6870,33 +6861,8 @@ TODO TODO TODO - TODO - TODO - TODO - TODO - TODO - TODO - TODO - TODO - TODO - TODO - TODO TODO TODO - TODO - TODO - TODO - TODO - TODO - TODO - TODO - TODO - TODO - TODO - TODO - TODO - TODO - TODO TODO TODO TODO @@ -6912,91 +6878,7 @@ TODO TODO TODO - TODO - TODO - TODO - TODO - TODO - TODO - TODO - TODO - TODO - TODO - TODO - TODO - TODO - TODO - TODO - TODO - TODO - TODO - TODO - TODO - TODO - TODO - TODO - TODO - TODO - TODO - TODO - TODO - TODO - TODO - TODO - TODO - TODO - TODO - TODO - TODO - TODO - TODO - TODO - TODO - TODO - TODO - TODO - TODO - TODO - TODO - TODO - TODO - TODO - TODO - TODO - TODO - TODO - TODO - TODO - TODO - TODO - TODO - TODO - TODO - TODO - TODO - TODO - TODO TODO - TODO - TODO - TODO - TODO - TODO - TODO - TODO - TODO - TODO - TODO - TODO - TODO - TODO - TODO - TODO - TODO - TODO - TODO - TODO - TODO TODO TODO TODO From ff85debd91d6a34c8e58335a0483be5c67234667 Mon Sep 17 00:00:00 2001 From: Seonghyun Kim Date: Wed, 17 Jul 2024 19:09:38 +0900 Subject: [PATCH 2/2] Fix Unicode Identifier paring bug Signed-off-by: Seonghyun Kim --- src/parser/Lexer.cpp | 19 ++++++++++++------- src/parser/Lexer.h | 2 +- tools/test/test262/excludelist.orig.xml | 2 -- 3 files changed, 13 insertions(+), 10 deletions(-) diff --git a/src/parser/Lexer.cpp b/src/parser/Lexer.cpp index 89fa530c3..1ad8df5e2 100644 --- a/src/parser/Lexer.cpp +++ b/src/parser/Lexer.cpp @@ -866,7 +866,7 @@ Scanner::ScanIDResult Scanner::getIdentifier() // Blackslash (U+005C) marks Unicode escape sequence. this->index = start; return this->getComplexIdentifier(); - } else if (UNLIKELY(ch >= 0xD800 && ch < 0xDFFF)) { + } else if (UNLIKELY(ch >= 0xD800 && ch <= 0xDFFF)) { // Need to handle surrogate pairs. this->index = start; return this->getComplexIdentifier(); @@ -887,7 +887,7 @@ Scanner::ScanIDResult Scanner::getIdentifier() Scanner::ScanIDResult Scanner::getComplexIdentifier() { - char16_t cp = this->codePointAt(this->index); + char32_t cp = this->codePointAt(this->index); ParserCharPiece piece = ParserCharPiece(cp); UTF16StringDataNonGCStd id(piece.data, piece.length); this->index += id.length(); @@ -902,14 +902,17 @@ Scanner::ScanIDResult Scanner::getComplexIdentifier() if (this->peekChar() == '{') { ++this->index; ch = this->scanUnicodeCodePointEscape(); + id.erase(id.length() - 1); } else { ch = this->scanHexEscape('u'); + id.erase(id.length() - 1); cp = ch; if (ch == EMPTY_CODE_POINT || ch == '\\' || !isIdentifierStart(cp)) { this->throwUnexpectedToken(); } } - id = ch; + piece = ParserCharPiece(ch); + id += UTF16StringDataNonGCStd(piece.data, piece.length); } while (!this->eof()) { @@ -2282,7 +2285,7 @@ static ALWAYS_INLINE KeywordKind getKeyword(const StringBufferAccessData& data) return NotKeyword; } -ALWAYS_INLINE void Scanner::scanIdentifier(Scanner::ScannerResult* token, char16_t ch0) +ALWAYS_INLINE void Scanner::scanIdentifier(Scanner::ScannerResult* token, char32_t ch0) { ASSERT(token != nullptr); Token type = Token::IdentifierToken; @@ -2341,13 +2344,15 @@ void Scanner::lex(Scanner::ScannerResult* token) return; } - char16_t cp = this->peekCharWithoutEOF(); + char32_t cp = this->peekCharWithoutEOF(); - if (UNLIKELY(cp >= 0xD800 && cp < 0xDFFF)) { + if (UNLIKELY(cp >= 0xD800 && cp <= 0xDFFF)) { ++this->index; char32_t ch2 = this->peekChar(); if (U16_IS_TRAIL(ch2)) { - cp = U16_GET_SUPPLEMENTARY(cp, ch2); + cp = (cp - 0xd800) << 10; + cp += (ch2 - 0xdc00) + 0x10000UL; + this->index--; } else { this->throwUnexpectedToken(); } diff --git a/src/parser/Lexer.h b/src/parser/Lexer.h index e0a0809fe..59a63ee6c 100644 --- a/src/parser/Lexer.h +++ b/src/parser/Lexer.h @@ -726,7 +726,7 @@ class Scanner { void scanStringLiteral(Scanner::ScannerResult* token); // ECMA-262 11.6 Names and Keywords - ALWAYS_INLINE void scanIdentifier(Scanner::ScannerResult* token, char16_t ch0); + ALWAYS_INLINE void scanIdentifier(Scanner::ScannerResult* token, char32_t ch0); String* scanRegExpBody(); String* scanRegExpFlags(); diff --git a/tools/test/test262/excludelist.orig.xml b/tools/test/test262/excludelist.orig.xml index 32392e6f7..0aab0926b 100644 --- a/tools/test/test262/excludelist.orig.xml +++ b/tools/test/test262/excludelist.orig.xml @@ -689,8 +689,6 @@ TODO TODO TODO - TODO - TODO TODO TODO TODO