Skip to content

Commit

Permalink
HPCC-32795 Additional optimizations to replaceString
Browse files Browse the repository at this point in the history
- Add new parameter to replaceString function that skips copying to the temporary buffer if no match was made
- Waits to allocate memory until just before the first copy

Signed-off-by: Jack Del Vecchio <[email protected]>
  • Loading branch information
jackdelv committed Nov 5, 2024
1 parent ea449dc commit 7ddf62f
Show file tree
Hide file tree
Showing 4 changed files with 41 additions and 14 deletions.
2 changes: 1 addition & 1 deletion rtl/eclrtl/eclrtl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6066,7 +6066,7 @@ void rtlAddExceptionTag(StringBuffer & errorText, const char * tag, const char *
void rtlSubstituteEmbeddedScript(size32_t &__lenResult, char * &__result, size32_t scriptChars, const char *script, size32_t outFieldsChars, const char *outFields, size32_t searchChars, const char *search)
{
StringBuffer result;
::replaceString(result, rtlUtf8Size(scriptChars, script), script, rtlUtf8Size(searchChars, search), search, rtlUtf8Size(outFieldsChars, outFields), outFields);
::replaceString(result, rtlUtf8Size(scriptChars, script), script, rtlUtf8Size(searchChars, search), search, rtlUtf8Size(outFieldsChars, outFields), outFields, false);
__lenResult = result.lengthUtf8();
__result = result.detach();
}
Expand Down
31 changes: 19 additions & 12 deletions system/jlib/jstring.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -935,13 +935,10 @@ StringBuffer & StringBuffer::replace(char oldChar, char newChar)
}

// Copy source to result, replacing all occurrences of "oldStr" with "newStr"
StringBuffer &replaceString(StringBuffer & result, size_t lenSource, const char *source, size_t lenOldStr, const char* oldStr, size_t lenNewStr, const char* newStr)
bool replaceString(StringBuffer & result, size_t lenSource, const char *source, size_t lenOldStr, const char* oldStr, size_t lenNewStr, const char* newStr, bool avoidCopyIfUnmatched)
{
if (lenOldStr && lenSource >= lenOldStr)
{
// Avoid allocating an unnecessarly large buffer and match the source string
result.ensureCapacity(lenSource);

size_t offset = 0;
size_t lastCopied = 0;
size_t maxOffset = lenSource - lenOldStr + 1;
Expand All @@ -951,6 +948,10 @@ StringBuffer &replaceString(StringBuffer & result, size_t lenSource, const char
if (unlikely(source[offset] == firstChar)
&& unlikely((lenOldStr == 1) || memcmp(source + offset, oldStr, lenOldStr)==0))
{
// Wait to allocate memory until a match is found
if (!lastCopied)
result.ensureCapacity(lenSource); // Avoid allocating an unnecessarly large buffer and match the source string

// If lastCopied matches the offset nothing is appended, but we can avoid a test for offset == lastCopied
result.append(offset - lastCopied, source + lastCopied);
result.append(lenNewStr, newStr);
Expand All @@ -960,13 +961,16 @@ StringBuffer &replaceString(StringBuffer & result, size_t lenSource, const char
else
offset++;
}
// Append the remaining characters
result.append(lenSource - lastCopied, source + lastCopied);

if (lastCopied || !avoidCopyIfUnmatched)
result.append(lenSource - lastCopied, source + lastCopied); // Append the remaining characters

return lastCopied != 0;
}
else
else if (!avoidCopyIfUnmatched)
result.append(lenSource, source); // Search string does not fit in source or is empty

return result;
return false;
}

StringBuffer &replaceVariables(StringBuffer & result, const char *source, bool exceptions, IVariableSubstitutionHelper *helper, const char* delim, const char* term)
Expand Down Expand Up @@ -1056,13 +1060,16 @@ StringBuffer &replaceStringNoCase(StringBuffer & result, size_t lenSource, const
// this method will replace all occurrences of "oldStr" with "newStr"
StringBuffer & StringBuffer::replaceString(const char* oldStr, const char* newStr)
{
if (curLen)
if (curLen && oldStr)
{
size_t oldlen = strlen(oldStr);
if (oldlen > curLen)
return *this;

StringBuffer temp;
size_t oldlen = oldStr ? strlen(oldStr) : 0;
size_t newlen = newStr ? strlen(newStr) : 0;
::replaceString(temp, curLen, buffer, oldlen, oldStr, newlen, newStr);
swapWith(temp);
if (::replaceString(temp, curLen, buffer, oldlen, oldStr, newlen, newStr, true))
swapWith(temp);
}
return *this;
}
Expand Down
2 changes: 1 addition & 1 deletion system/jlib/jstring.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -407,7 +407,7 @@ extern jlib_decl void decodeXML(ISimpleReadStream &in, StringBuffer &out, unsign
extern jlib_decl int utf8CharLen(unsigned char ch);
extern jlib_decl int utf8CharLen(const unsigned char *ch, unsigned maxsize = (unsigned)-1);

extern jlib_decl StringBuffer &replaceString(StringBuffer & result, size_t lenSource, const char *source, size_t lenOldStr, const char* oldStr, size_t lenNewStr, const char* newStr);
extern jlib_decl bool replaceString(StringBuffer & result, size_t lenSource, const char *source, size_t lenOldStr, const char* oldStr, size_t lenNewStr, const char* newStr, bool avoidCopyIfUnmatched);

interface IVariableSubstitutionHelper
{
Expand Down
20 changes: 20 additions & 0 deletions testing/unittests/jlibtests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -913,6 +913,26 @@ void testEncodeCSVColumn()
source.set("abbabab");
source.replaceString("ab", "xxx");
CPPUNIT_ASSERT_EQUAL_STR("xxxbxxxxxx", source.str());

// Search string has same length as source string and matches
source.set("ababab");
source.replaceString("ababab", "xxxxxx");
CPPUNIT_ASSERT_EQUAL_STR("xxxxxx", source.str());

// Search string has same length as source string and replace is smaller than source
source.set("ababab");
source.replaceString("ababab", "xxx");
CPPUNIT_ASSERT_EQUAL_STR("xxx", source.str());

// Search string has same length as source string and replace is larger than source
source.set("ababab");
source.replaceString("ababab", "xxxxxxxxx");
CPPUNIT_ASSERT_EQUAL_STR("xxxxxxxxx", source.str());

// Search string has same length as source string and does not match
source.set("ababab");
source.replaceString("ababac", "xxxxxx");
CPPUNIT_ASSERT_EQUAL_STR("ababab", source.str());
}
};

Expand Down

0 comments on commit 7ddf62f

Please sign in to comment.