From 5dfa8a192c328b5dc9749b47203742c8e39657b6 Mon Sep 17 00:00:00 2001 From: Gavin Halliday Date: Thu, 3 Oct 2024 11:33:00 +0100 Subject: [PATCH] HPCC-32765 Report the compressed offset when reporting an error about an invalid compressed file Signed-off-by: Gavin Halliday --- system/jlib/jfcmp.hpp | 2 + system/jlib/jlz4.cpp | 12 +- system/jlib/jlzw.cpp | 65 ++++++----- system/jlib/jlzw.hpp | 2 +- testing/unittests/CMakeLists.txt | 1 + testing/unittests/filetests.cpp | 185 +++++++++++++++++++++++++++++++ 6 files changed, 238 insertions(+), 29 deletions(-) create mode 100644 testing/unittests/filetests.cpp diff --git a/system/jlib/jfcmp.hpp b/system/jlib/jfcmp.hpp index 24e6029df94..f9749a067f4 100644 --- a/system/jlib/jfcmp.hpp +++ b/system/jlib/jfcmp.hpp @@ -230,6 +230,7 @@ class jlib_decl CFcmpExpander : public CExpanderBase size32_t outlen; size32_t bufalloc; const size32_t *in = nullptr; + const void * original = nullptr; public: CFcmpExpander() @@ -247,6 +248,7 @@ class jlib_decl CFcmpExpander : public CExpanderBase virtual size32_t init(const void *blk) { + original = blk; const size32_t *expsz = (const size32_t *)blk; outlen = *expsz; in = (expsz+1); diff --git a/system/jlib/jlz4.cpp b/system/jlib/jlz4.cpp index 6a854c39fbf..930b38fa02d 100644 --- a/system/jlib/jlz4.cpp +++ b/system/jlib/jlz4.cpp @@ -264,6 +264,13 @@ class jlib_decl CLZ4Expander : public CFcmpExpander size32_t written; if (szchunk+totalExpanded outlen) - throwUnexpected(); + { + VStringBuffer msg("Decompression expected max %u bytes, but now %u at block offset %u", outlen, maxOut, (size32_t)((const byte *)in - (const byte *)original)); + throwUnexpectedX(msg.str()); + } maxOut += szchunk; // Likely to quickly approach the actual expanded size target.clear(); diff --git a/system/jlib/jlzw.cpp b/system/jlib/jlzw.cpp index 6ff771eade6..6401657a7a9 100644 --- a/system/jlib/jlzw.cpp +++ b/system/jlib/jlzw.cpp @@ -2118,7 +2118,7 @@ class CCompressedFile : implements ICompressedFileIO, public CInterface { unsigned code = e->errorCode(); StringBuffer msg; - e->errorMessage(msg).appendf(" at position %llu of %llu", nextExpansionPos, trailer.indexPos); + e->errorMessage(msg).appendf(" at uncompressed position %llu block %u of %llu", nextExpansionPos, curblocknum, trailer.indexPos); e->Release(); throw makeStringException(code, msg.str()); } @@ -2157,37 +2157,48 @@ class CCompressedFile : implements ICompressedFileIO, public CInterface void expand(const void *compbuf,MemoryBuffer &expbuf,size32_t expsize, offset_t compressedPos) { - size32_t rs = trailer.recordSize; - if (rs) { // diff expand - const byte *src = (const byte *)compbuf; - byte *dst = (byte *)expbuf.reserve(expsize); - if (expsize) { - assertex(expsize>=rs); - memcpy(dst,src,rs); - dst += rs; - src += rs; - expsize -= rs; - while (expsize) { + try + { + size32_t rs = trailer.recordSize; + if (rs) { // diff expand + const byte *src = (const byte *)compbuf; + byte *dst = (byte *)expbuf.reserve(expsize); + if (expsize) { assertex(expsize>=rs); - src += DiffExpand(src, dst, dst-rs, rs); - expsize -= rs; + memcpy(dst,src,rs); dst += rs; + src += rs; + expsize -= rs; + while (expsize) { + assertex(expsize>=rs); + src += DiffExpand(src, dst, dst-rs, rs); + expsize -= rs; + dst += rs; + } } } - } - else { // lzw or fastlz or lz4 - assertex(expander.get()); - size32_t exp = expander->expandFirst(expbuf, compbuf); - if (exp == 0) - { - unsigned numZeros = countZeros(trailer.blockSize, (const byte *)compbuf); - if (numZeros >= 16) - throw makeStringExceptionV(-1, "Unexpected zero fill in compressed file at position %llu length %u", compressedPos, numZeros); - } + else { // lzw or fastlz or lz4 + assertex(expander.get()); + size32_t exp = expander->expandFirst(expbuf, compbuf); + if (exp == 0) + { + unsigned numZeros = countZeros(trailer.blockSize, (const byte *)compbuf); + if (numZeros >= 16) + throw makeStringExceptionV(-1, "Unexpected zero fill in compressed file at position %llu length %u", compressedPos, numZeros); + } - startBlockPos = curblockpos; - nextExpansionPos = startBlockPos + exp; - fullBlockSize = expsize; + startBlockPos = curblockpos; + nextExpansionPos = startBlockPos + exp; + fullBlockSize = expsize; + } + } + catch (IException * e) + { + unsigned code = e->errorCode(); + StringBuffer msg; + e->errorMessage(msg).appendf(" at compressed position %llu of %llu", compressedPos, trailer.indexPos); + e->Release(); + throw makeStringException(code, msg.str()); } } diff --git a/system/jlib/jlzw.hpp b/system/jlib/jlzw.hpp index 1c3402d3318..da160d112e4 100644 --- a/system/jlib/jlzw.hpp +++ b/system/jlib/jlzw.hpp @@ -145,8 +145,8 @@ extern jlib_decl bool isCompressedFile(const char *filename); extern jlib_decl bool isCompressedFile(IFile *file); extern jlib_decl ICompressedFileIO *createCompressedFileReader(IFile *file,IExpander *expander=NULL, bool memorymapped=false, IFEflags extraFlags=IFEnone); extern jlib_decl ICompressedFileIO *createCompressedFileReader(IFileIO *fileio,IExpander *expander=NULL); -extern jlib_decl ICompressedFileIO *createCompressedFileWriter(IFile *file,size32_t recordsize,bool append=false,bool setcrc=true,ICompressor *compressor=NULL, unsigned compMethod=COMPRESS_METHOD_LZ4, IFEflags extraFlags=IFEnone); extern jlib_decl ICompressedFileIO *createCompressedFileWriter(IFileIO *fileio, bool append, size32_t recordsize,bool setcrc=true,ICompressor *compressor=NULL, unsigned compMethod=COMPRESS_METHOD_LZ4); +extern jlib_decl ICompressedFileIO *createCompressedFileWriter(IFile *file,size32_t recordsize,bool append=false,bool setcrc=true,ICompressor *compressor=NULL, unsigned compMethod=COMPRESS_METHOD_LZ4, IFEflags extraFlags=IFEnone); #define COMPRESSEDFILECRC (~0U) diff --git a/testing/unittests/CMakeLists.txt b/testing/unittests/CMakeLists.txt index 69a7b5e7a1c..e9ecc8ac297 100644 --- a/testing/unittests/CMakeLists.txt +++ b/testing/unittests/CMakeLists.txt @@ -45,6 +45,7 @@ set ( SRCS txSummarytests.cpp accessmaptests.cpp fxpptests.cpp + filetests.cpp ${HPCC_SOURCE_DIR}/esp/bindings/SOAP/xpp/fxpp/FragmentedXmlPullParser.cpp ${HPCC_SOURCE_DIR}/esp/bindings/SOAP/xpp/fxpp/FragmentedXmlAssistant.cpp ${CMAKE_BINARY_DIR}/generated/ws_loggingservice_esp.cpp diff --git a/testing/unittests/filetests.cpp b/testing/unittests/filetests.cpp new file mode 100644 index 00000000000..c2c29ea718e --- /dev/null +++ b/testing/unittests/filetests.cpp @@ -0,0 +1,185 @@ +/*############################################################################## + + HPCC SYSTEMS software Copyright (C) 2024 HPCC Systems®. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +############################################################################## */ + +/* + * File regression tests + * + */ + +#ifdef _USE_CPPUNIT +#include +#include +#include +#include "jsem.hpp" +#include "jfile.hpp" +#include "jdebug.hpp" +#include "jset.hpp" +#include "rmtfile.hpp" +#include "jlzw.hpp" +#include "jqueue.hpp" +#include "jregexp.hpp" +#include "jsecrets.hpp" +#include "jutil.hpp" +#include "junicode.hpp" + +#include "opentelemetry/sdk/common/attribute_utils.h" +#include "opentelemetry/sdk/resource/resource.h" + +#include "unittests.hpp" + +#define CPPUNIT_ASSERT_EQUAL_STR(x, y) CPPUNIT_ASSERT_EQUAL(std::string(x ? x : ""),std::string(y ? y : "")) + +class JlibFileTest : public CppUnit::TestFixture +{ +public: + CPPUNIT_TEST_SUITE(JlibFileTest); + CPPUNIT_TEST(testCompressed); + CPPUNIT_TEST(cleanup); + CPPUNIT_TEST_SUITE_END(); + + static constexpr const char * testFilename = "unittests_compressfile"; + void createCompressed() + { + Owned file(createIFile(testFilename)); + Owned io(createCompressedFileWriter(file, 0, false, false, nullptr)); + + constexpr size_t cnt = 10000; + constexpr size_t size = 1000; + offset_t pos = 0; + for (unsigned i = 0; i < cnt; i++) + { + byte temp[size]; + + for (unsigned j = 0; j < size; j += 4) + { + temp[j] = (byte)j; + temp[j+1] = (byte)j+1; + temp[j+2] = (byte)j+2; + temp[j+3] = (byte)random(); + } + + io->write(pos, size, temp); + pos += size; + } + } + void readCompressed(bool errorExpected) + { + bool success = false; + try + { + Owned file(createIFile(testFilename)); + Owned io(createCompressedFileReader(file)); + + constexpr size_t cnt = 10000; + constexpr size_t size = 1000; + offset_t pos = 0; + for (unsigned i = 0; i < cnt; i++) + { + byte temp[size]; + + io->read(pos, size, temp); + + for (unsigned j = 0; j < size; j += 4) + { + CPPUNIT_ASSERT_EQUAL(temp[j], (byte)j); + CPPUNIT_ASSERT_EQUAL(temp[j+1], (byte)(j+1)); + } + + pos += size; + } + + success = true; + } + catch (IException *e) + { + if (errorExpected) + { + DBGLOG(e, "Expected error reading compressed file:"); + } + else + { + StringBuffer msg("Unexpected error reading compressed file:"); + e->errorMessage(msg); + CPPUNIT_FAIL(msg.str()); + } + e->Release(); + } + if (success && errorExpected) + CPPUNIT_FAIL("Expected error reading compressed file"); + } + void read(offset_t offset, size32_t size, void * data) + { + Owned file(createIFile(testFilename)); + Owned io(file->open(IFOread)); + io->read(offset, size, data); + } + void write(offset_t offset, size32_t size, void * data) + { + Owned file(createIFile(testFilename)); + Owned io(file->open(IFOwrite)); + io->write(offset, size, data); + } + void testCompressed() + { + //patch the first block with zeros + constexpr byte zeros[0x100000] = { 0 }; + + createCompressed(); + readCompressed(false); + + write(0, sizeof(zeros), (void *)zeros); + readCompressed(true); + + createCompressed(); + write(0x10000, sizeof(zeros), (void *)zeros); + readCompressed(true); + + createCompressed(); + write(0x9000, sizeof(zeros), (void *)zeros); + readCompressed(true); + + //Test the second block being corrupted with zeros + size32_t firstBlockSize = 0; + createCompressed(); + read(4, sizeof(firstBlockSize), &firstBlockSize); + write(8+firstBlockSize, sizeof(zeros), (void *)zeros); + readCompressed(true); + + //Test the data after the second block being corrupted with zeros + createCompressed(); + read(4, sizeof(firstBlockSize), &firstBlockSize); + write(8+4+firstBlockSize, sizeof(zeros), (void *)zeros); + readCompressed(true); + + //Test the second block being corrupted to an invalid size + size32_t newSize = 1; + createCompressed(); + read(4, sizeof(firstBlockSize), &firstBlockSize); + write(8+firstBlockSize, sizeof(newSize), &newSize); + readCompressed(true); + } + void cleanup() + { + Owned file(createIFile(testFilename)); + file->remove(); + } +}; + +CPPUNIT_TEST_SUITE_REGISTRATION( JlibFileTest ); +CPPUNIT_TEST_SUITE_NAMED_REGISTRATION( JlibFileTest, "JlibFileTest" ); + +#endif