Skip to content

Commit

Permalink
Implement TextReplacer further
Browse files Browse the repository at this point in the history
- It works, but I'm unsure whether I want to use it like this; I'm unsure whether the ctor is useful
  • Loading branch information
visuve committed May 11, 2024
1 parent 3c7c995 commit ded6872
Show file tree
Hide file tree
Showing 11 changed files with 334 additions and 88 deletions.
106 changes: 72 additions & 34 deletions PystykorvaLib/MemoryMappedFile.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,15 +14,15 @@ IOException::IOException(const std::string& message) :
class MemoryMappedFileImpl
{
public:
MemoryMappedFileImpl(const std::filesystem::path& path, uint64_t fileSize) :
MemoryMappedFileImpl(const std::filesystem::path& path, uint64_t fileSize, bool readOnly) :
_file(CreateFileW(
path.c_str(),
GENERIC_READ | GENERIC_WRITE,
readOnly ? GENERIC_READ : GENERIC_READ | GENERIC_WRITE,
0,
nullptr,
OPEN_EXISTING,
readOnly ? OPEN_EXISTING : CREATE_ALWAYS,
FILE_ATTRIBUTE_NORMAL,
NULL))
nullptr))
{
if (!_file || _file == INVALID_HANDLE_VALUE)
{
Expand All @@ -35,7 +35,7 @@ class MemoryMappedFileImpl
_mapping = CreateFileMappingW(
_file,
nullptr,
PAGE_READWRITE,
readOnly ? PAGE_READONLY : PAGE_READWRITE,
mappingSize.HighPart,
mappingSize.LowPart,
nullptr);
Expand All @@ -45,7 +45,12 @@ class MemoryMappedFileImpl
throw IOException("CreateFileMappingW");
}

_view = MapViewOfFile(_mapping, FILE_MAP_ALL_ACCESS, 0, 0, fileSize);
_view = MapViewOfFile(
_mapping,
readOnly ? FILE_MAP_READ : FILE_MAP_ALL_ACCESS,
0,
0,
fileSize);

if (!_view)
{
Expand Down Expand Up @@ -74,16 +79,7 @@ class MemoryMappedFileImpl
}

NonCopyable(MemoryMappedFileImpl);

std::string_view Data() const
{
return { reinterpret_cast<char*>(_view), _size };
}

std::string_view Sample(size_t size) const
{
return { reinterpret_cast<char*>(_view), std::min(size, _size) };
}
friend class MemoryMappedFile;

private:
HANDLE _file = nullptr;
Expand All @@ -106,15 +102,21 @@ IOException::IOException(const std::string& message) :
class MemoryMappedFileImpl
{
public:
MemoryMappedFileImpl(const std::filesystem::path& path, uint64_t fileSize) :
_descriptor(open(path.c_str(), O_RDONLY))
MemoryMappedFileImpl(const std::filesystem::path& path, uint64_t fileSize, bool readOnly) :
_descriptor(open(path.c_str(), readOnly ? O_RDONLY : O_RDWR))
{
if (_descriptor == -1)
{
throw IOException("open");
}

_view = mmap(nullptr, _size, PROT_READ, MAP_PRIVATE, _descriptor, 0);
_view = mmap(
nullptr,
_size,
readOnly ? PROT_READ : PROT_READ | PROT_WRITE,
MAP_PRIVATE,
_descriptor,
0);

if (_view == MAP_FAILED)
{
Expand All @@ -138,26 +140,16 @@ class MemoryMappedFileImpl
}

NonCopyable(MemoryMappedFileImpl);

std::string_view Sample(size_t size)
{
return { reinterpret_cast<char*>(_view), std::min(_size, size) };
}

std::string_view Data() const
{
return { reinterpret_cast<char*>(_view), _size };
}

friend class MemoryMappedFile;
private:
int _descriptor = 0;
void* _view = nullptr;
uint64_t _size = 0;
};
#endif

MemoryMappedFile::MemoryMappedFile(const std::filesystem::path& path, uint64_t fileSize) :
_impl(new MemoryMappedFileImpl(path, fileSize))
MemoryMappedFile::MemoryMappedFile(const std::filesystem::path& path, uint64_t fileSize, bool readOnly) :
_impl(new MemoryMappedFileImpl(path, fileSize, readOnly))
{
}

Expand All @@ -166,12 +158,58 @@ MemoryMappedFile::~MemoryMappedFile()
delete _impl;
}

uint64_t MemoryMappedFile::Size() const
{
return _impl->_size;
}

std::string_view MemoryMappedFile::Sample(size_t size) const
{
return _impl->Sample(size);
size = std::min(_impl->_size, size);
return { reinterpret_cast<char*>(_impl->_view), size };
}

std::string_view MemoryMappedFile::Chunk(uint64_t offset, uint64_t size) const
{
if (offset + size > _impl->_size)
{
throw std::out_of_range("chunk out of bounds");
}

return { reinterpret_cast<char*>(_impl->_view) + offset, size };
}

std::string_view MemoryMappedFile::Data() const
{
return _impl->Data();
return { reinterpret_cast<char*>(_impl->_view), _impl->_size };
}

void MemoryMappedFile::Read(void* data, uint64_t size)
{
if (_offset + size > _impl->_size)
{
throw std::out_of_range("read out of bounds");
}

for (uint64_t offset = 0; offset < size; ++offset, ++_offset)
{
auto source = reinterpret_cast<const uint8_t*>(_impl->_view) + _offset;
auto target = reinterpret_cast<uint8_t*>(data) + offset;
*target = *source;
}
}

void MemoryMappedFile::Write(const void* data, uint64_t size)
{
if (_offset + size > _impl->_size)
{
throw std::out_of_range("write out of bounds");
}

for (uint64_t offset = 0; offset < size; ++offset, ++_offset)
{
auto source = reinterpret_cast<const uint8_t*>(data) + offset;
auto target = reinterpret_cast<uint8_t*>(_impl->_view) + _offset;
*target = *source;
}
}
9 changes: 7 additions & 2 deletions PystykorvaLib/MemoryMappedFile.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,19 @@ struct IOException : std::system_error
class MemoryMappedFile : public Pystykorva::IFile
{
public:
MemoryMappedFile(const std::filesystem::path&, uint64_t);
MemoryMappedFile(const std::filesystem::path& path, uint64_t size, bool readOnly);
~MemoryMappedFile();
NonCopyable(MemoryMappedFile);

std::string_view Sample(size_t size = 0x400) const override;
virtual uint64_t Size() const override;
std::string_view Sample(uint64_t size) const override;
std::string_view Chunk(uint64_t from, uint64_t size) const override;
std::string_view Data() const override;
void Read(void* data, uint64_t size) override;
void Write(const void* data, uint64_t size) override;

private:
MemoryMappedFileImpl* _impl;
uint64_t _offset = 0;
};

6 changes: 5 additions & 1 deletion PystykorvaLib/Pystykorva.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,12 @@ class Pystykorva

struct IFile
{
virtual std::string_view Sample(size_t size = 0x400) const = 0;
virtual uint64_t Size() const = 0;
virtual std::string_view Sample(uint64_t size = 0x400) const = 0;
virtual std::string_view Chunk(uint64_t offset, uint64_t size) const = 0;
virtual std::string_view Data() const = 0;
virtual void Read(void* data, uint64_t size) = 0;
virtual void Write(const void* data, uint64_t size) = 0;
};

enum Status : uint32_t
Expand Down
4 changes: 2 additions & 2 deletions PystykorvaLib/TextProcessor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -63,9 +63,9 @@ Pystykorva::Result TextProcessor::ProcessFile(const std::filesystem::path& path)
return result;
}

MemoryMappedFile file(path, fileSize);
MemoryMappedFile input(path, fileSize, true);

FindAll(file, result.Matches, result.Encoding);
FindAll(input, result.Matches, result.Encoding);
}
catch (const IOException&)
{
Expand Down
91 changes: 84 additions & 7 deletions PystykorvaLib/TextReplacer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,31 +4,108 @@
class TextReplacerImpl
{
public:
TextReplacerImpl()
TextReplacerImpl(const Pystykorva::IFile& input, Pystykorva::Result& result) :
_input(input),
_result(result),
_converter(ucnv_open(result.Encoding.Name.data(), &_status))
{
// TODO!
if (U_FAILURE(_status))
{
throw ReplaceException("ucnv_open failed");
}
}

~TextReplacerImpl()
{
if (_converter)
{
ucnv_close(_converter);
}
}

NonCopyable(TextReplacerImpl);

void ReplaceAll(
Pystykorva::IFile& output,
std::u16string_view replacement)
{
uint64_t offset = 0;

std::string sourceEncodedReplacement = SourceEncode(replacement);

for (Pystykorva::Match& match : _result.Matches)
{
for (const auto& [relative, absolute] : match.Positions)
{
// TODO: progress reporting?

const uint64_t chunkSize = std::max(absolute.Begin, offset) - std::min(absolute.Begin, offset);

if (chunkSize)
{
auto chunk = _input.Chunk(offset, chunkSize);
output.Write(chunk.data(), chunkSize);
offset += chunkSize;
}

output.Write(sourceEncodedReplacement.data(), sourceEncodedReplacement.size());
offset += absolute.Size();

// In case someone wants to render the end result
match.LineContent.replace(relative.Begin, relative.Size(), replacement);
}
}

const uint64_t bytesLeft = _input.Size() - offset;

if (bytesLeft)
{
auto chunk = _input.Chunk(offset, bytesLeft);
output.Write(chunk.data(), bytesLeft);
}
}

private:
std::string SourceEncode(std::u16string_view replacement)
{
std::string buffer(replacement.size() * 2, '\0');
char* target = buffer.data();
char* targetLimit = buffer.data() + buffer.size();

const char16_t* source = replacement.data();
const char16_t* sourceLimit = replacement.data() + replacement.size();

ucnv_fromUnicode(_converter, &target, targetLimit, &source, sourceLimit, nullptr, true, &_status);

if (U_FAILURE(_status))
{
throw ReplaceException("ucnv_toUnicode failed");
}

buffer.resize(targetLimit - target);

return buffer;
}

const Pystykorva::IFile& _input;
Pystykorva::Result& _result;
UErrorCode _status = U_ZERO_ERROR;
UConverter* _converter = nullptr;
};

TextReplacer::TextReplacer() :
_impl(new TextReplacerImpl())
TextReplacer::TextReplacer(const Pystykorva::IFile& file, Pystykorva::Result& result) :
_impl(new TextReplacerImpl(file, result))
{
}

TextReplacer::~TextReplacer()
{
delete _impl;
}


void TextReplacer::ReplaceAll(Pystykorva::IFile&, Pystykorva::Match&, std::string_view)
void TextReplacer::ReplaceAll(
Pystykorva::IFile& output,
std::u16string_view replacement)
{
// TODO!
_impl->ReplaceAll(output, replacement);
}
4 changes: 2 additions & 2 deletions PystykorvaLib/TextReplacer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,11 @@ class TextReplacerImpl;
class TextReplacer
{
public:
TextReplacer();
TextReplacer(const Pystykorva::IFile&, Pystykorva::Result&);
~TextReplacer();
NonCopyable(TextReplacer);

void ReplaceAll(Pystykorva::IFile&, Pystykorva::Match&, std::string_view);
void ReplaceAll(Pystykorva::IFile& output, std::u16string_view);

private:
TextReplacerImpl* _impl;
Expand Down
5 changes: 4 additions & 1 deletion PystykorvaLib/UnicodeConverter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,10 @@ class UnicodeConverterImpl
UnicodeConverterImpl(std::string_view encoding) :
_converter(ucnv_open(encoding.data(), &_status))
{
assert(U_SUCCESS(_status));
if (U_FAILURE(_status))
{
throw ConversionException("ucnv_open failed");
}
}

~UnicodeConverterImpl()
Expand Down
Loading

0 comments on commit ded6872

Please sign in to comment.