Skip to content

Commit

Permalink
MAJOR CHANGE: USE MEMORY MAPPED FILE
Browse files Browse the repository at this point in the history
- This just makes everything so much more simpler
- The performance seems to have improved a bit
  - However, it's a file searcher hence pretty much everything is IO bound...
  • Loading branch information
visuve committed Apr 12, 2024
1 parent e66b429 commit 7a90b25
Show file tree
Hide file tree
Showing 14 changed files with 267 additions and 213 deletions.
8 changes: 3 additions & 5 deletions PystykorvaCLI/Main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,6 @@ Pystykorva::Options Deserialize(const CmdArgs& args)
options.MinimumTime = args.Value<std::chrono::file_clock::time_point>("mintime");
options.MaximumTime = args.Value<std::chrono::file_clock::time_point>("maxtime");

// 64 kib should be decent for most text files
options.BufferSize = args.Value<uint32_t>("buffersize");

// On my 16 core CPU, harware_concurrency returns 32, which is fine as I have SMT
options.MaximumThreads = args.Value<uint32_t>("maxthreads");

Expand Down Expand Up @@ -131,8 +128,7 @@ Console& operator << (Console& stream, const Pystykorva::Match& result)

std::mutex _mutex;

void ReportProcessing(
const std::filesystem::path& path)
void ReportProcessing(const std::filesystem::path& path)
{
_CRT_UNUSED(path);
#if _DEBUG
Expand Down Expand Up @@ -174,6 +170,8 @@ void ReportResults(

void ReportFinished(std::chrono::milliseconds ms)
{
std::lock_guard<std::mutex> guard(_mutex);

Cout << "\nPystykorva finished!\n\n";
Cout << "Statistics:\n";
Cout << "\tTook: " << std::format("{:%T}\n", ms);
Expand Down
65 changes: 0 additions & 65 deletions PystykorvaLib/BufferedStream.cpp

This file was deleted.

28 changes: 0 additions & 28 deletions PystykorvaLib/BufferedStream.hpp

This file was deleted.

158 changes: 158 additions & 0 deletions PystykorvaLib/MemoryMappedFile.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,158 @@
#include "PCH.hpp"
#include "MemoryMappedFile.hpp"

#ifdef _WIN32

#define NOMINMAX
#include <Windows.h>

class MemoryMappedFileImpl
{
public:
MemoryMappedFileImpl(const std::filesystem::path& path, uint64_t fileSize) :
_file(CreateFileW(
path.c_str(),
GENERIC_READ | GENERIC_WRITE,
0,
nullptr,
OPEN_EXISTING,
FILE_ATTRIBUTE_NORMAL,
NULL))
{
if (!_file || _file == INVALID_HANDLE_VALUE)
{
throw std::system_error(GetLastError(), std::system_category(), "CreateFileW");
}

LARGE_INTEGER mappingSize;
mappingSize.QuadPart = fileSize;

_mapping = CreateFileMappingW(
_file,
nullptr,
PAGE_READWRITE,
mappingSize.HighPart,
mappingSize.LowPart,
nullptr);

if (!_mapping)
{
throw std::system_error(GetLastError(), std::system_category(), "CreateFileMappingW");
}

_view = MapViewOfFile(_mapping, FILE_MAP_ALL_ACCESS, 0, 0, fileSize);

if (!_view)
{
throw std::system_error(GetLastError(), std::system_category(), "MapViewOfFile");
}

_size = fileSize;
}

~MemoryMappedFileImpl()
{
if (_view)
{
UnmapViewOfFile(_view);
}

if (_mapping)
{
CloseHandle(_mapping);
}

if (_file)
{
CloseHandle(_file);
}
}

std::string_view Data() const
{
return { reinterpret_cast<char*>(_view), _size };
}

std::string_view Sample(size_t size) const
{
return { reinterpret_cast<char*>(_view), std::min(size, _size) };
}

private:
HANDLE _file = nullptr;
HANDLE _mapping = nullptr;
void* _view = nullptr;
uint64_t _size = 0;
};
#else

#include <fcntl.h>
#include <sys/stat.h>
#include <sys/mman.h>
#include <unistd.h>

class MemoryMappedFileImpl
{
public:
MemoryMappedFileImpl(const std::filesystem::path& path, uint64_t fileSize) :
_descriptor(open(path.c_str(), O_RDONLY))
{
if (_descriptor == -1)
{
throw std::system_error(errno, std::system_category(), "open");
}

_view = mmap(nullptr, _size, PROT_READ, MAP_PRIVATE, _descriptor, 0);

if (_view == MAP_FAILED)
{
throw std::system_error(errno, std::system_category(), "mmap");
}

_size = fileSize;
}

~MemoryMappedFileImpl()
{
if (_descriptor)
{
::close(_descriptor);
}
}

std::string_view Sample(size_t size)
{
return { reinterpret_cast<char*>(_view), std::min(_size, size) };
}

std::string_view Data() const
{
return { reinterpret_cast<char*>(_view), _size };
}

private:
int _descriptor = 0;
void* _view = nullptr;
uint64_t _size = 0;
};
#endif

MemoryMappedFile::MemoryMappedFile(const std::filesystem::path& path, uint64_t fileSize) :
_impl(new MemoryMappedFileImpl(path, fileSize))
{
}

MemoryMappedFile::~MemoryMappedFile()
{
delete _impl;
}

std::string_view MemoryMappedFile::Sample(size_t size) const
{
return _impl->Sample(size);
}

std::string_view MemoryMappedFile::Data() const
{
return _impl->Data();
}
21 changes: 21 additions & 0 deletions PystykorvaLib/MemoryMappedFile.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
#pragma once

#include "Pystykorva.hpp"

class MemoryMappedFileImpl;

// This interface has no other meaning than to allow testing

class MemoryMappedFile : public Pystykorva::IFile
{
public:
MemoryMappedFile(const std::filesystem::path&, uint64_t);
~MemoryMappedFile();

std::string_view Sample(size_t size = 0x400) const override;
std::string_view Data() const override;

private:
MemoryMappedFileImpl* _impl;
};

1 change: 0 additions & 1 deletion PystykorvaLib/Pystykorva.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@ Pystykorva::Pystykorva(const Options& options, const Callbacks& callbacks) :
_callbacks(callbacks),
_rdi(options.Directory)
{
assert(_options.BufferSize > _options.SearchExpression.size());
}

Pystykorva::~Pystykorva()
Expand Down
8 changes: 6 additions & 2 deletions PystykorvaLib/Pystykorva.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,12 +28,16 @@ class Pystykorva
std::chrono::time_point<std::chrono::file_clock> MinimumTime;
std::chrono::time_point<std::chrono::file_clock> MaximumTime;

uint32_t BufferSize = 0;

// Zero will default to std::thread::hardware_concurrency or 1
uint32_t MaximumThreads = 0;
};

struct IFile
{
virtual std::string_view Sample(size_t size = 0x400) const = 0;
virtual std::string_view Data() const = 0;
};

enum Status : uint32_t
{
Ok = 0x00,
Expand Down
4 changes: 2 additions & 2 deletions PystykorvaLib/PystykorvaLib.vcxproj
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
<ItemGroup>
<ClCompile Include="EncodingDetector.cpp" />
<ClCompile Include="LineAnalyzer.cpp" />
<ClCompile Include="BufferedStream.cpp" />
<ClCompile Include="MemoryMappedFile.cpp" />
<ClCompile Include="TextProcessor.cpp" />
<ClCompile Include="Pystykorva.cpp" />
<ClCompile Include="PCH.cpp">
Expand All @@ -27,7 +27,7 @@
<ItemGroup>
<ClInclude Include="EncodingDetector.hpp" />
<ClInclude Include="LineAnalyzer.hpp" />
<ClInclude Include="BufferedStream.hpp" />
<ClInclude Include="MemoryMappedFile.hpp" />
<ClInclude Include="TextProcessor.hpp" />
<ClInclude Include="Pystykorva.hpp" />
<ClInclude Include="PCH.hpp" />
Expand Down
Loading

0 comments on commit 7a90b25

Please sign in to comment.