Skip to content

Commit

Permalink
feat: embed and check githash in .olean
Browse files Browse the repository at this point in the history
  • Loading branch information
Kha committed Nov 4, 2023
1 parent 0285878 commit 0130134
Show file tree
Hide file tree
Showing 5 changed files with 71 additions and 24 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -248,7 +248,7 @@ jobs:
mkdir build
cd build
ulimit -c unlimited # coredumps
OPTIONS=()
OPTIONS=(-DUSE_GITHASH=ON) # this also enables githash embedding into .olean headers
if [[ -n '${{ matrix.prepare-llvm }}' ]]; then
wget -q ${{ matrix.llvm-url }}
PREPARE="$(${{ matrix.prepare-llvm }})"
Expand Down
5 changes: 2 additions & 3 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ foreach(var ${vars})
list(APPEND STAGE0_ARGS "-D${CMAKE_MATCH_1}=${${var}}")
elseif("${currentHelpString}" MATCHES "No help, variable specified on the command line." OR "${currentHelpString}" STREQUAL "")
list(APPEND CL_ARGS "-D${var}=${${var}}")
if("${var}" STREQUAL "USE_GMP")
if("${var}" MATCHES "USE_GMP|USE_GITHASH")
# must forward options that generate incompatible .olean format
list(APPEND STAGE0_ARGS "-D${var}=${${var}}")
endif()
Expand All @@ -34,8 +34,7 @@ ExternalProject_add(stage0
SOURCE_DIR "${LEAN_SOURCE_DIR}/stage0"
SOURCE_SUBDIR src
BINARY_DIR stage0
# do not rebuild stage0 when git hash changes; it's not from this commit anyway
CMAKE_ARGS -DSTAGE=0 -DUSE_GITHASH=OFF ${PLATFORM_ARGS} ${STAGE0_ARGS}
CMAKE_ARGS -DSTAGE=0 ${PLATFORM_ARGS} ${STAGE0_ARGS}
BUILD_ALWAYS ON # cmake doesn't auto-detect changes without a download method
INSTALL_COMMAND "" # skip install
DEPENDS ${EXTRA_DEPENDS}
Expand Down
2 changes: 1 addition & 1 deletion src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ option(SPLIT_STACK "SPLIT_STACK" OFF)
option(LLVM "LLVM" OFF)

# When ON we include githash in the version string
option(USE_GITHASH "GIT_HASH" ON)
option(USE_GITHASH "GIT_HASH" OFF)
# When ON we install LICENSE files to CMAKE_INSTALL_PREFIX
option(INSTALL_LICENSE "INSTALL_LICENSE" ON)
# When ON thread storage is automatically finalized, it assumes platform support pthreads.
Expand Down
55 changes: 41 additions & 14 deletions src/library/module.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,16 @@ Copyright (c) 2014-2015 Microsoft Corporation. All rights reserved.
Released under Apache 2.0 license as described in the file LICENSE.
Authors: Leonardo de Moura, Gabriel Ebner, Sebastian Ullrich
.olean serialization and deserialization.
File format:
5 bytes: the string "olean"
1 byte: a file format version number, currently always 0x1
42 bytes: `LEAN_GITHASH` (empty for non-releases), padded with null bytes to the right if shorter
1 word: the base address
remainder: binary data
*/
#include <unordered_map>
#include <vector>
Expand All @@ -25,6 +35,7 @@ Authors: Leonardo de Moura, Gabriel Ebner, Sebastian Ullrich
#include "library/constants.h"
#include "library/time_task.h"
#include "library/util.h"
#include "githash.h"

#ifdef LEAN_WINDOWS
#include <windows.h>
Expand All @@ -41,8 +52,14 @@ Authors: Leonardo de Moura, Gabriel Ebner, Sebastian Ullrich
#endif

namespace lean {
// manually padded to multiple of word size, see `initialize_module`
static char const * g_olean_header = "oleanfile!!!!!!!";
static char const g_olean_marker[5] = {'o', 'l', 'e', 'a', 'n'};
static size_t const g_githash_header_size = 42;
static uint8_t const g_olean_version = 1;
static size_t const g_olean_header_size =
sizeof(g_olean_marker) + sizeof(g_olean_version) + g_githash_header_size + sizeof(size_t);
// remainder of file must be aligned to word size for `mmap`
static_assert(g_olean_header_size % sizeof(size_t) == 0, "olean header size must be multiple of word size");


extern "C" LEAN_EXPORT object * lean_save_module_data(b_obj_arg fname, b_obj_arg mod, b_obj_arg mdata, object *) {
std::string olean_fn(string_cstr(fname));
Expand Down Expand Up @@ -73,9 +90,16 @@ extern "C" LEAN_EXPORT object * lean_save_module_data(b_obj_arg fname, b_obj_arg
// `MapViewOfFileEx` addresses must be aligned to the "memory allocation granularity", which is 64KB.
base_addr = base_addr & ~((1LL<<16) - 1);

object_compactor compactor(reinterpret_cast<void *>(base_addr + strlen(g_olean_header) + sizeof(base_addr)));
object_compactor compactor(reinterpret_cast<void *>(base_addr + g_olean_header_size));
compactor(mdata);
out.write(g_olean_header, strlen(g_olean_header));

// see/sync with file format description above
out.write(g_olean_marker, sizeof(g_olean_marker));
out.write(reinterpret_cast<const char *>(&g_olean_version), sizeof(g_olean_version));
out.write(LEAN_GITHASH, std::min(g_githash_header_size, strlen(LEAN_GITHASH)));
if (g_githash_header_size > strlen(LEAN_GITHASH)) {
out << std::string(g_githash_header_size - strlen(LEAN_GITHASH), '\0');
}
out.write(reinterpret_cast<char *>(&base_addr), sizeof(base_addr));
out.write(static_cast<char const *>(compactor.data()), compactor.size());
out.close();
Expand Down Expand Up @@ -116,19 +140,21 @@ extern "C" LEAN_EXPORT object * lean_read_module_data(object * fname, object *)
in.seekg(0, in.end);
size_t size = in.tellg();
in.seekg(0);
size_t header_size = strlen(g_olean_header);
if (size < header_size) {

// see/sync with file format description above
if (size < g_olean_header_size) {
return io_result_mk_error((sstream() << "failed to read file '" << olean_fn << "', invalid header").str());
}
char * header = new char[header_size];
in.read(header, header_size);
if (strncmp(header, g_olean_header, header_size) != 0) {
char * header = new char[g_olean_header_size];
in.read(header, g_olean_header_size);
if (memcmp(header, g_olean_marker, sizeof(g_olean_marker)) != 0 ||
memcmp(header + sizeof(g_olean_marker), &g_olean_version, sizeof(g_olean_version)) != 0 ||
memcmp(header + sizeof(g_olean_marker) + sizeof(g_olean_version), LEAN_GITHASH, std::min(g_githash_header_size, strlen(LEAN_GITHASH))) != 0) {
return io_result_mk_error((sstream() << "failed to read file '" << olean_fn << "', invalid header").str());
}
delete[] header;
char * base_addr;
in.read(reinterpret_cast<char *>(&base_addr), sizeof(base_addr));
header_size += sizeof(base_addr);
char * buffer = nullptr;
bool is_mmap = false;
std::function<void()> free_data;
Expand Down Expand Up @@ -166,24 +192,25 @@ extern "C" LEAN_EXPORT object * lean_read_module_data(object * fname, object *)
};
#endif
if (buffer && buffer == base_addr) {
buffer += header_size;
buffer += g_olean_header_size;
is_mmap = true;
} else {
#ifdef LEAN_MMAP
free_data();
#endif
buffer = static_cast<char *>(malloc(size - header_size));
buffer = static_cast<char *>(malloc(size - g_olean_header_size));
free_data = [=]() {
free(buffer);
};
in.read(buffer, size - header_size);
in.read(buffer, size - g_olean_header_size);
if (!in) {
return io_result_mk_error((sstream() << "failed to read file '" << olean_fn << "'").str());
}
}
in.close();

compacted_region * region = new compacted_region(size - header_size, buffer, base_addr + header_size, is_mmap, free_data);
compacted_region * region =
new compacted_region(size - g_olean_header_size, buffer, base_addr + g_olean_header_size, is_mmap, free_data);
#if defined(__has_feature)
#if __has_feature(address_sanitizer)
// do not report as leak
Expand Down
31 changes: 26 additions & 5 deletions stage0/src/library/module.cpp

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 0130134

Please sign in to comment.