Skip to content

Commit

Permalink
feat: embed and check githash in .olean
Browse files Browse the repository at this point in the history
  • Loading branch information
Kha committed Nov 14, 2023
1 parent a0d7e52 commit f7f2244
Show file tree
Hide file tree
Showing 6 changed files with 97 additions and 53 deletions.
3 changes: 2 additions & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -250,7 +250,8 @@ jobs:
mkdir build
cd build
ulimit -c unlimited # coredumps
OPTIONS=()
# this also enables githash embedding into stage 1 library
OPTIONS=(-DCHECK_OLEAN_VERSION=ON)
if [[ -n '${{ matrix.prepare-llvm }}' ]]; then
wget -q ${{ matrix.llvm-url }}
PREPARE="$(${{ matrix.prepare-llvm }})"
Expand Down
4 changes: 3 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ foreach(var ${vars})
list(APPEND STAGE0_ARGS "-D${CMAKE_MATCH_1}=${${var}}")
elseif("${currentHelpString}" MATCHES "No help, variable specified on the command line." OR "${currentHelpString}" STREQUAL "")
list(APPEND CL_ARGS "-D${var}=${${var}}")
if("${var}" STREQUAL "USE_GMP")
if("${var}" MATCHES "USE_GMP|CHECK_OLEAN_VERSION")
# must forward options that generate incompatible .olean format
list(APPEND STAGE0_ARGS "-D${var}=${${var}}")
endif()
Expand All @@ -35,6 +35,8 @@ ExternalProject_add(stage0
SOURCE_SUBDIR src
BINARY_DIR stage0
# do not rebuild stage0 when git hash changes; it's not from this commit anyway
# (however, `CHECK_OLEAN_VERSION=ON` in CI will override this as we need to
# embed the githash into the stage 1 library built by stage 0)
CMAKE_ARGS -DSTAGE=0 -DUSE_GITHASH=OFF ${PLATFORM_ARGS} ${STAGE0_ARGS}
BUILD_ALWAYS ON # cmake doesn't auto-detect changes without a download method
INSTALL_COMMAND "" # skip install
Expand Down
7 changes: 4 additions & 3 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ option(BSYMBOLIC "Link with -Bsymbolic to reduce call overhead in shared librari
option(USE_GMP "USE_GMP" ON)

# development-specific options
option(CHECK_OLEAN_VERSION "Only load .olean files compiled with the current version of Lean" ON)
option(CHECK_OLEAN_VERSION "Only load .olean files compiled with the current version of Lean" OFF)

set(LEAN_EXTRA_MAKE_OPTS "" CACHE STRING "extra options to lean --make")
set(LEANC_CC ${CMAKE_C_COMPILER} CACHE STRING "C compiler to use in `leanc`")
Expand Down Expand Up @@ -93,8 +93,9 @@ if ("${RUNTIME_STATS}" MATCHES "ON")
string(APPEND LEAN_EXTRA_CXX_FLAGS " -D LEAN_RUNTIME_STATS")
endif()

if (NOT("${CHECK_OLEAN_VERSION}" MATCHES "ON"))
string(APPEND LEAN_EXTRA_CXX_FLAGS " -D LEAN_IGNORE_OLEAN_VERSION")
if ("${CHECK_OLEAN_VERSION}" MATCHES "ON")
set(USE_GITHASH ON)
string(APPEND LEAN_EXTRA_CXX_FLAGS " -D LEAN_CHECK_OLEAN_VERSION")
endif()

if(${CMAKE_SYSTEM_NAME} MATCHES "Emscripten")
Expand Down
60 changes: 42 additions & 18 deletions src/library/module.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@ Copyright (c) 2014-2015 Microsoft Corporation. All rights reserved.
Released under Apache 2.0 license as described in the file LICENSE.
Authors: Leonardo de Moura, Gabriel Ebner, Sebastian Ullrich
.olean serialization and deserialization.
*/
#include <unordered_map>
#include <vector>
Expand All @@ -25,6 +27,7 @@ Authors: Leonardo de Moura, Gabriel Ebner, Sebastian Ullrich
#include "library/constants.h"
#include "library/time_task.h"
#include "library/util.h"
#include "githash.h"

#ifdef LEAN_WINDOWS
#include <windows.h>
Expand All @@ -41,8 +44,22 @@ Authors: Leonardo de Moura, Gabriel Ebner, Sebastian Ullrich
#endif

namespace lean {
// manually padded to multiple of word size, see `initialize_module`
static char const * g_olean_header = "oleanfile!!!!!!!";

/** On-disk format of a .olean file. */
struct olean_header {
// 5 bytes: magic number
char marker[5] = {'o', 'l', 'e', 'a', 'n'};
// 1 byte: version, currently always `1`
uint8_t version = 1;
// 42 bytes: build githash, padded with `\0` to the right
char githash[42];
// address at which the beginning of the file (including header) is attempted to be mmapped
size_t base_addr;
// payload, a serialize Lean object graph; `size_t` has same alignment requirements as Lean objects
size_t data[];
};
// make sure we don't have any padding bytes, which also ensures `data` is properly aligned
static_assert(sizeof(olean_header) == 5 + 1 + 42 + sizeof(size_t), "olean_header must be packed");

extern "C" LEAN_EXPORT object * lean_save_module_data(b_obj_arg fname, b_obj_arg mod, b_obj_arg mdata, object *) {
std::string olean_fn(string_cstr(fname));
Expand Down Expand Up @@ -73,10 +90,14 @@ extern "C" LEAN_EXPORT object * lean_save_module_data(b_obj_arg fname, b_obj_arg
// `MapViewOfFileEx` addresses must be aligned to the "memory allocation granularity", which is 64KB.
base_addr = base_addr & ~((1LL<<16) - 1);

object_compactor compactor(reinterpret_cast<void *>(base_addr + strlen(g_olean_header) + sizeof(base_addr)));
object_compactor compactor(reinterpret_cast<void *>(base_addr + offsetof(olean_header, data)));
compactor(mdata);
out.write(g_olean_header, strlen(g_olean_header));
out.write(reinterpret_cast<char *>(&base_addr), sizeof(base_addr));

// see/sync with file format description above
olean_header header = {};
header.base_addr = base_addr;
strncpy(header.githash, LEAN_GITHASH, sizeof(header.githash));
out.write(reinterpret_cast<char *>(&header), sizeof(header));
out.write(static_cast<char const *>(compactor.data()), compactor.size());
out.close();
while (std::rename(olean_tmp_fn.c_str(), olean_fn.c_str()) != 0) {
Expand Down Expand Up @@ -116,19 +137,21 @@ extern "C" LEAN_EXPORT object * lean_read_module_data(object * fname, object *)
in.seekg(0, in.end);
size_t size = in.tellg();
in.seekg(0);
size_t header_size = strlen(g_olean_header);
if (size < header_size) {

olean_header default_header = {};
olean_header header;
if (!in.read(reinterpret_cast<char *>(&header), sizeof(header))) {
return io_result_mk_error((sstream() << "failed to read file '" << olean_fn << "', invalid header").str());
}
char * header = new char[header_size];
in.read(header, header_size);
if (strncmp(header, g_olean_header, header_size) != 0) {
if (memcmp(header.marker, default_header.marker, sizeof(header.marker)) != 0
|| header.version != default_header.version
#ifdef LEAN_CHECK_OLEAN_VERSION
|| strncmp(header.githash, LEAN_GITHASH, sizeof(header.githash)) != 0
#endif
) {
return io_result_mk_error((sstream() << "failed to read file '" << olean_fn << "', invalid header").str());
}
delete[] header;
char * base_addr;
in.read(reinterpret_cast<char *>(&base_addr), sizeof(base_addr));
header_size += sizeof(base_addr);
char * base_addr = reinterpret_cast<char *>(header.base_addr);
char * buffer = nullptr;
bool is_mmap = false;
std::function<void()> free_data;
Expand Down Expand Up @@ -166,24 +189,25 @@ extern "C" LEAN_EXPORT object * lean_read_module_data(object * fname, object *)
};
#endif
if (buffer && buffer == base_addr) {
buffer += header_size;
buffer += sizeof(olean_header);
is_mmap = true;
} else {
#ifdef LEAN_MMAP
free_data();
#endif
buffer = static_cast<char *>(malloc(size - header_size));
buffer = static_cast<char *>(malloc(size - sizeof(olean_header)));
free_data = [=]() {
free(buffer);
};
in.read(buffer, size - header_size);
in.read(buffer, size - sizeof(olean_header));
if (!in) {
return io_result_mk_error((sstream() << "failed to read file '" << olean_fn << "'").str());
}
}
in.close();

compacted_region * region = new compacted_region(size - header_size, buffer, base_addr + header_size, is_mmap, free_data);
compacted_region * region =
new compacted_region(size - sizeof(olean_header), buffer, base_addr + sizeof(olean_header), is_mmap, free_data);
#if defined(__has_feature)
#if __has_feature(address_sanitizer)
// do not report as leak
Expand Down
16 changes: 4 additions & 12 deletions stage0/src/CMakeLists.txt

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

60 changes: 42 additions & 18 deletions stage0/src/library/module.cpp

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit f7f2244

Please sign in to comment.