From 0130134ce27281c70c9d8c2b11349bc4ac8f31c0 Mon Sep 17 00:00:00 2001 From: Sebastian Ullrich Date: Wed, 25 Oct 2023 16:44:05 +0200 Subject: [PATCH] feat: embed and check githash in .olean --- .github/workflows/ci.yml | 2 +- CMakeLists.txt | 5 ++-- src/CMakeLists.txt | 2 +- src/library/module.cpp | 55 ++++++++++++++++++++++++++--------- stage0/src/library/module.cpp | 31 ++++++++++++++++---- 5 files changed, 71 insertions(+), 24 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index f686629527d5..2a7dfdec2036 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -248,7 +248,7 @@ jobs: mkdir build cd build ulimit -c unlimited # coredumps - OPTIONS=() + OPTIONS=(-DUSE_GITHASH=ON) # this also enables githash embedding into .olean headers if [[ -n '${{ matrix.prepare-llvm }}' ]]; then wget -q ${{ matrix.llvm-url }} PREPARE="$(${{ matrix.prepare-llvm }})" diff --git a/CMakeLists.txt b/CMakeLists.txt index 271ef9c4cdd3..646773fcd89c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -11,7 +11,7 @@ foreach(var ${vars}) list(APPEND STAGE0_ARGS "-D${CMAKE_MATCH_1}=${${var}}") elseif("${currentHelpString}" MATCHES "No help, variable specified on the command line." OR "${currentHelpString}" STREQUAL "") list(APPEND CL_ARGS "-D${var}=${${var}}") - if("${var}" STREQUAL "USE_GMP") + if("${var}" MATCHES "USE_GMP|USE_GITHASH") # must forward options that generate incompatible .olean format list(APPEND STAGE0_ARGS "-D${var}=${${var}}") endif() @@ -34,8 +34,7 @@ ExternalProject_add(stage0 SOURCE_DIR "${LEAN_SOURCE_DIR}/stage0" SOURCE_SUBDIR src BINARY_DIR stage0 - # do not rebuild stage0 when git hash changes; it's not from this commit anyway - CMAKE_ARGS -DSTAGE=0 -DUSE_GITHASH=OFF ${PLATFORM_ARGS} ${STAGE0_ARGS} + CMAKE_ARGS -DSTAGE=0 ${PLATFORM_ARGS} ${STAGE0_ARGS} BUILD_ALWAYS ON # cmake doesn't auto-detect changes without a download method INSTALL_COMMAND "" # skip install DEPENDS ${EXTRA_DEPENDS} diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 59efb1afe648..1f6940f064e0 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -37,7 +37,7 @@ option(SPLIT_STACK "SPLIT_STACK" OFF) option(LLVM "LLVM" OFF) # When ON we include githash in the version string -option(USE_GITHASH "GIT_HASH" ON) +option(USE_GITHASH "GIT_HASH" OFF) # When ON we install LICENSE files to CMAKE_INSTALL_PREFIX option(INSTALL_LICENSE "INSTALL_LICENSE" ON) # When ON thread storage is automatically finalized, it assumes platform support pthreads. diff --git a/src/library/module.cpp b/src/library/module.cpp index 7d93ca857c4f..3c4010dcf559 100644 --- a/src/library/module.cpp +++ b/src/library/module.cpp @@ -3,6 +3,16 @@ Copyright (c) 2014-2015 Microsoft Corporation. All rights reserved. Released under Apache 2.0 license as described in the file LICENSE. Authors: Leonardo de Moura, Gabriel Ebner, Sebastian Ullrich + +.olean serialization and deserialization. + +File format: + +5 bytes: the string "olean" +1 byte: a file format version number, currently always 0x1 +42 bytes: `LEAN_GITHASH` (empty for non-releases), padded with null bytes to the right if shorter +1 word: the base address +remainder: binary data */ #include #include @@ -25,6 +35,7 @@ Authors: Leonardo de Moura, Gabriel Ebner, Sebastian Ullrich #include "library/constants.h" #include "library/time_task.h" #include "library/util.h" +#include "githash.h" #ifdef LEAN_WINDOWS #include @@ -41,8 +52,14 @@ Authors: Leonardo de Moura, Gabriel Ebner, Sebastian Ullrich #endif namespace lean { -// manually padded to multiple of word size, see `initialize_module` -static char const * g_olean_header = "oleanfile!!!!!!!"; +static char const g_olean_marker[5] = {'o', 'l', 'e', 'a', 'n'}; +static size_t const g_githash_header_size = 42; +static uint8_t const g_olean_version = 1; +static size_t const g_olean_header_size = + sizeof(g_olean_marker) + sizeof(g_olean_version) + g_githash_header_size + sizeof(size_t); +// remainder of file must be aligned to word size for `mmap` +static_assert(g_olean_header_size % sizeof(size_t) == 0, "olean header size must be multiple of word size"); + extern "C" LEAN_EXPORT object * lean_save_module_data(b_obj_arg fname, b_obj_arg mod, b_obj_arg mdata, object *) { std::string olean_fn(string_cstr(fname)); @@ -73,9 +90,16 @@ extern "C" LEAN_EXPORT object * lean_save_module_data(b_obj_arg fname, b_obj_arg // `MapViewOfFileEx` addresses must be aligned to the "memory allocation granularity", which is 64KB. base_addr = base_addr & ~((1LL<<16) - 1); - object_compactor compactor(reinterpret_cast(base_addr + strlen(g_olean_header) + sizeof(base_addr))); + object_compactor compactor(reinterpret_cast(base_addr + g_olean_header_size)); compactor(mdata); - out.write(g_olean_header, strlen(g_olean_header)); + + // see/sync with file format description above + out.write(g_olean_marker, sizeof(g_olean_marker)); + out.write(reinterpret_cast(&g_olean_version), sizeof(g_olean_version)); + out.write(LEAN_GITHASH, std::min(g_githash_header_size, strlen(LEAN_GITHASH))); + if (g_githash_header_size > strlen(LEAN_GITHASH)) { + out << std::string(g_githash_header_size - strlen(LEAN_GITHASH), '\0'); + } out.write(reinterpret_cast(&base_addr), sizeof(base_addr)); out.write(static_cast(compactor.data()), compactor.size()); out.close(); @@ -116,19 +140,21 @@ extern "C" LEAN_EXPORT object * lean_read_module_data(object * fname, object *) in.seekg(0, in.end); size_t size = in.tellg(); in.seekg(0); - size_t header_size = strlen(g_olean_header); - if (size < header_size) { + + // see/sync with file format description above + if (size < g_olean_header_size) { return io_result_mk_error((sstream() << "failed to read file '" << olean_fn << "', invalid header").str()); } - char * header = new char[header_size]; - in.read(header, header_size); - if (strncmp(header, g_olean_header, header_size) != 0) { + char * header = new char[g_olean_header_size]; + in.read(header, g_olean_header_size); + if (memcmp(header, g_olean_marker, sizeof(g_olean_marker)) != 0 || + memcmp(header + sizeof(g_olean_marker), &g_olean_version, sizeof(g_olean_version)) != 0 || + memcmp(header + sizeof(g_olean_marker) + sizeof(g_olean_version), LEAN_GITHASH, std::min(g_githash_header_size, strlen(LEAN_GITHASH))) != 0) { return io_result_mk_error((sstream() << "failed to read file '" << olean_fn << "', invalid header").str()); } delete[] header; char * base_addr; in.read(reinterpret_cast(&base_addr), sizeof(base_addr)); - header_size += sizeof(base_addr); char * buffer = nullptr; bool is_mmap = false; std::function free_data; @@ -166,24 +192,25 @@ extern "C" LEAN_EXPORT object * lean_read_module_data(object * fname, object *) }; #endif if (buffer && buffer == base_addr) { - buffer += header_size; + buffer += g_olean_header_size; is_mmap = true; } else { #ifdef LEAN_MMAP free_data(); #endif - buffer = static_cast(malloc(size - header_size)); + buffer = static_cast(malloc(size - g_olean_header_size)); free_data = [=]() { free(buffer); }; - in.read(buffer, size - header_size); + in.read(buffer, size - g_olean_header_size); if (!in) { return io_result_mk_error((sstream() << "failed to read file '" << olean_fn << "'").str()); } } in.close(); - compacted_region * region = new compacted_region(size - header_size, buffer, base_addr + header_size, is_mmap, free_data); + compacted_region * region = + new compacted_region(size - g_olean_header_size, buffer, base_addr + g_olean_header_size, is_mmap, free_data); #if defined(__has_feature) #if __has_feature(address_sanitizer) // do not report as leak diff --git a/stage0/src/library/module.cpp b/stage0/src/library/module.cpp index 7d93ca857c4f..2f2d28192bf3 100644 --- a/stage0/src/library/module.cpp +++ b/stage0/src/library/module.cpp @@ -3,6 +3,16 @@ Copyright (c) 2014-2015 Microsoft Corporation. All rights reserved. Released under Apache 2.0 license as described in the file LICENSE. Authors: Leonardo de Moura, Gabriel Ebner, Sebastian Ullrich + +.olean serialization and deserialization. + +File format: + +9 bytes: the string "oleanfile" +39 bytes: `LEAN_GITHASH` (empty for non-releases), padded with null bytes to the right if shorter +1 word: the base address +remainder: binary data. MUST be aligned to word size, which for the above numbers (48 bytes + 1 word) + is true for platforms <= 128 bit */ #include #include @@ -25,6 +35,7 @@ Authors: Leonardo de Moura, Gabriel Ebner, Sebastian Ullrich #include "library/constants.h" #include "library/time_task.h" #include "library/util.h" +#include "githash.h" #ifdef LEAN_WINDOWS #include @@ -41,8 +52,9 @@ Authors: Leonardo de Moura, Gabriel Ebner, Sebastian Ullrich #endif namespace lean { -// manually padded to multiple of word size, see `initialize_module` -static char const * g_olean_header = "oleanfile!!!!!!!"; +static char const * g_olean_header = "oleanfile"; +static size_t const g_githash_header_size = 39; + extern "C" LEAN_EXPORT object * lean_save_module_data(b_obj_arg fname, b_obj_arg mod, b_obj_arg mdata, object *) { std::string olean_fn(string_cstr(fname)); @@ -73,9 +85,15 @@ extern "C" LEAN_EXPORT object * lean_save_module_data(b_obj_arg fname, b_obj_arg // `MapViewOfFileEx` addresses must be aligned to the "memory allocation granularity", which is 64KB. base_addr = base_addr & ~((1LL<<16) - 1); - object_compactor compactor(reinterpret_cast(base_addr + strlen(g_olean_header) + sizeof(base_addr))); + object_compactor compactor(reinterpret_cast(base_addr + strlen(g_olean_header) + g_githash_header_size + sizeof(base_addr))); compactor(mdata); + + // see/sync with file format description above out.write(g_olean_header, strlen(g_olean_header)); + out.write(LEAN_GITHASH, std::min(g_githash_header_size, strlen(LEAN_GITHASH))); + if (g_githash_header_size > strlen(LEAN_GITHASH)) { + out << std::string(g_githash_header_size - strlen(LEAN_GITHASH), '\0'); + } out.write(reinterpret_cast(&base_addr), sizeof(base_addr)); out.write(static_cast(compactor.data()), compactor.size()); out.close(); @@ -116,13 +134,16 @@ extern "C" LEAN_EXPORT object * lean_read_module_data(object * fname, object *) in.seekg(0, in.end); size_t size = in.tellg(); in.seekg(0); - size_t header_size = strlen(g_olean_header); + + // see/sync with file format description above + size_t header_size = strlen(g_olean_header) + g_githash_header_size; if (size < header_size) { return io_result_mk_error((sstream() << "failed to read file '" << olean_fn << "', invalid header").str()); } char * header = new char[header_size]; in.read(header, header_size); - if (strncmp(header, g_olean_header, header_size) != 0) { + if (memcmp(header, g_olean_header, strlen(g_olean_header)) != 0 || + memcmp(header + strlen(g_olean_header), LEAN_GITHASH, std::min(g_githash_header_size, strlen(LEAN_GITHASH))) != 0) { return io_result_mk_error((sstream() << "failed to read file '" << olean_fn << "', invalid header").str()); } delete[] header;