Skip to content

Commit

Permalink
feat: embed and check githash in .olean (#2766)
Browse files Browse the repository at this point in the history
This is an additional safety net on top of #2749: it protects users that
circumvent the build system (e.g. with `lake env`) as well as obviates
the need for TOCTOU-like race condition checks in the build system.

The check is activated by `CHECK_OLEAN_VERSION=ON`, which now defaults
to `OFF` as the sensible default for local development. When activated,
`USE_GITHASH=ON` is also force-enabled for stage 0 in order to make sure
that stage 1 can load its own core library.
  • Loading branch information
Kha authored Nov 27, 2023
1 parent f142d9f commit 79251f5
Show file tree
Hide file tree
Showing 8 changed files with 98 additions and 71 deletions.
3 changes: 2 additions & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -259,7 +259,8 @@ jobs:
mkdir build
cd build
ulimit -c unlimited # coredumps
OPTIONS=()
# this also enables githash embedding into stage 1 library
OPTIONS=(-DCHECK_OLEAN_VERSION=ON)
if [[ -n '${{ matrix.prepare-llvm }}' ]]; then
wget -q ${{ matrix.llvm-url }}
PREPARE="$(${{ matrix.prepare-llvm }})"
Expand Down
4 changes: 3 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ foreach(var ${vars})
list(APPEND STAGE0_ARGS "-D${CMAKE_MATCH_1}=${${var}}")
elseif("${currentHelpString}" MATCHES "No help, variable specified on the command line." OR "${currentHelpString}" STREQUAL "")
list(APPEND CL_ARGS "-D${var}=${${var}}")
if("${var}" STREQUAL "USE_GMP")
if("${var}" MATCHES "USE_GMP|CHECK_OLEAN_VERSION")
# must forward options that generate incompatible .olean format
list(APPEND STAGE0_ARGS "-D${var}=${${var}}")
endif()
Expand All @@ -35,6 +35,8 @@ ExternalProject_add(stage0
SOURCE_SUBDIR src
BINARY_DIR stage0
# do not rebuild stage0 when git hash changes; it's not from this commit anyway
# (however, `CHECK_OLEAN_VERSION=ON` in CI will override this as we need to
# embed the githash into the stage 1 library built by stage 0)
CMAKE_ARGS -DSTAGE=0 -DUSE_GITHASH=OFF ${PLATFORM_ARGS} ${STAGE0_ARGS}
BUILD_ALWAYS ON # cmake doesn't auto-detect changes without a download method
INSTALL_COMMAND "" # skip install
Expand Down
16 changes: 4 additions & 12 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ option(BSYMBOLIC "Link with -Bsymbolic to reduce call overhead in shared librari
option(USE_GMP "USE_GMP" ON)

# development-specific options
option(CHECK_OLEAN_VERSION "Only load .olean files compiled with the current version of Lean" ON)
option(CHECK_OLEAN_VERSION "Only load .olean files compiled with the current version of Lean" OFF)

set(LEAN_EXTRA_MAKE_OPTS "" CACHE STRING "extra options to lean --make")
set(LEANC_CC ${CMAKE_C_COMPILER} CACHE STRING "C compiler to use in `leanc`")
Expand Down Expand Up @@ -93,8 +93,9 @@ if ("${RUNTIME_STATS}" MATCHES "ON")
string(APPEND LEAN_EXTRA_CXX_FLAGS " -D LEAN_RUNTIME_STATS")
endif()

if (NOT("${CHECK_OLEAN_VERSION}" MATCHES "ON"))
string(APPEND LEAN_EXTRA_CXX_FLAGS " -D LEAN_IGNORE_OLEAN_VERSION")
if ("${CHECK_OLEAN_VERSION}" MATCHES "ON")
set(USE_GITHASH ON)
string(APPEND LEAN_EXTRA_CXX_FLAGS " -D LEAN_CHECK_OLEAN_VERSION")
endif()

if(${CMAKE_SYSTEM_NAME} MATCHES "Emscripten")
Expand Down Expand Up @@ -401,26 +402,17 @@ if(MULTI_THREAD AND NOT MSVC AND (NOT ("${CMAKE_SYSTEM_NAME}" MATCHES "Darwin"))
endif()

# Git HASH
set(LEAN_PACKAGE_VERSION "NOT-FOUND")
if(USE_GITHASH)
include(GetGitRevisionDescription)
get_git_head_revision(GIT_REFSPEC GIT_SHA1)
if(${GIT_SHA1} MATCHES "GITDIR-NOTFOUND")
message(STATUS "Failed to read git_sha1")
set(GIT_SHA1 "")
if(EXISTS "${LEAN_SOURCE_DIR}/bin/package_version")
file(STRINGS "${LEAN_SOURCE_DIR}/bin/package_version" LEAN_PACKAGE_VERSION)
message(STATUS "Package version detected: ${LEAN_PACKAGE_VERSION}")
endif()
else()
message(STATUS "git commit sha1: ${GIT_SHA1}")
endif()
else()
set(GIT_SHA1 "")
if(EXISTS "${LEAN_SOURCE_DIR}/bin/package_version")
file(STRINGS "${LEAN_SOURCE_DIR}/bin/package_version" LEAN_PACKAGE_VERSION)
message(STATUS "Package version detected: ${LEAN_PACKAGE_VERSION}")
endif()
endif()
configure_file("${LEAN_SOURCE_DIR}/githash.h.in" "${LEAN_BINARY_DIR}/githash.h")

Expand Down
60 changes: 42 additions & 18 deletions src/library/module.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@ Copyright (c) 2014-2015 Microsoft Corporation. All rights reserved.
Released under Apache 2.0 license as described in the file LICENSE.
Authors: Leonardo de Moura, Gabriel Ebner, Sebastian Ullrich
.olean serialization and deserialization.
*/
#include <unordered_map>
#include <vector>
Expand All @@ -25,6 +27,7 @@ Authors: Leonardo de Moura, Gabriel Ebner, Sebastian Ullrich
#include "library/constants.h"
#include "library/time_task.h"
#include "library/util.h"
#include "githash.h"

#ifdef LEAN_WINDOWS
#include <windows.h>
Expand All @@ -41,8 +44,22 @@ Authors: Leonardo de Moura, Gabriel Ebner, Sebastian Ullrich
#endif

namespace lean {
// manually padded to multiple of word size, see `initialize_module`
static char const * g_olean_header = "oleanfile!!!!!!!";

/** On-disk format of a .olean file. */
struct olean_header {
// 5 bytes: magic number
char marker[5] = {'o', 'l', 'e', 'a', 'n'};
// 1 byte: version, currently always `1`
uint8_t version = 1;
// 42 bytes: build githash, padded with `\0` to the right
char githash[42];
// address at which the beginning of the file (including header) is attempted to be mmapped
size_t base_addr;
// payload, a serialize Lean object graph; `size_t` has same alignment requirements as Lean objects
size_t data[];
};
// make sure we don't have any padding bytes, which also ensures `data` is properly aligned
static_assert(sizeof(olean_header) == 5 + 1 + 42 + sizeof(size_t), "olean_header must be packed");

extern "C" LEAN_EXPORT object * lean_save_module_data(b_obj_arg fname, b_obj_arg mod, b_obj_arg mdata, object *) {
std::string olean_fn(string_cstr(fname));
Expand Down Expand Up @@ -73,10 +90,14 @@ extern "C" LEAN_EXPORT object * lean_save_module_data(b_obj_arg fname, b_obj_arg
// `MapViewOfFileEx` addresses must be aligned to the "memory allocation granularity", which is 64KB.
base_addr = base_addr & ~((1LL<<16) - 1);

object_compactor compactor(reinterpret_cast<void *>(base_addr + strlen(g_olean_header) + sizeof(base_addr)));
object_compactor compactor(reinterpret_cast<void *>(base_addr + offsetof(olean_header, data)));
compactor(mdata);
out.write(g_olean_header, strlen(g_olean_header));
out.write(reinterpret_cast<char *>(&base_addr), sizeof(base_addr));

// see/sync with file format description above
olean_header header = {};
header.base_addr = base_addr;
strncpy(header.githash, LEAN_GITHASH, sizeof(header.githash));
out.write(reinterpret_cast<char *>(&header), sizeof(header));
out.write(static_cast<char const *>(compactor.data()), compactor.size());
out.close();
while (std::rename(olean_tmp_fn.c_str(), olean_fn.c_str()) != 0) {
Expand Down Expand Up @@ -116,19 +137,21 @@ extern "C" LEAN_EXPORT object * lean_read_module_data(object * fname, object *)
in.seekg(0, in.end);
size_t size = in.tellg();
in.seekg(0);
size_t header_size = strlen(g_olean_header);
if (size < header_size) {

olean_header default_header = {};
olean_header header;
if (!in.read(reinterpret_cast<char *>(&header), sizeof(header))) {
return io_result_mk_error((sstream() << "failed to read file '" << olean_fn << "', invalid header").str());
}
char * header = new char[header_size];
in.read(header, header_size);
if (strncmp(header, g_olean_header, header_size) != 0) {
if (memcmp(header.marker, default_header.marker, sizeof(header.marker)) != 0
|| header.version != default_header.version
#ifdef LEAN_CHECK_OLEAN_VERSION
|| strncmp(header.githash, LEAN_GITHASH, sizeof(header.githash)) != 0
#endif
) {
return io_result_mk_error((sstream() << "failed to read file '" << olean_fn << "', invalid header").str());
}
delete[] header;
char * base_addr;
in.read(reinterpret_cast<char *>(&base_addr), sizeof(base_addr));
header_size += sizeof(base_addr);
char * base_addr = reinterpret_cast<char *>(header.base_addr);
char * buffer = nullptr;
bool is_mmap = false;
std::function<void()> free_data;
Expand Down Expand Up @@ -166,24 +189,25 @@ extern "C" LEAN_EXPORT object * lean_read_module_data(object * fname, object *)
};
#endif
if (buffer && buffer == base_addr) {
buffer += header_size;
buffer += sizeof(olean_header);
is_mmap = true;
} else {
#ifdef LEAN_MMAP
free_data();
#endif
buffer = static_cast<char *>(malloc(size - header_size));
buffer = static_cast<char *>(malloc(size - sizeof(olean_header)));
free_data = [=]() {
free(buffer);
};
in.read(buffer, size - header_size);
in.read(buffer, size - sizeof(olean_header));
if (!in) {
return io_result_mk_error((sstream() << "failed to read file '" << olean_fn << "'").str());
}
}
in.close();

compacted_region * region = new compacted_region(size - header_size, buffer, base_addr + header_size, is_mmap, free_data);
compacted_region * region =
new compacted_region(size - sizeof(olean_header), buffer, base_addr + sizeof(olean_header), is_mmap, free_data);
#if defined(__has_feature)
#if __has_feature(address_sanitizer)
// do not report as leak
Expand Down
6 changes: 1 addition & 5 deletions src/library/util.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -873,11 +873,7 @@ void initialize_library_util() {
if (std::strlen(LEAN_SPECIAL_VERSION_DESC) > 0) {
out << "-" << LEAN_SPECIAL_VERSION_DESC;
}
if (std::strlen(LEAN_GITHASH) == 0) {
if (std::strcmp(LEAN_PACKAGE_VERSION, "NOT-FOUND") != 0) {
out << ", package " << LEAN_PACKAGE_VERSION;
}
} else {
if (std::strlen(LEAN_GITHASH) > 0) {
out << ", commit " << std::string(LEAN_GITHASH).substr(0, 12);
}
g_version_string = new std::string(out.str());
Expand Down
4 changes: 0 additions & 4 deletions src/version.h.in
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,3 @@

// Additional version description like "nightly-2018-03-11"
#define LEAN_SPECIAL_VERSION_DESC "@LEAN_SPECIAL_VERSION_DESC@"

// When git_sha1 is not available, lean reads bin/version file and
// assign its contents to LEAN_PACKAGE_VERSION
#define LEAN_PACKAGE_VERSION "@LEAN_PACKAGE_VERSION@"
16 changes: 4 additions & 12 deletions stage0/src/CMakeLists.txt

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

60 changes: 42 additions & 18 deletions stage0/src/library/module.cpp

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 79251f5

Please sign in to comment.