Skip to content

Commit

Permalink
Remove built cache of previous git commits. (#15344)
Browse files Browse the repository at this point in the history
### Ticket
[Link to Github
Issue](#14619)

### Problem description
Previously built files might still be reused when the tt_metal is
updated.

### What's changed
1. During build, in cmake, add compile defines for GIT_COMMIT_HASH. Use
the `VERSION_HASH` set by parent CMakeLists.txt
2. Build.cpp would check for the git hash define (GIT_COMMIT_HASH), if
hash is not found, proceed without changing any previously built folders
and root folder will be 'built/'
3. If git hash is found, the root folder will be
'built/GIT_COMMIT_HASH'.
4. If git hash is found, and TT_METAL_SKIP_DELETING_BUILT_CACHE is NOT
set, delete all directories in 'built' except for
'built/GIT_COMMIT_HASH' folder
5. If git hash is found, and TT_METAL_SKIP_DELETING_BUILT_CACHE is set,
no directory is deleted. Root for current build will be
'built/GIT_COMMIT_HASH'


### Checklist
- [x] Post commit CI passes -
https://github.com/tenstorrent/tt-metal/actions/runs/11966119330
- [ ] Blackhole Post commit (if applicable)
- [ ] Model regression CI testing passes (if applicable)
- [ ] Device performance regression CI testing passes (if applicable)
- [ ] New/Existing tests provide coverage for changes
  • Loading branch information
spoojaryTT authored Dec 13, 2024
1 parent 3594cff commit 3d78f0c
Show file tree
Hide file tree
Showing 8 changed files with 64 additions and 38 deletions.
2 changes: 1 addition & 1 deletion tests/tt_metal/tt_metal/test_compile_args.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ int main(int argc, char** argv) {
tt_metal::Device* device = tt_metal::CreateDevice(device_id);
// Remove old compiled kernels
static const std::string kernel_name = "test_compile_args";
auto binary_path_str = jit_build_get_kernel_compile_outpath(device->build_key()) + kernel_name;
auto binary_path_str = device->build_env().get_out_kernel_root_path() + kernel_name;
std::filesystem::remove_all(binary_path_str);

pass &= test_compile_args({0, 68, 0, 124}, device);
Expand Down
36 changes: 17 additions & 19 deletions tests/tt_metal/tt_metal/test_compile_program.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,22 +26,21 @@ struct KernelCacheStatus {
std::unordered_map<std::string, bool> kernel_name_to_cache_hit;
};

void ClearKernelCache(uint32_t build_key) {
std::filesystem::remove_all(jit_build_get_kernel_compile_outpath(build_key));
void ClearKernelCache (const string& kernel_root_path) {
std::filesystem::remove_all(kernel_root_path);
detail::HashLookup::inst().clear();
}

// This assumes binaries are written to specific location: kernel_compile_outpath / kernel_name / hash
std::unordered_map<std::string, std::string> get_last_program_binary_path(const Program& program, int build_key) {
std::unordered_map<std::string, std::string> get_last_program_binary_path(const Program& program, const string& kernel_root_path) {
std::unordered_map<std::string, std::string> kernel_name_to_last_compiled_dir;
auto root_dir = jit_build_get_kernel_compile_outpath(build_key);
for (size_t kernel_id = 0; kernel_id < program.num_kernels(); kernel_id++) {
auto kernel = detail::GetKernel(program, kernel_id);
if (not std::filesystem::exists(root_dir + kernel->name())) {
if (not std::filesystem::exists(kernel_root_path + kernel->name())) {
continue;
}

std::filesystem::path kernel_path{root_dir + kernel->name()};
std::filesystem::path kernel_path{kernel_root_path + kernel->name()};
std::filesystem::file_time_type ftime = std::filesystem::last_write_time(*kernel_path.begin());
std::string latest_hash;
for (auto const& dir_entry : std::filesystem::directory_iterator{kernel_path}) {
Expand All @@ -60,14 +59,13 @@ std::unordered_map<std::string, std::string> get_last_program_binary_path(const
// TODO: Replace this when we have debug/test hooks (GH: #964) to inspect inside CompileProgram
KernelCacheStatus CompileProgramTestWrapper(Device* device, Program& program, bool profile_kernel = false) {
// Check
auto root_dir = jit_build_get_kernel_compile_outpath(device->build_key());
std::unordered_map<std::string, std::string> pre_compile_kernel_to_hash_str =
get_last_program_binary_path(program, device->build_key());
get_last_program_binary_path(program, device->build_env().get_out_kernel_root_path());

detail::CompileProgram(device, program);

std::unordered_map<std::string, std::string> post_compile_kernel_to_hash_str =
get_last_program_binary_path(program, device->build_key());
get_last_program_binary_path(program, device->build_env().get_out_kernel_root_path());

KernelCacheStatus kernel_cache_status;
for (const auto& [kernel_name, hash_str] : post_compile_kernel_to_hash_str) {
Expand Down Expand Up @@ -152,12 +150,12 @@ Program create_program(Device* device, const ProgramAttributes& program_attribut
}

void assert_kernel_binary_path_exists(
const Program& program, int build_key, const KernelCacheStatus& kernel_cache_status) {
const Program& program, const string& kernel_root_path, const KernelCacheStatus &kernel_cache_status) {
auto kernel_name_to_hash = kernel_cache_status.kernel_name_to_hash_str;
for (size_t kernel_id = 0; kernel_id < program.num_kernels(); kernel_id++) {
auto kernel = detail::GetKernel(program, kernel_id);
auto hash = kernel_name_to_hash.at(kernel->name());
auto kernel_binary_path = jit_build_get_kernel_compile_outpath(build_key) + kernel->name() + "/" + hash;
auto kernel_binary_path = kernel_root_path + kernel->name() + "/" + hash;
TT_FATAL(std::filesystem::exists(kernel_binary_path), "Expected {} folder to exist!", kernel_binary_path);
}
}
Expand Down Expand Up @@ -188,7 +186,7 @@ void assert_kernel_hash_matches(
bool test_compile_program_in_loop(Device* device) {
bool pass = true;

ClearKernelCache(device->build_key());
ClearKernelCache(device->build_env().get_out_kernel_root_path());
ProgramAttributes default_attributes;
auto program = create_program(device, default_attributes);

Expand All @@ -197,7 +195,7 @@ bool test_compile_program_in_loop(Device* device) {
for (int compile_idx = 0; compile_idx < num_compiles; compile_idx++) {
auto kernel_cache_status = CompileProgramTestWrapper(device, program);
if (compile_idx == 0) {
assert_kernel_binary_path_exists(program, device->build_key(), kernel_cache_status);
assert_kernel_binary_path_exists(program, device->build_env().get_out_kernel_root_path(), kernel_cache_status);
assert_program_cache_hit_status(program, /*hit_expected=*/false, kernel_cache_status);
kernel_name_to_hash = kernel_cache_status.kernel_name_to_hash_str;
} else {
Expand All @@ -212,18 +210,18 @@ bool test_compile_program_in_loop(Device* device) {
bool test_compile_program_after_clean_kernel_binary_directory(Device* device) {
bool pass = true;

ClearKernelCache(device->build_key());
ClearKernelCache(device->build_env().get_out_kernel_root_path());

ProgramAttributes default_attributes;
auto program = create_program(device, default_attributes);

auto kernel_cache_status = CompileProgramTestWrapper(device, program);

assert_kernel_binary_path_exists(program, device->build_key(), kernel_cache_status);
assert_kernel_binary_path_exists(program, device->build_env().get_out_kernel_root_path(), kernel_cache_status);
assert_program_cache_hit_status(program, /*hit_expected=*/false, kernel_cache_status);
std::unordered_map<std::string, std::string> kernel_name_to_hash = kernel_cache_status.kernel_name_to_hash_str;

ClearKernelCache(device->build_key());
ClearKernelCache(device->build_env().get_out_kernel_root_path());
auto second_program = create_program(device, default_attributes);
auto second_kernel_cache_status = CompileProgramTestWrapper(device, second_program);
assert_program_cache_hit_status(second_program, /*hit_expected=*/false, second_kernel_cache_status);
Expand Down Expand Up @@ -275,7 +273,7 @@ std::unordered_map<std::string, std::string> compile_program_with_modified_kerne
const std::unordered_map<tt::RISCV, bool>& kernel_type_to_cache_hit_status) {
auto program = create_program(device, attributes);
auto kernel_cache_status = CompileProgramTestWrapper(device, program);
assert_kernel_binary_path_exists(program, device->build_key(), kernel_cache_status);
assert_kernel_binary_path_exists(program, device->build_env().get_out_kernel_root_path(), kernel_cache_status);
assert_cache_hit_status_for_kernel_type(program, kernel_type_to_cache_hit_status, kernel_cache_status);
assert_hash_comparison_for_kernel_type(
program, prev_kernel_name_to_hash, kernel_type_to_cache_hit_status, kernel_cache_status);
Expand All @@ -298,12 +296,12 @@ bool test_compile_program_with_modified_program(Device* device) {
const static std::unordered_map<tt::RISCV, bool> compute_miss_data_movement_miss = {
{tt::RISCV::COMPUTE, false}, {tt::RISCV::BRISC, false}, {tt::RISCV::NCRISC, false}};

ClearKernelCache(device->build_key());
ClearKernelCache(device->build_env().get_out_kernel_root_path());

ProgramAttributes attributes;
auto program = create_program(device, attributes);
auto kernel_cache_status = CompileProgramTestWrapper(device, program);
assert_kernel_binary_path_exists(program, device->build_key(), kernel_cache_status);
assert_kernel_binary_path_exists(program, device->build_env().get_out_kernel_root_path(), kernel_cache_status);
assert_program_cache_hit_status(program, /*hit_expected=*/false, kernel_cache_status);
std::unordered_map<std::string, std::string> kernel_name_to_hash = kernel_cache_status.kernel_name_to_hash_str;

Expand Down
19 changes: 8 additions & 11 deletions tests/tt_metal/tt_metal/test_compile_sets_kernel_binaries.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,12 +22,11 @@
using std::vector;
using namespace tt;

std::string get_latest_kernel_binary_path(uint32_t mask, const std::shared_ptr<Kernel>& kernel) {
auto root_dir = jit_build_get_kernel_compile_outpath(mask);
std::string get_latest_kernel_binary_path(const string& kernel_root_path, const std::shared_ptr<Kernel>& kernel) {
TT_FATAL(kernel != nullptr, "Error");
TT_FATAL(std::filesystem::exists(root_dir + kernel->name()), "Error");
TT_FATAL(std::filesystem::exists(kernel_root_path + kernel->name()), "Error");

std::filesystem::path kernel_path{root_dir + kernel->name()};
std::filesystem::path kernel_path{kernel_root_path + kernel->name()};
std::filesystem::file_time_type ftime = std::filesystem::last_write_time(*kernel_path.begin());
std::string latest_hash;
for (auto const& dir_entry : std::filesystem::directory_iterator{kernel_path}) {
Expand Down Expand Up @@ -116,15 +115,13 @@ int main(int argc, char** argv) {
tt::DevicePool::initialize(ids, 1, DEFAULT_L1_SMALL_SIZE, DEFAULT_TRACE_REGION_SIZE, DispatchCoreConfig{});
auto devices = tt::DevicePool::instance().get_all_active_devices();
std::vector<Program> programs;
std::set<uint32_t> build_keys;
// kernel->binaries() returns 32B aligned binaries
std::map<uint32_t, std::vector<ll_api::memory const*>> compute_binaries;
std::map<uint32_t, std::vector<ll_api::memory const*>> brisc_binaries;
std::map<uint32_t, std::vector<ll_api::memory const*>> ncrisc_binaries;

for (int i = 0; i < num_devices; i++) {
auto device = devices[i];
build_keys.insert(device->build_key());

////////////////////////////////////////////////////////////////////////////
// Application Setup
Expand Down Expand Up @@ -166,9 +163,9 @@ int main(int argc, char** argv) {
int num_compiles = 3;
for (int i = 0; i < 3; i++) {
std::vector<string> kernel_names = {"reader_unary_push_4", "writer_unary", "eltwise_copy_3m"};
for (auto build_key : build_keys) {
for (int i = 0; i < num_devices; i++) {
for (const auto& kernel_name : kernel_names) {
std::filesystem::remove_all(jit_build_get_kernel_compile_outpath(build_key) + kernel_name);
std::filesystem::remove_all(devices[i]->build_env().get_out_kernel_root_path() + kernel_name);
}
}
tt_metal::detail::ClearKernelCache();
Expand Down Expand Up @@ -208,7 +205,7 @@ int main(int argc, char** argv) {
programmable_core_index,
dm_class_idx,
0,
get_latest_kernel_binary_path(mask, riscv0_kernel));
get_latest_kernel_binary_path(device->build_env().get_out_kernel_root_path(), riscv0_kernel));
ll_api::memory const& brisc_binary =
llrt::get_risc_binary(brisc_hex_path, ll_api::memory::Loading::CONTIGUOUS_XIP);
TT_FATAL(
Expand All @@ -218,7 +215,7 @@ int main(int argc, char** argv) {
programmable_core_index,
dm_class_idx,
1,
get_latest_kernel_binary_path(mask, riscv1_kernel));
get_latest_kernel_binary_path(device->build_env().get_out_kernel_root_path(), riscv1_kernel));
auto load_type =
(device->arch() == tt::ARCH::GRAYSKULL || device->arch() == tt::ARCH::WORMHOLE_B0)
? ll_api::memory::Loading::CONTIGUOUS
Expand All @@ -233,7 +230,7 @@ int main(int argc, char** argv) {
programmable_core_index,
compute_class_idx,
trisc_id,
get_latest_kernel_binary_path(mask, compute_kernel));
get_latest_kernel_binary_path(device->build_env().get_out_kernel_root_path(), compute_kernel));
ll_api::memory const& trisc_binary =
llrt::get_risc_binary(trisc_hex_path, ll_api::memory::Loading::CONTIGUOUS_XIP);
TT_FATAL(
Expand Down
4 changes: 4 additions & 0 deletions tt_metal/jit_build/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,7 @@ set(JIT_BUILD_SRCS

add_library(jit_build OBJECT ${JIT_BUILD_SRCS})
target_link_libraries(jit_build PUBLIC common)

if(DEFINED VERSION_HASH)
target_compile_definitions(jit_build PRIVATE "-DGIT_COMMIT_HASH=\"${VERSION_HASH}\"")
endif()
26 changes: 26 additions & 0 deletions tt_metal/jit_build/build.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,13 @@ static std::string get_string_aliased_arch_lowercase(tt::ARCH arch) {

JitBuildEnv::JitBuildEnv() {}

void check_built_dir(const std::filesystem::path& dir_path, const std::filesystem::path& git_hash_path)
{
if (dir_path.compare(git_hash_path) != 0) {
std::filesystem::remove_all(dir_path);
}
}

void JitBuildEnv::init(
uint32_t build_key, tt::ARCH arch, const std::map<std::string, std::string>& device_kernel_defines) {
// Paths
Expand All @@ -50,6 +57,25 @@ void JitBuildEnv::init(
this->arch_name_ = get_string_lowercase(arch);
this->aliased_arch_name_ = get_string_aliased_arch_lowercase(arch);

#ifndef GIT_COMMIT_HASH
log_info(tt::LogBuildKernels, "GIT_COMMIT_HASH not found");
#else
std::string git_hash(GIT_COMMIT_HASH);

std::filesystem::path git_hash_path(this->out_root_ + git_hash);
std::filesystem::path root_path(this->out_root_);
if ((not llrt::RunTimeOptions::get_instance().get_skip_deleting_built_cache()) &&
std::filesystem::exists(root_path)) {
std::ranges::for_each(
std::filesystem::directory_iterator{root_path},
[&git_hash_path](const auto& dir_entry) { check_built_dir(dir_entry.path(), git_hash_path); });
} else {
log_info(tt::LogBuildKernels, "Skipping deleting built cache");
}

this->out_root_ = this->out_root_ + git_hash + "/";
#endif

this->out_firmware_root_ = this->out_root_ + to_string(build_key) + "/firmware/";
this->out_kernel_root_ = this->out_root_ + to_string(build_key) + "/kernels/";

Expand Down
7 changes: 0 additions & 7 deletions tt_metal/jit_build/build.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -178,13 +178,6 @@ void jit_build(const JitBuildState& build, const JitBuildSettings* settings);
void jit_build_set(const JitBuildStateSet& builds, const JitBuildSettings* settings);
void jit_build_subset(const JitBuildStateSubset& builds, const JitBuildSettings* settings);

inline const string jit_build_get_kernel_compile_outpath(int build_key) {
// TODO(pgk), get rid of this
// The test infra needs the output dir. Could put this in the device, but we plan
// to remove the device dependence in the future, so putting this here for now
return llrt::RunTimeOptions::get_instance().get_root_dir() + "/built/" + std::to_string(build_key) + "/kernels/";
}

inline void launch_build_step(const std::function<void()> build_func, std::vector<std::shared_future<void>>& events) {
events.emplace_back(detail::async(build_func));
}
Expand Down
4 changes: 4 additions & 0 deletions tt_metal/llrt/rtoptions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,10 @@ RunTimeOptions::RunTimeOptions() {
if (getenv("TT_METAL_SKIP_LOADING_FW")) {
this->skip_loading_fw = true;
}

if (getenv("TT_METAL_SKIP_DELETING_BUILT_CACHE")) {
this->skip_deleting_built_cache = true;
}
}

const std::string& RunTimeOptions::get_root_dir() {
Expand Down
4 changes: 4 additions & 0 deletions tt_metal/llrt/rtoptions.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,8 @@ class RunTimeOptions {

tt_metal::DispatchCoreConfig dispatch_core_config = tt_metal::DispatchCoreConfig{};

bool skip_deleting_built_cache = false;

RunTimeOptions();

public:
Expand Down Expand Up @@ -297,6 +299,8 @@ class RunTimeOptions {

inline tt_metal::DispatchCoreConfig get_dispatch_core_config() { return dispatch_core_config; }

inline bool get_skip_deleting_built_cache() { return skip_deleting_built_cache; }

private:
// Helper functions to parse feature-specific environment vaiables.
void ParseFeatureEnv(RunTimeDebugFeatures feature);
Expand Down

0 comments on commit 3d78f0c

Please sign in to comment.