Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for IR2Vec #615

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 47 additions & 1 deletion WORKSPACE
Original file line number Diff line number Diff line change
Expand Up @@ -321,7 +321,6 @@ cc_library(
name = "fmt",
srcs = glob(["src/*.cc"]),
hdrs = glob(["include/fmt/*.h"]),
copts = ["-Iexternal/fmt/include"],
strip_include_prefix = "include",
visibility = ["//visibility:public"],
)
Expand Down Expand Up @@ -358,3 +357,50 @@ http_archive(
load("@programl//tools:bzl/deps.bzl", "programl_deps")

programl_deps()

# === IR2Vec ===
# https://github.com/IITH-Compilers/IR2Vec

http_archive(
name = "ir2vec",
build_file_content = """
genrule(
name = "version",
outs = ["version.h"],
cmd = "echo '#define IR2VEC_VERSION \\"1\\"' > $@",
)

cc_library(
name = "ir2vec",
srcs = glob(["src/*.cpp"]) + [":version.h"],
hdrs = glob(["src/include/*.h"]),
copts = ["-Iexternal/ir2vec/src/include"],
strip_include_prefix = "src/include",
visibility = ["//visibility:public"],
deps = [
"@eigen//:eigen",
"@llvm//10.0.0",
],
)
""",
sha256 = "92cbe1d023593c2d45588caf2b1530795f376045e8bc3d2868ba349fb8d61ea5",
strip_prefix = "IR2Vec-1.1.0",
urls = ["https://github.com/IITH-Compilers/IR2Vec/archive/refs/tags/v1.1.0.tar.gz"],
)

# === Eigen ===
# https://eigen.tuxfamily.org/index.php?title=Main_Page

http_archive(
name = "eigen",
build_file_content = """
cc_library(
name = "eigen",
hdrs = glob(["Eigen/**/*"]),
visibility = ["//visibility:public"],
)
""",
sha256 = "d56fbad95abf993f8af608484729e3d87ef611dd85b3380a8bad1d5cbc373a57",
strip_prefix = "eigen-3.3.7",
urls = ["https://gitlab.com/libeigen/eigen/-/archive/3.3.7/eigen-3.3.7.tar.gz"],
)
3 changes: 3 additions & 0 deletions compiler_gym/envs/llvm/service/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ filegroup(
name = "service",
srcs = [
":compiler_gym-llvm-service",
# Runtime data dependencies:
"//compiler_gym/third_party/ir2vec:embeddings",
] + select({
"@llvm//:darwin": [],
"//conditions:default": [
Expand Down Expand Up @@ -245,6 +247,7 @@ cc_library(
"//compiler_gym/util:GrpcStatusMacros",
"@boost//:filesystem",
"@glog",
"@ir2vec",
"@llvm//10.0.0",
"@magic_enum",
"@nlohmann_json//:json",
Expand Down
4 changes: 3 additions & 1 deletion compiler_gym/envs/llvm/service/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,9 @@ if(DARWIN)
endif()
cg_filegroup(
NAME "service"
DEPENDS ${_DEPS}
DEPENDS
${_DEPS}
compiler_gym::third_party::ir2vec::embeddings
)

cg_genrule(
Expand Down
60 changes: 60 additions & 0 deletions compiler_gym/envs/llvm/service/Observation.cc
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,10 @@
#include "compiler_gym/third_party/autophase/InstCount.h"
#include "compiler_gym/third_party/llvm/InstCount.h"
#include "compiler_gym/util/GrpcStatusMacros.h"
#include "compiler_gym/util/RunfilesPath.h"
#include "llvm/Bitcode/BitcodeWriter.h"
// #include "llvm/IR/Metadata.h"
#include "IR2Vec.h"
#include "llvm/IR/Module.h"
#include "llvm/Support/raw_ostream.h"
#include "nlohmann/json.hpp"
Expand Down Expand Up @@ -83,6 +85,64 @@ Status setObservation(LlvmObservationSpace space, const fs::path& workingDirecto
*reply.mutable_int64_list()->mutable_value() = {features.begin(), features.end()};
break;
}
case LlvmObservationSpace::IR2VEC_FLOW_AWARE: {
const auto ir2vecEmbeddingsPath = util::getRunfilesPath(
"compiler_gym/third_party/ir2vec/seedEmbeddingVocab-300-llvm10.txt");

IR2Vec::Embeddings embeddings(benchmark.module(), IR2Vec::IR2VecMode::FlowAware,
ir2vecEmbeddingsPath.string());
const auto features = embeddings.getProgramVector();
*reply.mutable_double_list()->mutable_value() = {features.begin(), features.end()};
break;
}
case LlvmObservationSpace::IR2VEC_SYMBOLIC: {
const auto ir2vecEmbeddingsPath = util::getRunfilesPath(
"compiler_gym/third_party/ir2vec/seedEmbeddingVocab-300-llvm10.txt");

IR2Vec::Embeddings embeddings(benchmark.module(), IR2Vec::IR2VecMode::Symbolic,
ir2vecEmbeddingsPath.string());
const auto features = embeddings.getProgramVector();
*reply.mutable_double_list()->mutable_value() = {features.begin(), features.end()};
break;
}
case LlvmObservationSpace::IR2VEC_FUNCTION_LEVEL_FLOW_AWARE: {
const auto ir2vecEmbeddingsPath = util::getRunfilesPath(
"compiler_gym/third_party/ir2vec/seedEmbeddingVocab-300-llvm10.txt");
IR2Vec::Embeddings embeddings(benchmark.module(), IR2Vec::IR2VecMode::FlowAware,
ir2vecEmbeddingsPath.string());
const auto FuncMap = embeddings.getFunctionVecMap();
json Embeddings = json::array({});

for (auto func : FuncMap) {
std::vector<double> FuncEmb = {func.second.begin(), func.second.end()};
json FuncEmbJson = FuncEmb;
json FuncJson;
std::string FuncName = func.first->getName();
FuncJson[FuncName] = FuncEmbJson;
Embeddings.push_back(FuncJson);
}
*reply.mutable_string_value() = Embeddings.dump();
break;
}
case LlvmObservationSpace::IR2VEC_FUNCTION_LEVEL_SYMBOLIC: {
const auto ir2vecEmbeddingsPath = util::getRunfilesPath(
"compiler_gym/third_party/ir2vec/seedEmbeddingVocab-300-llvm10.txt");
IR2Vec::Embeddings embeddings(benchmark.module(), IR2Vec::IR2VecMode::Symbolic,
ir2vecEmbeddingsPath.string());
const auto FuncMap = embeddings.getFunctionVecMap();
json Embeddings = json::array({});

for (auto func : FuncMap) {
std::vector<double> FuncEmb = {func.second.begin(), func.second.end()};
json FuncEmbJson = FuncEmb;
json FuncJson;
std::string FuncName = func.first->getName();
FuncJson[FuncName] = FuncEmbJson;
Embeddings.push_back(FuncJson);
}
*reply.mutable_string_value() = Embeddings.dump();
break;
}
case LlvmObservationSpace::PROGRAML:
case LlvmObservationSpace::PROGRAML_JSON: {
// Build the ProGraML graph.
Expand Down
70 changes: 70 additions & 0 deletions compiler_gym/envs/llvm/service/ObservationSpaces.cc
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,10 @@ namespace compiler_gym::llvm_service {

// The number of features in the Autophase feature vector.
static constexpr size_t kAutophaseFeatureDim = 56;

// The number of features in the IR2Vec feature vector.
static constexpr size_t kIR2VecFeatureDim = 300;

// 4096 is the maximum path length for most filesystems.
static constexpr size_t kMaximumPathLength = 4096;

Expand Down Expand Up @@ -90,6 +94,72 @@ std::vector<ObservationSpace> getLlvmObservationSpaceList() {
defaultValue.begin(), defaultValue.end()};
break;
}
case LlvmObservationSpace::IR2VEC_FLOW_AWARE: {
ScalarRange featureSize;
std::vector<ScalarRange> featureSizes;
featureSizes.reserve(kIR2VecFeatureDim);
for (size_t i = 0; i < kIR2VecFeatureDim; ++i) {
featureSizes.push_back(featureSize);
}
*space.mutable_double_range_list()->mutable_range() = {featureSizes.begin(),
featureSizes.end()};
space.set_deterministic(true);
space.set_platform_dependent(false);
std::vector<double> defaultValue(kIR2VecFeatureDim, 0.0);
*space.mutable_default_value()->mutable_double_list()->mutable_value() = {
defaultValue.begin(), defaultValue.end()};
break;
}
case LlvmObservationSpace::IR2VEC_SYMBOLIC: {
ScalarRange featureSize;
std::vector<ScalarRange> featureSizes;
featureSizes.reserve(kIR2VecFeatureDim);
for (size_t i = 0; i < kIR2VecFeatureDim; ++i) {
featureSizes.push_back(featureSize);
}
*space.mutable_double_range_list()->mutable_range() = {featureSizes.begin(),
featureSizes.end()};
space.set_deterministic(true);
space.set_platform_dependent(false);
std::vector<double> defaultValue(kIR2VecFeatureDim, 0.0);
*space.mutable_default_value()->mutable_double_list()->mutable_value() = {
defaultValue.begin(), defaultValue.end()};
break;
}
case LlvmObservationSpace::IR2VEC_FUNCTION_LEVEL_FLOW_AWARE: {
space.set_opaque_data_format("json://");
space.mutable_string_size_range()->mutable_min()->set_value(0);
space.set_deterministic(true);
space.set_platform_dependent(false);
std::vector<double> defaultEmbs;
for (double i = 0; i < kIR2VecFeatureDim; i++) {
defaultEmbs.push_back(i);
}
json vectorJson = defaultEmbs;
json FunctionKey;
json embeddings;
FunctionKey["default"] = vectorJson;
embeddings["embeddings"] = FunctionKey;
*space.mutable_default_value()->mutable_string_value() = embeddings.dump();
break;
}
case LlvmObservationSpace::IR2VEC_FUNCTION_LEVEL_SYMBOLIC: {
space.set_opaque_data_format("json://");
space.mutable_string_size_range()->mutable_min()->set_value(0);
space.set_deterministic(true);
space.set_platform_dependent(false);
std::vector<double> defaultEmbs;
for (double i = 0; i < kIR2VecFeatureDim; i++) {
defaultEmbs.push_back(i);
}
json vectorJson = defaultEmbs;
json FunctionKey;
json embeddings;
FunctionKey["default"] = vectorJson;
embeddings["embeddings"] = FunctionKey;
*space.mutable_default_value()->mutable_string_value() = embeddings.dump();
break;
}
case LlvmObservationSpace::PROGRAML: {
// ProGraML serializes the graph to JSON.
space.set_opaque_data_format("json://networkx/MultiDiGraph");
Expand Down
58 changes: 55 additions & 3 deletions compiler_gym/envs/llvm/service/ObservationSpaces.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,9 @@ namespace compiler_gym::llvm_service {
* 1. Add a new entry to this LlvmObservationSpace enum.
* 2. Add a new switch case to getLlvmObservationSpaceList() to return the
* ObserverationSpace.
* 3. Add a new switch case to LlvmSession::getObservation() to compute
* the actual observation.
* 4. Run `bazel test //compiler_gym/...` and update the newly failing tests.
* 3. Add a new switch case to setObservation() to compute the actual
* observation.
* 4. Run `make test` and update the newly failing tests.
*/
enum class LlvmObservationSpace {
/**
Expand All @@ -46,6 +46,58 @@ enum class LlvmObservationSpace {
* deep reinforcement learning. FCCM.
*/
AUTOPHASE,
/**
* The IR2Vec Program Level Flow-Aware embeddings.
*
* From:
*
* S. VenkataKeerthy, Rohit Aggarwal, Shalini Jain, Maunendra Sankar Desarkar,
Ramakrishna Upadrasta, and Y. N. Srikant. (2020).
IR2VEC: LLVM IR Based Scalable Program Embeddings.
ACM Trans. Archit. Code Optim. 17, 4, Article 32 (December 2020), 27 pages.
DOI:https://doi.org/10.1145/3418463
*
*/
IR2VEC_FLOW_AWARE,
/**
* The IR2Vec Program Level Symbolic embeddings.
*
* From:
*
* S. VenkataKeerthy, Rohit Aggarwal, Shalini Jain, Maunendra Sankar Desarkar,
Ramakrishna Upadrasta, and Y. N. Srikant. (2020).
IR2VEC: LLVM IR Based Scalable Program Embeddings.
ACM Trans. Archit. Code Optim. 17, 4, Article 32 (December 2020), 27 pages.
DOI:https://doi.org/10.1145/3418463
*
*/
IR2VEC_SYMBOLIC,
/**
* The IR2Vec Function level Flow Aware embeddings.
*
* From:
*
* S. VenkataKeerthy, Rohit Aggarwal, Shalini Jain, Maunendra Sankar Desarkar,
Ramakrishna Upadrasta, and Y. N. Srikant. (2020).
IR2VEC: LLVM IR Based Scalable Program Embeddings.
ACM Trans. Archit. Code Optim. 17, 4, Article 32 (December 2020), 27 pages.
DOI:https://doi.org/10.1145/3418463
*
*/
IR2VEC_FUNCTION_LEVEL_FLOW_AWARE,
/**
* The IR2Vec Function level Symbolic embeddings.
*
* From:
*
* S. VenkataKeerthy, Rohit Aggarwal, Shalini Jain, Maunendra Sankar Desarkar,
Ramakrishna Upadrasta, and Y. N. Srikant. (2020).
IR2VEC: LLVM IR Based Scalable Program Embeddings.
ACM Trans. Archit. Code Optim. 17, 4, Article 32 (December 2020), 27 pages.
DOI:https://doi.org/10.1145/3418463
*
*/
IR2VEC_FUNCTION_LEVEL_SYMBOLIC,
/**
* Returns the graph representation of a program as a networkx Graph.
*
Expand Down
12 changes: 12 additions & 0 deletions compiler_gym/third_party/ir2vec/BUILD
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# Copyright (c) Facebook, Inc. and its affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
#
# IR2Vec. https://github.com/IITH-Compilers/IR2Vec

filegroup(
name = "embeddings",
srcs = ["seedEmbeddingVocab-300-llvm10.txt"],
visibility = ["//visibility:public"],
)
12 changes: 12 additions & 0 deletions compiler_gym/third_party/ir2vec/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# Copyright (c) Facebook, Inc. and its affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.

cg_add_all_subdirs()

cg_filegroup(
NAME "embeddings"
FILES
"${CMAKE_CURRENT_LIST_DIR}/seedEmbeddingVocab-300-llvm10.txt"
)
Loading