Skip to content

Commit

Permalink
reduce_max: rename and clean up (#1253) (#1264)
Browse files Browse the repository at this point in the history
Co-authored-by: Kristof Denolf <[email protected]>
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: Andra Bisca <[email protected]>
Co-authored-by: AndraBisca <[email protected]>
  • Loading branch information
5 people authored Apr 16, 2024
1 parent 3e37056 commit d088353
Show file tree
Hide file tree
Showing 21 changed files with 41 additions and 767 deletions.
2 changes: 1 addition & 1 deletion aie_kernels/aie2/reduce_add.cc
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ static void _reduce_add_scalar(int32_t *restrict in, int32_t *restrict out,
static void _reduce_add_vector(int32_t *restrict in, int32_t *restrict out,
const int32_t input_size) {
v16int32 zero = broadcast_to_v16int32((int32_t)0);
int32_t vector_size = 16;
const int32_t vector_size = 16;
v16int32 after_vector;
v16int32 running_total = zero;
for (int32_t i = 0; i < input_size; i += vector_size)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,11 @@

#include <aie_api/aie.hpp>

void vector(int32_t *restrict in, int32_t *restrict out) {
void _reduce_max_vector(int32_t *restrict in, int32_t *restrict out,
const int32_t input_size) {

v16int32 tiny = broadcast_to_v16int32((int32_t)-2147483648);
int32_t input_size = 1024;
int32_t vector_size = 16;
const int32_t vector_size = 16;
v16int32 after_vector;
v16int32 running_max = tiny;
for (int32_t i = 0; i < input_size; i += vector_size)
Expand All @@ -32,8 +32,8 @@ void vector(int32_t *restrict in, int32_t *restrict out) {
return;
}

void scalar(int32_t *restrict in, int32_t *restrict out) {
size_t input_size = 1024;
void _reduce_max_scalar(int32_t *restrict in, int32_t *restrict out,
const int32_t input_size) {
int32_t running_max = (int32_t)-2147483648;
for (int32_t i = 0; i < input_size; i++) {
if (in[i] > running_max)
Expand All @@ -46,8 +46,12 @@ void scalar(int32_t *restrict in, int32_t *restrict out) {

extern "C" {

void vector_max(int32_t *a_in, int32_t *c_out) { vector(a_in, c_out); }
void reduce_max_vector(int32_t *a_in, int32_t *c_out, int32_t input_size) {
_reduce_max_vector(a_in, c_out, input_size);
}

void scalar_max(int32_t *a_in, int32_t *c_out) { scalar(a_in, c_out); }
void reduce_max_scalar(int32_t *a_in, int32_t *c_out, int32_t input_size) {
_reduce_max_scalar(a_in, c_out, input_size);
}

} // extern "C"
Original file line number Diff line number Diff line change
Expand Up @@ -40,15 +40,17 @@ project(${ProjectName})
find_package(Boost REQUIRED)

add_executable(${currentTarget}
${CMAKE_CURRENT_SOURCE_DIR}/../../../runtime_lib/test_lib/test_utils.cpp
test.cpp
)

target_compile_definitions(${currentTarget} PUBLIC DISABLE_ABI_CHECK=1)

target_include_directories (${currentTarget} PUBLIC
../../utils
${CMAKE_CURRENT_SOURCE_DIR}/../../../runtime_lib/test_lib
${XRT_INC_DIR}
${Boost_INCLUDE_DIRS}
../../../programming_examples/utils
)

target_link_directories(${currentTarget} PUBLIC
Expand All @@ -66,4 +68,4 @@ else()
target_link_libraries(${currentTarget} PUBLIC
xrt_coreutil
)
endif()
endif()
Original file line number Diff line number Diff line change
Expand Up @@ -10,23 +10,23 @@ include ../../makefile-common

ACDC_AIE = $(dir $(shell which aie-opt))/..

targetname = vector_max
targetname = reduce_max
devicename = ipu
col = 0
CHESS_FLAGS=${CHESSCCWRAP2_FLAGS}
KERNEL_LIB=${ACDC_AIE}/../../aie_kernels/generic/
KERNEL_LIB=../../../aie_kernels/aie2

all: build/final.xclbin build/insts.txt

build/vector_max.o: ${KERNEL_LIB}/vector_max.cc
build/reduce_max.cc.o: ${KERNEL_LIB}/reduce_max.cc
mkdir -p ${@D}
cd ${@D} && xchesscc_wrapper ${CHESS_FLAGS} -DBIT_WIDTH=8 -c $< -o ${@F}
cd ${@D} && xchesscc_wrapper ${CHESS_FLAGS} -c $(<:%=../%) -o ${@F}

build/aie.mlir: aie2.py
mkdir -p ${@D}
python3 $< ${devicename} ${col} > $@

build/final.xclbin: build/aie.mlir build/vector_max.o
build/final.xclbin: build/aie.mlir build/reduce_max.cc.o
mkdir -p ${@D}
cd ${@D} && aiecc.py --aie-generate-cdo --no-compile-host --xclbin-name=${@F} \
--aie-generate-ipu --ipu-insts-name=insts.txt $(<:%=../%)
Expand Down Expand Up @@ -73,4 +73,4 @@ vck5000: build/aie.mlir build/scale.o
-Wl,--whole-archive -Wl,--no-whole-archive -lstdc++ -ldl -lelf -o test.elf

clean: clean_trace
rm -rf build _build inst aie.mlir.prj core_* test.elf ${targetname}.exe vector_max.o* vector_max.cc
rm -rf build _build inst aie.mlir.prj core_* test.elf ${targetname}.exe
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
import sys


def my_vector_max():
def my_reduce_max():
N = 1024

buffer_depth = 2
Expand All @@ -39,10 +39,9 @@ def device_body():
memRef_O_ty = T.memref(1, T.i32())

# AIE Core Function declarations

vector_max = external_func("vector_max", inputs=[memRef_I_ty, memRef_O_ty])

scalar_max = external_func("scalar_max", inputs=[memRef_I_ty, memRef_O_ty])
reduce_max_vector = external_func(
"reduce_max_vector", inputs=[memRef_I_ty, memRef_O_ty, T.i32()]
)

# Tile declarations
ShimTile = tile(int(sys.argv[2]), 0)
Expand All @@ -57,16 +56,12 @@ def device_body():
# Set up compute tiles

# Compute tile 2
@core(ComputeTile2, "vector_max.o")
@core(ComputeTile2, "reduce_max.cc.o")
def core_body():
for _ in for_(0xFFFFFFFF):
elem_out = of_out.acquire(ObjectFifoPort.Produce, 1)
elem_in = of_in.acquire(ObjectFifoPort.Consume, 1)

call(
vector_max,
[elem_in, elem_out],
)
call(reduce_max_vector, [elem_in, elem_out, N])
of_in.release(ObjectFifoPort.Consume, 1)
of_out.release(ObjectFifoPort.Produce, 1)
yield_([])
Expand All @@ -83,4 +78,4 @@ def sequence(A, C):
print(ctx.module)


my_vector_max()
my_reduce_max()
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
//
// REQUIRES: ryzen_ai, chess
//
// RUN: %python %S/aie2.py ipu 0 > ./aie.mlir
// RUN: xchesscc_wrapper aie2 -I %aietools/include -c %S/../../../aie_kernels/generic/vector_max.cc -o vector_max.o
// RUN: xchesscc_wrapper aie2 -I %aietools/include -c %S/../../../aie_kernels/aie2/reduce_max.cc -o reduce_max.cc.o
// RUN: %python %S/aie2.py ipu 0 | aie-opt -cse -canonicalize -o ./aie.mlir
// RUN: %python aiecc.py --aie-generate-cdo --aie-generate-ipu --no-compile-host --xclbin-name=aie.xclbin --ipu-insts-name=insts.txt ./aie.mlir
// RUN: clang %S/test.cpp -o test.exe -std=c++11 -Wall -I%S/../../../runtime_lib/test_lib %S/../../../runtime_lib/test_lib/test_utils.cpp %xrt_flags -lrt -lstdc++ -lboost_program_options -lboost_filesystem
// RUN: %run_on_ipu ./test.exe -x aie.xclbin -k MLIR_AIE -i insts.txt | FileCheck %s
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,11 +52,8 @@ int main(int argc, const char *argv[]) {
int n_warmup_iterations = vm["warmup"].as<int>();
int trace_size = vm["trace_sz"].as<int>();

// ------------------------------------------------------
// Configure this to match your design's buffer size
// ------------------------------------------------------
int INOUT0_VOLUME = 1024; // Input only, 64x uint32_t in this example
int INOUT1_VOLUME = 1; // Not used in this example
int INOUT0_VOLUME = 1024;
int INOUT1_VOLUME = 1;

size_t INOUT0_SIZE = INOUT0_VOLUME * sizeof(INOUT0_DATATYPE);
size_t INOUT1_SIZE = INOUT1_VOLUME * sizeof(INOUT1_DATATYPE);
Expand All @@ -76,46 +73,12 @@ int main(int argc, const char *argv[]) {
// Get device, load the xclbin & kernel and register them
// ------------------------------------------------------
// Get a device handle
unsigned int device_index = 0;
auto device = xrt::device(device_index);

// Load the xclbin
if (verbosity >= 1)
std::cout << "Loading xclbin: " << vm["xclbin"].as<std::string>() << "\n";
auto xclbin = xrt::xclbin(vm["xclbin"].as<std::string>());

// Load the kernel
if (verbosity >= 1)
std::cout << "Kernel opcode: " << vm["kernel"].as<std::string>() << "\n";
std::string Node = vm["kernel"].as<std::string>();

// Get the kernel from the xclbin
auto xkernels = xclbin.get_kernels();
auto xkernel = *std::find_if(xkernels.begin(), xkernels.end(),
[Node, verbosity](xrt::xclbin::kernel &k) {
auto name = k.get_name();
if (verbosity >= 1) {
std::cout << "Name: " << name << std::endl;
}
return name.rfind(Node, 0) == 0;
});
auto kernelName = xkernel.get_name();

// Register xclbin
if (verbosity >= 1)
std::cout << "Registering xclbin: " << vm["xclbin"].as<std::string>()
<< "\n";
device.register_xclbin(xclbin);
xrt::device device;
xrt::kernel kernel;

// Get a hardware context
if (verbosity >= 1)
std::cout << "Getting hardware context.\n";
xrt::hw_context context(device, xclbin.get_uuid());

// Get a kernel handle
if (verbosity >= 1)
std::cout << "Getting handle to kernel:" << kernelName << "\n";
auto kernel = xrt::kernel(context, kernelName);
test_utils::init_xrt_load_kernel(device, kernel, verbosity,
vm["xclbin"].as<std::string>(),
vm["kernel"].as<std::string>());

// ------------------------------------------------------
// Initialize input/ output buffer sizes and sync them
Expand Down
1 change: 0 additions & 1 deletion programming_examples/basic/vector_add_reduce/aie2.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,6 @@ def device_body():
memRef_O_ty = T.memref(1, T.i32())

# AIE Core Function declarations

reduce_add_vector = external_func(
"reduce_add_vector", inputs=[memRef_I_ty, memRef_O_ty, T.i32()]
)
Expand Down
69 changes: 0 additions & 69 deletions programming_examples/basic/vector_max/CMakeLists.txt

This file was deleted.

76 changes: 0 additions & 76 deletions programming_examples/basic/vector_max/Makefile

This file was deleted.

Loading

0 comments on commit d088353

Please sign in to comment.