Merge branch 'main' into fifield-patch-version

Xilinx · Apr 4, 2024 · 8732b20 · 8732b20
2 parents d74cf07 + aea2709
commit 8732b20
Show file tree

Hide file tree

Showing 44 changed files with 320 additions and 2,012 deletions.
diff --git a/...signs/ipu-xrt/matrix_multiplication/mm.cc → aie_kernels/iron/mm.cc b/...signs/ipu-xrt/matrix_multiplication/mm.cc → aie_kernels/iron/mm.cc
diff --git a/...pu-xrt/matrix_vector_multiplication/mv.cc → aie_kernels/iron/mv.cc b/...pu-xrt/matrix_vector_multiplication/mv.cc → aie_kernels/iron/mv.cc
@@ -25,7 +25,7 @@
 #include "zero.cc"
 
 template <typename T_in, typename T_out, int M, int K>
-void matvecScalar(T_in *a, T_in *b, T_out *c) {
+void matvec_scalar(T_in *a, T_in *b, T_out *c) {
   event0();
   for (int row = 0; row < M; row++) {
     T_out runningSum = 0;
@@ -39,8 +39,8 @@ void matvecScalar(T_in *a, T_in *b, T_out *c) {
 
 template <typename T_in, typename T_out, typename T_acc, unsigned m, unsigned k,
           unsigned r, unsigned s>
-void matvecVectorized(T_in *__restrict a, T_in *__restrict b,
-                      T_out *__restrict c) {
+void matvec_vectorized(T_in *__restrict a, T_in *__restrict b,
+                       T_out *__restrict c) {
   static_assert(m % r == 0 && k % 2 == 0);
   static_assert(s == 8); // s is fixed to 8 because that is the number of
                          // column vectors (a_vec_0_0..a_vec_3_1) we create
@@ -141,27 +141,27 @@ extern "C" {
                              ctype_acc)                                        \
   void matvec_scalar_##mlir_type_in##_##mlir_type_out(                         \
       ctype_in *a_in, ctype_in *b_in, ctype_out *c_out) {                      \
-    matvecScalar<ctype_in, ctype_out, 32, 32>(a_in, b_in, c_out);              \
+    matvec_scalar<ctype_in, ctype_out, 32, 32>(a_in, b_in, c_out);             \
   }
 
 #define matvec_vectorized_c_func(ctype_in, mlir_type_in, ctype_out,            \
                                  mlir_type_out, ctype_acc)                     \
   void matvec_vectorized_##mlir_type_in##_##mlir_type_out(                     \
       ctype_in *a_in, ctype_in *b_in, ctype_out *c_out) {                      \
-    matvecVectorized<ctype_in, ctype_out, ctype_acc, 32, 32, 16, 8>(           \
+    matvec_vectorized<ctype_in, ctype_out, ctype_acc, 32, 32, 16, 8>(          \
         a_in, b_in, c_out);                                                    \
   }
 
 #define zero_vectorized_c_func(ctype_in, mlir_type_in, ctype_out,              \
                                mlir_type_out, ctype_acc)                       \
   void zero_vectorized_##mlir_type_out(ctype_out *c_out) {                     \
-    zeroVectorized<ctype_out, 32, 1, 32>(c_out);                               \
+    zero_vectorized<ctype_out, 32, 1, 32>(c_out);                              \
   }
 
 #define zero_scalar_c_func(ctype_in, mlir_type_in, ctype_out, mlir_type_out,   \
                            ctype_acc)                                          \
   void zero_scalar_##mlir_type_out(ctype_out *c_out) {                         \
-    zeroScalar<ctype_out, 32, 1>(c_out);                                       \
+    zero_scalar<ctype_out, 32, 1>(c_out);                                      \
   }
 
 combos(matvec_scalar_c_func) combos(matvec_vectorized_c_func)

diff --git a/...gns/ipu-xrt/matrix_multiplication/zero.cc → aie_kernels/iron/zero.cc b/...gns/ipu-xrt/matrix_multiplication/zero.cc → aie_kernels/iron/zero.cc
diff --git a/reference_designs/ipu-xrt/matrix_multiplication/CMakeLists.txt b/reference_designs/ipu-xrt/matrix_multiplication/CMakeLists.txt
@@ -30,6 +30,8 @@ set(TARGET_NAME test CACHE STRING "Target to be built")
 SET (ProjectName ${TARGET_NAME})
 SET (currentTarget ${TARGET_NAME})
 
+set(CMAKE_CXX_STANDARD 23)
+
 if ( WSL )
 	set(CMAKE_RUNTIME_OUTPUT_DIRECTORY_RELEASE ${CMAKE_BINARY_DIR})
 endif ()
@@ -40,7 +42,7 @@ project(${ProjectName})
 find_package(Boost REQUIRED)
 
 add_executable(${currentTarget}
-    test.cpp
+    ${subdir}/test.cpp
 )
 
 target_compile_definitions(${currentTarget} PUBLIC DISABLE_ABI_CHECK=1)

diff --git a/reference_designs/ipu-xrt/matrix_multiplication/Makefile b/reference_designs/ipu-xrt/matrix_multiplication/Makefile
diff --git a/...e_designs/ipu-xrt/matrix_multiplication.h → ...ns/ipu-xrt/matrix_multiplication/common.h b/...e_designs/ipu-xrt/matrix_multiplication.h → ...ns/ipu-xrt/matrix_multiplication/common.h
@@ -53,8 +53,11 @@ void add_default_options(po::options_description &desc) {
       "M,M", po::value<int>()->default_value(512), "Matrix size M")(
       "K,K", po::value<int>()->default_value(512), "Matrix size K")(
       "N,N", po::value<int>()->default_value(512),
-      "Matrix size N")("iters", po::value<int>()->default_value(10))(
-      "warmup", po::value<int>()->default_value(1));
+      "Matrix size N")("iters", po::value<int>()->default_value(1))(
+      "warmup", po::value<int>()->default_value(0))(
+      "trace_sz,t", po::value<int>()->default_value(0))(
+      "trace_file", po::value<std::string>()->default_value("trace.txt"),
+      "where to store trace output");
 }
 
 void parse_options(int argc, const char *argv[], po::options_description &desc,
@@ -256,7 +259,7 @@ template <typename Tin, typename Tout>
 int verify(int M, int N, int K, std::vector<Tin> A, std::vector<Tin> B,
            std::vector<Tout> C) {
   int errors = 0;
-  int max_printable_errors = 500;
+  int max_printable_errors = 10;
   const float absTol = 0.5;
   const float relTol = 0.5;
 
@@ -292,6 +295,18 @@ int verify(int M, int N, int K, std::vector<Tin> A, std::vector<Tin> B,
   return errors;
 }
 
+// --------------------------------------------------------------------------
+// Tracing
+// --------------------------------------------------------------------------
+void write_out_trace(char *traceOutPtr, size_t trace_size, std::string path) {
+  std::ofstream fout(path);
+  uint32_t *traceOut = (uint32_t *)traceOutPtr;
+  for (int i = 0; i < trace_size / sizeof(traceOut[0]); i++) {
+    fout << std::setfill('0') << std::setw(8) << std::hex << (int)traceOut[i];
+    fout << std::endl;
+  }
+}
+
 } // namespace matmul_common
 
 #endif
diff --git a/reference_designs/ipu-xrt/matrix_multiplication/makefile-common b/reference_designs/ipu-xrt/matrix_multiplication/makefile-common
@@ -0,0 +1,92 @@
+##===- Makefile -----------------------------------------------------------===##
+# 
+# This file licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+# 
+##===----------------------------------------------------------------------===##
+
+# This file is to be included from one of the subdirectories, e.g. 
+# /matrix_vector/, after defining at least the targetname and kernels variable.
+#
+# The build steps for these matrix and matrix-vector multiplication designs all
+# look the same. We need to build:
+#  - A common host test code, 
+#  - a kernel implemented in C using AIE intrinsics,
+#  - the actual design implemented in MLIR.
+# There are also targets for signing the resulting xclbin, extracting traces 
+# and cleaning everything.
+#
+# Since the targets are all the same for all designs, they are defined here.
+# Subdirectories need only include this makefile-common after defining the 
+# following variables:
+# - subdir      -- subdirectory you are including this from
+# - targetname  -- resulting test host code will be named targetname.exe
+# - kernels     -- which kernels in kernels folder to compile and link in
+#                  (without file extension)
+# - M, K, N     -- (optional) dimensions of matrices, may be used by design;
+#                  N=1 for matrix-vector
+
+include ../../makefile-common
+
+# defaults; overwrite if needed
+M?=512   
+K?=512
+N?=512
+
+mlir_target?=build/aie_${M}x${K}x${N}.mlir
+xclbin_target?=build/final_${M}x${K}x${N}.xclbin
+insts_target?=build/insts_${M}x${K}x${N}.txt
+
+runargs?=-v 1 --warmup 10 --iters 10
+
+kernels_dir=../../../../aie_kernels/iron
+
+.PHONY: all
+all: ${xclbin_target} ${insts_target} ${targetname}.exe
+
+build/%.o: ${kernels_dir}/%.cc
+	mkdir -p ${@D}
+	cd ${@D} && xchesscc_wrapper ${CHESSCCWRAP2_FLAGS} -DBIT_WIDTH=8 -c $(<:%=../%) -o ${@F}
+
+${mlir_target}: aie2.py
+	mkdir -p ${@D}
+	python3 $< -M $M -K $K -N $N > $@
+
+${xclbin_target}: ${mlir_target} ${kernels:%=build/%.o}
+	mkdir -p ${@D}
+	cd ${@D} && aiecc.py --aie-generate-cdo --no-compile-host --xclbin-name=${@F} \
+				--aie-generate-ipu --ipu-insts-name=${insts_target:build/%=%} $(<:%=../%)
+
+${targetname}.exe: test.cpp ../test.cpp ../common.h
+	rm -rf _build
+	mkdir -p _build
+	cd _build && ${powershell} cmake -E env CXXFLAGS="-std=c++23 -ggdb" cmake ../.. -D CMAKE_C_COMPILER=gcc-13 -D CMAKE_CXX_COMPILER=g++-13 -DTARGET_NAME=${targetname} -Dsubdir=${subdir}
+	cd _build && ${powershell} cmake --build . --config Release
+ifeq "${powershell}" "powershell.exe"
+	cp _build/${targetname}.exe $@
+else
+	cp _build/${targetname} $@ 
+endif
+
+xclbin_sign=/opt/xilinx/xrt/amdxdna/setup_xclbin_firmware.sh 
+.PHONY: sign
+sign: ${xclbin_target}
+	${xclbin_sign} -dev Phoenix -xclbin $<
+
+.PHONY: run
+run: ${targetname}.exe ${xclbin_target} ${insts_target} #sign
+	export XRT_HACK_UNSECURE_LOADING_XCLBIN=1 && \
+	${powershell} ./$< -x ${xclbin_target} -i ${insts_target} -k MLIR_AIE -M $M -K $K -N $N ${runargs}
+
+.PHONY: clean
+clean:
+	rm -rf build _build ${targetname}.exe
+
+.PHONY: parse_trace
+parse_trace:
+	../../../utils/parse_eventIR.py --filename trace.txt --mlir ./build/aie.mlir --colshift 1 > trace_eventIR.json
+
+.PHONY: clean_trace
+clean_trace:
+	rm -rf tmpTrace trace_eventIR.json
diff --git a/reference_designs/ipu-xrt/matrix_multiplication/matrix_vector/Makefile b/reference_designs/ipu-xrt/matrix_multiplication/matrix_vector/Makefile
@@ -0,0 +1,19 @@
+##===- Makefile -----------------------------------------------------------===##
+# 
+# This file licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+# 
+##===----------------------------------------------------------------------===##
+
+subdir=matrix_vector
+targetname=matrixVectorMultiplication
+kernels=mv
+
+# Currently does not accept reconfiguring size via these variables; must change
+# in source at aie2.py as well as here
+M=288
+K=288
+N=1
+
+include ../makefile-common
diff --git a/...rt/matrix_vector_multiplication/README.md → ...ix_multiplication/matrix_vector/README.md b/...rt/matrix_vector_multiplication/README.md → ...ix_multiplication/matrix_vector/README.md
diff --git a/...-xrt/matrix_vector_multiplication/aie2.py → ...trix_multiplication/matrix_vector/aie2.py b/...-xrt/matrix_vector_multiplication/aie2.py → ...trix_multiplication/matrix_vector/aie2.py
diff --git a/...-xrt/matrix_vector_multiplication/run.lit → ...trix_multiplication/matrix_vector/run.lit b/...-xrt/matrix_vector_multiplication/run.lit → ...trix_multiplication/matrix_vector/run.lit
@@ -3,10 +3,10 @@
 //
 // REQUIRES: ryzen_ai, chess
 //
-// RUN: xchesscc_wrapper aie2 -I %aietools/include -c %S/mv.cc -o ./mv.o
-// RUN: %python %S/aie2.py > ./aie.mlir
+// RUN: xchesscc_wrapper aie2 -I %aietools/include -c %S/../../../../aie_kernels/iron/mv.cc -o ./mv.o
+// RUN: %python %S/aie2.py -M 288 -K 288 -N 1 > ./aie.mlir
 // RUN: %python aiecc.py --aie-generate-cdo --aie-generate-ipu --no-compile-host --xclbin-name=aie.xclbin --ipu-insts-name=insts.txt ./aie.mlir
 // RUN: g++-13 %S/test.cpp -o test.exe -std=c++23 -Wall %xrt_flags -lrt -lstdc++ -lboost_program_options -lboost_filesystem
-// RUN: %run_on_ipu ./test.exe -x aie.xclbin -k MLIR_AIE -i insts.txt | FileCheck %s
+// RUN: %run_on_ipu ./test.exe -x aie.xclbin -k MLIR_AIE -i insts.txt -M 288 -K 288 -N 1 -v 1 | FileCheck %s
 // CHECK: PASS!
 
diff --git a/reference_designs/ipu-xrt/matrix_multiplication/matrix_vector/test.cpp b/reference_designs/ipu-xrt/matrix_multiplication/matrix_vector/test.cpp
@@ -0,0 +1,18 @@
+//===- test.cpp -------------------------------------------000---*- C++ -*-===//
+//
+// This file is licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+// Copyright (C) 2023, Advanced Micro Devices, Inc.
+//
+//===----------------------------------------------------------------------===//
+
+#include <stdfloat>
+
+#define DATATYPES_USING_DEFINED
+using A_DATATYPE = std::bfloat16_t;
+using B_DATATYPE = std::bfloat16_t;
+using C_DATATYPE = float;
+
+#include "../test.cpp"
diff --git a/...matrix_multiplication_array/plot_sweep.py → ...u-xrt/matrix_multiplication/plot_sweep.py b/...matrix_multiplication_array/plot_sweep.py → ...u-xrt/matrix_multiplication/plot_sweep.py
diff --git a/reference_designs/ipu-xrt/matrix_multiplication/single_column/Makefile b/reference_designs/ipu-xrt/matrix_multiplication/single_column/Makefile
@@ -0,0 +1,19 @@
+##===- Makefile -----------------------------------------------------------===##
+# 
+# This file licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+# 
+##===----------------------------------------------------------------------===##
+
+subdir=single_column
+targetname=matrixMultiplication
+kernels=mm
+
+# Currently does not accept reconfiguring size via these variables; must change
+# in source at aie2.py as well as here
+M=256
+K=128
+N=128
+
+include ../makefile-common
diff --git a/...rt/matrix_multiplication_column/README.md → ...ix_multiplication/single_column/README.md b/...rt/matrix_multiplication_column/README.md → ...ix_multiplication/single_column/README.md
diff --git a/...-xrt/matrix_multiplication_column/aie2.py → ...trix_multiplication/single_column/aie2.py b/...-xrt/matrix_multiplication_column/aie2.py → ...trix_multiplication/single_column/aie2.py
diff --git a/...u-xrt/matrix_multiplication_array/run.lit → ...trix_multiplication/single_column/run.lit b/...u-xrt/matrix_multiplication_array/run.lit → ...trix_multiplication/single_column/run.lit
@@ -3,10 +3,10 @@
 //
 // REQUIRES: ryzen_ai, chess
 //
-// RUN: xchesscc_wrapper aie2 -I %aietools/include -c %S/mm.cc -o ./mm.o
-// RUN: %python %S/aie2.py > ./aie.mlir
+// RUN: xchesscc_wrapper aie2 -I %aietools/include -c %S/../../../../aie_kernels/iron/mm.cc -o ./mm.o
+// RUN: %python %S/aie2.py -M 256 -K 128 -N 128 > ./aie.mlir
 // RUN: %python aiecc.py --xbridge --aie-generate-cdo --aie-generate-ipu --no-compile-host --xclbin-name=aie.xclbin --ipu-insts-name=insts.txt ./aie.mlir
 // RUN: g++-13 %S/test.cpp -o test.exe -std=c++23 -Wall %xrt_flags -lrt -lstdc++ -lboost_program_options -lboost_filesystem
-// RUN: %run_on_ipu ./test.exe -x aie.xclbin -k MLIR_AIE -i insts.txt -v 1 | FileCheck %s
+// RUN: %run_on_ipu ./test.exe -x aie.xclbin -k MLIR_AIE -i insts.txt -M 256 -K 128 -N 128 -v 1 | FileCheck %s
 // CHECK: PASS!
 
diff --git a/reference_designs/ipu-xrt/matrix_multiplication/single_column/test.cpp b/reference_designs/ipu-xrt/matrix_multiplication/single_column/test.cpp
@@ -0,0 +1,18 @@
+//===- test.cpp -------------------------------------------000---*- C++ -*-===//
+//
+// This file is licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+// Copyright (C) 2023, Advanced Micro Devices, Inc.
+//
+//===----------------------------------------------------------------------===//
+
+#include <stdfloat>
+
+#define DATATYPES_USING_DEFINED
+using A_DATATYPE = std::bfloat16_t;
+using B_DATATYPE = std::bfloat16_t;
+using C_DATATYPE = float;
+
+#include "../test.cpp"