From fd13b417ccd963e2a86bda0470dcfa446676ec5d Mon Sep 17 00:00:00 2001 From: Eddie Richter Date: Thu, 21 Mar 2024 10:11:19 -0600 Subject: [PATCH] Getting lit tests working on reference designs and aie1 unit tests --- lib/Targets/AIETargetHSA.cpp | 7 +- python/ILP_solution.py | 519 ++++++++-------- python/compiler/aiecc/main.py | 1 - reference_designs/CMakeLists.txt | 9 +- .../add_one_objFifo_no_mem_tile/aie2.py | 17 +- .../add_one_objFifo_no_mem_tile/run.lit | 2 +- .../run_vck5000.lit | 10 + reference_designs/IRON-examples/lit.local.cfg | 12 + .../IRON-examples/matrix_add_one/aie2.py | 45 +- .../IRON-examples/matrix_add_one/run.lit | 2 +- .../matrix_add_one/run_vck5000.lit | 10 + .../matrix_multiplication_scalar/aie2.py | 75 ++- .../matrix_multiplication_scalar/run.lit | 4 +- .../run_vck5000.lit | 12 + .../passthrough_hardware/aie2.py | 19 +- .../passthrough_hardware/run.lit | 2 +- .../passthrough_hardware/run_vck5000.lit | 10 + .../IRON-examples/vector_add/aie2.py | 27 +- .../IRON-examples/vector_add/run.lit | 3 +- .../IRON-examples/vector_add/run_vck5000.lit | 10 + .../IRON-examples/vector_max/aie2.py | 29 +- .../IRON-examples/vector_max/run.lit | 3 +- .../IRON-examples/vector_max/run_vck5000.lit | 10 + .../IRON-examples/vector_min/aie2.py | 29 +- .../IRON-examples/vector_min/run.lit | 3 +- .../IRON-examples/vector_min/run_vck5000.lit | 10 + .../IRON-examples/vector_mult/aie2.py | 25 +- .../IRON-examples/vector_mult/run.lit | 3 +- .../IRON-examples/vector_mult/run_vck5000.lit | 10 + .../IRON-examples/vector_scalar/aie2.py | 25 +- .../IRON-examples/vector_scalar/run.lit | 3 +- .../vector_scalar/run_vck5000.lit | 10 + .../vector_scalar_kernel/run.lit | 12 - .../vector_scalar_kernel/run_vck5000.lit | 11 + .../{test.cpp => test_vck5000.cpp} | 0 .../IRON-examples/vector_sum/aie2.py | 29 +- .../IRON-examples/vector_sum/run.lit | 3 +- .../IRON-examples/vector_sum/run_vck5000.lit | 10 + .../dynamic_dma_config_add_one/test.cpp | 20 +- .../aie.mlir.no_func | 62 -- .../test.cpp | 20 +- .../test.cpp | 2 +- .../ipu-xrt/matrix_add_one/aie2.py | 29 +- reference_designs/ipu-xrt/vector_max/aie2.py | 12 +- reference_designs/ipu-xrt/vector_max/run.lit | 1 - reference_designs/ipu-xrt/vector_min/aie2.py | 12 +- reference_designs/ipu-xrt/vector_min/run.lit | 1 - reference_designs/ipu-xrt/vector_sum/aie2.py | 12 +- reference_designs/ipu-xrt/vector_sum/run.lit | 1 - reference_designs/lit.cfg.py | 35 +- .../AIETargetHSA/input_with_addresses.mlir | 55 ++ test/Targets/AIETargetHSA/lit.local.cfg | 8 + test/lit.cfg.py | 22 +- .../aie/27_single_L1_single_lock/aie2.mlir | 3 + tools/aie-routing-command-line/visualize.py | 580 ++++++++++-------- tutorials/lit.cfg.py | 143 +++-- tutorials/scripts/visualize.py | 580 ++++++++++-------- utils/build-mlir-aie-pcie.sh | 1 + utils/generate-test-checks.py | 73 ++- 59 files changed, 1538 insertions(+), 1155 deletions(-) create mode 100644 reference_designs/IRON-examples/add_one_objFifo_no_mem_tile/run_vck5000.lit create mode 100644 reference_designs/IRON-examples/lit.local.cfg create mode 100644 reference_designs/IRON-examples/matrix_add_one/run_vck5000.lit create mode 100644 reference_designs/IRON-examples/matrix_multiplication_scalar/run_vck5000.lit create mode 100644 reference_designs/IRON-examples/passthrough_hardware/run_vck5000.lit create mode 100644 reference_designs/IRON-examples/vector_add/run_vck5000.lit create mode 100644 reference_designs/IRON-examples/vector_max/run_vck5000.lit create mode 100644 reference_designs/IRON-examples/vector_min/run_vck5000.lit create mode 100644 reference_designs/IRON-examples/vector_mult/run_vck5000.lit create mode 100644 reference_designs/IRON-examples/vector_scalar/run_vck5000.lit delete mode 100644 reference_designs/IRON-examples/vector_scalar_kernel/run.lit create mode 100644 reference_designs/IRON-examples/vector_scalar_kernel/run_vck5000.lit rename reference_designs/IRON-examples/vector_scalar_kernel/{test.cpp => test_vck5000.cpp} (100%) create mode 100644 reference_designs/IRON-examples/vector_sum/run_vck5000.lit delete mode 100644 reference_designs/dynamic_dma_config_add_one_kernel/aie.mlir.no_func create mode 100644 test/Targets/AIETargetHSA/input_with_addresses.mlir create mode 100644 test/Targets/AIETargetHSA/lit.local.cfg diff --git a/lib/Targets/AIETargetHSA.cpp b/lib/Targets/AIETargetHSA.cpp index a79c01cc63..36701fc96d 100644 --- a/lib/Targets/AIETargetHSA.cpp +++ b/lib/Targets/AIETargetHSA.cpp @@ -74,13 +74,18 @@ mlir::LogicalResult AIETranslateToHSA(ModuleOp module, raw_ostream &output) { } // Getting the sequence function op which contains the instructions - mlir::func::FuncOp funcOp; + mlir::func::FuncOp funcOp = NULL; for (auto op : targetOp.getOps()) { if (op.getName().str().compare("sequence") == 0) { funcOp = op; } } + // If no funcOp then just return + if (funcOp == NULL) { + return success(); + } + collectTiles(targetOp, tiles); collectBuffers(targetOp, buffers); diff --git a/python/ILP_solution.py b/python/ILP_solution.py index aed6f33506..edd89370e9 100644 --- a/python/ILP_solution.py +++ b/python/ILP_solution.py @@ -4,289 +4,316 @@ # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception # # (c) Copyright 2021 Xilinx Inc. -#===============================================================================# +# ===============================================================================# # This file implements an experimental ILP solver for # automatic tiling space exploration. -#===============================================================================# +# ===============================================================================# -import gurobipy as gp -from gurobipy import GRB +import gurobipy as gp +from gurobipy import GRB import numpy as np import time -#-------------------------------------------------------------------------------# +# -------------------------------------------------------------------------------# # Algorithmic parameters -#-------------------------------------------------------------------------------# +# -------------------------------------------------------------------------------# # The list of loop bounds -loop_bounds=[64, 64, 64] +loop_bounds = [64, 64, 64] # The constant matrix that reflects how data tensors are related with # loop induction variables # +--------------------+ -# | | L0 | L1 | L2 | +# | | L0 | L1 | L2 | # +--------------------+ -# | in1 | 1 | 0 | 1 | +# | in1 | 1 | 0 | 1 | # +--------------------+ -# | in2 | 0 | 1 | 1 | +# | in2 | 0 | 1 | 1 | # +--------------------+ -# | out | 1 | 1 | 0 | +# | out | 1 | 1 | 0 | # +--------------------+ -tensor_IV = [[1,0,1],[0,1,1],[1,1,0]] +tensor_IV = [[1, 0, 1], [0, 1, 1], [1, 1, 0]] -#-------------------------------------------------------------------------------# +# -------------------------------------------------------------------------------# # Architectural parameters -#-------------------------------------------------------------------------------# +# -------------------------------------------------------------------------------# -# In AIE, we typically have three architectural (memory/compute) hierarchy levels. -# L3->L2 copies data from L3 memory to L2 shared cache. L2->L1 copies data from -# L2 cache to L1 private cache. L2->L1 also indicates the transition from -# temporal to spatial execution. L1 indicates the transition from spatial +# In AIE, we typically have three architectural (memory/compute) hierarchy levels. +# L3->L2 copies data from L3 memory to L2 shared cache. L2->L1 copies data from +# L2 cache to L1 private cache. L2->L1 also indicates the transition from +# temporal to spatial execution. L1 indicates the transition from spatial # to temporal task on each compute core. mem_levels = 3 # memory capacity for L3, L2, L1 -mem_capacity=[["L3", 2**20], ["L2", 2**16], ["L1", 2**11]] +mem_capacity = [["L3", 2**20], ["L2", 2**16], ["L1", 2**11]] -# The ratios according to which the memory spaces are allocated for each data -# tensor, ignoring the L3 level. For example, [0.3, 0.3, 0.4] means 30% of -# memory space is reserved for two input tensors, and 40% of memory space is +# The ratios according to which the memory spaces are allocated for each data +# tensor, ignoring the L3 level. For example, [0.3, 0.3, 0.4] means 30% of +# memory space is reserved for two input tensors, and 40% of memory space is # estimated to store the output tensor. # L2, L1: [in1, in2, out] -mem_ratios=[[0.3, 0.3, 0.4], [0.3, 0.3, 0.4]] +mem_ratios = [[0.3, 0.3, 0.4], [0.3, 0.3, 0.4]] # memory bandwidth for L3, L2, L1 # recalculated as log(data_size/#cycles) -mem_bandwidth=[["L3-L2", 2**30], ["L2-L1", 2*2**30]] +mem_bandwidth = [["L3-L2", 2**30], ["L2-L1", 2 * 2**30]] # frequency -freq = 600*10**6 +freq = 600 * 10**6 # compute cores of which L2 is in charge -spatial_dim=[8,8] +spatial_dim = [8, 8] -#-------------------------------------------------------------------------------# +# -------------------------------------------------------------------------------# # ILP formulation -#-------------------------------------------------------------------------------# +# -------------------------------------------------------------------------------# + def prime_factorize(loop_bounds): - """ Factorize the original loops bounds into a list of prime factors. - Input: a list of loop bounds - Output: a super-list of prime factor lists - """ - prime_factor_list = [] - for loop_bound in loop_bounds: - prime_factors = [] - while loop_bound%2 == 0: - prime_factors.append(2) - loop_bound /= 2 - if loop_bound > 3: - for i in range(3, loop_bound, 2): - while loop_bound%i == 0: - prime_factors.append(i) - loop_bound /= i - if loop_bound > 2: - prime_factors.append(loop_bound) - prime_factor_list.append(prime_factors) - return prime_factor_list - -def ILP_formulation(util_factor = 0.5, compute_factor = 1, traffic_factor = 0.2): - # Create a new model - m = gp.Model("loopnest") - - # Initialize inputs - prime_factor_list = prime_factorize(loop_bounds) - prime_factor_len = sum([len(x) for x in prime_factor_list]) - - # Create decision variables - ## Binary decision variables - from factorized subloops to a specific - ## loop order - x = {} - for f1, pf_list in enumerate(prime_factor_list): - for f2, prime_factors in enumerate(pf_list): - for p in range(prime_factor_len): - var_name = "X_{}_{}_{}".format(f1, f2, p) - x[(f1, f2, p)] = m.addVar(vtype=GRB.BINARY, name=var_name) - - ## Integer decision variables - ## Y[(0, p)]: L3 mem, Y[(1, p)]: L2 mem, Y[(2, p)]: L1 mem - y = {} - for l in range(mem_levels): - for p in range(prime_factor_len): - var_name = "Y_{}_{}".format(l, p) - y[(l, p)] = m.addVar(lb=0, ub=1, vtype=GRB.INTEGER, name=var_name) - - # Add Constraints - ## One prime factor subloop has one assignment - for f1, pf_list in enumerate(prime_factor_list): - for f2, prime_factors in enumerate(pf_list): - col_sum = 0 - for p in range(prime_factor_len): - col_sum += x[(f1, f2, p)] - m.addConstr(col_sum == 1, "col_sum_{}_{}".format(f1, f2)) - - ## One ordering slot has only one subloop - for p in range(prime_factor_len): - row_sum = 0 - for f1, pf_list in enumerate(prime_factor_list): - for f2, prime_factors in enumerate(pf_list): - row_sum += x[(f1, f2, p)] - m.addConstr(row_sum == 1, "row_sum_{}".format(p)) - - ## monotone non-decreasing - for l in range(mem_levels): - for p in range(prime_factor_len - 1): - m.addConstr(y[(l, p)] <= y[(l, p+1)], "y_leq_{}_{}".format(l, p)) - - ## L3 region > L2 region > L1 region - L2_region = 0 - L1_region = 0 - for p in range(prime_factor_len): - L2_region += y[(0, p)] - y[(1, p)] - L1_region += y[(1, p)] - y[(2, p)] - m.addConstr(L2_region >= 1, "y_l2_region") - m.addConstr(L1_region >= 2, "y_l1_region") - - ## memory capacity - ### L2: accommodates all tensors within L2 region - L2_util={} - for v, iv_map in enumerate(tensor_IV): - L2_util[v] = 0 - for f1, pf_list in enumerate(prime_factor_list): - for f2, prime_factors in enumerate(pf_list): - for p in range(prime_factor_len): - L2_util[v] += tensor_IV[v][f1] * np.log2(prime_factor_list[f1][f2]) * \ - x[(f1, f2, p)] * y[(0, p)] - v_available = mem_capacity[1][1] * mem_ratios[0][v] - m.addConstr(L2_util[v] <= np.log2(v_available), "mem_capacity_L2_{}".format(v)) - - ### L1: accommodates all tensors mapped to temporal dimension within L1 region - L1_util={} - for v, iv_map in enumerate(tensor_IV): - L1_util[v] = 0 - for f1, pf_list in enumerate(prime_factor_list): - for f2, prime_factors in enumerate(pf_list): - for p in range(prime_factor_len): - L1_util[v] += tensor_IV[v][f1] * np.log2(prime_factor_list[f1][f2]) * \ - x[(f1, f2, p)] * y[(2, p)] - v_available = mem_capacity[2][1] * mem_ratios[1][v] - m.addConstr(L1_util[v] <= np.log2(v_available), "mem_capacity_L1_{}".format(v)) - - ## memory bandwidth - the amount of data copy size in a unit of time - ## #Data_size_that_requires_moving / #compute_cycles - ### L3->L2 traffic - L3_L2_tensor_traffic = {} - data_L2 = L2_util - cycles_L2 = 0 - for f1, pf_list in enumerate(prime_factor_list): - for f2, prime_factors in enumerate(pf_list): - for p in range(prime_factor_len): - cycles_L2 += np.log2(prime_factor_list[f1][f2]) * x[(f1, f2, p)] *\ - (y[(0, p)] - y[(1, p)] + y[(2, p)]) - for v, iv_map in enumerate(tensor_IV): - bw_log = np.log2(mem_bandwidth[0][1]) + np.log2(mem_ratios[0][v]) - np.log2(freq) - L3_L2_tensor_traffic[v] = data_L2[v] - cycles_L2 - m.addConstr(L3_L2_tensor_traffic[v] <= bw_log, "L2_bandwidth_{}".format(v)) - L3_L2_traffic = sum(data_L2.values()) - cycles_L2 - - ### L2->L1 traffic - L2_L1_tensor_traffic = {} - data_L1 = {} - cycles_L1 = 0 - for v, iv_map in enumerate(tensor_IV): - data_L1[v] = 0 - for f1, pf_list in enumerate(prime_factor_list): - for f2, prime_factors in enumerate(pf_list): - for p in range(prime_factor_len): - data_L1[v] += tensor_IV[v][f1] * np.log2(prime_factor_list[f1][f2]) * \ - x[(f1, f2, p)] * y[(1, p)] - for f1, pf_list in enumerate(prime_factor_list): - for f2, prime_factors in enumerate(pf_list): - for p in range(prime_factor_len): - cycles_L1 += np.log2(prime_factor_list[f1][f2]) * x[(f1, f2, p)] * y[(2, p)] - for v, iv_map in enumerate(tensor_IV): - bw_log = np.log2(mem_bandwidth[1][1]) + np.log2(mem_ratios[1][v]) - np.log2(freq) - L2_L1_tensor_traffic[v] = data_L1[v] - cycles_L1 - m.addConstr(L2_L1_tensor_traffic[v] <= bw_log, "L1_bandwidth_{}".format(v)) - L2_L1_traffic = sum(data_L1.values()) - cycles_L1 - - - ## spatial resource limitation - spatial_tile = 0 - for f1, pf_list in enumerate(prime_factor_list): - for f2, prime_factors in enumerate(pf_list): - for p in range(prime_factor_len): - spatial_tile += np.log2(prime_factor_list[f1][f2]) * x[(f1, f2, p)] *\ - (y[(1, p)] - y[(2, p)]) - m.addConstr(spatial_tile <= sum(np.log2(spatial_dim)), "spatial_tile_limit") - - # Set objective function - ## utilization - total_util = 0 - for v, iv_map in enumerate(tensor_IV): - total_util += L2_util[v] + L1_util[v] - total_util += spatial_tile - - ## compute latency - ## The product of all dimensions that map to temporal - total_cycles = 0 - for f1, pf_list in enumerate(prime_factor_list): - for f2, prime_factors in enumerate(pf_list): - for p in range(prime_factor_len): - total_cycles += np.log2(prime_factor_list[f1][f2]) * x[(f1, f2, p)] *\ - (1 - y[(1, p)] + y[(2, p)]) - - - ## traffic - total_traffic = L3_L2_traffic + L2_L1_traffic - - loopnest_obj = - util_factor * total_util + \ - compute_factor * total_cycles - - m.setObjective(loopnest_obj, GRB.MINIMIZE) - - begin_time = time.time() - m.optimize() - end_time = time.time() - runtime = end_time - begin_time + """Factorize the original loops bounds into a list of prime factors. + Input: a list of loop bounds + Output: a super-list of prime factor lists + """ + prime_factor_list = [] + for loop_bound in loop_bounds: + prime_factors = [] + while loop_bound % 2 == 0: + prime_factors.append(2) + loop_bound /= 2 + if loop_bound > 3: + for i in range(3, loop_bound, 2): + while loop_bound % i == 0: + prime_factors.append(i) + loop_bound /= i + if loop_bound > 2: + prime_factors.append(loop_bound) + prime_factor_list.append(prime_factors) + return prime_factor_list + + +def ILP_formulation(util_factor=0.5, compute_factor=1, traffic_factor=0.2): + # Create a new model + m = gp.Model("loopnest") + + # Initialize inputs + prime_factor_list = prime_factorize(loop_bounds) + prime_factor_len = sum([len(x) for x in prime_factor_list]) + + # Create decision variables + ## Binary decision variables - from factorized subloops to a specific + ## loop order + x = {} + for f1, pf_list in enumerate(prime_factor_list): + for f2, prime_factors in enumerate(pf_list): + for p in range(prime_factor_len): + var_name = "X_{}_{}_{}".format(f1, f2, p) + x[(f1, f2, p)] = m.addVar(vtype=GRB.BINARY, name=var_name) + + ## Integer decision variables + ## Y[(0, p)]: L3 mem, Y[(1, p)]: L2 mem, Y[(2, p)]: L1 mem + y = {} + for l in range(mem_levels): + for p in range(prime_factor_len): + var_name = "Y_{}_{}".format(l, p) + y[(l, p)] = m.addVar(lb=0, ub=1, vtype=GRB.INTEGER, name=var_name) + + # Add Constraints + ## One prime factor subloop has one assignment + for f1, pf_list in enumerate(prime_factor_list): + for f2, prime_factors in enumerate(pf_list): + col_sum = 0 + for p in range(prime_factor_len): + col_sum += x[(f1, f2, p)] + m.addConstr(col_sum == 1, "col_sum_{}_{}".format(f1, f2)) + + ## One ordering slot has only one subloop + for p in range(prime_factor_len): + row_sum = 0 + for f1, pf_list in enumerate(prime_factor_list): + for f2, prime_factors in enumerate(pf_list): + row_sum += x[(f1, f2, p)] + m.addConstr(row_sum == 1, "row_sum_{}".format(p)) + + ## monotone non-decreasing + for l in range(mem_levels): + for p in range(prime_factor_len - 1): + m.addConstr(y[(l, p)] <= y[(l, p + 1)], "y_leq_{}_{}".format(l, p)) + + ## L3 region > L2 region > L1 region + L2_region = 0 + L1_region = 0 + for p in range(prime_factor_len): + L2_region += y[(0, p)] - y[(1, p)] + L1_region += y[(1, p)] - y[(2, p)] + m.addConstr(L2_region >= 1, "y_l2_region") + m.addConstr(L1_region >= 2, "y_l1_region") + + ## memory capacity + ### L2: accommodates all tensors within L2 region + L2_util = {} + for v, iv_map in enumerate(tensor_IV): + L2_util[v] = 0 + for f1, pf_list in enumerate(prime_factor_list): + for f2, prime_factors in enumerate(pf_list): + for p in range(prime_factor_len): + L2_util[v] += ( + tensor_IV[v][f1] + * np.log2(prime_factor_list[f1][f2]) + * x[(f1, f2, p)] + * y[(0, p)] + ) + v_available = mem_capacity[1][1] * mem_ratios[0][v] + m.addConstr(L2_util[v] <= np.log2(v_available), "mem_capacity_L2_{}".format(v)) + + ### L1: accommodates all tensors mapped to temporal dimension within L1 region + L1_util = {} + for v, iv_map in enumerate(tensor_IV): + L1_util[v] = 0 + for f1, pf_list in enumerate(prime_factor_list): + for f2, prime_factors in enumerate(pf_list): + for p in range(prime_factor_len): + L1_util[v] += ( + tensor_IV[v][f1] + * np.log2(prime_factor_list[f1][f2]) + * x[(f1, f2, p)] + * y[(2, p)] + ) + v_available = mem_capacity[2][1] * mem_ratios[1][v] + m.addConstr(L1_util[v] <= np.log2(v_available), "mem_capacity_L1_{}".format(v)) + + ## memory bandwidth - the amount of data copy size in a unit of time + ## #Data_size_that_requires_moving / #compute_cycles + ### L3->L2 traffic + L3_L2_tensor_traffic = {} + data_L2 = L2_util + cycles_L2 = 0 + for f1, pf_list in enumerate(prime_factor_list): + for f2, prime_factors in enumerate(pf_list): + for p in range(prime_factor_len): + cycles_L2 += ( + np.log2(prime_factor_list[f1][f2]) + * x[(f1, f2, p)] + * (y[(0, p)] - y[(1, p)] + y[(2, p)]) + ) + for v, iv_map in enumerate(tensor_IV): + bw_log = ( + np.log2(mem_bandwidth[0][1]) + np.log2(mem_ratios[0][v]) - np.log2(freq) + ) + L3_L2_tensor_traffic[v] = data_L2[v] - cycles_L2 + m.addConstr(L3_L2_tensor_traffic[v] <= bw_log, "L2_bandwidth_{}".format(v)) + L3_L2_traffic = sum(data_L2.values()) - cycles_L2 + + ### L2->L1 traffic + L2_L1_tensor_traffic = {} + data_L1 = {} + cycles_L1 = 0 + for v, iv_map in enumerate(tensor_IV): + data_L1[v] = 0 + for f1, pf_list in enumerate(prime_factor_list): + for f2, prime_factors in enumerate(pf_list): + for p in range(prime_factor_len): + data_L1[v] += ( + tensor_IV[v][f1] + * np.log2(prime_factor_list[f1][f2]) + * x[(f1, f2, p)] + * y[(1, p)] + ) + for f1, pf_list in enumerate(prime_factor_list): + for f2, prime_factors in enumerate(pf_list): + for p in range(prime_factor_len): + cycles_L1 += ( + np.log2(prime_factor_list[f1][f2]) * x[(f1, f2, p)] * y[(2, p)] + ) + for v, iv_map in enumerate(tensor_IV): + bw_log = ( + np.log2(mem_bandwidth[1][1]) + np.log2(mem_ratios[1][v]) - np.log2(freq) + ) + L2_L1_tensor_traffic[v] = data_L1[v] - cycles_L1 + m.addConstr(L2_L1_tensor_traffic[v] <= bw_log, "L1_bandwidth_{}".format(v)) + L2_L1_traffic = sum(data_L1.values()) - cycles_L1 + + ## spatial resource limitation + spatial_tile = 0 + for f1, pf_list in enumerate(prime_factor_list): + for f2, prime_factors in enumerate(pf_list): + for p in range(prime_factor_len): + spatial_tile += ( + np.log2(prime_factor_list[f1][f2]) + * x[(f1, f2, p)] + * (y[(1, p)] - y[(2, p)]) + ) + m.addConstr(spatial_tile <= sum(np.log2(spatial_dim)), "spatial_tile_limit") + + # Set objective function + ## utilization + total_util = 0 + for v, iv_map in enumerate(tensor_IV): + total_util += L2_util[v] + L1_util[v] + total_util += spatial_tile + + ## compute latency + ## The product of all dimensions that map to temporal + total_cycles = 0 + for f1, pf_list in enumerate(prime_factor_list): + for f2, prime_factors in enumerate(pf_list): + for p in range(prime_factor_len): + total_cycles += ( + np.log2(prime_factor_list[f1][f2]) + * x[(f1, f2, p)] + * (1 - y[(1, p)] + y[(2, p)]) + ) + + ## traffic + total_traffic = L3_L2_traffic + L2_L1_traffic + + loopnest_obj = -util_factor * total_util + compute_factor * total_cycles + + m.setObjective(loopnest_obj, GRB.MINIMIZE) + + begin_time = time.time() + m.optimize() + end_time = time.time() + runtime = end_time - begin_time # Logging to a file - m.write("debug.lp") - - # print results - print("---runtime--- ", runtime) - m.printAttr('X') - - ## L2 utilization - print("---L2_utilization---") - for key, val in L2_util.items(): - print(key, "-", val.getValue()) - - ## L1 utilization - print("---L1_utilization---") - for key, val in L1_util.items(): - print(key, "-", val.getValue()) - - ## spatial tile limit - print("---spatial_tile_limit---") - print(spatial_tile.getValue()) - - ## objective - utilization - print("objective: total_utilization = ", total_util.getValue()) - print("objective: total_cycles = ", total_cycles.getValue()) - print("objective: L3_L2_traffic") - for key, val in L3_L2_tensor_traffic.items(): - print(key, "-", val.getValue()) - print("objective: L2_L1_traffic") - for key, val in L2_L1_tensor_traffic.items(): - print(key, "-", val.getValue()) - -if __name__=="__main__": - try: - ILP_formulation() - - except gp.GurobiError as e: - print('Error code ' + str(e.errno) + ": " + str(e)) - - except AttributeError: - print('Encountered an attribute error') + m.write("debug.lp") + + # print results + print("---runtime--- ", runtime) + m.printAttr("X") + + ## L2 utilization + print("---L2_utilization---") + for key, val in L2_util.items(): + print(key, "-", val.getValue()) + + ## L1 utilization + print("---L1_utilization---") + for key, val in L1_util.items(): + print(key, "-", val.getValue()) + + ## spatial tile limit + print("---spatial_tile_limit---") + print(spatial_tile.getValue()) + + ## objective - utilization + print("objective: total_utilization = ", total_util.getValue()) + print("objective: total_cycles = ", total_cycles.getValue()) + print("objective: L3_L2_traffic") + for key, val in L3_L2_tensor_traffic.items(): + print(key, "-", val.getValue()) + print("objective: L2_L1_traffic") + for key, val in L2_L1_tensor_traffic.items(): + print(key, "-", val.getValue()) + + +if __name__ == "__main__": + try: + ILP_formulation() + + except gp.GurobiError as e: + print("Error code " + str(e.errno) + ": " + str(e)) + + except AttributeError: + print("Encountered an attribute error") diff --git a/python/compiler/aiecc/main.py b/python/compiler/aiecc/main.py index e67c8c7fd7..3296704e8e 100644 --- a/python/compiler/aiecc/main.py +++ b/python/compiler/aiecc/main.py @@ -671,7 +671,6 @@ async def process_host_cgen(self, aie_target, file_with_addresses): file_inc_cpp, ], ) - cmd = ["clang++", "-std=c++11"] if opts.host_target: diff --git a/reference_designs/CMakeLists.txt b/reference_designs/CMakeLists.txt index e732b4655c..08cb8a3155 100755 --- a/reference_designs/CMakeLists.txt +++ b/reference_designs/CMakeLists.txt @@ -57,7 +57,12 @@ option(ENABLE_CHESS_TESTS "Enable backend tests using xchesscc" ${DEFAULT_ENABLE if(${CMAKE_HOST_SYSTEM_PROCESSOR} STREQUAL aarch64) set(DEFAULT_ENABLE_BOARD_TESTS ON) else() - set(DEFAULT_ENABLE_BOARD_TESTS OFF) + # If we are on a x86 and find hsa-runtiem64 run the tests on the board + if(hsa-runtime64_FOUND) + set(ENABLE_BOARD_TESTS ON) + else() + set(ENABLE_BOARD_TESTS OFF) + endif() endif() option(ENABLE_BOARD_TESTS "Enable board tests" ${DEFAULT_ENABLE_BOARD_TESTS}) @@ -144,4 +149,4 @@ add_lit_testsuite(check-reference-designs "Running the aie reference designs" DEPENDS ${TEST_DEPENDS} ARGS "-sv --timeout 600" ) -set_target_properties(check-reference-designs PROPERTIES FOLDER "Tutorials") \ No newline at end of file +set_target_properties(check-reference-designs PROPERTIES FOLDER "Tutorials") diff --git a/reference_designs/IRON-examples/add_one_objFifo_no_mem_tile/aie2.py b/reference_designs/IRON-examples/add_one_objFifo_no_mem_tile/aie2.py index 18ea6a5672..e39d918e67 100644 --- a/reference_designs/IRON-examples/add_one_objFifo_no_mem_tile/aie2.py +++ b/reference_designs/IRON-examples/add_one_objFifo_no_mem_tile/aie2.py @@ -14,18 +14,19 @@ # Used to get command line arguments import sys + def my_add_one_objFifo(): with mlir_mod_ctx() as ctx: - if(len(sys.argv) != 3): - raise ValueError("[ERROR] Need 2 command line arguments (Device name, Col)") - - if sys.argv[1] == 'ipu': - dev = AIEDevice.ipu - elif sys.argv[1] == 'xcvc1902': - dev = AIEDevice.xcvc1902 + if len(sys.argv) != 3: + raise ValueError("[ERROR] Need 2 command line arguments (Device name, Col)") + + if sys.argv[1] == "ipu": + dev = AIEDevice.ipu + elif sys.argv[1] == "xcvc1902": + dev = AIEDevice.xcvc1902 else: - raise ValueError("[ERROR] Device name {} is unknown".format(sys.argv[1])) + raise ValueError("[ERROR] Device name {} is unknown".format(sys.argv[1])) @device(dev) def device_body(): diff --git a/reference_designs/IRON-examples/add_one_objFifo_no_mem_tile/run.lit b/reference_designs/IRON-examples/add_one_objFifo_no_mem_tile/run.lit index 49cd75e360..a429e99221 100644 --- a/reference_designs/IRON-examples/add_one_objFifo_no_mem_tile/run.lit +++ b/reference_designs/IRON-examples/add_one_objFifo_no_mem_tile/run.lit @@ -3,7 +3,7 @@ // // REQUIRES: ryzen_ai // -// RUN: %python %S/aie2.py > ./aie.mlir +// RUN: %python %S/aie2.py ipu 0 > ./aie.mlir // RUN: %python aiecc.py --aie-generate-cdo --aie-generate-ipu --no-compile-host --xclbin-name=aie.xclbin --ipu-insts-name=insts.txt ./aie.mlir // RUN: clang %S/test.cpp -o test.exe -std=c++11 -Wall %xrt_flags -lrt -lstdc++ -lboost_program_options -lboost_filesystem // RUN: %run_on_ipu ./test.exe -x aie.xclbin -k MLIR_AIE -i insts.txt | FileCheck %s diff --git a/reference_designs/IRON-examples/add_one_objFifo_no_mem_tile/run_vck5000.lit b/reference_designs/IRON-examples/add_one_objFifo_no_mem_tile/run_vck5000.lit new file mode 100644 index 0000000000..3583a5f393 --- /dev/null +++ b/reference_designs/IRON-examples/add_one_objFifo_no_mem_tile/run_vck5000.lit @@ -0,0 +1,10 @@ +// (c) Copyright 2023 Advanced Micro Devices, Inc. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// REQUIRES: hsa +// +// RUN: %python %S/aie2.py xcvc1902 6 > ./aie.mlir +// RUN: %python aiecc.py %VitisSysrootFlag% --host-target=%aieHostTargetTriplet% %link_against_hsa% ./aie.mlir -I%host_runtime_lib%/test_lib/include %extraAieCcFlags% %S/test_vck5000.cpp -o test.elf -L%host_runtime_lib%/test_lib/lib -ltest_lib +// RUN: %run_on_board ./test.elf | FileCheck %s +// CHECK: PASS! + diff --git a/reference_designs/IRON-examples/lit.local.cfg b/reference_designs/IRON-examples/lit.local.cfg new file mode 100644 index 0000000000..cd7e7f2f7a --- /dev/null +++ b/reference_designs/IRON-examples/lit.local.cfg @@ -0,0 +1,12 @@ +# +# This file is licensed under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# (c) Copyright 2023 AMD Inc. + +config.suffixes = ['.lit'] +config.unsupported = [] + +if not config.has_libxaie: + config.unsupported = ['IRON-examples'] diff --git a/reference_designs/IRON-examples/matrix_add_one/aie2.py b/reference_designs/IRON-examples/matrix_add_one/aie2.py index 6e74ddc765..36eb3a3d38 100644 --- a/reference_designs/IRON-examples/matrix_add_one/aie2.py +++ b/reference_designs/IRON-examples/matrix_add_one/aie2.py @@ -28,18 +28,19 @@ objfifo_capacity = 4 + def my_matrix_add_one(): with mlir_mod_ctx() as ctx: - if(len(sys.argv) != 3): - raise ValueError("[ERROR] Need 2 command line arguments (Device name, Col)") - - if sys.argv[1] == 'ipu': - dev = AIEDevice.ipu - elif sys.argv[1] == 'xcvc1902': - dev = AIEDevice.xcvc1902 + if len(sys.argv) != 3: + raise ValueError("[ERROR] Need 2 command line arguments (Device name, Col)") + + if sys.argv[1] == "ipu": + dev = AIEDevice.ipu + elif sys.argv[1] == "xcvc1902": + dev = AIEDevice.xcvc1902 else: - raise ValueError("[ERROR] Device name {} is unknown".format(sys.argv[1])) + raise ValueError("[ERROR] Device name {} is unknown".format(sys.argv[1])) @device(dev) def device_body(): @@ -51,10 +52,14 @@ def device_body(): # AIE-array data movement with object fifos # Input - of_in1 = object_fifo("in0", ShimTile, ComputeTile2, objfifo_capacity, memRef_ty) + of_in1 = object_fifo( + "in0", ShimTile, ComputeTile2, objfifo_capacity, memRef_ty + ) # Output - of_out1 = object_fifo("out0", ComputeTile2, ShimTile, objfifo_capacity, memRef_ty) + of_out1 = object_fifo( + "out0", ComputeTile2, ShimTile, objfifo_capacity, memRef_ty + ) # Set up compute tiles @@ -66,10 +71,10 @@ def core_body(): elem_in = of_in1.acquire(ObjectFifoPort.Consume, 1) elem_out = of_out1.acquire(ObjectFifoPort.Produce, 1) for i in for_(TILE_SIZE): - v0 = memref.load(elem_in, [i]) - v1 = arith.addi(v0, arith.constant(1, T.i32())) - memref.store(v1, elem_out, [i]) - yield_([]) + v0 = memref.load(elem_in, [i]) + v1 = arith.addi(v0, arith.constant(1, T.i32())) + memref.store(v1, elem_out, [i]) + yield_([]) of_in1.release(ObjectFifoPort.Consume, 1) of_out1.release(ObjectFifoPort.Produce, 1) yield_([]) @@ -81,10 +86,18 @@ def core_body(): @FuncOp.from_py_func(tensor_ty, tensor_ty, tensor_ty) def sequence(inTensor, notUsed, outTensor): ipu_dma_memcpy_nd( - metadata="out0", bd_id=0, mem=outTensor, sizes=[1, 1, TILE_HEIGHT, TILE_WIDTH], strides=[1, 1, IMAGE_WIDTH] + metadata="out0", + bd_id=0, + mem=outTensor, + sizes=[1, 1, TILE_HEIGHT, TILE_WIDTH], + strides=[1, 1, IMAGE_WIDTH], ) ipu_dma_memcpy_nd( - metadata="in0", bd_id=1, mem=inTensor, sizes=[1, 1, TILE_HEIGHT, TILE_WIDTH], strides=[1, 1, IMAGE_WIDTH] + metadata="in0", + bd_id=1, + mem=inTensor, + sizes=[1, 1, TILE_HEIGHT, TILE_WIDTH], + strides=[1, 1, IMAGE_WIDTH], ) ipu_sync(column=0, row=0, direction=0, channel=0) diff --git a/reference_designs/IRON-examples/matrix_add_one/run.lit b/reference_designs/IRON-examples/matrix_add_one/run.lit index 49cd75e360..a429e99221 100644 --- a/reference_designs/IRON-examples/matrix_add_one/run.lit +++ b/reference_designs/IRON-examples/matrix_add_one/run.lit @@ -3,7 +3,7 @@ // // REQUIRES: ryzen_ai // -// RUN: %python %S/aie2.py > ./aie.mlir +// RUN: %python %S/aie2.py ipu 0 > ./aie.mlir // RUN: %python aiecc.py --aie-generate-cdo --aie-generate-ipu --no-compile-host --xclbin-name=aie.xclbin --ipu-insts-name=insts.txt ./aie.mlir // RUN: clang %S/test.cpp -o test.exe -std=c++11 -Wall %xrt_flags -lrt -lstdc++ -lboost_program_options -lboost_filesystem // RUN: %run_on_ipu ./test.exe -x aie.xclbin -k MLIR_AIE -i insts.txt | FileCheck %s diff --git a/reference_designs/IRON-examples/matrix_add_one/run_vck5000.lit b/reference_designs/IRON-examples/matrix_add_one/run_vck5000.lit new file mode 100644 index 0000000000..3583a5f393 --- /dev/null +++ b/reference_designs/IRON-examples/matrix_add_one/run_vck5000.lit @@ -0,0 +1,10 @@ +// (c) Copyright 2023 Advanced Micro Devices, Inc. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// REQUIRES: hsa +// +// RUN: %python %S/aie2.py xcvc1902 6 > ./aie.mlir +// RUN: %python aiecc.py %VitisSysrootFlag% --host-target=%aieHostTargetTriplet% %link_against_hsa% ./aie.mlir -I%host_runtime_lib%/test_lib/include %extraAieCcFlags% %S/test_vck5000.cpp -o test.elf -L%host_runtime_lib%/test_lib/lib -ltest_lib +// RUN: %run_on_board ./test.elf | FileCheck %s +// CHECK: PASS! + diff --git a/reference_designs/IRON-examples/matrix_multiplication_scalar/aie2.py b/reference_designs/IRON-examples/matrix_multiplication_scalar/aie2.py index 6dc6a4c78e..7aacb0ab10 100644 --- a/reference_designs/IRON-examples/matrix_multiplication_scalar/aie2.py +++ b/reference_designs/IRON-examples/matrix_multiplication_scalar/aie2.py @@ -12,9 +12,9 @@ from aie.extras.context import mlir_mod_ctx # Size of the matrices -M = 4 -N = 4 -K = 4 +M = 4 +N = 4 +K = 4 A_SIZE = M * K B_SIZE = K * N @@ -22,12 +22,13 @@ objfifo_capacity = 4 + def my_matrix_multiplication_scalar(): with mlir_mod_ctx() as ctx: @device(AIEDevice.xcvc1902) def device_body(): - #memRef_ty = T.memref(A_SIZE, T.i32()) + # memRef_ty = T.memref(A_SIZE, T.i32()) memRef_ty = T.memref(M, N, T.i32()) # Tile declarations @@ -36,11 +37,17 @@ def device_body(): # AIE-array data movement with object fifos # Input - of_in0 = object_fifo("in0", ShimTile, ComputeTile2, objfifo_capacity, memRef_ty) - of_in1 = object_fifo("in1", ShimTile, ComputeTile2, objfifo_capacity, memRef_ty) + of_in0 = object_fifo( + "in0", ShimTile, ComputeTile2, objfifo_capacity, memRef_ty + ) + of_in1 = object_fifo( + "in1", ShimTile, ComputeTile2, objfifo_capacity, memRef_ty + ) # Output - of_out0 = object_fifo("out0", ComputeTile2, ShimTile, objfifo_capacity, memRef_ty) + of_out0 = object_fifo( + "out0", ComputeTile2, ShimTile, objfifo_capacity, memRef_ty + ) # Set up compute tiles @@ -49,27 +56,27 @@ def device_body(): def core_body(): # Effective while(1) for _ in for_(8): - elem_in0 = of_in0.acquire(ObjectFifoPort.Consume, 1) - elem_in1 = of_in1.acquire(ObjectFifoPort.Consume, 1) - elem_out = of_out0.acquire(ObjectFifoPort.Produce, 1) - for n in for_(N): - for m in for_(M): - for k in for_(K): - v0 = memref.load(elem_in0, [m, k]) - v1 = memref.load(elem_in1, [k, n]) - v2 = memref.load(elem_out, [m, n]) - v3 = arith.muli(v0, v1) - v4 = arith.addi(v2, v3) - memref.store(v4, elem_out, [m, n]) - yield_([]) # K - - yield_([]) # N - yield_([]) # M - - of_in0.release(ObjectFifoPort.Consume, 1) - of_in1.release(ObjectFifoPort.Consume, 1) - of_out0.release(ObjectFifoPort.Produce, 1) - yield_([]) + elem_in0 = of_in0.acquire(ObjectFifoPort.Consume, 1) + elem_in1 = of_in1.acquire(ObjectFifoPort.Consume, 1) + elem_out = of_out0.acquire(ObjectFifoPort.Produce, 1) + for n in for_(N): + for m in for_(M): + for k in for_(K): + v0 = memref.load(elem_in0, [m, k]) + v1 = memref.load(elem_in1, [k, n]) + v2 = memref.load(elem_out, [m, n]) + v3 = arith.muli(v0, v1) + v4 = arith.addi(v2, v3) + memref.store(v4, elem_out, [m, n]) + yield_([]) # K + + yield_([]) # N + yield_([]) # M + + of_in0.release(ObjectFifoPort.Consume, 1) + of_in1.release(ObjectFifoPort.Consume, 1) + of_out0.release(ObjectFifoPort.Produce, 1) + yield_([]) # To/from AIE-array data movement @@ -77,15 +84,15 @@ def core_body(): @FuncOp.from_py_func(tensor_ty, tensor_ty, tensor_ty) def sequence(inTensorA, inTensorB, outTensor): - #ipu_dma_memcpy_nd( + # ipu_dma_memcpy_nd( # metadata="out0", bd_id=0, mem=outTensor, sizes=[1, 1, 1, C_SIZE] - #) - #ipu_dma_memcpy_nd( + # ) + # ipu_dma_memcpy_nd( # metadata="in0", bd_id=1, mem=inTensorA, sizes=[1, 1, 1, A_SIZE] - #) - #ipu_dma_memcpy_nd( + # ) + # ipu_dma_memcpy_nd( # metadata="in1", bd_id=1, mem=inTensorB, sizes=[1, 1, 1, B_SIZE] - #) + # ) ipu_sync(column=0, row=0, direction=0, channel=0) print(ctx.module) diff --git a/reference_designs/IRON-examples/matrix_multiplication_scalar/run.lit b/reference_designs/IRON-examples/matrix_multiplication_scalar/run.lit index 49cd75e360..bb92a10b48 100644 --- a/reference_designs/IRON-examples/matrix_multiplication_scalar/run.lit +++ b/reference_designs/IRON-examples/matrix_multiplication_scalar/run.lit @@ -3,9 +3,9 @@ // // REQUIRES: ryzen_ai // -// RUN: %python %S/aie2.py > ./aie.mlir +// RUN: %python %S/aie2.py ipu 0 > ./aie.mlir // RUN: %python aiecc.py --aie-generate-cdo --aie-generate-ipu --no-compile-host --xclbin-name=aie.xclbin --ipu-insts-name=insts.txt ./aie.mlir // RUN: clang %S/test.cpp -o test.exe -std=c++11 -Wall %xrt_flags -lrt -lstdc++ -lboost_program_options -lboost_filesystem // RUN: %run_on_ipu ./test.exe -x aie.xclbin -k MLIR_AIE -i insts.txt | FileCheck %s // CHECK: PASS! - +// XFAIL: * diff --git a/reference_designs/IRON-examples/matrix_multiplication_scalar/run_vck5000.lit b/reference_designs/IRON-examples/matrix_multiplication_scalar/run_vck5000.lit new file mode 100644 index 0000000000..752032bfc3 --- /dev/null +++ b/reference_designs/IRON-examples/matrix_multiplication_scalar/run_vck5000.lit @@ -0,0 +1,12 @@ +// (c) Copyright 2023 Advanced Micro Devices, Inc. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// REQUIRES: hsa +// +// XFAIL: * +// +// RUN: %python %S/aie2.py xcvc1902 6 > ./aie.mlir +// RUN: %python aiecc.py %VitisSysrootFlag% --host-target=%aieHostTargetTriplet% %link_against_hsa% ./aie.mlir -I%host_runtime_lib%/test_lib/include %extraAieCcFlags% %S/test_vck5000.cpp -o test.elf -L%host_runtime_lib%/test_lib/lib -ltest_lib +// RUN: %run_on_board ./test.elf | FileCheck %s +// CHECK: PASS! + diff --git a/reference_designs/IRON-examples/passthrough_hardware/aie2.py b/reference_designs/IRON-examples/passthrough_hardware/aie2.py index 0863359ef0..3931b5ffc0 100644 --- a/reference_designs/IRON-examples/passthrough_hardware/aie2.py +++ b/reference_designs/IRON-examples/passthrough_hardware/aie2.py @@ -15,18 +15,19 @@ N = 4096 + def my_add_one_objFifo(): with mlir_mod_ctx() as ctx: - if(len(sys.argv) != 3): - raise ValueError("[ERROR] Need 2 command line arguments (Device name, Col)") - - if sys.argv[1] == 'ipu': - dev = AIEDevice.ipu - elif sys.argv[1] == 'xcvc1902': - dev = AIEDevice.xcvc1902 + if len(sys.argv) != 3: + raise ValueError("[ERROR] Need 2 command line arguments (Device name, Col)") + + if sys.argv[1] == "ipu": + dev = AIEDevice.ipu + elif sys.argv[1] == "xcvc1902": + dev = AIEDevice.xcvc1902 else: - raise ValueError("[ERROR] Device name {} is unknown".format(sys.argv[1])) + raise ValueError("[ERROR] Device name {} is unknown".format(sys.argv[1])) @device(dev) def device_body(): @@ -53,7 +54,7 @@ def core_body(): v0 = arith.constant(0, T.i32()) memref.store(v0, tmp, [0]) - # To/from AIE-array data movement + # To/from AIE-array data movement tensor_ty = T.memref(N, T.i32()) @FuncOp.from_py_func(tensor_ty, tensor_ty, tensor_ty) diff --git a/reference_designs/IRON-examples/passthrough_hardware/run.lit b/reference_designs/IRON-examples/passthrough_hardware/run.lit index 3e9a05eb94..62d66040ff 100644 --- a/reference_designs/IRON-examples/passthrough_hardware/run.lit +++ b/reference_designs/IRON-examples/passthrough_hardware/run.lit @@ -3,7 +3,7 @@ // // REQUIRES: ryzen_ai, chess // -// RUN: %python %S/aie2.py > ./aie.mlir +// RUN: %python %S/aie2.py ipu 0 > ./aie.mlir // RUN: %python aiecc.py --aie-generate-cdo --aie-generate-ipu --no-compile-host --xclbin-name=aie.xclbin --ipu-insts-name=insts.txt ./aie.mlir // RUN: clang %S/test.cpp -o test.exe -std=c++11 -Wall %xrt_flags -lrt -lstdc++ -lboost_program_options -lboost_filesystem // RUN: %run_on_ipu ./test.exe -x aie.xclbin -k MLIR_AIE -i insts.txt -l 4096 | FileCheck %s diff --git a/reference_designs/IRON-examples/passthrough_hardware/run_vck5000.lit b/reference_designs/IRON-examples/passthrough_hardware/run_vck5000.lit new file mode 100644 index 0000000000..3583a5f393 --- /dev/null +++ b/reference_designs/IRON-examples/passthrough_hardware/run_vck5000.lit @@ -0,0 +1,10 @@ +// (c) Copyright 2023 Advanced Micro Devices, Inc. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// REQUIRES: hsa +// +// RUN: %python %S/aie2.py xcvc1902 6 > ./aie.mlir +// RUN: %python aiecc.py %VitisSysrootFlag% --host-target=%aieHostTargetTriplet% %link_against_hsa% ./aie.mlir -I%host_runtime_lib%/test_lib/include %extraAieCcFlags% %S/test_vck5000.cpp -o test.elf -L%host_runtime_lib%/test_lib/lib -ltest_lib +// RUN: %run_on_board ./test.elf | FileCheck %s +// CHECK: PASS! + diff --git a/reference_designs/IRON-examples/vector_add/aie2.py b/reference_designs/IRON-examples/vector_add/aie2.py index df0cfa3c0a..6f8ad2d5b6 100755 --- a/reference_designs/IRON-examples/vector_add/aie2.py +++ b/reference_designs/IRON-examples/vector_add/aie2.py @@ -15,6 +15,7 @@ import sys + def my_vector_add(): N = 64 n = 16 @@ -24,15 +25,15 @@ def my_vector_add(): with mlir_mod_ctx() as ctx: - if(len(sys.argv) != 3): - raise ValueError("[ERROR] Need 2 command line arguments (Device name, Col)") - - if sys.argv[1] == 'ipu': - dev = AIEDevice.ipu - elif sys.argv[1] == 'xcvc1902': - dev = AIEDevice.xcvc1902 + if len(sys.argv) != 3: + raise ValueError("[ERROR] Need 2 command line arguments (Device name, Col)") + + if sys.argv[1] == "ipu": + dev = AIEDevice.ipu + elif sys.argv[1] == "xcvc1902": + dev = AIEDevice.xcvc1902 else: - raise ValueError("[ERROR] Device name {} is unknown".format(sys.argv[1])) + raise ValueError("[ERROR] Device name {} is unknown".format(sys.argv[1])) @device(dev) def device_body(): @@ -62,11 +63,11 @@ def core_body(): elem_in2 = of_in2.acquire(ObjectFifoPort.Consume, 1) elem_out = of_out.acquire(ObjectFifoPort.Produce, 1) for i in for_(n): - v0 = memref.load(elem_in1, [i]) - v1 = memref.load(elem_in2, [i]) - v2 = arith.addi(v0, v1) - memref.store(v2, elem_out, [i]) - yield_([]) + v0 = memref.load(elem_in1, [i]) + v1 = memref.load(elem_in2, [i]) + v2 = arith.addi(v0, v1) + memref.store(v2, elem_out, [i]) + yield_([]) of_in1.release(ObjectFifoPort.Consume, 1) of_in2.release(ObjectFifoPort.Consume, 1) of_out.release(ObjectFifoPort.Produce, 1) diff --git a/reference_designs/IRON-examples/vector_add/run.lit b/reference_designs/IRON-examples/vector_add/run.lit index 212f474fea..a429e99221 100644 --- a/reference_designs/IRON-examples/vector_add/run.lit +++ b/reference_designs/IRON-examples/vector_add/run.lit @@ -3,8 +3,7 @@ // // REQUIRES: ryzen_ai // -// RUN: xchesscc_wrapper aie2 -I %aietools/include -c %S/scale.cc -o ./scale.o -// RUN: %python %S/aie2.py > ./aie.mlir +// RUN: %python %S/aie2.py ipu 0 > ./aie.mlir // RUN: %python aiecc.py --aie-generate-cdo --aie-generate-ipu --no-compile-host --xclbin-name=aie.xclbin --ipu-insts-name=insts.txt ./aie.mlir // RUN: clang %S/test.cpp -o test.exe -std=c++11 -Wall %xrt_flags -lrt -lstdc++ -lboost_program_options -lboost_filesystem // RUN: %run_on_ipu ./test.exe -x aie.xclbin -k MLIR_AIE -i insts.txt | FileCheck %s diff --git a/reference_designs/IRON-examples/vector_add/run_vck5000.lit b/reference_designs/IRON-examples/vector_add/run_vck5000.lit new file mode 100644 index 0000000000..3583a5f393 --- /dev/null +++ b/reference_designs/IRON-examples/vector_add/run_vck5000.lit @@ -0,0 +1,10 @@ +// (c) Copyright 2023 Advanced Micro Devices, Inc. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// REQUIRES: hsa +// +// RUN: %python %S/aie2.py xcvc1902 6 > ./aie.mlir +// RUN: %python aiecc.py %VitisSysrootFlag% --host-target=%aieHostTargetTriplet% %link_against_hsa% ./aie.mlir -I%host_runtime_lib%/test_lib/include %extraAieCcFlags% %S/test_vck5000.cpp -o test.elf -L%host_runtime_lib%/test_lib/lib -ltest_lib +// RUN: %run_on_board ./test.elf | FileCheck %s +// CHECK: PASS! + diff --git a/reference_designs/IRON-examples/vector_max/aie2.py b/reference_designs/IRON-examples/vector_max/aie2.py index ff362f2ba8..5d094a621d 100755 --- a/reference_designs/IRON-examples/vector_max/aie2.py +++ b/reference_designs/IRON-examples/vector_max/aie2.py @@ -15,6 +15,7 @@ import sys + def my_vector_max(): N = 64 @@ -22,15 +23,15 @@ def my_vector_max(): with mlir_mod_ctx() as ctx: - if(len(sys.argv) != 3): - raise ValueError("[ERROR] Need 2 command line arguments (Device name, Col)") - - if sys.argv[1] == 'ipu': - dev = AIEDevice.ipu - elif sys.argv[1] == 'xcvc1902': - dev = AIEDevice.xcvc1902 + if len(sys.argv) != 3: + raise ValueError("[ERROR] Need 2 command line arguments (Device name, Col)") + + if sys.argv[1] == "ipu": + dev = AIEDevice.ipu + elif sys.argv[1] == "xcvc1902": + dev = AIEDevice.xcvc1902 else: - raise ValueError("[ERROR] Device name {} is unknown".format(sys.argv[1])) + raise ValueError("[ERROR] Device name {} is unknown".format(sys.argv[1])) @device(dev) def device_body(): @@ -59,12 +60,12 @@ def core_body(): elem_in = of_in.acquire(ObjectFifoPort.Consume, 1) elem_out = of_out.acquire(ObjectFifoPort.Produce, 1) for i in for_(N): - v0 = memref.load(elem_in, [i]) - v1 = memref.load(max_val, [0]) - v2 = arith.maxui(v1, v0) - memref.store(v2, max_val, [0]) - yield_([]) - + v0 = memref.load(elem_in, [i]) + v1 = memref.load(max_val, [0]) + v2 = arith.maxui(v1, v0) + memref.store(v2, max_val, [0]) + yield_([]) + v3 = memref.load(max_val, [0]) memref.store(v3, elem_out, [0]) of_in.release(ObjectFifoPort.Consume, 1) diff --git a/reference_designs/IRON-examples/vector_max/run.lit b/reference_designs/IRON-examples/vector_max/run.lit index 212f474fea..a429e99221 100644 --- a/reference_designs/IRON-examples/vector_max/run.lit +++ b/reference_designs/IRON-examples/vector_max/run.lit @@ -3,8 +3,7 @@ // // REQUIRES: ryzen_ai // -// RUN: xchesscc_wrapper aie2 -I %aietools/include -c %S/scale.cc -o ./scale.o -// RUN: %python %S/aie2.py > ./aie.mlir +// RUN: %python %S/aie2.py ipu 0 > ./aie.mlir // RUN: %python aiecc.py --aie-generate-cdo --aie-generate-ipu --no-compile-host --xclbin-name=aie.xclbin --ipu-insts-name=insts.txt ./aie.mlir // RUN: clang %S/test.cpp -o test.exe -std=c++11 -Wall %xrt_flags -lrt -lstdc++ -lboost_program_options -lboost_filesystem // RUN: %run_on_ipu ./test.exe -x aie.xclbin -k MLIR_AIE -i insts.txt | FileCheck %s diff --git a/reference_designs/IRON-examples/vector_max/run_vck5000.lit b/reference_designs/IRON-examples/vector_max/run_vck5000.lit new file mode 100644 index 0000000000..3583a5f393 --- /dev/null +++ b/reference_designs/IRON-examples/vector_max/run_vck5000.lit @@ -0,0 +1,10 @@ +// (c) Copyright 2023 Advanced Micro Devices, Inc. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// REQUIRES: hsa +// +// RUN: %python %S/aie2.py xcvc1902 6 > ./aie.mlir +// RUN: %python aiecc.py %VitisSysrootFlag% --host-target=%aieHostTargetTriplet% %link_against_hsa% ./aie.mlir -I%host_runtime_lib%/test_lib/include %extraAieCcFlags% %S/test_vck5000.cpp -o test.elf -L%host_runtime_lib%/test_lib/lib -ltest_lib +// RUN: %run_on_board ./test.elf | FileCheck %s +// CHECK: PASS! + diff --git a/reference_designs/IRON-examples/vector_min/aie2.py b/reference_designs/IRON-examples/vector_min/aie2.py index a62c8ddb88..f0b931431a 100755 --- a/reference_designs/IRON-examples/vector_min/aie2.py +++ b/reference_designs/IRON-examples/vector_min/aie2.py @@ -15,6 +15,7 @@ import sys + def my_vector_max(): N = 64 @@ -22,15 +23,15 @@ def my_vector_max(): with mlir_mod_ctx() as ctx: - if(len(sys.argv) != 3): - raise ValueError("[ERROR] Need 2 command line arguments (Device name, Col)") - - if sys.argv[1] == 'ipu': - dev = AIEDevice.ipu - elif sys.argv[1] == 'xcvc1902': - dev = AIEDevice.xcvc1902 + if len(sys.argv) != 3: + raise ValueError("[ERROR] Need 2 command line arguments (Device name, Col)") + + if sys.argv[1] == "ipu": + dev = AIEDevice.ipu + elif sys.argv[1] == "xcvc1902": + dev = AIEDevice.xcvc1902 else: - raise ValueError("[ERROR] Device name {} is unknown".format(sys.argv[1])) + raise ValueError("[ERROR] Device name {} is unknown".format(sys.argv[1])) @device(dev) def device_body(): @@ -59,12 +60,12 @@ def core_body(): elem_in = of_in.acquire(ObjectFifoPort.Consume, 1) elem_out = of_out.acquire(ObjectFifoPort.Produce, 1) for i in for_(N): - v0 = memref.load(elem_in, [i]) - v1 = memref.load(min_val, [0]) - v2 = arith.minui(v1, v0) - memref.store(v2, min_val, [0]) - yield_([]) - + v0 = memref.load(elem_in, [i]) + v1 = memref.load(min_val, [0]) + v2 = arith.minui(v1, v0) + memref.store(v2, min_val, [0]) + yield_([]) + v3 = memref.load(min_val, [0]) memref.store(v3, elem_out, [0]) of_in.release(ObjectFifoPort.Consume, 1) diff --git a/reference_designs/IRON-examples/vector_min/run.lit b/reference_designs/IRON-examples/vector_min/run.lit index 212f474fea..a429e99221 100644 --- a/reference_designs/IRON-examples/vector_min/run.lit +++ b/reference_designs/IRON-examples/vector_min/run.lit @@ -3,8 +3,7 @@ // // REQUIRES: ryzen_ai // -// RUN: xchesscc_wrapper aie2 -I %aietools/include -c %S/scale.cc -o ./scale.o -// RUN: %python %S/aie2.py > ./aie.mlir +// RUN: %python %S/aie2.py ipu 0 > ./aie.mlir // RUN: %python aiecc.py --aie-generate-cdo --aie-generate-ipu --no-compile-host --xclbin-name=aie.xclbin --ipu-insts-name=insts.txt ./aie.mlir // RUN: clang %S/test.cpp -o test.exe -std=c++11 -Wall %xrt_flags -lrt -lstdc++ -lboost_program_options -lboost_filesystem // RUN: %run_on_ipu ./test.exe -x aie.xclbin -k MLIR_AIE -i insts.txt | FileCheck %s diff --git a/reference_designs/IRON-examples/vector_min/run_vck5000.lit b/reference_designs/IRON-examples/vector_min/run_vck5000.lit new file mode 100644 index 0000000000..3583a5f393 --- /dev/null +++ b/reference_designs/IRON-examples/vector_min/run_vck5000.lit @@ -0,0 +1,10 @@ +// (c) Copyright 2023 Advanced Micro Devices, Inc. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// REQUIRES: hsa +// +// RUN: %python %S/aie2.py xcvc1902 6 > ./aie.mlir +// RUN: %python aiecc.py %VitisSysrootFlag% --host-target=%aieHostTargetTriplet% %link_against_hsa% ./aie.mlir -I%host_runtime_lib%/test_lib/include %extraAieCcFlags% %S/test_vck5000.cpp -o test.elf -L%host_runtime_lib%/test_lib/lib -ltest_lib +// RUN: %run_on_board ./test.elf | FileCheck %s +// CHECK: PASS! + diff --git a/reference_designs/IRON-examples/vector_mult/aie2.py b/reference_designs/IRON-examples/vector_mult/aie2.py index 7621a04861..5a36f85a33 100755 --- a/reference_designs/IRON-examples/vector_mult/aie2.py +++ b/reference_designs/IRON-examples/vector_mult/aie2.py @@ -15,6 +15,7 @@ import sys + def my_vector_add(): N = 64 n = 16 @@ -24,15 +25,15 @@ def my_vector_add(): with mlir_mod_ctx() as ctx: - if(len(sys.argv) != 3): - raise ValueError("[ERROR] Need 2 command line arguments (Device name, Col)") + if len(sys.argv) != 3: + raise ValueError("[ERROR] Need 2 command line arguments (Device name, Col)") - if sys.argv[1] == 'ipu': - dev = AIEDevice.ipu - elif sys.argv[1] == 'xcvc1902': - dev = AIEDevice.xcvc1902 + if sys.argv[1] == "ipu": + dev = AIEDevice.ipu + elif sys.argv[1] == "xcvc1902": + dev = AIEDevice.xcvc1902 else: - raise ValueError("[ERROR] Device name {} is unknown".format(sys.argv[1])) + raise ValueError("[ERROR] Device name {} is unknown".format(sys.argv[1])) @device(dev) def device_body(): @@ -62,11 +63,11 @@ def core_body(): elem_in2 = of_in2.acquire(ObjectFifoPort.Consume, 1) elem_out = of_out.acquire(ObjectFifoPort.Produce, 1) for i in for_(n): - v0 = memref.load(elem_in1, [i]) - v1 = memref.load(elem_in2, [i]) - v2 = arith.muli(v0, v1) - memref.store(v2, elem_out, [i]) - yield_([]) + v0 = memref.load(elem_in1, [i]) + v1 = memref.load(elem_in2, [i]) + v2 = arith.muli(v0, v1) + memref.store(v2, elem_out, [i]) + yield_([]) of_in1.release(ObjectFifoPort.Consume, 1) of_in2.release(ObjectFifoPort.Consume, 1) of_out.release(ObjectFifoPort.Produce, 1) diff --git a/reference_designs/IRON-examples/vector_mult/run.lit b/reference_designs/IRON-examples/vector_mult/run.lit index 212f474fea..a429e99221 100644 --- a/reference_designs/IRON-examples/vector_mult/run.lit +++ b/reference_designs/IRON-examples/vector_mult/run.lit @@ -3,8 +3,7 @@ // // REQUIRES: ryzen_ai // -// RUN: xchesscc_wrapper aie2 -I %aietools/include -c %S/scale.cc -o ./scale.o -// RUN: %python %S/aie2.py > ./aie.mlir +// RUN: %python %S/aie2.py ipu 0 > ./aie.mlir // RUN: %python aiecc.py --aie-generate-cdo --aie-generate-ipu --no-compile-host --xclbin-name=aie.xclbin --ipu-insts-name=insts.txt ./aie.mlir // RUN: clang %S/test.cpp -o test.exe -std=c++11 -Wall %xrt_flags -lrt -lstdc++ -lboost_program_options -lboost_filesystem // RUN: %run_on_ipu ./test.exe -x aie.xclbin -k MLIR_AIE -i insts.txt | FileCheck %s diff --git a/reference_designs/IRON-examples/vector_mult/run_vck5000.lit b/reference_designs/IRON-examples/vector_mult/run_vck5000.lit new file mode 100644 index 0000000000..3583a5f393 --- /dev/null +++ b/reference_designs/IRON-examples/vector_mult/run_vck5000.lit @@ -0,0 +1,10 @@ +// (c) Copyright 2023 Advanced Micro Devices, Inc. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// REQUIRES: hsa +// +// RUN: %python %S/aie2.py xcvc1902 6 > ./aie.mlir +// RUN: %python aiecc.py %VitisSysrootFlag% --host-target=%aieHostTargetTriplet% %link_against_hsa% ./aie.mlir -I%host_runtime_lib%/test_lib/include %extraAieCcFlags% %S/test_vck5000.cpp -o test.elf -L%host_runtime_lib%/test_lib/lib -ltest_lib +// RUN: %run_on_board ./test.elf | FileCheck %s +// CHECK: PASS! + diff --git a/reference_designs/IRON-examples/vector_scalar/aie2.py b/reference_designs/IRON-examples/vector_scalar/aie2.py index 19fd4dffd5..540210596c 100755 --- a/reference_designs/IRON-examples/vector_scalar/aie2.py +++ b/reference_designs/IRON-examples/vector_scalar/aie2.py @@ -15,6 +15,7 @@ import sys + def my_vector_scalar(): N = 64 n = 16 @@ -24,15 +25,15 @@ def my_vector_scalar(): with mlir_mod_ctx() as ctx: - if(len(sys.argv) != 3): - raise ValueError("[ERROR] Need 2 command line arguments (Device name, Col)") - - if sys.argv[1] == 'ipu': - dev = AIEDevice.ipu - elif sys.argv[1] == 'xcvc1902': - dev = AIEDevice.xcvc1902 + if len(sys.argv) != 3: + raise ValueError("[ERROR] Need 2 command line arguments (Device name, Col)") + + if sys.argv[1] == "ipu": + dev = AIEDevice.ipu + elif sys.argv[1] == "xcvc1902": + dev = AIEDevice.xcvc1902 else: - raise ValueError("[ERROR] Device name {} is unknown".format(sys.argv[1])) + raise ValueError("[ERROR] Device name {} is unknown".format(sys.argv[1])) @device(dev) def device_body(): @@ -60,10 +61,10 @@ def core_body(): elem_in = of_in.acquire(ObjectFifoPort.Consume, 1) elem_out = of_out.acquire(ObjectFifoPort.Produce, 1) for i in for_(n): - v0 = memref.load(elem_in, [i]) - v1 = arith.muli(v0, arith.constant(3, T.i32())) - memref.store(v1, elem_out, [i]) - yield_([]) + v0 = memref.load(elem_in, [i]) + v1 = arith.muli(v0, arith.constant(3, T.i32())) + memref.store(v1, elem_out, [i]) + yield_([]) of_in.release(ObjectFifoPort.Consume, 1) of_out.release(ObjectFifoPort.Produce, 1) yield_([]) diff --git a/reference_designs/IRON-examples/vector_scalar/run.lit b/reference_designs/IRON-examples/vector_scalar/run.lit index 212f474fea..a429e99221 100644 --- a/reference_designs/IRON-examples/vector_scalar/run.lit +++ b/reference_designs/IRON-examples/vector_scalar/run.lit @@ -3,8 +3,7 @@ // // REQUIRES: ryzen_ai // -// RUN: xchesscc_wrapper aie2 -I %aietools/include -c %S/scale.cc -o ./scale.o -// RUN: %python %S/aie2.py > ./aie.mlir +// RUN: %python %S/aie2.py ipu 0 > ./aie.mlir // RUN: %python aiecc.py --aie-generate-cdo --aie-generate-ipu --no-compile-host --xclbin-name=aie.xclbin --ipu-insts-name=insts.txt ./aie.mlir // RUN: clang %S/test.cpp -o test.exe -std=c++11 -Wall %xrt_flags -lrt -lstdc++ -lboost_program_options -lboost_filesystem // RUN: %run_on_ipu ./test.exe -x aie.xclbin -k MLIR_AIE -i insts.txt | FileCheck %s diff --git a/reference_designs/IRON-examples/vector_scalar/run_vck5000.lit b/reference_designs/IRON-examples/vector_scalar/run_vck5000.lit new file mode 100644 index 0000000000..3583a5f393 --- /dev/null +++ b/reference_designs/IRON-examples/vector_scalar/run_vck5000.lit @@ -0,0 +1,10 @@ +// (c) Copyright 2023 Advanced Micro Devices, Inc. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// REQUIRES: hsa +// +// RUN: %python %S/aie2.py xcvc1902 6 > ./aie.mlir +// RUN: %python aiecc.py %VitisSysrootFlag% --host-target=%aieHostTargetTriplet% %link_against_hsa% ./aie.mlir -I%host_runtime_lib%/test_lib/include %extraAieCcFlags% %S/test_vck5000.cpp -o test.elf -L%host_runtime_lib%/test_lib/lib -ltest_lib +// RUN: %run_on_board ./test.elf | FileCheck %s +// CHECK: PASS! + diff --git a/reference_designs/IRON-examples/vector_scalar_kernel/run.lit b/reference_designs/IRON-examples/vector_scalar_kernel/run.lit deleted file mode 100644 index 212f474fea..0000000000 --- a/reference_designs/IRON-examples/vector_scalar_kernel/run.lit +++ /dev/null @@ -1,12 +0,0 @@ -// (c) Copyright 2023 Advanced Micro Devices, Inc. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -// REQUIRES: ryzen_ai -// -// RUN: xchesscc_wrapper aie2 -I %aietools/include -c %S/scale.cc -o ./scale.o -// RUN: %python %S/aie2.py > ./aie.mlir -// RUN: %python aiecc.py --aie-generate-cdo --aie-generate-ipu --no-compile-host --xclbin-name=aie.xclbin --ipu-insts-name=insts.txt ./aie.mlir -// RUN: clang %S/test.cpp -o test.exe -std=c++11 -Wall %xrt_flags -lrt -lstdc++ -lboost_program_options -lboost_filesystem -// RUN: %run_on_ipu ./test.exe -x aie.xclbin -k MLIR_AIE -i insts.txt | FileCheck %s -// CHECK: PASS! - diff --git a/reference_designs/IRON-examples/vector_scalar_kernel/run_vck5000.lit b/reference_designs/IRON-examples/vector_scalar_kernel/run_vck5000.lit new file mode 100644 index 0000000000..65dcc6a9a4 --- /dev/null +++ b/reference_designs/IRON-examples/vector_scalar_kernel/run_vck5000.lit @@ -0,0 +1,11 @@ +// (c) Copyright 2023 Advanced Micro Devices, Inc. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// REQUIRES: hsa +// +// RUN: xchesscc -p me -P %aietools/data/versal_prod/lib/ -L %aietools/data/versal_prod/lib/ -c %S/scale.cc +// RUN: %python %S/aie2.py xcvc1902 6 > ./aie.mlir +// RUN: %python aiecc.py %VitisSysrootFlag% --host-target=%aieHostTargetTriplet% %link_against_hsa% ./aie.mlir -I%host_runtime_lib%/test_lib/include %extraAieCcFlags% %S/test_vck5000.cpp -o test.elf -L%host_runtime_lib%/test_lib/lib -ltest_lib +// RUN: %run_on_board ./test.elf | FileCheck %s +// CHECK: PASS! + diff --git a/reference_designs/IRON-examples/vector_scalar_kernel/test.cpp b/reference_designs/IRON-examples/vector_scalar_kernel/test_vck5000.cpp similarity index 100% rename from reference_designs/IRON-examples/vector_scalar_kernel/test.cpp rename to reference_designs/IRON-examples/vector_scalar_kernel/test_vck5000.cpp diff --git a/reference_designs/IRON-examples/vector_sum/aie2.py b/reference_designs/IRON-examples/vector_sum/aie2.py index fee3e165d9..4e40b8009c 100755 --- a/reference_designs/IRON-examples/vector_sum/aie2.py +++ b/reference_designs/IRON-examples/vector_sum/aie2.py @@ -15,6 +15,7 @@ import sys + def my_vector_sum(): N = 64 @@ -22,15 +23,15 @@ def my_vector_sum(): with mlir_mod_ctx() as ctx: - if(len(sys.argv) != 3): - raise ValueError("[ERROR] Need 2 command line arguments (Device name, Col)") - - if sys.argv[1] == 'ipu': - dev = AIEDevice.ipu - elif sys.argv[1] == 'xcvc1902': - dev = AIEDevice.xcvc1902 + if len(sys.argv) != 3: + raise ValueError("[ERROR] Need 2 command line arguments (Device name, Col)") + + if sys.argv[1] == "ipu": + dev = AIEDevice.ipu + elif sys.argv[1] == "xcvc1902": + dev = AIEDevice.xcvc1902 else: - raise ValueError("[ERROR] Device name {} is unknown".format(sys.argv[1])) + raise ValueError("[ERROR] Device name {} is unknown".format(sys.argv[1])) @device(dev) def device_body(): @@ -59,12 +60,12 @@ def core_body(): elem_in = of_in.acquire(ObjectFifoPort.Consume, 1) elem_out = of_out.acquire(ObjectFifoPort.Produce, 1) for i in for_(N): - v0 = memref.load(elem_in, [i]) - v1 = memref.load(sum_val, [0]) - v2 = arith.addi(v1, v0) - memref.store(v2, sum_val, [0]) - yield_([]) - + v0 = memref.load(elem_in, [i]) + v1 = memref.load(sum_val, [0]) + v2 = arith.addi(v1, v0) + memref.store(v2, sum_val, [0]) + yield_([]) + v3 = memref.load(sum_val, [0]) memref.store(v3, elem_out, [0]) of_in.release(ObjectFifoPort.Consume, 1) diff --git a/reference_designs/IRON-examples/vector_sum/run.lit b/reference_designs/IRON-examples/vector_sum/run.lit index 212f474fea..a429e99221 100644 --- a/reference_designs/IRON-examples/vector_sum/run.lit +++ b/reference_designs/IRON-examples/vector_sum/run.lit @@ -3,8 +3,7 @@ // // REQUIRES: ryzen_ai // -// RUN: xchesscc_wrapper aie2 -I %aietools/include -c %S/scale.cc -o ./scale.o -// RUN: %python %S/aie2.py > ./aie.mlir +// RUN: %python %S/aie2.py ipu 0 > ./aie.mlir // RUN: %python aiecc.py --aie-generate-cdo --aie-generate-ipu --no-compile-host --xclbin-name=aie.xclbin --ipu-insts-name=insts.txt ./aie.mlir // RUN: clang %S/test.cpp -o test.exe -std=c++11 -Wall %xrt_flags -lrt -lstdc++ -lboost_program_options -lboost_filesystem // RUN: %run_on_ipu ./test.exe -x aie.xclbin -k MLIR_AIE -i insts.txt | FileCheck %s diff --git a/reference_designs/IRON-examples/vector_sum/run_vck5000.lit b/reference_designs/IRON-examples/vector_sum/run_vck5000.lit new file mode 100644 index 0000000000..3583a5f393 --- /dev/null +++ b/reference_designs/IRON-examples/vector_sum/run_vck5000.lit @@ -0,0 +1,10 @@ +// (c) Copyright 2023 Advanced Micro Devices, Inc. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// REQUIRES: hsa +// +// RUN: %python %S/aie2.py xcvc1902 6 > ./aie.mlir +// RUN: %python aiecc.py %VitisSysrootFlag% --host-target=%aieHostTargetTriplet% %link_against_hsa% ./aie.mlir -I%host_runtime_lib%/test_lib/include %extraAieCcFlags% %S/test_vck5000.cpp -o test.elf -L%host_runtime_lib%/test_lib/lib -ltest_lib +// RUN: %run_on_board ./test.elf | FileCheck %s +// CHECK: PASS! + diff --git a/reference_designs/dynamic_dma_config_add_one/test.cpp b/reference_designs/dynamic_dma_config_add_one/test.cpp index c26cb76355..8f101d4f1d 100644 --- a/reference_designs/dynamic_dma_config_add_one/test.cpp +++ b/reference_designs/dynamic_dma_config_add_one/test.cpp @@ -113,11 +113,11 @@ int main(int argc, char *argv[]) { uint64_t wr_idx = hsa_queue_add_write_index_relaxed(queues[0], 1); uint64_t packet_id = wr_idx % queues[0]->size; hsa_agent_dispatch_packet_t write_pkt; - air_packet_nd_memcpy(&write_pkt, 0, col, 1, 0, 4, 2, - reinterpret_cast(src), - DMA_COUNT * sizeof(float), 1, 0, 1, 0, 1, 0); - air_queue_dispatch_and_wait(&agents[0], queues[0], packet_id, wr_idx, - &write_pkt); + mlir_aie_packet_nd_memcpy(&write_pkt, 0, col, 1, 0, 4, 2, + reinterpret_cast(src), + DMA_COUNT * sizeof(float), 1, 0, 1, 0, 1, 0); + mlir_aie_queue_dispatch_and_wait(&agents[0], queues[0], packet_id, wr_idx, + &write_pkt); // // read the data @@ -126,11 +126,11 @@ int main(int argc, char *argv[]) { wr_idx = hsa_queue_add_write_index_relaxed(queues[0], 1); packet_id = wr_idx % queues[0]->size; hsa_agent_dispatch_packet_t read_pkt; - air_packet_nd_memcpy(&read_pkt, 0, col, 0, 0, 4, 2, - reinterpret_cast(dst), - DMA_COUNT * sizeof(float), 1, 0, 1, 0, 1, 0); - air_queue_dispatch_and_wait(&agents[0], queues[0], packet_id, wr_idx, - &read_pkt); + mlir_aie_packet_nd_memcpy(&read_pkt, 0, col, 0, 0, 4, 2, + reinterpret_cast(dst), + DMA_COUNT * sizeof(float), 1, 0, 1, 0, 1, 0); + mlir_aie_queue_dispatch_and_wait(&agents[0], queues[0], packet_id, wr_idx, + &read_pkt); int errors = 0; diff --git a/reference_designs/dynamic_dma_config_add_one_kernel/aie.mlir.no_func b/reference_designs/dynamic_dma_config_add_one_kernel/aie.mlir.no_func deleted file mode 100644 index bfef549e04..0000000000 --- a/reference_designs/dynamic_dma_config_add_one_kernel/aie.mlir.no_func +++ /dev/null @@ -1,62 +0,0 @@ -//===- aie.mlir ------------------------------------------------*- MLIR -*-===// -// -// Copyright (C) 2020-2022, Xilinx Inc. -// Copyright (C) 2022, Advanced Micro Devices, Inc. -// SPDX-License-Identifier: MIT -// -//===----------------------------------------------------------------------===// - -module { - %t70 = aie.tile(6, 0) - %t71 = aie.tile(6, 1) - %t72 = aie.tile(6, 2) - - aie.flow(%t70, "DMA" : 0, %t72, "DMA" : 0) - aie.flow(%t70, "DMA" : 1, %t72, "DMA" : 1) - aie.flow(%t72, "DMA" : 0, %t70, "DMA" : 0) - aie.flow(%t72, "DMA" : 1, %t70, "DMA" : 1) - - %buf72_0 = aie.buffer(%t72) { sym_name = "in" } : memref<16xi32> - %buf72_1 = aie.buffer(%t72) { sym_name = "out" } : memref<16xi32> - - %l72_0 = aie.lock(%t72, 0) - %l72_1 = aie.lock(%t72, 1) - - %m72 = aie.mem(%t72) { - %srcDma = aie.dma_start(S2MM, 0, ^bd0, ^dma0) - ^dma0: - %dstDma = aie.dma_start(MM2S, 0, ^bd1, ^end) - ^bd0: - aie.use_lock(%l72_0, "Acquire", 0) - aie.dma_bd(%buf72_0 : memref<16xi32>, 0, 16) - aie.use_lock(%l72_0, "Release", 1) - aie.next_bd ^bd1 - ^bd1: - aie.use_lock(%l72_1, "Acquire", 1) - aie.dma_bd(%buf72_1 : memref<16xi32>, 0, 16) - aie.use_lock(%l72_1, "Release", 0) - aie.next_bd ^bd0 - ^end: - aie.end - } - - aie.core(%t72) { - %c16 = arith.constant 16 : index - %c0 = arith.constant 0 : index - %c1 = arith.constant 1 : index - %c1_32 = arith.constant 1 : i32 - - aie.use_lock(%l72_0, "Acquire", 1) - aie.use_lock(%l72_1, "Acquire", 0) - scf.for %arg3 = %c0 to %c16 step %c1 { - %0 = memref.load %buf72_0[%arg3] : memref<16xi32> - %1 = arith.addi %0, %c1_32 : i32 - memref.store %1, %buf72_1[%arg3] : memref<16xi32> - } - aie.use_lock(%l72_0, "Release", 0) - aie.use_lock(%l72_1, "Release", 1) - - aie.end - } - -} diff --git a/reference_designs/dynamic_dma_config_add_one_kernel/test.cpp b/reference_designs/dynamic_dma_config_add_one_kernel/test.cpp index c26cb76355..8f101d4f1d 100644 --- a/reference_designs/dynamic_dma_config_add_one_kernel/test.cpp +++ b/reference_designs/dynamic_dma_config_add_one_kernel/test.cpp @@ -113,11 +113,11 @@ int main(int argc, char *argv[]) { uint64_t wr_idx = hsa_queue_add_write_index_relaxed(queues[0], 1); uint64_t packet_id = wr_idx % queues[0]->size; hsa_agent_dispatch_packet_t write_pkt; - air_packet_nd_memcpy(&write_pkt, 0, col, 1, 0, 4, 2, - reinterpret_cast(src), - DMA_COUNT * sizeof(float), 1, 0, 1, 0, 1, 0); - air_queue_dispatch_and_wait(&agents[0], queues[0], packet_id, wr_idx, - &write_pkt); + mlir_aie_packet_nd_memcpy(&write_pkt, 0, col, 1, 0, 4, 2, + reinterpret_cast(src), + DMA_COUNT * sizeof(float), 1, 0, 1, 0, 1, 0); + mlir_aie_queue_dispatch_and_wait(&agents[0], queues[0], packet_id, wr_idx, + &write_pkt); // // read the data @@ -126,11 +126,11 @@ int main(int argc, char *argv[]) { wr_idx = hsa_queue_add_write_index_relaxed(queues[0], 1); packet_id = wr_idx % queues[0]->size; hsa_agent_dispatch_packet_t read_pkt; - air_packet_nd_memcpy(&read_pkt, 0, col, 0, 0, 4, 2, - reinterpret_cast(dst), - DMA_COUNT * sizeof(float), 1, 0, 1, 0, 1, 0); - air_queue_dispatch_and_wait(&agents[0], queues[0], packet_id, wr_idx, - &read_pkt); + mlir_aie_packet_nd_memcpy(&read_pkt, 0, col, 0, 0, 4, 2, + reinterpret_cast(dst), + DMA_COUNT * sizeof(float), 1, 0, 1, 0, 1, 0); + mlir_aie_queue_dispatch_and_wait(&agents[0], queues[0], packet_id, wr_idx, + &read_pkt); int errors = 0; diff --git a/reference_designs/dynamic_dma_config_add_one_kernel_obj_fifo/test.cpp b/reference_designs/dynamic_dma_config_add_one_kernel_obj_fifo/test.cpp index 68b523f873..1fe7607220 100644 --- a/reference_designs/dynamic_dma_config_add_one_kernel_obj_fifo/test.cpp +++ b/reference_designs/dynamic_dma_config_add_one_kernel_obj_fifo/test.cpp @@ -100,7 +100,7 @@ int main(int argc, char *argv[]) { dst[i] = 0xdeface; } - invoke_data_movement(queues[0], &agents[0], src, dst); + invoke_data_movement(queues[0], &agents[0], dst, src); int errors = 0; diff --git a/reference_designs/ipu-xrt/matrix_add_one/aie2.py b/reference_designs/ipu-xrt/matrix_add_one/aie2.py index d5a528c43a..6f62deaa1c 100644 --- a/reference_designs/ipu-xrt/matrix_add_one/aie2.py +++ b/reference_designs/ipu-xrt/matrix_add_one/aie2.py @@ -26,6 +26,7 @@ objfifo_capacity = 4 + def my_matrix_add_one(): with mlir_mod_ctx() as ctx: @@ -39,10 +40,14 @@ def device_body(): # AIE-array data movement with object fifos # Input - of_in1 = object_fifo("in0", ShimTile, ComputeTile2, objfifo_capacity, memRef_ty) + of_in1 = object_fifo( + "in0", ShimTile, ComputeTile2, objfifo_capacity, memRef_ty + ) # Output - of_out1 = object_fifo("out0", ComputeTile2, ShimTile, objfifo_capacity, memRef_ty) + of_out1 = object_fifo( + "out0", ComputeTile2, ShimTile, objfifo_capacity, memRef_ty + ) # Set up compute tiles @@ -54,10 +59,10 @@ def core_body(): elem_in = of_in1.acquire(ObjectFifoPort.Consume, 1) elem_out = of_out1.acquire(ObjectFifoPort.Produce, 1) for i in for_(TILE_SIZE): - v0 = memref.load(elem_in, [i]) - v1 = arith.addi(v0, arith.constant(1, T.i32())) - memref.store(v1, elem_out, [i]) - yield_([]) + v0 = memref.load(elem_in, [i]) + v1 = arith.addi(v0, arith.constant(1, T.i32())) + memref.store(v1, elem_out, [i]) + yield_([]) of_in1.release(ObjectFifoPort.Consume, 1) of_out1.release(ObjectFifoPort.Produce, 1) yield_([]) @@ -69,10 +74,18 @@ def core_body(): @FuncOp.from_py_func(tensor_ty, tensor_ty, tensor_ty) def sequence(inTensor, notUsed, outTensor): ipu_dma_memcpy_nd( - metadata="out0", bd_id=0, mem=outTensor, sizes=[1, 1, TILE_HEIGHT, TILE_WIDTH], strides=[1, 1, IMAGE_WIDTH] + metadata="out0", + bd_id=0, + mem=outTensor, + sizes=[1, 1, TILE_HEIGHT, TILE_WIDTH], + strides=[1, 1, IMAGE_WIDTH], ) ipu_dma_memcpy_nd( - metadata="in0", bd_id=1, mem=inTensor, sizes=[1, 1, TILE_HEIGHT, TILE_WIDTH], strides=[1, 1, IMAGE_WIDTH] + metadata="in0", + bd_id=1, + mem=inTensor, + sizes=[1, 1, TILE_HEIGHT, TILE_WIDTH], + strides=[1, 1, IMAGE_WIDTH], ) ipu_sync(column=0, row=0, direction=0, channel=0) diff --git a/reference_designs/ipu-xrt/vector_max/aie2.py b/reference_designs/ipu-xrt/vector_max/aie2.py index 89f9948296..b1b7c8c389 100755 --- a/reference_designs/ipu-xrt/vector_max/aie2.py +++ b/reference_designs/ipu-xrt/vector_max/aie2.py @@ -48,12 +48,12 @@ def core_body(): elem_in = of_in.acquire(ObjectFifoPort.Consume, 1) elem_out = of_out.acquire(ObjectFifoPort.Produce, 1) for i in for_(N): - v0 = memref.load(elem_in, [i]) - v1 = memref.load(max_val, [0]) - v2 = arith.maxui(v1, v0) - memref.store(v2, max_val, [0]) - yield_([]) - + v0 = memref.load(elem_in, [i]) + v1 = memref.load(max_val, [0]) + v2 = arith.maxui(v1, v0) + memref.store(v2, max_val, [0]) + yield_([]) + v3 = memref.load(max_val, [0]) memref.store(v3, elem_out, [0]) of_in.release(ObjectFifoPort.Consume, 1) diff --git a/reference_designs/ipu-xrt/vector_max/run.lit b/reference_designs/ipu-xrt/vector_max/run.lit index 212f474fea..49cd75e360 100644 --- a/reference_designs/ipu-xrt/vector_max/run.lit +++ b/reference_designs/ipu-xrt/vector_max/run.lit @@ -3,7 +3,6 @@ // // REQUIRES: ryzen_ai // -// RUN: xchesscc_wrapper aie2 -I %aietools/include -c %S/scale.cc -o ./scale.o // RUN: %python %S/aie2.py > ./aie.mlir // RUN: %python aiecc.py --aie-generate-cdo --aie-generate-ipu --no-compile-host --xclbin-name=aie.xclbin --ipu-insts-name=insts.txt ./aie.mlir // RUN: clang %S/test.cpp -o test.exe -std=c++11 -Wall %xrt_flags -lrt -lstdc++ -lboost_program_options -lboost_filesystem diff --git a/reference_designs/ipu-xrt/vector_min/aie2.py b/reference_designs/ipu-xrt/vector_min/aie2.py index 0e6e05896d..f59ab8ab6e 100755 --- a/reference_designs/ipu-xrt/vector_min/aie2.py +++ b/reference_designs/ipu-xrt/vector_min/aie2.py @@ -48,12 +48,12 @@ def core_body(): elem_in = of_in.acquire(ObjectFifoPort.Consume, 1) elem_out = of_out.acquire(ObjectFifoPort.Produce, 1) for i in for_(N): - v0 = memref.load(elem_in, [i]) - v1 = memref.load(min_val, [0]) - v2 = arith.minui(v1, v0) - memref.store(v2, min_val, [0]) - yield_([]) - + v0 = memref.load(elem_in, [i]) + v1 = memref.load(min_val, [0]) + v2 = arith.minui(v1, v0) + memref.store(v2, min_val, [0]) + yield_([]) + v3 = memref.load(min_val, [0]) memref.store(v3, elem_out, [0]) of_in.release(ObjectFifoPort.Consume, 1) diff --git a/reference_designs/ipu-xrt/vector_min/run.lit b/reference_designs/ipu-xrt/vector_min/run.lit index 212f474fea..49cd75e360 100644 --- a/reference_designs/ipu-xrt/vector_min/run.lit +++ b/reference_designs/ipu-xrt/vector_min/run.lit @@ -3,7 +3,6 @@ // // REQUIRES: ryzen_ai // -// RUN: xchesscc_wrapper aie2 -I %aietools/include -c %S/scale.cc -o ./scale.o // RUN: %python %S/aie2.py > ./aie.mlir // RUN: %python aiecc.py --aie-generate-cdo --aie-generate-ipu --no-compile-host --xclbin-name=aie.xclbin --ipu-insts-name=insts.txt ./aie.mlir // RUN: clang %S/test.cpp -o test.exe -std=c++11 -Wall %xrt_flags -lrt -lstdc++ -lboost_program_options -lboost_filesystem diff --git a/reference_designs/ipu-xrt/vector_sum/aie2.py b/reference_designs/ipu-xrt/vector_sum/aie2.py index 1c44c24125..66dd3eb9a8 100755 --- a/reference_designs/ipu-xrt/vector_sum/aie2.py +++ b/reference_designs/ipu-xrt/vector_sum/aie2.py @@ -48,12 +48,12 @@ def core_body(): elem_in = of_in.acquire(ObjectFifoPort.Consume, 1) elem_out = of_out.acquire(ObjectFifoPort.Produce, 1) for i in for_(N): - v0 = memref.load(elem_in, [i]) - v1 = memref.load(sum_val, [0]) - v2 = arith.addi(v1, v0) - memref.store(v2, sum_val, [0]) - yield_([]) - + v0 = memref.load(elem_in, [i]) + v1 = memref.load(sum_val, [0]) + v2 = arith.addi(v1, v0) + memref.store(v2, sum_val, [0]) + yield_([]) + v3 = memref.load(sum_val, [0]) memref.store(v3, elem_out, [0]) of_in.release(ObjectFifoPort.Consume, 1) diff --git a/reference_designs/ipu-xrt/vector_sum/run.lit b/reference_designs/ipu-xrt/vector_sum/run.lit index 212f474fea..49cd75e360 100644 --- a/reference_designs/ipu-xrt/vector_sum/run.lit +++ b/reference_designs/ipu-xrt/vector_sum/run.lit @@ -3,7 +3,6 @@ // // REQUIRES: ryzen_ai // -// RUN: xchesscc_wrapper aie2 -I %aietools/include -c %S/scale.cc -o ./scale.o // RUN: %python %S/aie2.py > ./aie.mlir // RUN: %python aiecc.py --aie-generate-cdo --aie-generate-ipu --no-compile-host --xclbin-name=aie.xclbin --ipu-insts-name=insts.txt ./aie.mlir // RUN: clang %S/test.cpp -o test.exe -std=c++11 -Wall %xrt_flags -lrt -lstdc++ -lboost_program_options -lboost_filesystem diff --git a/reference_designs/lit.cfg.py b/reference_designs/lit.cfg.py index 340e53f343..066f4fb0b9 100755 --- a/reference_designs/lit.cfg.py +++ b/reference_designs/lit.cfg.py @@ -45,27 +45,40 @@ # for python llvm_config.with_environment("PYTHONPATH", os.path.join(config.aie_obj_root, "python")) -if config.hsa_found: +if config.enable_board_tests: + config.substitutions.append(("%run_on_board", "sudo flock /tmp/board.lock")) +else: + config.substitutions.append(("%run_on_board", "echo")) + +run_on_ipu = "echo" +xrt_flags = "" + +if config.hsa_dir and (not ("NOTFOUND" in config.hsa_dir)): if not "hsa" in config.aieHostTarget: - print("ROCm found, but disabled because host target '{config.aieHostTarget}'") + print( + "ROCm found, but disabled because host target {}".format( + config.aieHostTarget + ) + ) # Getting the path to the ROCm directory. hsa-runtime64 points to the cmake # directory so need to go up three directories rocm_root = os.path.join(config.hsa_dir, "..", "..", "..") print("Found ROCm:", rocm_root) config.available_features.add("hsa") - config.substitutions.append(('%HSA_DIR%', "{}".format(rocm_root))) -else: - print("ROCm not found") + config.substitutions.append(("%HSA_DIR%", "{}".format(rocm_root))) + config.substitutions.append(("%link_against_hsa%", "--link_against_hsa")) -if config.enable_board_tests: - config.substitutions.append( - ("%run_on_board", "sudo flock /tmp/board.lock") - ) + if config.enable_board_tests: + config.substitutions.append(("%run_on_board", "flock /tmp/vck5000.lock sudo")) + else: + print("Skipping execution of unit tests (ENABLE_BOARD_TESTS=OFF)") + config.substitutions.append(("%run_on_board", "echo")) else: + print("ROCm not found") config.substitutions.append(("%run_on_board", "echo")) + config.substitutions.append(("%link_against_hsa%", "")) + config.substitutions.append(("%HSA_DIR%", "")) -run_on_ipu = "echo" -xrt_flags = "" if config.xrt_lib_dir: print("xrt found at", os.path.dirname(config.xrt_lib_dir)) xrt_flags = "-I{} -L{} -luuid -lxrt_coreutil".format( diff --git a/test/Targets/AIETargetHSA/input_with_addresses.mlir b/test/Targets/AIETargetHSA/input_with_addresses.mlir new file mode 100644 index 0000000000..1cf762054b --- /dev/null +++ b/test/Targets/AIETargetHSA/input_with_addresses.mlir @@ -0,0 +1,55 @@ + +// RUN: aie-translate --aie-generate-hsa %s | FileCheck %s + +//CHECK: void invoke_data_movement(hsa_queue_t *q, hsa_agent_t *a, void *buf2, void *buf0) { +//CHECK: uint64_t wr_idx = 0; +//CHECK: uint64_t packet_id = 0; +//CHECK: hsa_agent_dispatch_packet_t pkt0 ; +//CHECK: wr_idx = hsa_queue_add_write_index_relaxed(q, 1); +//CHECK: packet_id = wr_idx % q->size; +//CHECK: mlir_aie_packet_nd_memcpy(&pkt0, 0 /* herd_id */, 6 /* col */, 0 /* dir */, 0/* channel */, 4 /* Burst length */, 2 /* Memory space */, (uint64_t)buf2 + 0 /* Address */, 256 /* 1d_length */, 1 /* 2d_length */, 0 /* 2d_stride */, 1 /* 3d_length */, 0 /* 3d_stride */ , 1 /* 4d_length */, 0 /* 4d_stride */); +//CHECK: hsa_amd_signal_create_on_agent(1, 0, nullptr, a, 0, &pkt0.completion_signal); +//CHECK: mlir_aie_write_pkt(q, packet_id, &pkt0); +//CHECK: hsa_agent_dispatch_packet_t pkt1 ; +//CHECK: wr_idx = hsa_queue_add_write_index_relaxed(q, 1); +//CHECK: packet_id = wr_idx % q->size; +//CHECK: mlir_aie_packet_nd_memcpy(&pkt1, 0 /* herd_id */, 6 /* col */, 1 /* dir */, 0/* channel */, 4 /* Burst length */, 2 /* Memory space */, (uint64_t)buf0 + 0 /* Address */, 256 /* 1d_length */, 1 /* 2d_length */, 0 /* 2d_stride */, 1 /* 3d_length */, 0 /* 3d_stride */ , 1 /* 4d_length */, 0 /* 4d_stride */); +//CHECK: mlir_aie_queue_dispatch_and_wait(a, q, packet_id, wr_idx, &pkt1, false); +//CHECK: while (hsa_signal_wait_scacquire(pkt0.completion_signal, +//CHECK: HSA_SIGNAL_CONDITION_EQ, 0, 0x80000, +//CHECK: HSA_WAIT_STATE_ACTIVE) != 0); +//CHECK: while (hsa_signal_wait_scacquire(pkt1.completion_signal, +//CHECK: HSA_SIGNAL_CONDITION_EQ, 0, 0x80000, +//CHECK: HSA_WAIT_STATE_ACTIVE) != 0); +//CHECK: hsa_signal_destroy(pkt0.completion_signal); +//CHECK: hsa_signal_destroy(pkt1.completion_signal); +//CHECK: } + +module { + aie.device(xcvc1902) { + memref.global "public" @out0 : memref<16xi32> + memref.global "public" @in0 : memref<16xi32> + %tile_6_0 = aie.tile(6, 0) + %switchbox_6_0 = aie.switchbox(%tile_6_0) { + } + %tile_6_2 = aie.tile(6, 2) + %switchbox_6_2 = aie.switchbox(%tile_6_2) { + } + + aie.flow(%tile_6_0, DMA : 0, %tile_6_2, DMA : 0) + aie.flow(%tile_6_2, DMA : 0, %tile_6_0, DMA : 0) + %core_6_2 = aie.core(%tile_6_2) { + aie.end + } + + aie.shim_dma_allocation @in0(MM2S, 0, 6) + aie.shim_dma_allocation @out0(S2MM, 0, 6) + + func.func @sequence(%arg0: memref<64xi32>, %arg1: memref<32xi32>, %arg2: memref<64xi32>) { + aiex.ipu.dma_memcpy_nd(0, 0, %arg2[0, 0, 0, 0][1, 1, 1, 64][0, 0, 0]) {id = 0 : i64, metadata = @out0} : memref<64xi32> + aiex.ipu.dma_memcpy_nd(0, 0, %arg0[0, 0, 0, 0][1, 1, 1, 64][0, 0, 0]) {id = 1 : i64, metadata = @in0} : memref<64xi32> + aiex.ipu.sync {channel = 0 : i32, column = 0 : i32, column_num = 1 : i32, direction = 0 : i32, row = 0 : i32, row_num = 1 : i32} + return + } + } +} diff --git a/test/Targets/AIETargetHSA/lit.local.cfg b/test/Targets/AIETargetHSA/lit.local.cfg new file mode 100644 index 0000000000..f87326bcf2 --- /dev/null +++ b/test/Targets/AIETargetHSA/lit.local.cfg @@ -0,0 +1,8 @@ +# This file is licensed under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# (c) Copyright 2023 Advanced Micro Devices Inc. + +if not config.hsa_found: + config.unsupported = True diff --git a/test/lit.cfg.py b/test/lit.cfg.py index d0523a40a8..26eaf3951a 100644 --- a/test/lit.cfg.py +++ b/test/lit.cfg.py @@ -57,21 +57,33 @@ run_on_ipu = "echo" xrt_flags = "" -if config.hsa_found: +if config.hsa_dir and (not ("NOTFOUND" in config.hsa_dir)): + if not "hsa" in config.aieHostTarget: + print( + "ROCm found, but disabled because host target {}".format( + config.aieHostTarget + ) + ) + # Getting the path to the ROCm directory. hsa-runtime64 points to the cmake # directory so need to go up three directories rocm_root = os.path.join(config.hsa_dir, "..", "..", "..") print("Found ROCm:", rocm_root) - config.substitutions.append(('%link_against_hsa%', "--link_against_hsa")) + print("Found hsa_dir:", config.hsa_dir) + config.available_features.add("hsa") + config.substitutions.append(("%HSA_DIR%", "{}".format(rocm_root))) + config.substitutions.append(("%link_against_hsa%", "--link_against_hsa")) if config.enable_board_tests: - config.substitutions.append(('%run_on_board', "flock /tmp/vck5000.lock sudo")) + config.substitutions.append(("%run_on_board", "flock /tmp/vck5000.lock sudo")) else: print("Skipping execution of unit tests (ENABLE_BOARD_TESTS=OFF)") - config.substitutions.append(('%run_on_board', "echo")) + config.substitutions.append(("%run_on_board", "echo")) else: print("ROCm not found") - config.substitutions.append(('%link_against_hsa%', "")) + config.substitutions.append(("%run_on_board", "echo")) + config.substitutions.append(("%link_against_hsa%", "")) + config.substitutions.append(("%HSA_DIR%", "")) if config.xrt_lib_dir: print("xrt found at", os.path.dirname(config.xrt_lib_dir)) diff --git a/test/unit_tests/aie/27_single_L1_single_lock/aie2.mlir b/test/unit_tests/aie/27_single_L1_single_lock/aie2.mlir index 5ffae91ba7..f583981529 100644 --- a/test/unit_tests/aie/27_single_L1_single_lock/aie2.mlir +++ b/test/unit_tests/aie/27_single_L1_single_lock/aie2.mlir @@ -8,9 +8,12 @@ // //===----------------------------------------------------------------------===// +// REQUIRES: hsa // RUN: %PYTHON aiecc.py %VitisSysrootFlag% --host-target=%aieHostTargetTriplet% %s -I%host_runtime_lib%/test_lib/include %extraAieCcFlags% %S/test.cpp -o test.elf -L%host_runtime_lib%/test_lib/lib -ltest_lib // RUN: %run_on_board ./test.elf +// XFAIL: * + module @test27_simple_shim_dma_single_lock { aie.device(xcve2802) { %tile72 = aie.tile(7, 3) diff --git a/tools/aie-routing-command-line/visualize.py b/tools/aie-routing-command-line/visualize.py index 976ce4d17e..b772575894 100755 --- a/tools/aie-routing-command-line/visualize.py +++ b/tools/aie-routing-command-line/visualize.py @@ -8,9 +8,10 @@ from enum import Enum + class Direction(Enum): - Horz = 0; - Vert = 1; + Horz = 0 + Vert = 1 class canvas: @@ -20,345 +21,403 @@ def __init__(self, width, height): self.vert_line_list = [] self.horz_line_list = [] self.characters = [] - - def direction(self, line) : - Horz_Stationary = False; - Vert_Stationary = False; - - if (line[0][0] == line[1][0]): + + def direction(self, line): + Horz_Stationary = False + Vert_Stationary = False + + if line[0][0] == line[1][0]: # Line Stationary in Horz Axis Horz_Stationary = True - if (line[0][1] == line[1][1]): + if line[0][1] == line[1][1]: # Line Stationary in Vert Axis Vert_Stationary = True - if (Horz_Stationary and Vert_Stationary): + if Horz_Stationary and Vert_Stationary: # Crash - raise Exception("Line is Diagonal"); - if (not Horz_Stationary and not Vert_Stationary): + raise Exception("Line is Diagonal") + if not Horz_Stationary and not Vert_Stationary: # Crash - raise Exception("Line is a dot"); - if ((not Horz_Stationary) and Vert_Stationary): - return Direction.Horz; - - if (Horz_Stationary and (not Vert_Stationary)): - return Direction.Vert; - + raise Exception("Line is a dot") + if (not Horz_Stationary) and Vert_Stationary: + return Direction.Horz + + if Horz_Stationary and (not Vert_Stationary): + return Direction.Vert + def draw_character(self, point, character): - self.characters.append([point, character]); + self.characters.append([point, character]) def replace_character(self, point, character, replacement): - if (self.characters.count([point, character])): - self.characters.remove([point, character]); - self.characters.append([point, replacement]); - + if self.characters.count([point, character]): + self.characters.remove([point, character]) + self.characters.append([point, replacement]) + def draw_line(self, start, finish): - if (self.direction([start,finish]) == Direction.Vert): + if self.direction([start, finish]) == Direction.Vert: self.vert_line_list.append([start, finish]) else: self.horz_line_list.append([start, finish]) - + def draw_square(self, center, size): - horz_origin = math.floor((center[0] + 0.5) - (size/2)); - horz_extent = math.ceil((center[0] + 0.5) + (size/2) + 3); - - vert_origin = math.floor( (center[1] + 0.5)- (size/2)); - vert_extent = math.ceil( (center[1] + 0.5) + (size/2)); - - top_left = (horz_origin, vert_origin ); - top_right = (horz_extent, vert_origin); - bottom_left = (horz_origin, vert_extent); - bottom_right = (horz_extent, vert_extent); - + horz_origin = math.floor((center[0] + 0.5) - (size / 2)) + horz_extent = math.ceil((center[0] + 0.5) + (size / 2) + 3) + + vert_origin = math.floor((center[1] + 0.5) - (size / 2)) + vert_extent = math.ceil((center[1] + 0.5) + (size / 2)) + + top_left = (horz_origin, vert_origin) + top_right = (horz_extent, vert_origin) + bottom_left = (horz_origin, vert_extent) + bottom_right = (horz_extent, vert_extent) + self.draw_line(top_left, top_right) self.draw_line(top_right, bottom_right) self.draw_line(bottom_left, bottom_right) self.draw_line(top_left, bottom_left) - def vertical_index(self,point): - return point[1]; - def horizontal_index(self,point): - return point[0]; - + def vertical_index(self, point): + return point[1] + + def horizontal_index(self, point): + return point[0] + def within_line(self, point, line): - Horz_Stationary = False; - Vert_Stationary = False; - - if (line[0][0] == line[1][0]): + Horz_Stationary = False + Vert_Stationary = False + + if line[0][0] == line[1][0]: # Line Stationary in Horz Axis Horz_Stationary = True - if (line[0][1] == line[1][1]): + if line[0][1] == line[1][1]: # Line Stationary in Vert Axis Vert_Stationary = True - # print("HS: {}, VS: {}, {}".format(Horz_Stationary, Vert_Stationary, line)); - - if (Horz_Stationary and Vert_Stationary): + # print("HS: {}, VS: {}, {}".format(Horz_Stationary, Vert_Stationary, line)); + + if Horz_Stationary and Vert_Stationary: # Crash - raise Exception("Line is Diagonal"); - if (not Horz_Stationary and not Vert_Stationary): + raise Exception("Line is Diagonal") + if not Horz_Stationary and not Vert_Stationary: # Crash - raise Exception("Line is a dot"); - - if (Horz_Stationary and (not Vert_Stationary)): + raise Exception("Line is a dot") + + if Horz_Stationary and (not Vert_Stationary): # it's a vertical line # Sort the tuples by Horz - line.sort(key=self.vertical_index); - start_line = ( (point[1] == line[0][1]) and (point[1] <= line[1][1]) and (point[0] == line[0][0]) ) - in_line = ( (point[1] > line[0][1]) and (point[1] < line[1][1]) and (point[0] == line[0][0]) ) - end_line = ( (point[1] > line[0][1]) and (point[1] == line[1][1]) and (point[0] == line[0][0]) ) - - #print("Vert {}, point {}, {}".format(line, point, in_line)); - - if ((not Horz_Stationary) and Vert_Stationary): + line.sort(key=self.vertical_index) + start_line = ( + (point[1] == line[0][1]) + and (point[1] <= line[1][1]) + and (point[0] == line[0][0]) + ) + in_line = ( + (point[1] > line[0][1]) + and (point[1] < line[1][1]) + and (point[0] == line[0][0]) + ) + end_line = ( + (point[1] > line[0][1]) + and (point[1] == line[1][1]) + and (point[0] == line[0][0]) + ) + + # print("Vert {}, point {}, {}".format(line, point, in_line)); + + if (not Horz_Stationary) and Vert_Stationary: # it's a horizonal line line.sort(key=self.horizontal_index) - start_line = ( (point[0] == line[0][0]) and (point[0] <= line[1][0]) and ( point[1] == line[0][1])) - in_line = ( (point[0] > line[0][0]) and (point[0] < line[1][0]) and ( point[1] == line[0][1])) - end_line = ( (point[0] > line[0][0]) and (point[0] == line[1][0]) and ( point[1] == line[0][1])) - #print("Horz {}, point {}, {}".format(line, point, in_line)); + start_line = ( + (point[0] == line[0][0]) + and (point[0] <= line[1][0]) + and (point[1] == line[0][1]) + ) + in_line = ( + (point[0] > line[0][0]) + and (point[0] < line[1][0]) + and (point[1] == line[0][1]) + ) + end_line = ( + (point[0] > line[0][0]) + and (point[0] == line[1][0]) + and (point[1] == line[0][1]) + ) + # print("Horz {}, point {}, {}".format(line, point, in_line)); return (start_line, in_line, end_line) - - def find_horz_index(self,line_points): - index =0; - if (line_points[1]): - index+=2 + def find_horz_index(self, line_points): + index = 0 + + if line_points[1]: + index += 2 else: - if (line_points[0]): - index += 1; - if (line_points[2]): - index +=4 - return index; + if line_points[0]: + index += 1 + if line_points[2]: + index += 4 + return index def transform(self, index): # if horz through, clear bits for horz end # if vert through, clear bits for vert end - - + chars = { - 0 : ' ', - 2 : u'\u2500', # horz line - 16 : u'\u2502',# vert line - 9 : u'\u250c', # box top left - 33 : u'\u2514',# box bot left - 12 : u'\u2510',# box top right - 36 : u'\u2518',# box bot right - 1 : u'\u2576', # right half horz line - 8 : u'\u2577', # lower half vert line - 32 : u'\u2575',# upper half vert line - 4 : u'\u2574', # left half horz line - 18 : u'\u253c',# vert AND horz - 25 : u'\u251c',# vert and right - 17 : u'\u251c', - 20 : u'\u2524',# vert and left - 10 : u'\u252c',# horz and bot - 34 : u'\u2534',# horz and top - 21 : u'\u253c',# vert AND horz - 42 : u'\u253c' - + 0: " ", + 2: "\u2500", # horz line + 16: "\u2502", # vert line + 9: "\u250c", # box top left + 33: "\u2514", # box bot left + 12: "\u2510", # box top right + 36: "\u2518", # box bot right + 1: "\u2576", # right half horz line + 8: "\u2577", # lower half vert line + 32: "\u2575", # upper half vert line + 4: "\u2574", # left half horz line + 18: "\u253c", # vert AND horz + 25: "\u251c", # vert and right + 17: "\u251c", + 20: "\u2524", # vert and left + 10: "\u252c", # horz and bot + 34: "\u2534", # horz and top + 21: "\u253c", # vert AND horz + 42: "\u253c", } try: - char = chars[index]; + char = chars[index] except KeyError: - char = "x" - + char = "x" + return char def combine(self, a, b): - return [ a[0] or b[0], a[1] or b[1], a[2] or b[2]]; - + return [a[0] or b[0], a[1] or b[1], a[2] or b[2]] + def rasterize(self): - for x in range(self.height): - for y in range(self.width): - char = "({},{})".format(y,x); - index = 0; - horz_line_points = [False, False, False]; - vert_line_points = [False, False, False]; - - for charloc in self.characters: - if (charloc[0][0] == y and charloc[0][1] == x): - print(charloc[1], end='', sep=''); - index = -1; - break - if index == 0: # not a character, either vert or horz line - for line in self.horz_line_list: - horz_line_points = self.combine(horz_line_points, self.within_line((y,x),line)); - index += self.find_horz_index(horz_line_points) - - for line in self.vert_line_list: - vert_line_points = self.combine(vert_line_points, self.within_line((y,x),line)); - index += 8*self.find_horz_index(vert_line_points) - - print("{}".format(self.transform(index)), end='', sep='') - #print(" {} ".format(index), end='', sep='') - - print("") + for x in range(self.height): + for y in range(self.width): + char = "({},{})".format(y, x) + index = 0 + horz_line_points = [False, False, False] + vert_line_points = [False, False, False] + + for charloc in self.characters: + if charloc[0][0] == y and charloc[0][1] == x: + print(charloc[1], end="", sep="") + index = -1 + break + if index == 0: # not a character, either vert or horz line + for line in self.horz_line_list: + horz_line_points = self.combine( + horz_line_points, self.within_line((y, x), line) + ) + index += self.find_horz_index(horz_line_points) + + for line in self.vert_line_list: + vert_line_points = self.combine( + vert_line_points, self.within_line((y, x), line) + ) + index += 8 * self.find_horz_index(vert_line_points) + + print("{}".format(self.transform(index)), end="", sep="") + # print(" {} ".format(index), end='', sep='') + + print("") + superscripts = { - # 0 : u'\u2070', - 0 : ' ', - 1 : u'\u00b9', - 2 : u'\u00b2', - 3 : u'\u00b3', - 4 : u'\u2074', - 5 : u'\u2075', - 6 : u'\u2076', - 7 : u'\u2077', - 8 : u'\u2078', - 9 : u'\u2079', + # 0 : u'\u2070', + 0: " ", + 1: "\u00b9", + 2: "\u00b2", + 3: "\u00b3", + 4: "\u2074", + 5: "\u2075", + 6: "\u2076", + 7: "\u2077", + 8: "\u2078", + 9: "\u2079", } subscripts = { - # 0 : u'\u2080', - 0 : ' ', - 1 : u'\u2081', - 2 : u'\u2082', - 3 : u'\u2083', - 4 : u'\u2084', - 5 : u'\u2085', - 6 : u'\u2086', - 7 : u'\u2087', - 8 : u'\u2088', - 9 : u'\u2089', + # 0 : u'\u2080', + 0: " ", + 1: "\u2081", + 2: "\u2082", + 3: "\u2083", + 4: "\u2084", + 5: "\u2085", + 6: "\u2086", + 7: "\u2087", + 8: "\u2088", + 9: "\u2089", } -def draw_switchbox(canvas, xoffset, yoffset, source_count, destination_count, - northbound, southbound, eastbound, westbound, draw_demand=True, name=""): - c.draw_square((xoffset+5,yoffset+4),2) + +def draw_switchbox( + canvas, + xoffset, + yoffset, + source_count, + destination_count, + northbound, + southbound, + eastbound, + westbound, + draw_demand=True, + name="", +): + c.draw_square((xoffset + 5, yoffset + 4), 2) # label it if len(name) > 0: - c.draw_character((xoffset+6,yoffset+4), name[0]) + c.draw_character((xoffset + 6, yoffset + 4), name[0]) if len(name) > 1: - c.draw_character((xoffset+7,yoffset+4), name[1]) + c.draw_character((xoffset + 7, yoffset + 4), name[1]) if len(name) > 2: - c.draw_character((xoffset+8,yoffset+4), name[2]) + c.draw_character((xoffset + 8, yoffset + 4), name[2]) if len(name) > 3: - c.draw_character((xoffset+9,yoffset+4), name[3]) + c.draw_character((xoffset + 9, yoffset + 4), name[3]) # draw source and destination count - if(source_count > 0 or destination_count > 0): - c.draw_character((xoffset+7,yoffset+5), '*') + if source_count > 0 or destination_count > 0: + c.draw_character((xoffset + 7, yoffset + 5), "*") # left of the switchbox (south) - if northbound > 0: - c.draw_line((xoffset+10,yoffset+4), (xoffset+14,yoffset+4)) - if(draw_demand): - c.draw_character((xoffset+12,yoffset+3), subscripts[northbound]) - if(northbound > 6): # if overcapacity, mark with an 'x' - c.draw_character((xoffset+10,yoffset+4), 'x') - #c.draw_character((xoffset+11,yoffset+4), 'x') - c.draw_character((xoffset+12,yoffset+4), 'x') - if southbound > 0: - c.draw_line((xoffset+0,yoffset+5), (xoffset+4,yoffset+5)) - if(draw_demand): - c.draw_character((xoffset+2,yoffset+6), superscripts[southbound]) - if(southbound > 4): # if overcapacity, mark with an 'x' - c.draw_character((xoffset+1, yoffset+5), 'x') - #c.draw_character((xoffset+2, yoffset+5), 'x') - c.draw_character((xoffset+3, yoffset+5), 'x') + if northbound > 0: + c.draw_line((xoffset + 10, yoffset + 4), (xoffset + 14, yoffset + 4)) + if draw_demand: + c.draw_character((xoffset + 12, yoffset + 3), subscripts[northbound]) + if northbound > 6: # if overcapacity, mark with an 'x' + c.draw_character((xoffset + 10, yoffset + 4), "x") + # c.draw_character((xoffset+11,yoffset+4), 'x') + c.draw_character((xoffset + 12, yoffset + 4), "x") + if southbound > 0: + c.draw_line((xoffset + 0, yoffset + 5), (xoffset + 4, yoffset + 5)) + if draw_demand: + c.draw_character((xoffset + 2, yoffset + 6), superscripts[southbound]) + if southbound > 4: # if overcapacity, mark with an 'x' + c.draw_character((xoffset + 1, yoffset + 5), "x") + # c.draw_character((xoffset+2, yoffset+5), 'x') + c.draw_character((xoffset + 3, yoffset + 5), "x") # below the switchbox (east) - if eastbound > 0: - c.draw_line((xoffset+6,yoffset+6), (xoffset+6,yoffset+8)) - if(draw_demand): - c.draw_character((xoffset+5,yoffset+7), superscripts[eastbound]) - if(eastbound > 4): # if overcapacity, mark with an 'x' - c.draw_character((xoffset+6, yoffset+6), 'x') - #c.draw_character((xoffset+6, yoffset+7), 'x') - c.draw_character((xoffset+6, yoffset+8), 'x') - if westbound > 0: - c.draw_line((xoffset+8,yoffset+1), (xoffset+8,yoffset+3)) - if(draw_demand): - c.draw_character((xoffset+9,yoffset+2), superscripts[westbound]) - if(westbound > 4): # if overcapacity, mark with an 'x' - c.draw_character((xoffset+8, yoffset+1), 'x') - #c.draw_character((xoffset+7, yoffset+2), 'x') - c.draw_character((xoffset+8, yoffset+3), 'x') - - -SB_WIDTH = 10; SB_HEIGHT = 5 # distances between switchboxes + if eastbound > 0: + c.draw_line((xoffset + 6, yoffset + 6), (xoffset + 6, yoffset + 8)) + if draw_demand: + c.draw_character((xoffset + 5, yoffset + 7), superscripts[eastbound]) + if eastbound > 4: # if overcapacity, mark with an 'x' + c.draw_character((xoffset + 6, yoffset + 6), "x") + # c.draw_character((xoffset+6, yoffset+7), 'x') + c.draw_character((xoffset + 6, yoffset + 8), "x") + if westbound > 0: + c.draw_line((xoffset + 8, yoffset + 1), (xoffset + 8, yoffset + 3)) + if draw_demand: + c.draw_character((xoffset + 9, yoffset + 2), superscripts[westbound]) + if westbound > 4: # if overcapacity, mark with an 'x' + c.draw_character((xoffset + 8, yoffset + 1), "x") + # c.draw_character((xoffset+7, yoffset+2), 'x') + c.draw_character((xoffset + 8, yoffset + 3), "x") + + +SB_WIDTH = 10 +SB_HEIGHT = 5 # distances between switchboxes + + def draw_switchboxes(c, switchboxes): for item in switchboxes: - draw_switchbox(c, SB_WIDTH*item['row'], SB_HEIGHT*item['col'], - item['source_count'], item['destination_count'], - item['northbound'], item['southbound'], - item['eastbound'], item['westbound'], draw_demand=True, - name="{},{}".format(item['col'], item['row'] )) - + draw_switchbox( + c, + SB_WIDTH * item["row"], + SB_HEIGHT * item["col"], + item["source_count"], + item["destination_count"], + item["northbound"], + item["southbound"], + item["eastbound"], + item["westbound"], + draw_demand=True, + name="{},{}".format(item["col"], item["row"]), + ) + + # given a route, draw arrow characters to indicate the route # route is a list of switchboxes, represented as int tuple coordinates -left_arrow = u'\u2190' -up_arrow = u'\u2191' -right_arrow= u'\u2192' -down_arrow = u'\u2193' +left_arrow = "\u2190" +up_arrow = "\u2191" +right_arrow = "\u2192" +down_arrow = "\u2193" + + def draw_route(c, route): - for i in range(len(route)-1): + for i in range(len(route) - 1): col = route[i][0][0] row = route[i][0][1] - xoffset = SB_WIDTH*row - yoffset = SB_HEIGHT*col - if len(route[i]) == 1: continue + xoffset = SB_WIDTH * row + yoffset = SB_HEIGHT * col + if len(route[i]) == 1: + continue dirs = route[i][1] # draw source and destination - if(i == 0): - c.draw_character((xoffset+5,yoffset+5), 'S') - if(i == (len(route)-2)): - c.draw_character((xoffset+9,yoffset+5), 'D') + if i == 0: + c.draw_character((xoffset + 5, yoffset + 5), "S") + if i == (len(route) - 2): + c.draw_character((xoffset + 9, yoffset + 5), "D") - if(i == 0): - if(row == 0): # for routes starting in the shim, draw arrows coming from PL - c.draw_character((xoffset+1, yoffset+4), right_arrow) - c.draw_character((xoffset+2, yoffset+4), right_arrow) - c.draw_character((xoffset+3, yoffset+4), right_arrow) + if i == 0: + if row == 0: # for routes starting in the shim, draw arrows coming from PL + c.draw_character((xoffset + 1, yoffset + 4), right_arrow) + c.draw_character((xoffset + 2, yoffset + 4), right_arrow) + c.draw_character((xoffset + 3, yoffset + 4), right_arrow) for j in range(len(dirs)): # draw indications for cores the route passes through - c.replace_character((xoffset+7,yoffset+5), '*', '#') + c.replace_character((xoffset + 7, yoffset + 5), "*", "#") # 0 = North, 1 = East, 2 = South, 3 = West - if(dirs[j] == "North"): - c.draw_character((xoffset+11, yoffset+4), right_arrow) - c.draw_character((xoffset+12, yoffset+4), right_arrow) - c.draw_character((xoffset+13, yoffset+4), right_arrow) - row = row+1 - elif(dirs[j] == "East"): - c.draw_character((xoffset+6, yoffset+7), down_arrow) - col = col+1 - elif(dirs[j] == "South"): - c.draw_character((xoffset+1, yoffset+5), left_arrow) - c.draw_character((xoffset+2, yoffset+5), left_arrow) - c.draw_character((xoffset+3, yoffset+5), left_arrow) - row = row-1 - elif(dirs[j] == "West"): - c.draw_character((xoffset+8, yoffset+2), up_arrow) - col = col-1 - elif(dirs[j] == "DMA"): + if dirs[j] == "North": + c.draw_character((xoffset + 11, yoffset + 4), right_arrow) + c.draw_character((xoffset + 12, yoffset + 4), right_arrow) + c.draw_character((xoffset + 13, yoffset + 4), right_arrow) + row = row + 1 + elif dirs[j] == "East": + c.draw_character((xoffset + 6, yoffset + 7), down_arrow) + col = col + 1 + elif dirs[j] == "South": + c.draw_character((xoffset + 1, yoffset + 5), left_arrow) + c.draw_character((xoffset + 2, yoffset + 5), left_arrow) + c.draw_character((xoffset + 3, yoffset + 5), left_arrow) + row = row - 1 + elif dirs[j] == "West": + c.draw_character((xoffset + 8, yoffset + 2), up_arrow) + col = col - 1 + elif dirs[j] == "DMA": # draw destination - c.draw_character((xoffset+9,yoffset+5), 'D') - + c.draw_character((xoffset + 9, yoffset + 5), "D") - - -if __name__ == '__main__': +if __name__ == "__main__": # setup python unicode encoding os.system("export PYTHONIOENCODING=utf8") - parser = argparse.ArgumentParser(description='Draw switchboxes, demands and routes') - parser.add_argument('-j', '--json', help='Filepath for JSON file to read') - parser.add_argument('-r', '--route_list', help='List of routes to print') - parser.add_argument('-o', '--output', help='Path to output directory. Text files of the routes will be stored here.') + parser = argparse.ArgumentParser(description="Draw switchboxes, demands and routes") + parser.add_argument("-j", "--json", help="Filepath for JSON file to read") + parser.add_argument("-r", "--route_list", help="List of routes to print") + parser.add_argument( + "-o", + "--output", + help="Path to output directory. Text files of the routes will be stored here.", + ) args = parser.parse_args() - if args.json: json_file_path = args.json - else: json_file_path = "switchbox.json" # default JSON + if args.json: + json_file_path = args.json + else: + json_file_path = "switchbox.json" # default JSON with open(json_file_path) as f: json_data = json.load(f) - + switchboxes = [] routes = [] @@ -367,22 +426,23 @@ def draw_route(c, route): switchboxes.append(item) if "route" in key: routes.append(item) - + max_col = 0 max_row = 0 for switchbox in switchboxes: - if switchbox['col'] > max_col: - max_col = switchbox['col'] - if switchbox['row'] > max_row: - max_row = switchbox['row'] + if switchbox["col"] > max_col: + max_col = switchbox["col"] + if switchbox["row"] > max_row: + max_row = switchbox["row"] routes_to_print = [] - if args.route_list: + if args.route_list: for route in args.route_list.split(","): routes_to_print.append(int(route.strip())) - else: routes_to_print = range(len(routes)) + else: + routes_to_print = range(len(routes)) - output_directory = json_file_path.split('.')[0] + '/' + output_directory = json_file_path.split(".")[0] + "/" if args.output: output_directory = args.output @@ -390,12 +450,14 @@ def draw_route(c, route): os.mkdir(output_directory) for i in routes_to_print: - c = canvas(12*(max_row+1), 5+5*(max_col+1)); + c = canvas(12 * (max_row + 1), 5 + 5 * (max_col + 1)) draw_switchboxes(c, switchboxes) filename = os.path.join(output_directory, "route{}.txt".format(i)) sys.stdout = sys.__stdout__ - print("Printing route {} of {}: {}".format(i, len(routes_to_print)-1, filename)) - with open(filename, 'w') as f: + print( + "Printing route {} of {}: {}".format(i, len(routes_to_print) - 1, filename) + ) + with open(filename, "w") as f: sys.stdout = f print("Route {}: {}".format(i, routes[i])) draw_route(c, routes[i]) diff --git a/tutorials/lit.cfg.py b/tutorials/lit.cfg.py index 9ad07492b9..a09651649f 100755 --- a/tutorials/lit.cfg.py +++ b/tutorials/lit.cfg.py @@ -17,10 +17,11 @@ import lit.util from lit.llvm import llvm_config + # Configuration file for the 'lit' test runner. # name: The name of this test suite. -config.name = 'AIE_TUTORIALS' +config.name = "AIE_TUTORIALS" config.test_format = lit.formats.ShTest(not llvm_config.use_lit_shell) config.environment["PYTHONPATH"] = "{}".format( @@ -28,75 +29,94 @@ ) # suffixes: A list of file extensions to treat as test files. -config.suffixes = ['.mlir'] +config.suffixes = [".mlir"] # test_source_root: The root path where tests are located. config.test_source_root = os.path.dirname(__file__) -config.substitutions.append(('%PATH%', config.environment['PATH'])) -config.substitutions.append(('%shlibext', config.llvm_shlib_ext)) -config.substitutions.append(('%extraAieCcFlags%', config.extraAieCcFlags)) -config.substitutions.append(('%host_runtime_lib%', os.path.join(config.aie_obj_root, "runtime_lib",config.aieHostTarget))) -config.substitutions.append(('%aietools', config.vitis_aietools_dir)) +config.substitutions.append(("%PATH%", config.environment["PATH"])) +config.substitutions.append(("%shlibext", config.llvm_shlib_ext)) +config.substitutions.append(("%extraAieCcFlags%", config.extraAieCcFlags)) +config.substitutions.append( + ( + "%host_runtime_lib%", + os.path.join(config.aie_obj_root, "runtime_lib", config.aieHostTarget), + ) +) +config.substitutions.append(("%aietools", config.vitis_aietools_dir)) # for xchesscc_wrapper -llvm_config.with_environment('AIETOOLS', config.vitis_aietools_dir) +llvm_config.with_environment("AIETOOLS", config.vitis_aietools_dir) -if(config.enable_board_tests): - config.substitutions.append(('%run_on_board', "echo %T >> /home/xilinx/testlog | sync | sudo")) +if config.enable_board_tests: + config.substitutions.append( + ("%run_on_board", "echo %T >> /home/xilinx/testlog | sync | sudo") + ) else: - config.substitutions.append(('%run_on_board', "echo")) + config.substitutions.append(("%run_on_board", "echo")) -VitisSysrootFlag = '' -if (config.aieHostTarget == 'x86_64'): - config.substitutions.append(('%aieHostTargetTriplet%', 'x86_64-unknown-linux-gnu')) -elif (config.aieHostTarget == 'aarch64'): - config.substitutions.append(('%aieHostTargetTriplet%', 'aarch64-linux-gnu')) - VitisSysrootFlag = '--sysroot='+config.vitis_sysroot +VitisSysrootFlag = "" +if config.aieHostTarget == "x86_64": + config.substitutions.append(("%aieHostTargetTriplet%", "x86_64-unknown-linux-gnu")) +elif config.aieHostTarget == "aarch64": + config.substitutions.append(("%aieHostTargetTriplet%", "aarch64-linux-gnu")) + VitisSysrootFlag = "--sysroot=" + config.vitis_sysroot -config.substitutions.append(('%VitisSysrootFlag%', VitisSysrootFlag)) -config.substitutions.append(('%aieHostTargetArch%', config.aieHostTarget)) +config.substitutions.append(("%VitisSysrootFlag%", VitisSysrootFlag)) +config.substitutions.append(("%aieHostTargetArch%", config.aieHostTarget)) -llvm_config.with_system_environment( - ['HOME', 'INCLUDE', 'LIB', 'TMP', 'TEMP']) +llvm_config.with_system_environment(["HOME", "INCLUDE", "LIB", "TMP", "TEMP"]) llvm_config.use_default_substitutions() # excludes: A list of directories to exclude from the testsuite. The 'Inputs' # subdirectories contain auxiliary inputs for various tests in their parent # directories. -config.excludes = ['Inputs', 'Examples', 'CMakeLists.txt', 'README.txt', 'LICENSE.txt', 'aie.mlir.prj'] +config.excludes = [ + "Inputs", + "Examples", + "CMakeLists.txt", + "README.txt", + "LICENSE.txt", + "aie.mlir.prj", +] + +config.aie_tools_dir = os.path.join(config.aie_obj_root, "bin") -config.aie_tools_dir = os.path.join(config.aie_obj_root, 'bin') def prepend_path(path): global llvm_config paths = [path] - current_paths = llvm_config.config.environment.get('PATH', None) + current_paths = llvm_config.config.environment.get("PATH", None) if current_paths: paths.extend(current_paths.split(os.path.pathsep)) paths = [os.path.normcase(os.path.normpath(p)) for p in paths] else: paths = [] - llvm_config.config.environment['PATH'] = os.pathsep.join(paths) + llvm_config.config.environment["PATH"] = os.pathsep.join(paths) + # Setup the path. prepend_path(config.llvm_tools_dir) prepend_path(config.peano_tools_dir) prepend_path(config.aie_tools_dir) -#llvm_config.with_environment('LM_LICENSE_FILE', os.getenv('LM_LICENSE_FILE')) -#llvm_config.with_environment('XILINXD_LICENSE_FILE', os.getenv('XILINXD_LICENSE_FILE')) -if(config.vitis_root): - config.vitis_aietools_bin = os.path.join(config.vitis_aietools_dir, "bin") - prepend_path(config.vitis_aietools_bin) - llvm_config.with_environment('VITIS', config.vitis_root) +# llvm_config.with_environment('LM_LICENSE_FILE', os.getenv('LM_LICENSE_FILE')) +# llvm_config.with_environment('XILINXD_LICENSE_FILE', os.getenv('XILINXD_LICENSE_FILE')) +if config.vitis_root: + config.vitis_aietools_bin = os.path.join(config.vitis_aietools_dir, "bin") + prepend_path(config.vitis_aietools_bin) + llvm_config.with_environment("VITIS", config.vitis_root) # Test to see if we have the peano backend. try: - result = subprocess.run([os.path.join(config.peano_tools_dir, 'llc'),'-mtriple=aie','--version'],stdout=subprocess.PIPE,stderr=subprocess.PIPE) - if (re.search("Xilinx AI Engine", result.stdout.decode('utf-8')) is not None): - config.available_features.add('peano') + result = subprocess.run( + [os.path.join(config.peano_tools_dir, "llc"), "-mtriple=aie", "--version"], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + ) + if re.search("Xilinx AI Engine", result.stdout.decode("utf-8")) is not None: + config.available_features.add("peano") print("Peano found: " + shutil.which("llc")) else: print("Peano not found, but expected at ", config.peano_tools_dir) @@ -104,51 +124,56 @@ def prepend_path(path): print("Peano not found, but expected at ", config.peano_tools_dir) print("Looking for Chess...") -#test if LM_LICENSE_FILE valid -if(config.enable_chess_tests): +# test if LM_LICENSE_FILE valid +if config.enable_chess_tests: result = None - if(config.vitis_root): + if config.vitis_root: result = shutil.which("xchesscc") if result != None: print("Chess found: " + result) - config.available_features.add('chess') - config.available_features.add('valid_xchess_license') - lm_license_file = os.getenv('LM_LICENSE_FILE') - if(lm_license_file != None): - llvm_config.with_environment('LM_LICENSE_FILE', lm_license_file) - xilinxd_license_file = os.getenv('XILINXD_LICENSE_FILE') - if(xilinxd_license_file != None): - llvm_config.with_environment('XILINXD_LICENSE_FILE', xilinxd_license_file) + config.available_features.add("chess") + config.available_features.add("valid_xchess_license") + lm_license_file = os.getenv("LM_LICENSE_FILE") + if lm_license_file != None: + llvm_config.with_environment("LM_LICENSE_FILE", lm_license_file) + xilinxd_license_file = os.getenv("XILINXD_LICENSE_FILE") + if xilinxd_license_file != None: + llvm_config.with_environment("XILINXD_LICENSE_FILE", xilinxd_license_file) validate_chess = False - if(validate_chess): + if validate_chess: import subprocess - result = subprocess.run(['xchesscc','+v'],stdout=subprocess.PIPE,stderr=subprocess.PIPE) - validLMLicense = (len(result.stderr.decode('utf-8')) == 0) + + result = subprocess.run( + ["xchesscc", "+v"], stdout=subprocess.PIPE, stderr=subprocess.PIPE + ) + validLMLicense = len(result.stderr.decode("utf-8")) == 0 else: validLMLicense = lm_license_file or xilinxd_license_file - if(not lm_license_file and not xilinxd_license_file): - print("WARNING: no valid xchess license that is required by some of the lit tests") + if not lm_license_file and not xilinxd_license_file: + print( + "WARNING: no valid xchess license that is required by some of the lit tests" + ) else: print("Chess not found") tool_dirs = [config.aie_tools_dir, config.peano_tools_dir, config.llvm_tools_dir] tools = [ - 'aie-opt', - 'aie-translate', - 'aiecc.py', - 'ld.lld', - 'llc', - 'llvm-objdump', - 'opt', - 'xchesscc_wrapper', + "aie-opt", + "aie-translate", + "aiecc.py", + "ld.lld", + "llc", + "llvm-objdump", + "opt", + "xchesscc_wrapper", ] llvm_config.add_tool_substitutions(tools, tool_dirs) -if(config.enable_board_tests): +if config.enable_board_tests: lit_config.parallelism_groups["board"] = 1 config.parallelism_group = "board" diff --git a/tutorials/scripts/visualize.py b/tutorials/scripts/visualize.py index 976ce4d17e..b772575894 100755 --- a/tutorials/scripts/visualize.py +++ b/tutorials/scripts/visualize.py @@ -8,9 +8,10 @@ from enum import Enum + class Direction(Enum): - Horz = 0; - Vert = 1; + Horz = 0 + Vert = 1 class canvas: @@ -20,345 +21,403 @@ def __init__(self, width, height): self.vert_line_list = [] self.horz_line_list = [] self.characters = [] - - def direction(self, line) : - Horz_Stationary = False; - Vert_Stationary = False; - - if (line[0][0] == line[1][0]): + + def direction(self, line): + Horz_Stationary = False + Vert_Stationary = False + + if line[0][0] == line[1][0]: # Line Stationary in Horz Axis Horz_Stationary = True - if (line[0][1] == line[1][1]): + if line[0][1] == line[1][1]: # Line Stationary in Vert Axis Vert_Stationary = True - if (Horz_Stationary and Vert_Stationary): + if Horz_Stationary and Vert_Stationary: # Crash - raise Exception("Line is Diagonal"); - if (not Horz_Stationary and not Vert_Stationary): + raise Exception("Line is Diagonal") + if not Horz_Stationary and not Vert_Stationary: # Crash - raise Exception("Line is a dot"); - if ((not Horz_Stationary) and Vert_Stationary): - return Direction.Horz; - - if (Horz_Stationary and (not Vert_Stationary)): - return Direction.Vert; - + raise Exception("Line is a dot") + if (not Horz_Stationary) and Vert_Stationary: + return Direction.Horz + + if Horz_Stationary and (not Vert_Stationary): + return Direction.Vert + def draw_character(self, point, character): - self.characters.append([point, character]); + self.characters.append([point, character]) def replace_character(self, point, character, replacement): - if (self.characters.count([point, character])): - self.characters.remove([point, character]); - self.characters.append([point, replacement]); - + if self.characters.count([point, character]): + self.characters.remove([point, character]) + self.characters.append([point, replacement]) + def draw_line(self, start, finish): - if (self.direction([start,finish]) == Direction.Vert): + if self.direction([start, finish]) == Direction.Vert: self.vert_line_list.append([start, finish]) else: self.horz_line_list.append([start, finish]) - + def draw_square(self, center, size): - horz_origin = math.floor((center[0] + 0.5) - (size/2)); - horz_extent = math.ceil((center[0] + 0.5) + (size/2) + 3); - - vert_origin = math.floor( (center[1] + 0.5)- (size/2)); - vert_extent = math.ceil( (center[1] + 0.5) + (size/2)); - - top_left = (horz_origin, vert_origin ); - top_right = (horz_extent, vert_origin); - bottom_left = (horz_origin, vert_extent); - bottom_right = (horz_extent, vert_extent); - + horz_origin = math.floor((center[0] + 0.5) - (size / 2)) + horz_extent = math.ceil((center[0] + 0.5) + (size / 2) + 3) + + vert_origin = math.floor((center[1] + 0.5) - (size / 2)) + vert_extent = math.ceil((center[1] + 0.5) + (size / 2)) + + top_left = (horz_origin, vert_origin) + top_right = (horz_extent, vert_origin) + bottom_left = (horz_origin, vert_extent) + bottom_right = (horz_extent, vert_extent) + self.draw_line(top_left, top_right) self.draw_line(top_right, bottom_right) self.draw_line(bottom_left, bottom_right) self.draw_line(top_left, bottom_left) - def vertical_index(self,point): - return point[1]; - def horizontal_index(self,point): - return point[0]; - + def vertical_index(self, point): + return point[1] + + def horizontal_index(self, point): + return point[0] + def within_line(self, point, line): - Horz_Stationary = False; - Vert_Stationary = False; - - if (line[0][0] == line[1][0]): + Horz_Stationary = False + Vert_Stationary = False + + if line[0][0] == line[1][0]: # Line Stationary in Horz Axis Horz_Stationary = True - if (line[0][1] == line[1][1]): + if line[0][1] == line[1][1]: # Line Stationary in Vert Axis Vert_Stationary = True - # print("HS: {}, VS: {}, {}".format(Horz_Stationary, Vert_Stationary, line)); - - if (Horz_Stationary and Vert_Stationary): + # print("HS: {}, VS: {}, {}".format(Horz_Stationary, Vert_Stationary, line)); + + if Horz_Stationary and Vert_Stationary: # Crash - raise Exception("Line is Diagonal"); - if (not Horz_Stationary and not Vert_Stationary): + raise Exception("Line is Diagonal") + if not Horz_Stationary and not Vert_Stationary: # Crash - raise Exception("Line is a dot"); - - if (Horz_Stationary and (not Vert_Stationary)): + raise Exception("Line is a dot") + + if Horz_Stationary and (not Vert_Stationary): # it's a vertical line # Sort the tuples by Horz - line.sort(key=self.vertical_index); - start_line = ( (point[1] == line[0][1]) and (point[1] <= line[1][1]) and (point[0] == line[0][0]) ) - in_line = ( (point[1] > line[0][1]) and (point[1] < line[1][1]) and (point[0] == line[0][0]) ) - end_line = ( (point[1] > line[0][1]) and (point[1] == line[1][1]) and (point[0] == line[0][0]) ) - - #print("Vert {}, point {}, {}".format(line, point, in_line)); - - if ((not Horz_Stationary) and Vert_Stationary): + line.sort(key=self.vertical_index) + start_line = ( + (point[1] == line[0][1]) + and (point[1] <= line[1][1]) + and (point[0] == line[0][0]) + ) + in_line = ( + (point[1] > line[0][1]) + and (point[1] < line[1][1]) + and (point[0] == line[0][0]) + ) + end_line = ( + (point[1] > line[0][1]) + and (point[1] == line[1][1]) + and (point[0] == line[0][0]) + ) + + # print("Vert {}, point {}, {}".format(line, point, in_line)); + + if (not Horz_Stationary) and Vert_Stationary: # it's a horizonal line line.sort(key=self.horizontal_index) - start_line = ( (point[0] == line[0][0]) and (point[0] <= line[1][0]) and ( point[1] == line[0][1])) - in_line = ( (point[0] > line[0][0]) and (point[0] < line[1][0]) and ( point[1] == line[0][1])) - end_line = ( (point[0] > line[0][0]) and (point[0] == line[1][0]) and ( point[1] == line[0][1])) - #print("Horz {}, point {}, {}".format(line, point, in_line)); + start_line = ( + (point[0] == line[0][0]) + and (point[0] <= line[1][0]) + and (point[1] == line[0][1]) + ) + in_line = ( + (point[0] > line[0][0]) + and (point[0] < line[1][0]) + and (point[1] == line[0][1]) + ) + end_line = ( + (point[0] > line[0][0]) + and (point[0] == line[1][0]) + and (point[1] == line[0][1]) + ) + # print("Horz {}, point {}, {}".format(line, point, in_line)); return (start_line, in_line, end_line) - - def find_horz_index(self,line_points): - index =0; - if (line_points[1]): - index+=2 + def find_horz_index(self, line_points): + index = 0 + + if line_points[1]: + index += 2 else: - if (line_points[0]): - index += 1; - if (line_points[2]): - index +=4 - return index; + if line_points[0]: + index += 1 + if line_points[2]: + index += 4 + return index def transform(self, index): # if horz through, clear bits for horz end # if vert through, clear bits for vert end - - + chars = { - 0 : ' ', - 2 : u'\u2500', # horz line - 16 : u'\u2502',# vert line - 9 : u'\u250c', # box top left - 33 : u'\u2514',# box bot left - 12 : u'\u2510',# box top right - 36 : u'\u2518',# box bot right - 1 : u'\u2576', # right half horz line - 8 : u'\u2577', # lower half vert line - 32 : u'\u2575',# upper half vert line - 4 : u'\u2574', # left half horz line - 18 : u'\u253c',# vert AND horz - 25 : u'\u251c',# vert and right - 17 : u'\u251c', - 20 : u'\u2524',# vert and left - 10 : u'\u252c',# horz and bot - 34 : u'\u2534',# horz and top - 21 : u'\u253c',# vert AND horz - 42 : u'\u253c' - + 0: " ", + 2: "\u2500", # horz line + 16: "\u2502", # vert line + 9: "\u250c", # box top left + 33: "\u2514", # box bot left + 12: "\u2510", # box top right + 36: "\u2518", # box bot right + 1: "\u2576", # right half horz line + 8: "\u2577", # lower half vert line + 32: "\u2575", # upper half vert line + 4: "\u2574", # left half horz line + 18: "\u253c", # vert AND horz + 25: "\u251c", # vert and right + 17: "\u251c", + 20: "\u2524", # vert and left + 10: "\u252c", # horz and bot + 34: "\u2534", # horz and top + 21: "\u253c", # vert AND horz + 42: "\u253c", } try: - char = chars[index]; + char = chars[index] except KeyError: - char = "x" - + char = "x" + return char def combine(self, a, b): - return [ a[0] or b[0], a[1] or b[1], a[2] or b[2]]; - + return [a[0] or b[0], a[1] or b[1], a[2] or b[2]] + def rasterize(self): - for x in range(self.height): - for y in range(self.width): - char = "({},{})".format(y,x); - index = 0; - horz_line_points = [False, False, False]; - vert_line_points = [False, False, False]; - - for charloc in self.characters: - if (charloc[0][0] == y and charloc[0][1] == x): - print(charloc[1], end='', sep=''); - index = -1; - break - if index == 0: # not a character, either vert or horz line - for line in self.horz_line_list: - horz_line_points = self.combine(horz_line_points, self.within_line((y,x),line)); - index += self.find_horz_index(horz_line_points) - - for line in self.vert_line_list: - vert_line_points = self.combine(vert_line_points, self.within_line((y,x),line)); - index += 8*self.find_horz_index(vert_line_points) - - print("{}".format(self.transform(index)), end='', sep='') - #print(" {} ".format(index), end='', sep='') - - print("") + for x in range(self.height): + for y in range(self.width): + char = "({},{})".format(y, x) + index = 0 + horz_line_points = [False, False, False] + vert_line_points = [False, False, False] + + for charloc in self.characters: + if charloc[0][0] == y and charloc[0][1] == x: + print(charloc[1], end="", sep="") + index = -1 + break + if index == 0: # not a character, either vert or horz line + for line in self.horz_line_list: + horz_line_points = self.combine( + horz_line_points, self.within_line((y, x), line) + ) + index += self.find_horz_index(horz_line_points) + + for line in self.vert_line_list: + vert_line_points = self.combine( + vert_line_points, self.within_line((y, x), line) + ) + index += 8 * self.find_horz_index(vert_line_points) + + print("{}".format(self.transform(index)), end="", sep="") + # print(" {} ".format(index), end='', sep='') + + print("") + superscripts = { - # 0 : u'\u2070', - 0 : ' ', - 1 : u'\u00b9', - 2 : u'\u00b2', - 3 : u'\u00b3', - 4 : u'\u2074', - 5 : u'\u2075', - 6 : u'\u2076', - 7 : u'\u2077', - 8 : u'\u2078', - 9 : u'\u2079', + # 0 : u'\u2070', + 0: " ", + 1: "\u00b9", + 2: "\u00b2", + 3: "\u00b3", + 4: "\u2074", + 5: "\u2075", + 6: "\u2076", + 7: "\u2077", + 8: "\u2078", + 9: "\u2079", } subscripts = { - # 0 : u'\u2080', - 0 : ' ', - 1 : u'\u2081', - 2 : u'\u2082', - 3 : u'\u2083', - 4 : u'\u2084', - 5 : u'\u2085', - 6 : u'\u2086', - 7 : u'\u2087', - 8 : u'\u2088', - 9 : u'\u2089', + # 0 : u'\u2080', + 0: " ", + 1: "\u2081", + 2: "\u2082", + 3: "\u2083", + 4: "\u2084", + 5: "\u2085", + 6: "\u2086", + 7: "\u2087", + 8: "\u2088", + 9: "\u2089", } -def draw_switchbox(canvas, xoffset, yoffset, source_count, destination_count, - northbound, southbound, eastbound, westbound, draw_demand=True, name=""): - c.draw_square((xoffset+5,yoffset+4),2) + +def draw_switchbox( + canvas, + xoffset, + yoffset, + source_count, + destination_count, + northbound, + southbound, + eastbound, + westbound, + draw_demand=True, + name="", +): + c.draw_square((xoffset + 5, yoffset + 4), 2) # label it if len(name) > 0: - c.draw_character((xoffset+6,yoffset+4), name[0]) + c.draw_character((xoffset + 6, yoffset + 4), name[0]) if len(name) > 1: - c.draw_character((xoffset+7,yoffset+4), name[1]) + c.draw_character((xoffset + 7, yoffset + 4), name[1]) if len(name) > 2: - c.draw_character((xoffset+8,yoffset+4), name[2]) + c.draw_character((xoffset + 8, yoffset + 4), name[2]) if len(name) > 3: - c.draw_character((xoffset+9,yoffset+4), name[3]) + c.draw_character((xoffset + 9, yoffset + 4), name[3]) # draw source and destination count - if(source_count > 0 or destination_count > 0): - c.draw_character((xoffset+7,yoffset+5), '*') + if source_count > 0 or destination_count > 0: + c.draw_character((xoffset + 7, yoffset + 5), "*") # left of the switchbox (south) - if northbound > 0: - c.draw_line((xoffset+10,yoffset+4), (xoffset+14,yoffset+4)) - if(draw_demand): - c.draw_character((xoffset+12,yoffset+3), subscripts[northbound]) - if(northbound > 6): # if overcapacity, mark with an 'x' - c.draw_character((xoffset+10,yoffset+4), 'x') - #c.draw_character((xoffset+11,yoffset+4), 'x') - c.draw_character((xoffset+12,yoffset+4), 'x') - if southbound > 0: - c.draw_line((xoffset+0,yoffset+5), (xoffset+4,yoffset+5)) - if(draw_demand): - c.draw_character((xoffset+2,yoffset+6), superscripts[southbound]) - if(southbound > 4): # if overcapacity, mark with an 'x' - c.draw_character((xoffset+1, yoffset+5), 'x') - #c.draw_character((xoffset+2, yoffset+5), 'x') - c.draw_character((xoffset+3, yoffset+5), 'x') + if northbound > 0: + c.draw_line((xoffset + 10, yoffset + 4), (xoffset + 14, yoffset + 4)) + if draw_demand: + c.draw_character((xoffset + 12, yoffset + 3), subscripts[northbound]) + if northbound > 6: # if overcapacity, mark with an 'x' + c.draw_character((xoffset + 10, yoffset + 4), "x") + # c.draw_character((xoffset+11,yoffset+4), 'x') + c.draw_character((xoffset + 12, yoffset + 4), "x") + if southbound > 0: + c.draw_line((xoffset + 0, yoffset + 5), (xoffset + 4, yoffset + 5)) + if draw_demand: + c.draw_character((xoffset + 2, yoffset + 6), superscripts[southbound]) + if southbound > 4: # if overcapacity, mark with an 'x' + c.draw_character((xoffset + 1, yoffset + 5), "x") + # c.draw_character((xoffset+2, yoffset+5), 'x') + c.draw_character((xoffset + 3, yoffset + 5), "x") # below the switchbox (east) - if eastbound > 0: - c.draw_line((xoffset+6,yoffset+6), (xoffset+6,yoffset+8)) - if(draw_demand): - c.draw_character((xoffset+5,yoffset+7), superscripts[eastbound]) - if(eastbound > 4): # if overcapacity, mark with an 'x' - c.draw_character((xoffset+6, yoffset+6), 'x') - #c.draw_character((xoffset+6, yoffset+7), 'x') - c.draw_character((xoffset+6, yoffset+8), 'x') - if westbound > 0: - c.draw_line((xoffset+8,yoffset+1), (xoffset+8,yoffset+3)) - if(draw_demand): - c.draw_character((xoffset+9,yoffset+2), superscripts[westbound]) - if(westbound > 4): # if overcapacity, mark with an 'x' - c.draw_character((xoffset+8, yoffset+1), 'x') - #c.draw_character((xoffset+7, yoffset+2), 'x') - c.draw_character((xoffset+8, yoffset+3), 'x') - - -SB_WIDTH = 10; SB_HEIGHT = 5 # distances between switchboxes + if eastbound > 0: + c.draw_line((xoffset + 6, yoffset + 6), (xoffset + 6, yoffset + 8)) + if draw_demand: + c.draw_character((xoffset + 5, yoffset + 7), superscripts[eastbound]) + if eastbound > 4: # if overcapacity, mark with an 'x' + c.draw_character((xoffset + 6, yoffset + 6), "x") + # c.draw_character((xoffset+6, yoffset+7), 'x') + c.draw_character((xoffset + 6, yoffset + 8), "x") + if westbound > 0: + c.draw_line((xoffset + 8, yoffset + 1), (xoffset + 8, yoffset + 3)) + if draw_demand: + c.draw_character((xoffset + 9, yoffset + 2), superscripts[westbound]) + if westbound > 4: # if overcapacity, mark with an 'x' + c.draw_character((xoffset + 8, yoffset + 1), "x") + # c.draw_character((xoffset+7, yoffset+2), 'x') + c.draw_character((xoffset + 8, yoffset + 3), "x") + + +SB_WIDTH = 10 +SB_HEIGHT = 5 # distances between switchboxes + + def draw_switchboxes(c, switchboxes): for item in switchboxes: - draw_switchbox(c, SB_WIDTH*item['row'], SB_HEIGHT*item['col'], - item['source_count'], item['destination_count'], - item['northbound'], item['southbound'], - item['eastbound'], item['westbound'], draw_demand=True, - name="{},{}".format(item['col'], item['row'] )) - + draw_switchbox( + c, + SB_WIDTH * item["row"], + SB_HEIGHT * item["col"], + item["source_count"], + item["destination_count"], + item["northbound"], + item["southbound"], + item["eastbound"], + item["westbound"], + draw_demand=True, + name="{},{}".format(item["col"], item["row"]), + ) + + # given a route, draw arrow characters to indicate the route # route is a list of switchboxes, represented as int tuple coordinates -left_arrow = u'\u2190' -up_arrow = u'\u2191' -right_arrow= u'\u2192' -down_arrow = u'\u2193' +left_arrow = "\u2190" +up_arrow = "\u2191" +right_arrow = "\u2192" +down_arrow = "\u2193" + + def draw_route(c, route): - for i in range(len(route)-1): + for i in range(len(route) - 1): col = route[i][0][0] row = route[i][0][1] - xoffset = SB_WIDTH*row - yoffset = SB_HEIGHT*col - if len(route[i]) == 1: continue + xoffset = SB_WIDTH * row + yoffset = SB_HEIGHT * col + if len(route[i]) == 1: + continue dirs = route[i][1] # draw source and destination - if(i == 0): - c.draw_character((xoffset+5,yoffset+5), 'S') - if(i == (len(route)-2)): - c.draw_character((xoffset+9,yoffset+5), 'D') + if i == 0: + c.draw_character((xoffset + 5, yoffset + 5), "S") + if i == (len(route) - 2): + c.draw_character((xoffset + 9, yoffset + 5), "D") - if(i == 0): - if(row == 0): # for routes starting in the shim, draw arrows coming from PL - c.draw_character((xoffset+1, yoffset+4), right_arrow) - c.draw_character((xoffset+2, yoffset+4), right_arrow) - c.draw_character((xoffset+3, yoffset+4), right_arrow) + if i == 0: + if row == 0: # for routes starting in the shim, draw arrows coming from PL + c.draw_character((xoffset + 1, yoffset + 4), right_arrow) + c.draw_character((xoffset + 2, yoffset + 4), right_arrow) + c.draw_character((xoffset + 3, yoffset + 4), right_arrow) for j in range(len(dirs)): # draw indications for cores the route passes through - c.replace_character((xoffset+7,yoffset+5), '*', '#') + c.replace_character((xoffset + 7, yoffset + 5), "*", "#") # 0 = North, 1 = East, 2 = South, 3 = West - if(dirs[j] == "North"): - c.draw_character((xoffset+11, yoffset+4), right_arrow) - c.draw_character((xoffset+12, yoffset+4), right_arrow) - c.draw_character((xoffset+13, yoffset+4), right_arrow) - row = row+1 - elif(dirs[j] == "East"): - c.draw_character((xoffset+6, yoffset+7), down_arrow) - col = col+1 - elif(dirs[j] == "South"): - c.draw_character((xoffset+1, yoffset+5), left_arrow) - c.draw_character((xoffset+2, yoffset+5), left_arrow) - c.draw_character((xoffset+3, yoffset+5), left_arrow) - row = row-1 - elif(dirs[j] == "West"): - c.draw_character((xoffset+8, yoffset+2), up_arrow) - col = col-1 - elif(dirs[j] == "DMA"): + if dirs[j] == "North": + c.draw_character((xoffset + 11, yoffset + 4), right_arrow) + c.draw_character((xoffset + 12, yoffset + 4), right_arrow) + c.draw_character((xoffset + 13, yoffset + 4), right_arrow) + row = row + 1 + elif dirs[j] == "East": + c.draw_character((xoffset + 6, yoffset + 7), down_arrow) + col = col + 1 + elif dirs[j] == "South": + c.draw_character((xoffset + 1, yoffset + 5), left_arrow) + c.draw_character((xoffset + 2, yoffset + 5), left_arrow) + c.draw_character((xoffset + 3, yoffset + 5), left_arrow) + row = row - 1 + elif dirs[j] == "West": + c.draw_character((xoffset + 8, yoffset + 2), up_arrow) + col = col - 1 + elif dirs[j] == "DMA": # draw destination - c.draw_character((xoffset+9,yoffset+5), 'D') - + c.draw_character((xoffset + 9, yoffset + 5), "D") - - -if __name__ == '__main__': +if __name__ == "__main__": # setup python unicode encoding os.system("export PYTHONIOENCODING=utf8") - parser = argparse.ArgumentParser(description='Draw switchboxes, demands and routes') - parser.add_argument('-j', '--json', help='Filepath for JSON file to read') - parser.add_argument('-r', '--route_list', help='List of routes to print') - parser.add_argument('-o', '--output', help='Path to output directory. Text files of the routes will be stored here.') + parser = argparse.ArgumentParser(description="Draw switchboxes, demands and routes") + parser.add_argument("-j", "--json", help="Filepath for JSON file to read") + parser.add_argument("-r", "--route_list", help="List of routes to print") + parser.add_argument( + "-o", + "--output", + help="Path to output directory. Text files of the routes will be stored here.", + ) args = parser.parse_args() - if args.json: json_file_path = args.json - else: json_file_path = "switchbox.json" # default JSON + if args.json: + json_file_path = args.json + else: + json_file_path = "switchbox.json" # default JSON with open(json_file_path) as f: json_data = json.load(f) - + switchboxes = [] routes = [] @@ -367,22 +426,23 @@ def draw_route(c, route): switchboxes.append(item) if "route" in key: routes.append(item) - + max_col = 0 max_row = 0 for switchbox in switchboxes: - if switchbox['col'] > max_col: - max_col = switchbox['col'] - if switchbox['row'] > max_row: - max_row = switchbox['row'] + if switchbox["col"] > max_col: + max_col = switchbox["col"] + if switchbox["row"] > max_row: + max_row = switchbox["row"] routes_to_print = [] - if args.route_list: + if args.route_list: for route in args.route_list.split(","): routes_to_print.append(int(route.strip())) - else: routes_to_print = range(len(routes)) + else: + routes_to_print = range(len(routes)) - output_directory = json_file_path.split('.')[0] + '/' + output_directory = json_file_path.split(".")[0] + "/" if args.output: output_directory = args.output @@ -390,12 +450,14 @@ def draw_route(c, route): os.mkdir(output_directory) for i in routes_to_print: - c = canvas(12*(max_row+1), 5+5*(max_col+1)); + c = canvas(12 * (max_row + 1), 5 + 5 * (max_col + 1)) draw_switchboxes(c, switchboxes) filename = os.path.join(output_directory, "route{}.txt".format(i)) sys.stdout = sys.__stdout__ - print("Printing route {} of {}: {}".format(i, len(routes_to_print)-1, filename)) - with open(filename, 'w') as f: + print( + "Printing route {} of {}: {}".format(i, len(routes_to_print) - 1, filename) + ) + with open(filename, "w") as f: sys.stdout = f print("Route {}: {}".format(i, routes[i])) draw_route(c, routes[i]) diff --git a/utils/build-mlir-aie-pcie.sh b/utils/build-mlir-aie-pcie.sh index 4d371d42bf..f715ed8596 100755 --- a/utils/build-mlir-aie-pcie.sh +++ b/utils/build-mlir-aie-pcie.sh @@ -63,6 +63,7 @@ CMAKE_CONFIGS="\ -DLLVM_ENABLE_ASSERTIONS=ON \ -DLLVM_ENABLE_RTTI=$LLVM_ENABLE_RTTI \ -DAIE_RUNTIME_TARGETS=x86_64-hsa \ + -DAIE_RUNTIME_TEST_TARGET=x86_64-hsa \ -DAIE_ENABLE_PYTHON_PASSES=OFF \ .. |& tee cmake.log" diff --git a/utils/generate-test-checks.py b/utils/generate-test-checks.py index a115d4b343..6237f2e186 100755 --- a/utils/generate-test-checks.py +++ b/utils/generate-test-checks.py @@ -46,15 +46,15 @@ SSA_RE = re.compile(SSA_RE_STR) # Regex matching the left-hand side of an assignment -SSA_RESULTS_STR = r'\s*(%' + SSA_RE_STR + r')(\s*,\s*(%' + SSA_RE_STR + r'))*\s*=' +SSA_RESULTS_STR = r"\s*(%" + SSA_RE_STR + r")(\s*,\s*(%" + SSA_RE_STR + r"))*\s*=" SSA_RESULTS_RE = re.compile(SSA_RESULTS_STR) # Regex matching attributes -ATTR_RE_STR = r'(#[a-zA-Z._-][a-zA-Z0-9._-]*)' +ATTR_RE_STR = r"(#[a-zA-Z._-][a-zA-Z0-9._-]*)" ATTR_RE = re.compile(ATTR_RE_STR) # Regex matching the left-hand side of an attribute definition -ATTR_DEF_RE_STR = r'\s*' + ATTR_RE_STR + r'\s*=' +ATTR_DEF_RE_STR = r"\s*" + ATTR_RE_STR + r"\s*=" ATTR_DEF_RE = re.compile(ATTR_DEF_RE_STR) @@ -69,7 +69,7 @@ def __init__(self, variable_names): self.generate_in_parent_scope_left = 0 # Parse variable names - self.variable_names = [name.upper() for name in variable_names.split(',')] + self.variable_names = [name.upper() for name in variable_names.split(",")] self.used_variable_names = set() # Generate the following 'n' variable names in the parent scope. @@ -80,8 +80,10 @@ def generate_in_parent_scope(self, n): def generate_name(self, source_variable_name): # Compute variable name - variable_name = self.variable_names.pop(0) if len(self.variable_names) > 0 else '' - if variable_name == '': + variable_name = ( + self.variable_names.pop(0) if len(self.variable_names) > 0 else "" + ) + if variable_name == "": variable_name = "VAL_" + str(self.name_counter) self.name_counter += 1 @@ -90,11 +92,11 @@ def generate_name(self, source_variable_name): if self.generate_in_parent_scope_left > 0: self.generate_in_parent_scope_left -= 1 scope = len(self.scopes) - 2 - assert(scope >= 0) + assert scope >= 0 # Save variable if variable_name in self.used_variable_names: - raise RuntimeError(variable_name + ': duplicate variable name') + raise RuntimeError(variable_name + ": duplicate variable name") self.scopes[scope][source_variable_name] = variable_name self.used_variable_names.add(variable_name) @@ -117,11 +119,12 @@ def clear_names(self): self.name_counter = 0 self.used_variable_names = set() + class AttributeNamer: def __init__(self, attribute_names): self.name_counter = 0 - self.attribute_names = [name.upper() for name in attribute_names.split(',')] + self.attribute_names = [name.upper() for name in attribute_names.split(",")] self.map = {} self.used_attribute_names = set() @@ -129,17 +132,19 @@ def __init__(self, attribute_names): def generate_name(self, source_attribute_name): # Compute FileCheck name - attribute_name = self.attribute_names.pop(0) if len(self.attribute_names) > 0 else '' - if attribute_name == '': + attribute_name = ( + self.attribute_names.pop(0) if len(self.attribute_names) > 0 else "" + ) + if attribute_name == "": attribute_name = "ATTR_" + str(self.name_counter) self.name_counter += 1 # Prepend global symbol - attribute_name = '$' + attribute_name + attribute_name = "$" + attribute_name # Save attribute if attribute_name in self.used_attribute_names: - raise RuntimeError(attribute_name + ': duplicate attribute name') + raise RuntimeError(attribute_name + ": duplicate attribute name") self.map[source_attribute_name] = attribute_name self.used_attribute_names.add(attribute_name) return attribute_name @@ -148,14 +153,19 @@ def generate_name(self, source_attribute_name): # has been generated for the given attribute yet, the source attribute name # itself is returned. def get_name(self, source_attribute_name): - return self.map[source_attribute_name] if source_attribute_name in self.map else '?' + return ( + self.map[source_attribute_name] + if source_attribute_name in self.map + else "?" + ) + # Return the number of SSA results in a line of type # %0, %1, ... = ... # The function returns 0 if there are no results. def get_num_ssa_results(input_line): m = SSA_RESULTS_RE.match(input_line) - return m.group().count('%') if m else 0 + return m.group().count("%") if m else 0 # Process a line of input that has been split at each SSA identifier '%'. @@ -165,7 +175,7 @@ def process_line(line_chunks, variable_namer): # Process the rest that contained an SSA value name. for chunk in line_chunks: m = SSA_RE.match(chunk) - ssa_name = m.group(0) if m is not None else '' + ssa_name = m.group(0) if m is not None else "" # Check if an existing variable exists for this name. variable = None @@ -207,26 +217,35 @@ def process_source_lines(source_lines, note, args): source_segments[-1].append(line + "\n") return source_segments + def process_attribute_definition(line, attribute_namer, output): m = ATTR_DEF_RE.match(line) if m: attribute_name = attribute_namer.generate_name(m.group(1)) - line = '// CHECK: #[[' + attribute_name + ':.+]] =' + line[len(m.group(0)):] + '\n' + line = ( + "// CHECK: #[[" + + attribute_name + + ":.+]] =" + + line[len(m.group(0)) :] + + "\n" + ) output.write(line) + def process_attribute_references(line, attribute_namer): - output_line = '' + output_line = "" components = ATTR_RE.split(line) for component in components: m = ATTR_RE.match(component) if m: - output_line += '#[[' + attribute_namer.get_name(m.group(1)) + ']]' - output_line += component[len(m.group()):] + output_line += "#[[" + attribute_namer.get_name(m.group(1)) + "]]" + output_line += component[len(m.group()) :] else: output_line += component return output_line + # Pre-process a line of input to remove any character sequences that will be # problematic with FileCheck. def preprocess_line(line): @@ -274,17 +293,19 @@ def main(): parser.add_argument( "--variable_names", type=str, - default='', + default="", help="Names to be used in FileCheck regular expression to represent SSA " "variables in the order they are encountered. Separate names with commas, " - "and leave empty entries for default names (e.g.: 'DIM,,SUM,RESULT')") + "and leave empty entries for default names (e.g.: 'DIM,,SUM,RESULT')", + ) parser.add_argument( "--attribute_names", type=str, - default='', + default="", help="Names to be used in FileCheck regular expression to represent " "attributes in the order they are defined. Separate names with commas," - "commas, and leave empty entries for default names (e.g.: 'MAP0,,,MAP1')") + "commas, and leave empty entries for default names (e.g.: 'MAP0,,,MAP1')", + ) args = parser.parse_args() @@ -400,7 +421,9 @@ def main(): output_segments = list(filter(None, output_segments)) # Write the output. if source_segments: - assert len(output_segments) == len(source_segments), f"{len(output_segments)=}, {len(source_segments)=}" + assert len(output_segments) == len( + source_segments + ), f"{len(output_segments)=}, {len(source_segments)=}" for check_segment, source_segment in zip(output_segments, source_segments): for line in check_segment: output.write(line)