Merge branch 'main' into taplib-docs

Xilinx · Dec 12, 2024 · 7953c26 · 7953c26
2 parents ad0205f + 04578ed
commit 7953c26
Show file tree

Hide file tree

Showing 229 changed files with 8,077 additions and 5,965 deletions.
diff --git a/programming_examples/README.md b/programming_examples/README.md
@@ -10,7 +10,13 @@
 
 # <ins>Programming Examples</ins>
 
-These programming examples are provided so that application programmers can learn how to leverage the IRON design flow with mlir-aie python bindings, and the mlir-aie intermediate representation directly to build applications targeting AI Engines. They are organized into the following directories:
+These programming examples are provided so that application programmers can learn how to leverage the IRON design flow with mlir-aie python bindings, and the mlir-aie intermediate representation directly to build applications targeting AI Engines. 
+
+Each IRON example has one or more implementations:
+* `<example_name>.py` - These designs are generally written using a higher-level version of IRON
+* `<example_name>_alt.py` - These designs are generally written using a lower-level verion of IRON
+
+They are organized into the following directories:
 
 ## [basic](./basic) 
 

diff --git a/programming_examples/basic/dma_transpose/Makefile b/programming_examples/basic/dma_transpose/Makefile
@@ -16,15 +16,23 @@ SHELL := /bin/bash
 
 all: build/final.xclbin build/insts.txt
 
-targetname = dmaTranspose
+targetname = dma_transpose
 M ?= 64
 K ?= 32
 
-aie_py_src=aie2.py
+aie_py_src=${targetname}.py
 use_alt?=0
+use_iron?=0
 
 ifeq (${use_alt}, 1)
-aie_py_src=aie2_alt.py
+aie_py_src=${targetname}_alt.py
+ifeq (${use_iron}, 1)
+$(error Cannot specify both alternative design and IRON)
+endif
+endif
+
+ifeq (${use_iron}, 1)
+aie_py_src=${targetname}_iron.py
 endif
 
 build/aie.mlir: ${srcdir}/${aie_py_src}
@@ -51,7 +59,7 @@ endif
 run: ${targetname}.exe build/final.xclbin
 	${powershell} ./$< -x build/final.xclbin -i build/insts.txt -k MLIR_AIE --M ${M} --K ${K}
 
-generate_access_map: ${srcdir}/aie2.py
+generate_access_map: ${srcdir}/${aie_py_src}
 	mkdir -p ${@D}
 	python3 $< --generate-access-map ${M} ${K}
 

diff --git a/programming_examples/basic/dma_transpose/README.md b/programming_examples/basic/dma_transpose/README.md
@@ -12,8 +12,8 @@
 
 This reference design can be run on a Ryzen™ AI NPU.
 
-In the [design](./aie2.py), a 2-D array in a row-major layout is read from external memory to `ComputeTile2` with a transposed layout,
-by using an implicit copy via the compute tile's Data Movement Accelerator (DMA). The data is read from and written to external memory through the Shim tile (`col`, 0).
+In the [design](./dma_transpose_iron.py), a 2-D array in a row-major layout is read from external memory to a compute tile with a transposed layout,
+by using an implicit copy via the compute tile's Data Movement Accelerator (DMA). The data is read from and written to external memory through a shim tile.
 
 This data movement transformation can be visualized as a map which shows the order the data the data is streamed (e.g., in transposed layout):
 <p align="center">
@@ -23,16 +23,34 @@ This data movement transformation can be visualized as a map which shows the ord
  </h3> 
 </p>
 
-The implicit copy is performed using the `object_fifo_link` operation that specifies how input data arriving via `of_in` should be sent further via `of_out` by specifically leveraging the compute tile's DMA. This operation and its functionality are described in more depth in [Section-2b](../../../programming_guide/section-2/section-2b/README.md/#object-fifo-link) of the programming guide.
+The implicit copy is performed using the `ObjectFifo.forward()` function that specifies how input data arriving via `of_in` should be sent further via `of_out` by specifically leveraging a compute tile's (`AnyComputeTile`'s) DMA. 
 
+## Design Versions
+* [dma_transpose_iron.py](./dma_transpose_iron.py) shows how to use the current version of IRON
+* [dma_transpose.py](./dma_transpose.py) shows a lower-level version of IRON, where constructors directly correspond to MLIR operations
+* [dma_transpose._alt.py](./dma_transpose_alt.py)
 
-To compile and run the design for NPU:
-```bash
+The `object_fifo_link` operation used explicitly by`dma_transpose.py` and `dma_transpose._alt.py` is described in more depth in [Section-2b](../../../programming_guide/section-2/section-2b/README.md/#object-fifo-link) of the programming guide.
+
+To compile and run the design `dma_transpose_iron.py` for NPU:
+```shell
+env use_iron=1 make
+make run
+```
+
+To compile and run the design `dma_transpose.py` for NPU:
+```shell
 make
 make run
 ```
 
+To compile and run the design `dma_transpose_alt.py` for NPU:
+```shell
+env use_alt=1 make
+make run
+```
+
 To generate a data visualization of the transpose (like that above), run:
-```bash
+```shell
 make generate_access_map
 ```
diff --git a/...ming_examples/basic/dma_transpose/aie2.py → ...ples/basic/dma_transpose/dma_transpose.py b/...ming_examples/basic/dma_transpose/aie2.py → ...ples/basic/dma_transpose/dma_transpose.py
@@ -1,4 +1,4 @@
-# dma_transpose/aie2.py -*- Python -*-
+# dma_transpose/dma_transpose.py -*- Python -*-
 #
 # This file is licensed under the Apache License v2.0 with LLVM Exceptions.
 # See https://llvm.org/LICENSE.txt for license information.

diff --git a/..._examples/basic/dma_transpose/aie2_alt.py → .../basic/dma_transpose/dma_transpose_alt.py b/..._examples/basic/dma_transpose/aie2_alt.py → .../basic/dma_transpose/dma_transpose_alt.py
@@ -1,4 +1,4 @@
-# dma_transpose/aie2.py -*- Python -*-
+# dma_transpose/dma_transpose_alt.py -*- Python -*-
 #
 # This file is licensed under the Apache License v2.0 with LLVM Exceptions.
 # See https://llvm.org/LICENSE.txt for license information.

diff --git a/programming_examples/basic/dma_transpose/dma_transpose_iron.py b/programming_examples/basic/dma_transpose/dma_transpose_iron.py
@@ -0,0 +1,70 @@
+# dma_transpose/dma_transpose_iron.py -*- Python -*-
+#
+# This file is licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+#
+# (c) Copyright 2024 Advanced Micro Devices, Inc. or its affiliates
+import argparse
+import numpy as np
+import sys
+
+from aie.iron import ObjectFifo, Program, Runtime
+from aie.iron.device import NPU1Col1, AnyComputeTile
+from aie.iron.placers import SequentialPlacer
+from aie.helpers.taplib import TensorTiler2D
+
+
+def my_passthrough(M, K, generate_acccess_map=False):
+
+    # Define types
+    tensor_ty = np.ndarray[(M, K), np.dtype[np.int32]]
+
+    # Define tensor access pattern
+    tap_in = TensorTiler2D.simple_tiler((M, K), tile_col_major=True)[0]
+
+    # Use tensor access pattern to create a graph
+    if generate_acccess_map:
+        tap_in.visualize(file_path="iron_transpose_data.png", show_tile=False)
+        return
+
+    # Dataflow with ObjectFifos
+    of_in = ObjectFifo(tensor_ty)
+    of_out = of_in.cons().forward(AnyComputeTile)
+
+    # Runtime operations to move data to/from the AIE-array
+    rt = Runtime()
+    with rt.sequence(tensor_ty, tensor_ty, tensor_ty) as (a_in, _, c_out):
+        rt.fill(of_in.prod(), a_in, tap_in)
+        rt.drain(of_out.cons(), c_out, wait=True)
+
+    # Create the program from the device type and runtime
+    my_program = Program(NPU1Col1(), rt)
+
+    # Place program components (assign them resources on the device) and generate an MLIR module
+    module = my_program.resolve_program(SequentialPlacer())
+
+    # Print the generated MLIR
+    print(module)
+
+
+if __name__ == "__main__":
+    p = argparse.ArgumentParser()
+    p.add_argument("dims", help="M K", type=int, nargs="*", default=[64, 64])
+    p.add_argument(
+        "--generate-access-map",
+        action="store_true",
+        help="Produce a file showing data access order",
+    )
+    args = p.parse_args()
+
+    if len(args.dims) != 2:
+        print(
+            "ERROR: Must provide either no dimensions or both M and K", file=sys.stderr
+        )
+        exit(-1)
+    my_passthrough(
+        M=args.dims[0],
+        K=args.dims[1],
+        generate_acccess_map=args.generate_access_map,
+    )
diff --git a/..._add_BDs_init_values/run_makefile_alt.lit → ...basic/dma_transpose/run_makefile_iron.lit b/..._add_BDs_init_values/run_makefile_alt.lit → ...basic/dma_transpose/run_makefile_iron.lit
@@ -3,10 +3,9 @@
 //
 // REQUIRES: ryzen_ai, peano 
 //
-// RUN: mkdir -p test_alt
-// RUN: cd test_alt
+// RUN: mkdir -p iron_test
+// RUN: cd iron_test
 // RUN: make -f %S/Makefile clean
-// RUN: env use_alt=1 make -f %S/Makefile 
+// RUN: env use_iron=1 make -f %S/Makefile 
 // RUN: %run_on_npu make -f %S/Makefile run | FileCheck %s
 // CHECK: PASS!
-
diff --git a/programming_examples/basic/matrix_multiplication/cascade/Makefile b/programming_examples/basic/matrix_multiplication/cascade/Makefile
@@ -9,7 +9,7 @@
 ##===----------------------------------------------------------------------===##
 srcdir := $(shell dirname $(realpath $(firstword $(MAKEFILE_LIST))))
 subdir=cascade
-targetname=matrixMultiplication
+targetname=cascade
 
 M?=512
 K?=512
@@ -25,7 +25,7 @@ target_suffix=${M}x${K}x${N}_${m}x${k}x${n}_${n_aie_cols}c
 use_alt?=0
 
 ifeq (${use_alt}, 1)
-aie_py_src=aie2_alt.py
+aie_py_src=${targetname}_alt.py
 endif
 
 include ${srcdir}/../makefile-common

diff --git a/programming_examples/basic/matrix_multiplication/cascade/README.md b/programming_examples/basic/matrix_multiplication/cascade/README.md
@@ -18,4 +18,7 @@ Different from the `whole_array` implementation, in this design, the accumulatio
 
 The current design only works for scalar `int16`.
 
-The performance sweep results against `whole_array` can be found at [here](https://gist.github.com/Yu-Zhewen/da3fed9feb278b973f35fb78c2d3a484), no gain observed. 
+The performance sweep results against `whole_array` can be found at [here](https://gist.github.com/Yu-Zhewen/da3fed9feb278b973f35fb78c2d3a484), no gain observed. 
+
+The orignal implementation of the design is found at [cascade.py](./cascade.py). An alternative version of the design, featuring different runtime operations,
+is found at [cascade_alt.py](./cascade_alt.py).
diff --git a/...sic/matrix_multiplication/cascade/aie2.py → .../matrix_multiplication/cascade/cascade.py b/...sic/matrix_multiplication/cascade/aie2.py → .../matrix_multiplication/cascade/cascade.py