From c0b15bb6a5e99ca887e2e2d8bad6f43f9d99e202 Mon Sep 17 00:00:00 2001 From: Erika Hunhoff Date: Tue, 15 Oct 2024 13:46:07 -0600 Subject: [PATCH] Match dynamic object fifo python examples style to other examples in programming examples. (#1840) --- .../dynamic_object_fifo/nested_loops/aie2.py | 24 +++++++------- .../dynamic_object_fifo/ping_pong/aie2.py | 21 ++++++------- .../dynamic_object_fifo/reduction/aie2.py | 27 ++++++++-------- .../sliding_window/aie2.py | 27 ++++++++-------- .../sliding_window/aie2_if_else.py | 26 +++++++--------- .../two_core_sliding_window/aie2.py | 31 +++++++++---------- 6 files changed, 74 insertions(+), 82 deletions(-) diff --git a/programming_examples/dynamic_object_fifo/nested_loops/aie2.py b/programming_examples/dynamic_object_fifo/nested_loops/aie2.py index 7c5babedb3..537fff8b56 100644 --- a/programming_examples/dynamic_object_fifo/nested_loops/aie2.py +++ b/programming_examples/dynamic_object_fifo/nested_loops/aie2.py @@ -5,8 +5,7 @@ # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception # # (c) Copyright 2024 Advanced Micro Devices, Inc. or its affiliates - -import sys +import numpy as np from aie.dialects.aie import * from aie.dialects.aiex import * @@ -25,41 +24,40 @@ def nested_loops(): @device(dev) def device_body(): - memRef_ty = T.memref(N // n_rows, T.i32()) + tensor_ty = np.ndarray[(N // n_rows,), np.dtype[np.int32]] # Tile declarations ShimTile = tile(col, 0) ComputeTile = tile(col, 2) # AIE-array data movement with object fifos - of_in = object_fifo("in", ShimTile, ComputeTile, 2, memRef_ty) - of_out = object_fifo("out", ComputeTile, ShimTile, 2, memRef_ty) + of_in = object_fifo("in", ShimTile, ComputeTile, 2, tensor_ty) + of_out = object_fifo("out", ComputeTile, ShimTile, 2, tensor_ty) # AIE Core Function declarations passthrough_10_i32 = external_func( - "passthrough_10_i32", inputs=[memRef_ty, memRef_ty] + "passthrough_10_i32", inputs=[tensor_ty, tensor_ty] ) # Set up compute tiles - @core(ComputeTile, "kernel.o") def core_body(): for _ in range_(5): elemIn = of_in.acquire(ObjectFifoPort.Consume, 1) for _ in range_(5): elemOut = of_out.acquire(ObjectFifoPort.Produce, 1) - call(passthrough_10_i32, [elemIn, elemOut]) + passthrough_10_i32(elemIn, elemOut) of_out.release(ObjectFifoPort.Produce, 1) of_in.release(ObjectFifoPort.Consume, 1) # To/from AIE-array data movement - tensor_ty = T.memref(N // n_rows, T.i32()) - @runtime_sequence(tensor_ty, tensor_ty) def sequence(A, C): - npu_dma_memcpy_nd(metadata="out", bd_id=0, mem=C, sizes=[1, 1, 1, O]) - npu_dma_memcpy_nd(metadata="in", bd_id=1, mem=A, sizes=[1, 1, 1, N]) - npu_sync(column=0, row=0, direction=0, channel=0) + npu_dma_memcpy_nd( + metadata=of_in, bd_id=1, mem=A, sizes=[1, 1, 1, N], issue_token=True + ) + npu_dma_memcpy_nd(metadata=of_out, bd_id=0, mem=C, sizes=[1, 1, 1, O]) + dma_wait(of_in, of_out) print(ctx.module) diff --git a/programming_examples/dynamic_object_fifo/ping_pong/aie2.py b/programming_examples/dynamic_object_fifo/ping_pong/aie2.py index 995f6a100b..e2edbd1643 100644 --- a/programming_examples/dynamic_object_fifo/ping_pong/aie2.py +++ b/programming_examples/dynamic_object_fifo/ping_pong/aie2.py @@ -5,7 +5,7 @@ # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception # # (c) Copyright 2024 Advanced Micro Devices, Inc. or its affiliates - +import numpy as np import sys from aie.dialects.aie import * @@ -23,40 +23,37 @@ def ping_pong(): @device(dev) def device_body(): - memRef_ty = T.memref(N // 16, T.i32()) + tensor_ty = np.ndarray[(N // 16,), np.dtype[np.int32]] # Tile declarations ShimTile = tile(col, 0) ComputeTile = tile(col, 2) # AIE-array data movement with object fifos - of_in = object_fifo("in", ShimTile, ComputeTile, 2, memRef_ty) - of_out = object_fifo("out", ComputeTile, ShimTile, 2, memRef_ty) + of_in = object_fifo("in", ShimTile, ComputeTile, 2, tensor_ty) + of_out = object_fifo("out", ComputeTile, ShimTile, 2, tensor_ty) # AIE Core Function declarations passthrough_64_i32 = external_func( - "passthrough_64_i32", inputs=[memRef_ty, memRef_ty] + "passthrough_64_i32", inputs=[tensor_ty, tensor_ty] ) # Set up compute tiles - @core(ComputeTile, "kernel.o") def core_body(): for _ in range_(sys.maxsize): elemOut = of_out.acquire(ObjectFifoPort.Produce, 1) elemIn = of_in.acquire(ObjectFifoPort.Consume, 1) - call(passthrough_64_i32, [elemIn, elemOut]) + passthrough_64_i32(elemIn, elemOut) of_in.release(ObjectFifoPort.Consume, 1) of_out.release(ObjectFifoPort.Produce, 1) # To/from AIE-array data movement - tensor_ty = T.memref(N // 16, T.i32()) - @runtime_sequence(tensor_ty, tensor_ty) def sequence(A, C): - npu_dma_memcpy_nd(metadata="out", bd_id=0, mem=C, sizes=[1, 1, 1, N]) - npu_dma_memcpy_nd(metadata="in", bd_id=1, mem=A, sizes=[1, 1, 1, N]) - npu_sync(column=0, row=0, direction=0, channel=0) + npu_dma_memcpy_nd(metadata=of_in, bd_id=1, mem=A, sizes=[1, 1, 1, N]) + npu_dma_memcpy_nd(metadata=of_out, bd_id=0, mem=C, sizes=[1, 1, 1, N]) + dma_wait(of_out) print(ctx.module) diff --git a/programming_examples/dynamic_object_fifo/reduction/aie2.py b/programming_examples/dynamic_object_fifo/reduction/aie2.py index 9457a81370..42cc47b0e9 100644 --- a/programming_examples/dynamic_object_fifo/reduction/aie2.py +++ b/programming_examples/dynamic_object_fifo/reduction/aie2.py @@ -5,7 +5,7 @@ # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception # # (c) Copyright 2024 Advanced Micro Devices, Inc. or its affiliates - +import numpy as np import sys from aie.dialects.aie import * @@ -25,41 +25,40 @@ def reduction(): @device(dev) def device_body(): - memRef_ty = T.memref(N // n_rows, T.i32()) + tile_ty = np.ndarray[(N // n_rows,), np.dtype[np.int32]] # Tile declarations ShimTile = tile(col, 0) ComputeTile = tile(col, 2) # AIE-array data movement with object fifos - of_in = object_fifo("in", ShimTile, ComputeTile, [2, 4], memRef_ty) - of_out = object_fifo("out", ComputeTile, ShimTile, 2, memRef_ty) + of_in = object_fifo("in", ShimTile, ComputeTile, [2, 4], tile_ty) + of_out = object_fifo("out", ComputeTile, ShimTile, 2, tile_ty) # AIE Core Function declarations - add_10_i32 = external_func( - "add_10_i32", inputs=[memRef_ty, memRef_ty, memRef_ty] - ) + add_10_i32 = external_func("add_10_i32", inputs=[tile_ty, tile_ty, tile_ty]) # Set up compute tiles - @core(ComputeTile, "kernel.o") def core_body(): for _ in range_(sys.maxsize): elemOut = of_out.acquire(ObjectFifoPort.Produce, 1) elemsIn = of_in.acquire(ObjectFifoPort.Consume, 2) - call(add_10_i32, [elemsIn[0], elemsIn[1], elemOut]) + add_10_i32(elemsIn[0], elemsIn[1], elemOut) of_in.release(ObjectFifoPort.Consume, 2) of_out.release(ObjectFifoPort.Produce, 1) # To/from AIE-array data movement - tensor_in_ty = T.memref(N, T.i32()) - tensor_out_ty = T.memref(O, T.i32()) + tensor_in_ty = np.ndarray[(N,), np.dtype[np.int32]] + tensor_out_ty = np.ndarray[(O,), np.dtype[np.int32]] @runtime_sequence(tensor_in_ty, tensor_out_ty) def sequence(A, C): - npu_dma_memcpy_nd(metadata="out", bd_id=0, mem=C, sizes=[1, 1, 1, O]) - npu_dma_memcpy_nd(metadata="in", bd_id=1, mem=A, sizes=[1, 1, 1, N]) - npu_sync(column=0, row=0, direction=0, channel=0) + npu_dma_memcpy_nd( + metadata=of_in, bd_id=1, mem=A, sizes=[1, 1, 1, N], issue_token=True + ) + npu_dma_memcpy_nd(metadata=of_out, bd_id=0, mem=C, sizes=[1, 1, 1, O]) + dma_wait(of_in, of_out) print(ctx.module) diff --git a/programming_examples/dynamic_object_fifo/sliding_window/aie2.py b/programming_examples/dynamic_object_fifo/sliding_window/aie2.py index 08d92c73e1..ba72c7de0f 100644 --- a/programming_examples/dynamic_object_fifo/sliding_window/aie2.py +++ b/programming_examples/dynamic_object_fifo/sliding_window/aie2.py @@ -5,8 +5,7 @@ # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception # # (c) Copyright 2024 Advanced Micro Devices, Inc. or its affiliates - -import sys +import numpy as np from aie.dialects.aie import * from aie.dialects.aiex import * @@ -24,19 +23,19 @@ def sliding_window(): @device(dev) def device_body(): - memRef_ty = T.memref(N // n_rows, T.i32()) + subtensor_ty = np.ndarray[(N // n_rows,), np.dtype[np.int32]] # Tile declarations ShimTile = tile(col, 0) ComputeTile = tile(col, 2) # AIE-array data movement with object fifos - of_in = object_fifo("in", ShimTile, ComputeTile, 3, memRef_ty) - of_out = object_fifo("out", ComputeTile, ShimTile, 2, memRef_ty) + of_in = object_fifo("in", ShimTile, ComputeTile, 3, subtensor_ty) + of_out = object_fifo("out", ComputeTile, ShimTile, 2, subtensor_ty) # AIE Core Function declarations add_10_i32 = external_func( - "add_10_i32", inputs=[memRef_ty, memRef_ty, memRef_ty] + "add_10_i32", inputs=[subtensor_ty, subtensor_ty, subtensor_ty] ) # Set up compute tiles @@ -45,30 +44,32 @@ def device_body(): def core_body(): elemOutPre = of_out.acquire(ObjectFifoPort.Produce, 1) elemInPre = of_in.acquire(ObjectFifoPort.Consume, 1) - call(add_10_i32, [elemInPre, elemInPre, elemOutPre]) + add_10_i32(elemInPre, elemInPre, elemOutPre) of_out.release(ObjectFifoPort.Produce, 1) for _ in range_(8): elemOut = of_out.acquire(ObjectFifoPort.Produce, 1) elemsIn = of_in.acquire(ObjectFifoPort.Consume, 2) - call(add_10_i32, [elemsIn[0], elemsIn[1], elemOut]) + add_10_i32(elemsIn[0], elemsIn[1], elemOut) of_in.release(ObjectFifoPort.Consume, 1) of_out.release(ObjectFifoPort.Produce, 1) elemOutPost = of_out.acquire(ObjectFifoPort.Produce, 1) elemsInPost = of_in.acquire(ObjectFifoPort.Consume, 2) - call(add_10_i32, [elemsInPost[0], elemsInPost[1], elemOutPost]) + add_10_i32(elemsInPost[0], elemsInPost[1], elemOutPost) of_in.release(ObjectFifoPort.Consume, 2) of_out.release(ObjectFifoPort.Produce, 1) # To/from AIE-array data movement - tensor_ty = T.memref(N, T.i32()) + tensor_ty = np.ndarray[(N,), np.dtype[np.int32]] @runtime_sequence(tensor_ty, tensor_ty) def sequence(A, C): - npu_dma_memcpy_nd(metadata="out", bd_id=0, mem=C, sizes=[1, 1, 1, N]) - npu_dma_memcpy_nd(metadata="in", bd_id=1, mem=A, sizes=[1, 1, 1, N]) - npu_sync(column=0, row=0, direction=0, channel=0) + npu_dma_memcpy_nd( + metadata=of_in, bd_id=1, mem=A, sizes=[1, 1, 1, N], issue_token=True + ) + npu_dma_memcpy_nd(metadata=of_out, bd_id=0, mem=C, sizes=[1, 1, 1, N]) + dma_wait(of_in, of_out) print(ctx.module) diff --git a/programming_examples/dynamic_object_fifo/sliding_window/aie2_if_else.py b/programming_examples/dynamic_object_fifo/sliding_window/aie2_if_else.py index 8ab2dfa636..dc6fa3fdae 100644 --- a/programming_examples/dynamic_object_fifo/sliding_window/aie2_if_else.py +++ b/programming_examples/dynamic_object_fifo/sliding_window/aie2_if_else.py @@ -5,8 +5,7 @@ # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception # # (c) Copyright 2024 Advanced Micro Devices, Inc. or its affiliates - -import sys +import numpy as np from aie.dialects.aie import * from aie.dialects.aiex import * @@ -24,49 +23,48 @@ def sliding_window(): @device(dev) def device_body(): - memRef_ty = T.memref(N // n_rows, T.i32()) + subtensor_ty = np.ndarray[(N // n_rows,), np.dtype[np.int32]] # Tile declarations ShimTile = tile(col, 0) ComputeTile = tile(col, 2) # AIE-array data movement with object fifos - of_in = object_fifo("in", ShimTile, ComputeTile, 3, memRef_ty) - of_out = object_fifo("out", ComputeTile, ShimTile, 2, memRef_ty) + of_in = object_fifo("in", ShimTile, ComputeTile, 3, subtensor_ty) + of_out = object_fifo("out", ComputeTile, ShimTile, 2, subtensor_ty) # AIE Core Function declarations add_10_i32 = external_func( - "add_10_i32", inputs=[memRef_ty, memRef_ty, memRef_ty] + "add_10_i32", inputs=[subtensor_ty, subtensor_ty, subtensor_ty] ) # Set up compute tiles - @core(ComputeTile, "kernel.o") def core_body(): for i in range_(10): elemOut = of_out.acquire(ObjectFifoPort.Produce, 1) if i == 0: elemInPre = of_in.acquire(ObjectFifoPort.Consume, 1) - call(add_10_i32, [elemInPre, elemInPre, elemOut]) + add_10_i32(elemInPre, elemInPre, elemOut) elif i == 9: elemsInPost = of_in.acquire(ObjectFifoPort.Consume, 2) - call(add_10_i32, [elemsInPost[0], elemsInPost[1], elemOut]) + add_10_i32(elemsInPost[0], elemsInPost[1], elemOut) of_in.release(ObjectFifoPort.Consume, 2) else: elemsIn = of_in.acquire(ObjectFifoPort.Consume, 2) - call(add_10_i32, [elemsIn[0], elemsIn[1], elemOut]) + add_10_i32(elemsIn[0], elemsIn[1], elemOut) of_in.release(ObjectFifoPort.Consume, 1) of_out.release(ObjectFifoPort.Produce, 1) # To/from AIE-array data movement - tensor_ty = T.memref(N, T.i32()) + tensor_ty = np.ndarray[(N,), np.dtype[np.int32]] @runtime_sequence(tensor_ty, tensor_ty) def sequence(A, C): - npu_dma_memcpy_nd(metadata="out", bd_id=0, mem=C, sizes=[1, 1, 1, N]) - npu_dma_memcpy_nd(metadata="in", bd_id=1, mem=A, sizes=[1, 1, 1, N]) - npu_sync(column=0, row=0, direction=0, channel=0) + npu_dma_memcpy_nd(metadata=of_in, bd_id=1, mem=A, sizes=[1, 1, 1, N]) + npu_dma_memcpy_nd(metadata=of_out, bd_id=0, mem=C, sizes=[1, 1, 1, N]) + dma_wait(of_out) print(ctx.module) diff --git a/programming_examples/dynamic_object_fifo/two_core_sliding_window/aie2.py b/programming_examples/dynamic_object_fifo/two_core_sliding_window/aie2.py index e815fada7c..f2ce1a8238 100644 --- a/programming_examples/dynamic_object_fifo/two_core_sliding_window/aie2.py +++ b/programming_examples/dynamic_object_fifo/two_core_sliding_window/aie2.py @@ -5,8 +5,7 @@ # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception # # (c) Copyright 2024 Advanced Micro Devices, Inc. or its affiliates - -import sys +import numpy as np from aie.dialects.aie import * from aie.dialects.aiex import * @@ -24,7 +23,7 @@ def two_core_sliding_window(): @device(dev) def device_body(): - memRef_ty = T.memref(N // n_rows, T.i32()) + subtensor_ty = np.ndarray[(N // n_rows,), np.dtype[np.int32]] # Tile declarations ShimTile = tile(col, 0) @@ -32,16 +31,16 @@ def device_body(): ComputeTile2 = tile(col, 4) # AIE-array data movement with object fifos - of_in = object_fifo("in", ShimTile, ComputeTile, 2, memRef_ty) - of_in2 = object_fifo("in2", ComputeTile, ComputeTile2, 3, memRef_ty) - of_out = object_fifo("out", ComputeTile2, ShimTile, 2, memRef_ty) + of_in = object_fifo("in", ShimTile, ComputeTile, 2, subtensor_ty) + of_in2 = object_fifo("in2", ComputeTile, ComputeTile2, 3, subtensor_ty) + of_out = object_fifo("out", ComputeTile2, ShimTile, 2, subtensor_ty) # AIE Core Function declarations passthrough_10_i32 = external_func( - "passthrough_10_i32", inputs=[memRef_ty, memRef_ty] + "passthrough_10_i32", inputs=[subtensor_ty, subtensor_ty] ) add_10_i32 = external_func( - "add_10_i32", inputs=[memRef_ty, memRef_ty, memRef_ty] + "add_10_i32", inputs=[subtensor_ty, subtensor_ty, subtensor_ty] ) # Set up compute tiles @@ -51,7 +50,7 @@ def core_body(): for _ in range_(10): elemOut = of_in2.acquire(ObjectFifoPort.Produce, 1) elemIn = of_in.acquire(ObjectFifoPort.Consume, 1) - call(passthrough_10_i32, [elemIn, elemOut]) + passthrough_10_i32(elemIn, elemOut) of_in.release(ObjectFifoPort.Consume, 1) of_in2.release(ObjectFifoPort.Produce, 1) @@ -59,30 +58,30 @@ def core_body(): def core_body(): elemOutPre = of_out.acquire(ObjectFifoPort.Produce, 1) elemInPre = of_in2.acquire(ObjectFifoPort.Consume, 1) - call(add_10_i32, [elemInPre, elemInPre, elemOutPre]) + add_10_i32(elemInPre, elemInPre, elemOutPre) of_out.release(ObjectFifoPort.Produce, 1) for _ in range_(8): elemOut = of_out.acquire(ObjectFifoPort.Produce, 1) elemsIn = of_in2.acquire(ObjectFifoPort.Consume, 2) - call(add_10_i32, [elemsIn[0], elemsIn[1], elemOut]) + add_10_i32(elemsIn[0], elemsIn[1], elemOut) of_in2.release(ObjectFifoPort.Consume, 1) of_out.release(ObjectFifoPort.Produce, 1) elemOutPost = of_out.acquire(ObjectFifoPort.Produce, 1) elemsInPost = of_in2.acquire(ObjectFifoPort.Consume, 2) - call(add_10_i32, [elemsInPost[0], elemsInPost[1], elemOutPost]) + add_10_i32(elemsInPost[0], elemsInPost[1], elemOutPost) of_in2.release(ObjectFifoPort.Consume, 2) of_out.release(ObjectFifoPort.Produce, 1) # To/from AIE-array data movement - tensor_ty = T.memref(N, T.i32()) + tensor_ty = np.ndarray[(N,), np.dtype[np.int32]] @runtime_sequence(tensor_ty, tensor_ty) def sequence(A, C): - npu_dma_memcpy_nd(metadata="out", bd_id=0, mem=C, sizes=[1, 1, 1, N]) - npu_dma_memcpy_nd(metadata="in", bd_id=1, mem=A, sizes=[1, 1, 1, N]) - npu_sync(column=0, row=0, direction=0, channel=0) + npu_dma_memcpy_nd(metadata=of_in, bd_id=1, mem=A, sizes=[1, 1, 1, N]) + npu_dma_memcpy_nd(metadata=of_out, bd_id=0, mem=C, sizes=[1, 1, 1, N]) + dma_wait(of_out) print(ctx.module)