Skip to content

Commit

Permalink
Match dynamic object fifo python examples style to other examples in …
Browse files Browse the repository at this point in the history
…programming examples. (Xilinx#1840)
  • Loading branch information
hunhoffe authored Oct 15, 2024
1 parent fa85462 commit c0b15bb
Show file tree
Hide file tree
Showing 6 changed files with 74 additions and 82 deletions.
24 changes: 11 additions & 13 deletions programming_examples/dynamic_object_fifo/nested_loops/aie2.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,7 @@
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
#
# (c) Copyright 2024 Advanced Micro Devices, Inc. or its affiliates

import sys
import numpy as np

from aie.dialects.aie import *
from aie.dialects.aiex import *
Expand All @@ -25,41 +24,40 @@ def nested_loops():

@device(dev)
def device_body():
memRef_ty = T.memref(N // n_rows, T.i32())
tensor_ty = np.ndarray[(N // n_rows,), np.dtype[np.int32]]

# Tile declarations
ShimTile = tile(col, 0)
ComputeTile = tile(col, 2)

# AIE-array data movement with object fifos
of_in = object_fifo("in", ShimTile, ComputeTile, 2, memRef_ty)
of_out = object_fifo("out", ComputeTile, ShimTile, 2, memRef_ty)
of_in = object_fifo("in", ShimTile, ComputeTile, 2, tensor_ty)
of_out = object_fifo("out", ComputeTile, ShimTile, 2, tensor_ty)

# AIE Core Function declarations
passthrough_10_i32 = external_func(
"passthrough_10_i32", inputs=[memRef_ty, memRef_ty]
"passthrough_10_i32", inputs=[tensor_ty, tensor_ty]
)

# Set up compute tiles

@core(ComputeTile, "kernel.o")
def core_body():
for _ in range_(5):
elemIn = of_in.acquire(ObjectFifoPort.Consume, 1)
for _ in range_(5):
elemOut = of_out.acquire(ObjectFifoPort.Produce, 1)
call(passthrough_10_i32, [elemIn, elemOut])
passthrough_10_i32(elemIn, elemOut)
of_out.release(ObjectFifoPort.Produce, 1)
of_in.release(ObjectFifoPort.Consume, 1)

# To/from AIE-array data movement
tensor_ty = T.memref(N // n_rows, T.i32())

@runtime_sequence(tensor_ty, tensor_ty)
def sequence(A, C):
npu_dma_memcpy_nd(metadata="out", bd_id=0, mem=C, sizes=[1, 1, 1, O])
npu_dma_memcpy_nd(metadata="in", bd_id=1, mem=A, sizes=[1, 1, 1, N])
npu_sync(column=0, row=0, direction=0, channel=0)
npu_dma_memcpy_nd(
metadata=of_in, bd_id=1, mem=A, sizes=[1, 1, 1, N], issue_token=True
)
npu_dma_memcpy_nd(metadata=of_out, bd_id=0, mem=C, sizes=[1, 1, 1, O])
dma_wait(of_in, of_out)

print(ctx.module)

Expand Down
21 changes: 9 additions & 12 deletions programming_examples/dynamic_object_fifo/ping_pong/aie2.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
#
# (c) Copyright 2024 Advanced Micro Devices, Inc. or its affiliates

import numpy as np
import sys

from aie.dialects.aie import *
Expand All @@ -23,40 +23,37 @@ def ping_pong():

@device(dev)
def device_body():
memRef_ty = T.memref(N // 16, T.i32())
tensor_ty = np.ndarray[(N // 16,), np.dtype[np.int32]]

# Tile declarations
ShimTile = tile(col, 0)
ComputeTile = tile(col, 2)

# AIE-array data movement with object fifos
of_in = object_fifo("in", ShimTile, ComputeTile, 2, memRef_ty)
of_out = object_fifo("out", ComputeTile, ShimTile, 2, memRef_ty)
of_in = object_fifo("in", ShimTile, ComputeTile, 2, tensor_ty)
of_out = object_fifo("out", ComputeTile, ShimTile, 2, tensor_ty)

# AIE Core Function declarations
passthrough_64_i32 = external_func(
"passthrough_64_i32", inputs=[memRef_ty, memRef_ty]
"passthrough_64_i32", inputs=[tensor_ty, tensor_ty]
)

# Set up compute tiles

@core(ComputeTile, "kernel.o")
def core_body():
for _ in range_(sys.maxsize):
elemOut = of_out.acquire(ObjectFifoPort.Produce, 1)
elemIn = of_in.acquire(ObjectFifoPort.Consume, 1)
call(passthrough_64_i32, [elemIn, elemOut])
passthrough_64_i32(elemIn, elemOut)
of_in.release(ObjectFifoPort.Consume, 1)
of_out.release(ObjectFifoPort.Produce, 1)

# To/from AIE-array data movement
tensor_ty = T.memref(N // 16, T.i32())

@runtime_sequence(tensor_ty, tensor_ty)
def sequence(A, C):
npu_dma_memcpy_nd(metadata="out", bd_id=0, mem=C, sizes=[1, 1, 1, N])
npu_dma_memcpy_nd(metadata="in", bd_id=1, mem=A, sizes=[1, 1, 1, N])
npu_sync(column=0, row=0, direction=0, channel=0)
npu_dma_memcpy_nd(metadata=of_in, bd_id=1, mem=A, sizes=[1, 1, 1, N])
npu_dma_memcpy_nd(metadata=of_out, bd_id=0, mem=C, sizes=[1, 1, 1, N])
dma_wait(of_out)

print(ctx.module)

Expand Down
27 changes: 13 additions & 14 deletions programming_examples/dynamic_object_fifo/reduction/aie2.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
#
# (c) Copyright 2024 Advanced Micro Devices, Inc. or its affiliates

import numpy as np
import sys

from aie.dialects.aie import *
Expand All @@ -25,41 +25,40 @@ def reduction():

@device(dev)
def device_body():
memRef_ty = T.memref(N // n_rows, T.i32())
tile_ty = np.ndarray[(N // n_rows,), np.dtype[np.int32]]

# Tile declarations
ShimTile = tile(col, 0)
ComputeTile = tile(col, 2)

# AIE-array data movement with object fifos
of_in = object_fifo("in", ShimTile, ComputeTile, [2, 4], memRef_ty)
of_out = object_fifo("out", ComputeTile, ShimTile, 2, memRef_ty)
of_in = object_fifo("in", ShimTile, ComputeTile, [2, 4], tile_ty)
of_out = object_fifo("out", ComputeTile, ShimTile, 2, tile_ty)

# AIE Core Function declarations
add_10_i32 = external_func(
"add_10_i32", inputs=[memRef_ty, memRef_ty, memRef_ty]
)
add_10_i32 = external_func("add_10_i32", inputs=[tile_ty, tile_ty, tile_ty])

# Set up compute tiles

@core(ComputeTile, "kernel.o")
def core_body():
for _ in range_(sys.maxsize):
elemOut = of_out.acquire(ObjectFifoPort.Produce, 1)
elemsIn = of_in.acquire(ObjectFifoPort.Consume, 2)
call(add_10_i32, [elemsIn[0], elemsIn[1], elemOut])
add_10_i32(elemsIn[0], elemsIn[1], elemOut)
of_in.release(ObjectFifoPort.Consume, 2)
of_out.release(ObjectFifoPort.Produce, 1)

# To/from AIE-array data movement
tensor_in_ty = T.memref(N, T.i32())
tensor_out_ty = T.memref(O, T.i32())
tensor_in_ty = np.ndarray[(N,), np.dtype[np.int32]]
tensor_out_ty = np.ndarray[(O,), np.dtype[np.int32]]

@runtime_sequence(tensor_in_ty, tensor_out_ty)
def sequence(A, C):
npu_dma_memcpy_nd(metadata="out", bd_id=0, mem=C, sizes=[1, 1, 1, O])
npu_dma_memcpy_nd(metadata="in", bd_id=1, mem=A, sizes=[1, 1, 1, N])
npu_sync(column=0, row=0, direction=0, channel=0)
npu_dma_memcpy_nd(
metadata=of_in, bd_id=1, mem=A, sizes=[1, 1, 1, N], issue_token=True
)
npu_dma_memcpy_nd(metadata=of_out, bd_id=0, mem=C, sizes=[1, 1, 1, O])
dma_wait(of_in, of_out)

print(ctx.module)

Expand Down
27 changes: 14 additions & 13 deletions programming_examples/dynamic_object_fifo/sliding_window/aie2.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,7 @@
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
#
# (c) Copyright 2024 Advanced Micro Devices, Inc. or its affiliates

import sys
import numpy as np

from aie.dialects.aie import *
from aie.dialects.aiex import *
Expand All @@ -24,19 +23,19 @@ def sliding_window():

@device(dev)
def device_body():
memRef_ty = T.memref(N // n_rows, T.i32())
subtensor_ty = np.ndarray[(N // n_rows,), np.dtype[np.int32]]

# Tile declarations
ShimTile = tile(col, 0)
ComputeTile = tile(col, 2)

# AIE-array data movement with object fifos
of_in = object_fifo("in", ShimTile, ComputeTile, 3, memRef_ty)
of_out = object_fifo("out", ComputeTile, ShimTile, 2, memRef_ty)
of_in = object_fifo("in", ShimTile, ComputeTile, 3, subtensor_ty)
of_out = object_fifo("out", ComputeTile, ShimTile, 2, subtensor_ty)

# AIE Core Function declarations
add_10_i32 = external_func(
"add_10_i32", inputs=[memRef_ty, memRef_ty, memRef_ty]
"add_10_i32", inputs=[subtensor_ty, subtensor_ty, subtensor_ty]
)

# Set up compute tiles
Expand All @@ -45,30 +44,32 @@ def device_body():
def core_body():
elemOutPre = of_out.acquire(ObjectFifoPort.Produce, 1)
elemInPre = of_in.acquire(ObjectFifoPort.Consume, 1)
call(add_10_i32, [elemInPre, elemInPre, elemOutPre])
add_10_i32(elemInPre, elemInPre, elemOutPre)
of_out.release(ObjectFifoPort.Produce, 1)

for _ in range_(8):
elemOut = of_out.acquire(ObjectFifoPort.Produce, 1)
elemsIn = of_in.acquire(ObjectFifoPort.Consume, 2)
call(add_10_i32, [elemsIn[0], elemsIn[1], elemOut])
add_10_i32(elemsIn[0], elemsIn[1], elemOut)
of_in.release(ObjectFifoPort.Consume, 1)
of_out.release(ObjectFifoPort.Produce, 1)

elemOutPost = of_out.acquire(ObjectFifoPort.Produce, 1)
elemsInPost = of_in.acquire(ObjectFifoPort.Consume, 2)
call(add_10_i32, [elemsInPost[0], elemsInPost[1], elemOutPost])
add_10_i32(elemsInPost[0], elemsInPost[1], elemOutPost)
of_in.release(ObjectFifoPort.Consume, 2)
of_out.release(ObjectFifoPort.Produce, 1)

# To/from AIE-array data movement
tensor_ty = T.memref(N, T.i32())
tensor_ty = np.ndarray[(N,), np.dtype[np.int32]]

@runtime_sequence(tensor_ty, tensor_ty)
def sequence(A, C):
npu_dma_memcpy_nd(metadata="out", bd_id=0, mem=C, sizes=[1, 1, 1, N])
npu_dma_memcpy_nd(metadata="in", bd_id=1, mem=A, sizes=[1, 1, 1, N])
npu_sync(column=0, row=0, direction=0, channel=0)
npu_dma_memcpy_nd(
metadata=of_in, bd_id=1, mem=A, sizes=[1, 1, 1, N], issue_token=True
)
npu_dma_memcpy_nd(metadata=of_out, bd_id=0, mem=C, sizes=[1, 1, 1, N])
dma_wait(of_in, of_out)

print(ctx.module)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,7 @@
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
#
# (c) Copyright 2024 Advanced Micro Devices, Inc. or its affiliates

import sys
import numpy as np

from aie.dialects.aie import *
from aie.dialects.aiex import *
Expand All @@ -24,49 +23,48 @@ def sliding_window():

@device(dev)
def device_body():
memRef_ty = T.memref(N // n_rows, T.i32())
subtensor_ty = np.ndarray[(N // n_rows,), np.dtype[np.int32]]

# Tile declarations
ShimTile = tile(col, 0)
ComputeTile = tile(col, 2)

# AIE-array data movement with object fifos
of_in = object_fifo("in", ShimTile, ComputeTile, 3, memRef_ty)
of_out = object_fifo("out", ComputeTile, ShimTile, 2, memRef_ty)
of_in = object_fifo("in", ShimTile, ComputeTile, 3, subtensor_ty)
of_out = object_fifo("out", ComputeTile, ShimTile, 2, subtensor_ty)

# AIE Core Function declarations
add_10_i32 = external_func(
"add_10_i32", inputs=[memRef_ty, memRef_ty, memRef_ty]
"add_10_i32", inputs=[subtensor_ty, subtensor_ty, subtensor_ty]
)

# Set up compute tiles

@core(ComputeTile, "kernel.o")
def core_body():
for i in range_(10):
elemOut = of_out.acquire(ObjectFifoPort.Produce, 1)
if i == 0:
elemInPre = of_in.acquire(ObjectFifoPort.Consume, 1)
call(add_10_i32, [elemInPre, elemInPre, elemOut])
add_10_i32(elemInPre, elemInPre, elemOut)
elif i == 9:
elemsInPost = of_in.acquire(ObjectFifoPort.Consume, 2)
call(add_10_i32, [elemsInPost[0], elemsInPost[1], elemOut])
add_10_i32(elemsInPost[0], elemsInPost[1], elemOut)
of_in.release(ObjectFifoPort.Consume, 2)
else:
elemsIn = of_in.acquire(ObjectFifoPort.Consume, 2)
call(add_10_i32, [elemsIn[0], elemsIn[1], elemOut])
add_10_i32(elemsIn[0], elemsIn[1], elemOut)
of_in.release(ObjectFifoPort.Consume, 1)

of_out.release(ObjectFifoPort.Produce, 1)

# To/from AIE-array data movement
tensor_ty = T.memref(N, T.i32())
tensor_ty = np.ndarray[(N,), np.dtype[np.int32]]

@runtime_sequence(tensor_ty, tensor_ty)
def sequence(A, C):
npu_dma_memcpy_nd(metadata="out", bd_id=0, mem=C, sizes=[1, 1, 1, N])
npu_dma_memcpy_nd(metadata="in", bd_id=1, mem=A, sizes=[1, 1, 1, N])
npu_sync(column=0, row=0, direction=0, channel=0)
npu_dma_memcpy_nd(metadata=of_in, bd_id=1, mem=A, sizes=[1, 1, 1, N])
npu_dma_memcpy_nd(metadata=of_out, bd_id=0, mem=C, sizes=[1, 1, 1, N])
dma_wait(of_out)

print(ctx.module)

Expand Down
Loading

0 comments on commit c0b15bb

Please sign in to comment.