diff --git a/reference_designs/ipu-xrt/add_one_objFifo/aie2.py b/reference_designs/ipu-xrt/add_one_objFifo/aie2.py index b18b9c7245..8041952e2d 100644 --- a/reference_designs/ipu-xrt/add_one_objFifo/aie2.py +++ b/reference_designs/ipu-xrt/add_one_objFifo/aie2.py @@ -73,7 +73,9 @@ def sequence(inTensor, notUsed, outTensor): ipu_dma_memcpy_nd( metadata="in0", bd_id=1, mem=inTensor, lengths=[1, 1, 1, 64] ) - ipu_sync(column=0, row=0, direction=0, channel=0) + ipu_sync( + column=0, row=0, direction=0, channel=0, column_num=1, row_num=1 + ) print(ctx.module) diff --git a/reference_designs/ipu-xrt/log_hello_world/hello_world.py b/reference_designs/ipu-xrt/log_hello_world/hello_world.py index cb0ea41c65..3229414b16 100644 --- a/reference_designs/ipu-xrt/log_hello_world/hello_world.py +++ b/reference_designs/ipu-xrt/log_hello_world/hello_world.py @@ -63,7 +63,9 @@ def sequence(in_mem, out_mem, logout): ipu_dma_memcpy_nd( metadata="logoutOF", bd_id=2, mem=logout, lengths=[1, 1, 1, N] ) - ipu_sync(column=0, row=0, direction=0, channel=0) + ipu_sync( + column=0, row=0, direction=0, channel=0, column_num=1, row_num=1 + ) print(ctx.module) diff --git a/reference_designs/ipu-xrt/matrix_multiplication/aie2.py b/reference_designs/ipu-xrt/matrix_multiplication/aie2.py index 7e9237e37c..a3584d3356 100644 --- a/reference_designs/ipu-xrt/matrix_multiplication/aie2.py +++ b/reference_designs/ipu-xrt/matrix_multiplication/aie2.py @@ -217,7 +217,9 @@ def sequence(A, B, C): strides=[n_in_i32s, k_x_N_in_i32s, N_in_i32s], ) - ipu_sync(column=0, row=0, direction=0, channel=0) + ipu_sync( + column=0, row=0, direction=0, channel=0, column_num=1, row_num=1 + ) print(ctx.module) diff --git a/reference_designs/ipu-xrt/matrix_multiplication_column/aie2.py b/reference_designs/ipu-xrt/matrix_multiplication_column/aie2.py index 996069dec7..d063fb5bb9 100644 --- a/reference_designs/ipu-xrt/matrix_multiplication_column/aie2.py +++ b/reference_designs/ipu-xrt/matrix_multiplication_column/aie2.py @@ -250,7 +250,9 @@ def sequence(A, B, C): strides=[0, n_in_i32s, N_in_i32s], ) - ipu_sync(column=0, row=0, direction=0, channel=0) + ipu_sync( + column=0, row=0, direction=0, channel=0, column_num=1, row_num=1 + ) print(ctx.module) diff --git a/reference_designs/ipu-xrt/passthrough_hardware/aie2.py b/reference_designs/ipu-xrt/passthrough_hardware/aie2.py index 661a413977..5d175eb28d 100755 --- a/reference_designs/ipu-xrt/passthrough_hardware/aie2.py +++ b/reference_designs/ipu-xrt/passthrough_hardware/aie2.py @@ -53,7 +53,9 @@ def core_body(): def sequence(A, B, C): ipu_dma_memcpy_nd(metadata="out", bd_id=0, mem=C, lengths=[1, 1, 1, N]) ipu_dma_memcpy_nd(metadata="in", bd_id=1, mem=A, lengths=[1, 1, 1, N]) - ipu_sync(column=0, row=0, direction=0, channel=0) + ipu_sync( + column=0, row=0, direction=0, channel=0, column_num=1, row_num=1 + ) print(ctx.module) diff --git a/reference_designs/ipu-xrt/vector_scalar/aie2.py b/reference_designs/ipu-xrt/vector_scalar/aie2.py index 6f5c790537..d9b4d91769 100755 --- a/reference_designs/ipu-xrt/vector_scalar/aie2.py +++ b/reference_designs/ipu-xrt/vector_scalar/aie2.py @@ -70,7 +70,9 @@ def core_body(): def sequence(A, B, C): ipu_dma_memcpy_nd(metadata="out", bd_id=0, mem=C, lengths=[1, 1, 1, N]) ipu_dma_memcpy_nd(metadata="in", bd_id=1, mem=A, lengths=[1, 1, 1, N]) - ipu_sync(column=0, row=0, direction=0, channel=0) + ipu_sync( + column=0, row=0, direction=0, channel=0, column_num=1, row_num=1 + ) print(ctx.module) diff --git a/reference_designs/ipu-xrt/vision_pipelines/color_detect/aie2_colorDetect.py b/reference_designs/ipu-xrt/vision_pipelines/color_detect/aie2_colorDetect.py index b9e08da975..ddd343c3b1 100644 --- a/reference_designs/ipu-xrt/vision_pipelines/color_detect/aie2_colorDetect.py +++ b/reference_designs/ipu-xrt/vision_pipelines/color_detect/aie2_colorDetect.py @@ -10,9 +10,10 @@ from aie.dialects.aie import * from aie.dialects.aiex import * from aie.extras.dialects.ext import arith -from aie.extras.dialects.ext.scf import for_ +from aie.extras.dialects.ext.scf import range_, yield_ from aie.extras.context import mlir_mod_ctx from aie.extras import types as T +from aie.ir import TypeAttr width = 64 @@ -56,7 +57,7 @@ def deviceBody(): "gray2rgbaLine", inputs=[line_ty, line_bytes_ty, T.i32()] ) bitwise_and_line = external_func( - "bitwiseORLine", + "bitwiseANDLine", inputs=[line_bytes_ty, line_bytes_ty, line_bytes_ty, T.i32()], ) @@ -182,7 +183,7 @@ def deviceBody(): # Compute tile 2 @core(compute_tile2, "rgba2hue.cc.o") def coreBody(): - for _ in for_(sys.maxsize): + for _ in range_(sys.maxsize): elem_in = acquire( ObjectFifoPort.Consume, "inOF_L3L2", 1, line_bytes_ty ).acquired_elem() @@ -192,6 +193,7 @@ def coreBody(): Call(rgba2hueLine, [elem_in, elem_out, arith.constant(line_width)]) objectfifo_release(ObjectFifoPort.Consume, "inOF_L3L2", 1) objectfifo_release(ObjectFifoPort.Produce, "OF_2to34", 1) + yield_([]) # Compute tile 3 @core(compute_tile3, "threshold.cc.o") @@ -201,7 +203,7 @@ def coreBody(): threshold_maxvalue = arith.constant(255, T.i16()) threshold_mode_to_zero_inv = arith.constant(4, T.i8()) threshold_mode_binary = arith.constant(0, T.i8()) - for _ in for_(sys.maxsize): + for _ in range_(sys.maxsize): elem_in = acquire( ObjectFifoPort.Consume, "OF_2to34", 1, line_ty ).acquired_elem() @@ -240,6 +242,7 @@ def coreBody(): ) objectfifo_release(ObjectFifoPort.Consume, "OF_3to3", 1) objectfifo_release(ObjectFifoPort.Produce, "OF_3to5", 1) + yield_([]) # Compute tile 4 @core(compute_tile4, "threshold.cc.o") @@ -249,7 +252,7 @@ def coreBody(): threshold_maxvalue = arith.constant(255, T.i16()) threshold_mode_to_zero_inv = arith.constant(4, T.i8()) threshold_mode_binary = arith.constant(0, T.i8()) - for _ in for_(sys.maxsize): + for _ in range_(sys.maxsize): elem_in = acquire( ObjectFifoPort.Consume, "OF_2to34", 1, line_ty ).acquired_elem() @@ -288,11 +291,12 @@ def coreBody(): ) objectfifo_release(ObjectFifoPort.Consume, "OF_4to4", 1) objectfifo_release(ObjectFifoPort.Produce, "OF_4to5", 1) + yield_([]) # Compute tile 5 @core(compute_tile5, "combined_bitwiseOR_gray2rgba_bitwiseAND.a") def coreBody(): - for _ in for_(sys.maxsize): + for _ in range_(sys.maxsize): # bitwise OR elem_in1 = acquire( ObjectFifoPort.Consume, "OF_3to5", 1, line_ty @@ -350,6 +354,7 @@ def coreBody(): objectfifo_release(ObjectFifoPort.Consume, "OF_5to5b", 1) objectfifo_release(ObjectFifoPort.Consume, "inOF_L2L1", 1) objectfifo_release(ObjectFifoPort.Produce, "outOF_L1L2", 1) + yield_([]) # To/from AIE-array data movement diff --git a/reference_designs/ipu-xrt/vision_pipelines/color_threshold/aie2_colorThreshold.py b/reference_designs/ipu-xrt/vision_pipelines/color_threshold/aie2_colorThreshold.py index bd5cce61b1..9b62f3d43a 100644 --- a/reference_designs/ipu-xrt/vision_pipelines/color_threshold/aie2_colorThreshold.py +++ b/reference_designs/ipu-xrt/vision_pipelines/color_threshold/aie2_colorThreshold.py @@ -9,10 +9,9 @@ from aie.dialects.aie import * from aie.dialects.aiex import * -from aie.extras.dialects.ext import arith, memref -from aie.extras.dialects.ext.scf import for_, yield_ +from aie.dialects.scf import * +from aie.extras.dialects.ext import memref, arith from aie.extras.context import mlir_mod_ctx -from aie.extras import types as T width = 512 height = 9 @@ -20,12 +19,12 @@ width = int(sys.argv[1]) height = int(sys.argv[2]) -line_width = width -line_width_channels = width * 4 # 4 channels +lineWidth = width +lineWidthChannels = width * 4 # 4 channels -enable_trace = False -trace_size_in_bytes = 8192 -trace_size_in_int32s = trace_size_in_bytes // 4 +enableTrace = False +traceSizeInBytes = 8192 +traceSizeInInt32s = traceSizeInBytes // 4 def color_threshold(): @@ -33,45 +32,46 @@ def color_threshold(): @device(AIEDevice.ipu) def device_body(): - line_ty = T.memref(line_width, T.ui8()) + line_channels_ty = T.memref(lineWidthChannels, T.ui8()) + line_ty = T.memref(lineWidth, T.ui8()) ofifo_line_channels_ty = TypeAttr.get( - ObjectFifoType.get(T.memref(line_width_channels, T.ui8())) + ObjectFifoType.get(T.memref(lineWidthChannels, T.ui8())) ) ofifo_line_ty = TypeAttr.get( - ObjectFifoType.get(T.memref(line_width, T.ui8())) + ObjectFifoType.get(T.memref(lineWidth, T.ui8())) ) # AIE Core Function declarations - threshold_line = external_func( + thresholdLine = external_func( "thresholdLine", inputs=[line_ty, line_ty, T.i32(), T.i16(), T.i16(), T.i8()], ) # Tile declarations - shim_tile = tile(0, 0) - mem_tile = tile(0, 1) - compute_tile2 = tile(0, 2) - compute_tile3 = tile(0, 3) - compute_tile4 = tile(0, 4) - compute_tile5 = tile(0, 5) + ShimTile = tile(0, 0) + MemTile = tile(0, 1) + ComputeTile2 = tile(0, 2) + ComputeTile3 = tile(0, 3) + ComputeTile4 = tile(0, 4) + ComputeTile5 = tile(0, 5) # AIE-array data movement with object fifos # Input RGBA broadcast + memtile for skip objectfifo( - "inOOB_L3L2", shim_tile, [mem_tile], 2, ofifo_line_channels_ty, [], [] + "inOOB_L3L2", ShimTile, [MemTile], 2, ofifo_line_channels_ty, [], [] ) objectfifo( - "inOOB_L2L1_0", mem_tile, [compute_tile2], 2, ofifo_line_ty, [], [] + "inOOB_L2L1_0", MemTile, [ComputeTile2], 2, ofifo_line_ty, [], [] ) objectfifo( - "inOOB_L2L1_1", mem_tile, [compute_tile3], 2, ofifo_line_ty, [], [] + "inOOB_L2L1_1", MemTile, [ComputeTile3], 2, ofifo_line_ty, [], [] ) objectfifo( - "inOOB_L2L1_2", mem_tile, [compute_tile4], 2, ofifo_line_ty, [], [] + "inOOB_L2L1_2", MemTile, [ComputeTile4], 2, ofifo_line_ty, [], [] ) objectfifo( - "inOOB_L2L1_3", mem_tile, [compute_tile5], 2, ofifo_line_ty, [], [] + "inOOB_L2L1_3", MemTile, [ComputeTile5], 2, ofifo_line_ty, [], [] ) objectfifo_link( ["inOOB_L3L2"], @@ -80,19 +80,19 @@ def device_body(): # Output RGBA objectfifo( - "outOOB_L2L3", mem_tile, [shim_tile], 2, ofifo_line_channels_ty, [], [] + "outOOB_L2L3", MemTile, [ShimTile], 2, ofifo_line_channels_ty, [], [] ) objectfifo( - "outOOB_L1L2_0", compute_tile2, [mem_tile], 2, ofifo_line_ty, [], [] + "outOOB_L1L2_0", ComputeTile2, [MemTile], 2, ofifo_line_ty, [], [] ) objectfifo( - "outOOB_L1L2_1", compute_tile3, [mem_tile], 2, ofifo_line_ty, [], [] + "outOOB_L1L2_1", ComputeTile3, [MemTile], 2, ofifo_line_ty, [], [] ) objectfifo( - "outOOB_L1L2_2", compute_tile4, [mem_tile], 2, ofifo_line_ty, [], [] + "outOOB_L1L2_2", ComputeTile4, [MemTile], 2, ofifo_line_ty, [], [] ) objectfifo( - "outOOB_L1L2_3", compute_tile5, [mem_tile], 2, ofifo_line_ty, [], [] + "outOOB_L1L2_3", ComputeTile5, [MemTile], 2, ofifo_line_ty, [], [] ) objectfifo_link( ["outOOB_L1L2_0", "outOOB_L1L2_1", "outOOB_L1L2_2", "outOOB_L1L2_3"], @@ -100,51 +100,52 @@ def device_body(): ) # Runtime parameters - rtp_compute_tile2 = Buffer(compute_tile2, [16], T.i32(), "rtpComputeTile2") - rtp_compute_tile3 = Buffer(compute_tile3, [16], T.i32(), "rtpComputeTile3") - rtp_compute_tile4 = Buffer(compute_tile4, [16], T.i32(), "rtpComputeTile4") - rtp_compute_tile5 = Buffer(compute_tile5, [16], T.i32(), "rtpComputeTile5") + rtpComputeTile2 = Buffer(ComputeTile2, [16], T.i32(), "rtpComputeTile2") + rtpComputeTile3 = Buffer(ComputeTile3, [16], T.i32(), "rtpComputeTile3") + rtpComputeTile4 = Buffer(ComputeTile4, [16], T.i32(), "rtpComputeTile4") + rtpComputeTile5 = Buffer(ComputeTile5, [16], T.i32(), "rtpComputeTile5") # Set up compute tiles # Compute tile 2 - @core(compute_tile2, "threshold.cc.o") + @core(ComputeTile2, "threshold.cc.o") def core_body(): # for _ in for_(4096): for _ in for_(sys.maxsize): - elem_in = acquire( + elemIn = acquire( ObjectFifoPort.Consume, "inOOB_L2L1_0", 1, - T.memref(line_width, T.ui8()), + T.memref(lineWidth, T.ui8()), ).acquired_elem() - elem_out = acquire( + elemOut = acquire( ObjectFifoPort.Produce, "outOOB_L1L2_0", 1, - T.memref(line_width, T.ui8()), + T.memref(lineWidth, T.ui8()), ).acquired_elem() # RTPs written from the instruction stream must be read right before the kernel # after the ObjectFIFO acquires - threshold_value = arith.trunci( - T.i16(), memref.load(rtp_compute_tile2, [0]) + thresholdValue = arith.trunci( + T.i16(), memref.load(rtpComputeTile2, [0]) ) - max_value = arith.trunci( - T.i16(), memref.load(rtp_compute_tile2, [1]) - ) - threshold_type = arith.trunci( - T.i8(), memref.load(rtp_compute_tile2, [2]) + maxValue = arith.trunci(T.i16(), memref.load(rtpComputeTile2, [1])) + thresholdType = arith.trunci( + T.i8(), memref.load(rtpComputeTile2, [2]) ) + # maxValue = arith.constant(255, T.i16()) + # thresholdValue = arith.constant(50, T.i16()) + # thresholdType = arith.constant(0, T.i8()) Call( - threshold_line, + thresholdLine, [ - elem_in, - elem_out, - arith.constant(line_width), - threshold_value, - max_value, - threshold_type, + elemIn, + elemOut, + arith.constant(lineWidth), + thresholdValue, + maxValue, + thresholdType, ], ) @@ -153,45 +154,43 @@ def core_body(): yield_([]) # Compute tile 3 - @core(compute_tile3, "threshold.cc.o") + @core(ComputeTile3, "threshold.cc.o") def core_body(): # for _ in for_(4096): for _ in for_(sys.maxsize): - elem_in = acquire( + elemIn = acquire( ObjectFifoPort.Consume, "inOOB_L2L1_1", 1, - T.memref(line_width, T.ui8()), + T.memref(lineWidth, T.ui8()), ).acquired_elem() - elem_out = acquire( + elemOut = acquire( ObjectFifoPort.Produce, "outOOB_L1L2_1", 1, - T.memref(line_width, T.ui8()), + T.memref(lineWidth, T.ui8()), ).acquired_elem() # RTPs written from the instruction stream must be read right before the kernel # after the ObjectFIFO acquires - threshold_value = arith.trunci( - T.i16(), memref.load(rtp_compute_tile3, [0]) + thresholdValue = arith.trunci( + T.i16(), memref.load(rtpComputeTile3, [0]) ) - max_value = arith.trunci( - T.i16(), memref.load(rtp_compute_tile3, [1]) - ) - threshold_type = arith.trunci( - T.i8(), memref.load(rtp_compute_tile3, [2]) + maxValue = arith.trunci(T.i16(), memref.load(rtpComputeTile3, [1])) + thresholdType = arith.trunci( + T.i8(), memref.load(rtpComputeTile3, [2]) ) # maxValue = arith.constant(255, T.i16()) # thresholdValue = arith.constant(50, T.i16()) # thresholdType = arith.constant(0, T.i8()) Call( - threshold_line, + thresholdLine, [ - elem_in, - elem_out, - arith.constant(line_width), - threshold_value, - max_value, - threshold_type, + elemIn, + elemOut, + arith.constant(lineWidth), + thresholdValue, + maxValue, + thresholdType, ], ) @@ -200,46 +199,44 @@ def core_body(): yield_([]) # Compute tile 4 - @core(compute_tile4, "threshold.cc.o") + @core(ComputeTile4, "threshold.cc.o") def core_body(): # for _ in for_(4096): for _ in for_(sys.maxsize): - elem_in = acquire( + elemIn = acquire( ObjectFifoPort.Consume, "inOOB_L2L1_2", 1, - T.memref(line_width, T.ui8()), + T.memref(lineWidth, T.ui8()), ).acquired_elem() - elem_out = acquire( + elemOut = acquire( ObjectFifoPort.Produce, "outOOB_L1L2_2", 1, - T.memref(line_width, T.ui8()), + T.memref(lineWidth, T.ui8()), ).acquired_elem() # RTPs written from the instruction stream must be read right before the kernel # after the ObjectFIFO acquires - threshold_value = arith.trunci( - T.i16(), memref.load(rtp_compute_tile4, [0]) - ) - max_value = arith.trunci( - T.i16(), memref.load(rtp_compute_tile4, [1]) + thresholdValue = arith.trunci( + T.i16(), memref.load(rtpComputeTile4, [0]) ) - threshold_type = arith.trunci( - T.i8(), memref.load(rtp_compute_tile4, [2]) + maxValue = arith.trunci(T.i16(), memref.load(rtpComputeTile4, [1])) + thresholdType = arith.trunci( + T.i8(), memref.load(rtpComputeTile4, [2]) ) # maxValue = arith.constant(255, T.i16()) # thresholdValue = arith.constant(50, T.i16()) # thresholdType = arith.constant(0, T.i8()) Call( - threshold_line, + thresholdLine, [ - elem_in, - elem_out, - arith.constant(line_width), - threshold_value, - max_value, - threshold_type, + elemIn, + elemOut, + arith.constant(lineWidth), + thresholdValue, + maxValue, + thresholdType, ], ) @@ -248,46 +245,44 @@ def core_body(): yield_([]) # Compute tile 5 - @core(compute_tile5, "threshold.cc.o") + @core(ComputeTile5, "threshold.cc.o") def core_body(): # for _ in for_(4096): for _ in for_(sys.maxsize): - elem_in = acquire( + elemIn = acquire( ObjectFifoPort.Consume, "inOOB_L2L1_3", 1, - T.memref(line_width, T.ui8()), + T.memref(lineWidth, T.ui8()), ).acquired_elem() - elem_out = acquire( + elemOut = acquire( ObjectFifoPort.Produce, "outOOB_L1L2_3", 1, - T.memref(line_width, T.ui8()), + T.memref(lineWidth, T.ui8()), ).acquired_elem() # RTPs written from the instruction stream must be read right before the kernel # after the ObjectFIFO acquires - threshold_value = arith.trunci( - T.i16(), memref.load(rtp_compute_tile5, [0]) - ) - max_value = arith.trunci( - T.i16(), memref.load(rtp_compute_tile5, [1]) + thresholdValue = arith.trunci( + T.i16(), memref.load(rtpComputeTile5, [0]) ) - threshold_type = arith.trunci( - T.i8(), memref.load(rtp_compute_tile5, [2]) + maxValue = arith.trunci(T.i16(), memref.load(rtpComputeTile5, [1])) + thresholdType = arith.trunci( + T.i8(), memref.load(rtpComputeTile5, [2]) ) # maxValue = arith.constant(255, T.i16()) # thresholdValue = arith.constant(50, T.i16()) # thresholdType = arith.constant(0, T.i8() Call( - threshold_line, + thresholdLine, [ - elem_in, - elem_out, - arith.constant(line_width), - threshold_value, - max_value, - threshold_type, + elemIn, + elemOut, + arith.constant(lineWidth), + thresholdValue, + maxValue, + thresholdType, ], ) @@ -297,13 +292,13 @@ def core_body(): # To/from AIE-array data movement - tensor_size = width * height - tensor_size_in_int32s = tensor_size // 4 + tensorSize = width * height + tensorSizeInInt32s = tensorSize // 4 @FuncOp.from_py_func( - T.memref(tensor_size_in_int32s, T.i32()), + T.memref(tensorSizeInInt32s, T.i32()), T.memref(32, T.i32()), # not used - T.memref(tensor_size_in_int32s, T.i32()), + T.memref(tensorSizeInInt32s, T.i32()), ) def sequence(inTensor, notUsed, outTensor): # thresholdValue, maxValue, thresholdType @@ -327,18 +322,19 @@ def sequence(inTensor, notUsed, outTensor): metadata="inOOB_L3L2", bd_id=1, mem=inTensor, - lengths=[1, 1, 1, tensor_size_in_int32s], + lengths=[1, 1, 1, tensorSizeInInt32s], ) ipu_dma_memcpy_nd( metadata="outOOB_L2L3", bd_id=0, mem=outTensor, - lengths=[1, 1, 1, tensor_size_in_int32s], + lengths=[1, 1, 1, tensorSizeInInt32s], ) ipu_sync( column=0, row=0, direction=0, channel=0, column_num=1, row_num=1 ) + # print(ctx.module.operation.verify()) print(ctx.module) diff --git a/reference_designs/ipu-xrt/vision_pipelines/edge_detect/aie2_edgeDetect.py b/reference_designs/ipu-xrt/vision_pipelines/edge_detect/aie2_edgeDetect.py index 8034185fc3..d54997aa04 100644 --- a/reference_designs/ipu-xrt/vision_pipelines/edge_detect/aie2_edgeDetect.py +++ b/reference_designs/ipu-xrt/vision_pipelines/edge_detect/aie2_edgeDetect.py @@ -19,14 +19,14 @@ width = int(sys.argv[1]) height = int(sys.argv[2]) -height_minus1 = height - 1 -line_width = width -line_width_in_bytes = width * 4 -line_width_in_int32s = line_width_in_bytes // 4 +heightMinus1 = height - 1 +lineWidth = width +lineWidthInBytes = width * 4 +lineWidthInInt32s = lineWidthInBytes // 4 -enable_trace = False -trace_size_in_bytes = 8192 -trace_size_in_int32s = trace_size_in_bytes // 4 +enableTrace = False +traceSizeInBytes = 8192 +traceSizeInInt32s = traceSizeInBytes // 4 def edge_detect(): @@ -34,9 +34,9 @@ def edge_detect(): @device(AIEDevice.ipu) def device_body(): - line_bytes_ty = T.memref(line_width_in_bytes, T.ui8()) - line_ty = T.memref(line_width, T.ui8()) - mem_ref_3x3_ty = T.memref(3, 3, T.i16()) + line_bytes_ty = T.memref(lineWidthInBytes, T.ui8()) + line_ty = T.memref(lineWidth, T.ui8()) + memRef_3x3_ty = T.memref(3, 3, T.i16()) ofifo_line_bytes_ty = TypeAttr.get(ObjectFifoType.get(line_bytes_ty)) ofifo_line_ty = TypeAttr.get(ObjectFifoType.get(line_ty)) @@ -47,7 +47,7 @@ def device_body(): ) filter2d_line = external_func( "filter2dLine", - inputs=[line_ty, line_ty, line_ty, line_ty, T.i32(), mem_ref_3x3_ty], + inputs=[line_ty, line_ty, line_ty, line_ty, T.i32(), memRef_3x3_ty], ) threshold_line = external_func( "thresholdLine", @@ -70,19 +70,19 @@ def device_body(): ) # Tile declarations - shim_tile = tile(0, 0) - mem_tile = tile(0, 1) - compute_tile2 = tile(0, 2) - compute_tile3 = tile(0, 3) - compute_tile4 = tile(0, 4) - compute_tile5 = tile(0, 5) + ShimTile = tile(0, 0) + MemTile = tile(0, 1) + ComputeTile2 = tile(0, 2) + ComputeTile3 = tile(0, 3) + ComputeTile4 = tile(0, 4) + ComputeTile5 = tile(0, 5) # AIE-array data movement with object fifos # Input objectfifo( "inOF_L3L2", - shim_tile, - [compute_tile2, mem_tile], + ShimTile, + [ComputeTile2, MemTile], [2, 2, 7], ofifo_line_bytes_ty, [], @@ -90,8 +90,8 @@ def device_body(): ) objectfifo( "inOF_L2L1", - mem_tile, - [compute_tile5], + MemTile, + [ComputeTile5], 7, ofifo_line_bytes_ty, [], @@ -102,8 +102,8 @@ def device_body(): # Output objectfifo( "outOF_L2L3", - mem_tile, - [shim_tile], + MemTile, + [ShimTile], 2, ofifo_line_bytes_ty, [], @@ -111,8 +111,8 @@ def device_body(): ) objectfifo( "outOF_L1L2", - compute_tile5, - [mem_tile], + ComputeTile5, + [MemTile], 2, ofifo_line_bytes_ty, [], @@ -123,8 +123,8 @@ def device_body(): # Intermediate objectfifo( "OF_2to3", - compute_tile2, - [compute_tile3], + ComputeTile2, + [ComputeTile3], 4, ofifo_line_ty, [], @@ -132,8 +132,8 @@ def device_body(): ) objectfifo( "OF_3to4", - compute_tile3, - [compute_tile4], + ComputeTile3, + [ComputeTile4], 2, ofifo_line_ty, [], @@ -141,8 +141,8 @@ def device_body(): ) objectfifo( "OF_4to5", - compute_tile4, - [compute_tile5], + ComputeTile4, + [ComputeTile5], 2, ofifo_line_ty, [], @@ -150,8 +150,8 @@ def device_body(): ) objectfifo( "OF_5to5", - compute_tile5, - [compute_tile5], + ComputeTile5, + [ComputeTile5], 1, ofifo_line_bytes_ty, [], @@ -161,7 +161,7 @@ def device_body(): # Set up compute tiles # Compute tile 2 - @core(compute_tile2, "rgba2gray.cc.o") + @core(ComputeTile2, "rgba2gray.cc.o") def core_body(): for _ in for_(4294967295): # for _ in for_(36): @@ -172,16 +172,14 @@ def core_body(): ObjectFifoPort.Produce, "OF_2to3", 1, line_ty ).acquired_elem() - Call( - rgba2gray_line, [elem_in, elem_out, arith.constant(line_width)] - ) + Call(rgba2gray_line, [elem_in, elem_out, arith.constant(lineWidth)]) objectfifo_release(ObjectFifoPort.Consume, "inOF_L3L2", 1) objectfifo_release(ObjectFifoPort.Produce, "OF_2to3", 1) yield_([]) # Compute tile 3 - @core(compute_tile3, "filter2d.cc.o") + @core(ComputeTile3, "filter2d.cc.o") def core_body(): kernel = memref.alloc([3, 3], T.i16()) v0 = arith.constant(0, T.i16()) @@ -212,14 +210,14 @@ def core_body(): elems_in_pre[0], elems_in_pre[1], elem_pre_out, - arith.constant(line_width), + arith.constant(lineWidth), kernel, ], ) objectfifo_release(ObjectFifoPort.Produce, "OF_3to4", 1) # Steady State : Middle - for _ in for_(1, height_minus1): + for _ in for_(1, heightMinus1): elems_in = acquire( ObjectFifoPort.Consume, "OF_2to3", 3, line_ty ).acquired_elem() @@ -233,7 +231,7 @@ def core_body(): elems_in[1], elems_in[2], elem_out, - arith.constant(line_width), + arith.constant(lineWidth), kernel, ], ) @@ -255,7 +253,7 @@ def core_body(): elems_in_post[1], elems_in_post[1], elem_post_out, - arith.constant(line_width), + arith.constant(lineWidth), kernel, ], ) @@ -264,7 +262,7 @@ def core_body(): yield_([]) # Compute tile 4 - @core(compute_tile4, "threshold.cc.o") + @core(ComputeTile4, "threshold.cc.o") def core_body(): v_thr = arith.constant(10, T.i16()) v_max = arith.constant(255, T.i16()) @@ -283,7 +281,7 @@ def core_body(): [ elem_in, elem_out, - arith.constant(line_width), + arith.constant(lineWidth), v_thr, v_max, v_typ, @@ -295,7 +293,7 @@ def core_body(): yield_([]) # Compute tile 5 - @core(compute_tile5, "combined_gray2rgba_addWeighted.a") + @core(ComputeTile5, "combined_gray2rgba_addWeighted.a") def core_body(): for _ in for_(4294967295): elem_in = acquire( @@ -305,9 +303,7 @@ def core_body(): ObjectFifoPort.Produce, "OF_5to5", 1, line_bytes_ty ).acquired_elem() - Call( - gray2rgba_line, [elem_in, elem_out, arith.constant(line_width)] - ) + Call(gray2rgba_line, [elem_in, elem_out, arith.constant(lineWidth)]) objectfifo_release(ObjectFifoPort.Consume, "OF_4to5", 1) objectfifo_release(ObjectFifoPort.Produce, "OF_5to5", 1) @@ -332,7 +328,7 @@ def core_body(): elem_in1, elem_in2, elem_out2, - arith.constant(line_width_in_bytes), + arith.constant(lineWidthInBytes), alpha, beta, gamma, @@ -346,29 +342,30 @@ def core_body(): # To/from AIE-array data movement - tensor_size = width * height * 4 # 4 channels - tensor_size_in_int32s = tensor_size // 4 - tensor_ty = T.memref(tensor_size_in_int32s, T.i32()) - mem_ref_16x16_ty = T.memref(16, 16, T.i32()) + tensorSize = width * height * 4 # 4 channels + tensorSizeInInt32s = tensorSize // 4 + tensor_ty = T.memref(tensorSizeInInt32s, T.i32()) + memRef_16x16_ty = T.memref(16, 16, T.i32()) - @FuncOp.from_py_func(tensor_ty, mem_ref_16x16_ty, tensor_ty) + @FuncOp.from_py_func(tensor_ty, memRef_16x16_ty, tensor_ty) def sequence(I, B, O): ipu_dma_memcpy_nd( metadata="outOF_L2L3", bd_id=0, mem=O, - lengths=[1, 1, 1, tensor_size_in_int32s], + lengths=[1, 1, 1, tensorSizeInInt32s], ) ipu_dma_memcpy_nd( metadata="inOF_L3L2", bd_id=1, mem=I, - lengths=[1, 1, 1, tensor_size_in_int32s], + lengths=[1, 1, 1, tensorSizeInInt32s], ) ipu_sync( column=0, row=0, direction=0, channel=0, column_num=1, row_num=1 ) + # print(ctx.module.operation.verify()) print(ctx.module) diff --git a/reference_designs/ipu-xrt/vision_pipelines/vision_kernels/filter2d.cc b/reference_designs/ipu-xrt/vision_pipelines/vision_kernels/filter2d.cc index 166aef0f94..1bb7302982 100644 --- a/reference_designs/ipu-xrt/vision_pipelines/vision_kernels/filter2d.cc +++ b/reference_designs/ipu-xrt/vision_pipelines/vision_kernels/filter2d.cc @@ -220,4 +220,4 @@ lineWidth, 1, filter2dValue, maxValue); #endif */ -} // extern "C" \ No newline at end of file +} // extern "C" diff --git a/reference_designs/ipu-xrt/vision_pipelines/vision_kernels/lut_inv_8b.h b/reference_designs/ipu-xrt/vision_pipelines/vision_kernels/lut_inv_8b.h index 681630c7eb..da3f0e0851 100644 --- a/reference_designs/ipu-xrt/vision_pipelines/vision_kernels/lut_inv_8b.h +++ b/reference_designs/ipu-xrt/vision_pipelines/vision_kernels/lut_inv_8b.h @@ -5,74 +5,138 @@ constexpr uint16 num_entries_lut_inv_8b = 256; // Q0.8 data format alignas(aie::vector_decl_align) const uint16 lut_inv_8b_ab[] = { -/*bank0*/255,0,128,85,64,51,42,36,/*bank1*/255,0,128,85,64,51,42,36, -/*bank0*/32,28,25,23,21,19,18,17,/*bank1*/32,28,25,23,21,19,18,17, -/*bank0*/16,15,14,13,12,12,11,11,/*bank1*/16,15,14,13,12,12,11,11, -/*bank0*/10,10,9,9,9,8,8,8,/*bank1*/10,10,9,9,9,8,8,8, -/*bank0*/8,7,7,7,7,6,6,6,/*bank1*/8,7,7,7,7,6,6,6, -/*bank0*/6,6,6,5,5,5,5,5,/*bank1*/6,6,6,5,5,5,5,5, -/*bank0*/5,5,5,5,4,4,4,4,/*bank1*/5,5,5,5,4,4,4,4, -/*bank0*/4,4,4,4,4,4,4,4,/*bank1*/4,4,4,4,4,4,4,4, -/*bank0*/4,3,3,3,3,3,3,3,/*bank1*/4,3,3,3,3,3,3,3, -/*bank0*/3,3,3,3,3,3,3,3,/*bank1*/3,3,3,3,3,3,3,3, -/*bank0*/3,3,3,3,3,3,2,2,/*bank1*/3,3,3,3,3,3,2,2, -/*bank0*/2,2,2,2,2,2,2,2,/*bank1*/2,2,2,2,2,2,2,2, -/*bank0*/2,2,2,2,2,2,2,2,/*bank1*/2,2,2,2,2,2,2,2, -/*bank0*/2,2,2,2,2,2,2,2,/*bank1*/2,2,2,2,2,2,2,2, -/*bank0*/2,2,2,2,2,2,2,2,/*bank1*/2,2,2,2,2,2,2,2, -/*bank0*/2,2,2,2,2,2,2,2,/*bank1*/2,2,2,2,2,2,2,2, -/*bank0*/2,1,1,1,1,1,1,1,/*bank1*/2,1,1,1,1,1,1,1, -/*bank0*/1,1,1,1,1,1,1,1,/*bank1*/1,1,1,1,1,1,1,1, -/*bank0*/1,1,1,1,1,1,1,1,/*bank1*/1,1,1,1,1,1,1,1, -/*bank0*/1,1,1,1,1,1,1,1,/*bank1*/1,1,1,1,1,1,1,1, -/*bank0*/1,1,1,1,1,1,1,1,/*bank1*/1,1,1,1,1,1,1,1, -/*bank0*/1,1,1,1,1,1,1,1,/*bank1*/1,1,1,1,1,1,1,1, -/*bank0*/1,1,1,1,1,1,1,1,/*bank1*/1,1,1,1,1,1,1,1, -/*bank0*/1,1,1,1,1,1,1,1,/*bank1*/1,1,1,1,1,1,1,1, -/*bank0*/1,1,1,1,1,1,1,1,/*bank1*/1,1,1,1,1,1,1,1, -/*bank0*/1,1,1,1,1,1,1,1,/*bank1*/1,1,1,1,1,1,1,1, -/*bank0*/1,1,1,1,1,1,1,1,/*bank1*/1,1,1,1,1,1,1,1, -/*bank0*/1,1,1,1,1,1,1,1,/*bank1*/1,1,1,1,1,1,1,1, -/*bank0*/1,1,1,1,1,1,1,1,/*bank1*/1,1,1,1,1,1,1,1, -/*bank0*/1,1,1,1,1,1,1,1,/*bank1*/1,1,1,1,1,1,1,1, -/*bank0*/1,1,1,1,1,1,1,1,/*bank1*/1,1,1,1,1,1,1,1, -/*bank0*/1,1,1,1,1,1,1,1,/*bank1*/1,1,1,1,1,1,1,1, + /*bank0*/ 255, 0, 128, 85, 64, 51, 42, 36, + /*bank1*/ 255, 0, 128, 85, 64, 51, 42, 36, + /*bank0*/ 32, 28, 25, 23, 21, 19, 18, 17, + /*bank1*/ 32, 28, 25, 23, 21, 19, 18, 17, + /*bank0*/ 16, 15, 14, 13, 12, 12, 11, 11, + /*bank1*/ 16, 15, 14, 13, 12, 12, 11, 11, + /*bank0*/ 10, 10, 9, 9, 9, 8, 8, 8, + /*bank1*/ 10, 10, 9, 9, 9, 8, 8, 8, + /*bank0*/ 8, 7, 7, 7, 7, 6, 6, 6, + /*bank1*/ 8, 7, 7, 7, 7, 6, 6, 6, + /*bank0*/ 6, 6, 6, 5, 5, 5, 5, 5, + /*bank1*/ 6, 6, 6, 5, 5, 5, 5, 5, + /*bank0*/ 5, 5, 5, 5, 4, 4, 4, 4, + /*bank1*/ 5, 5, 5, 5, 4, 4, 4, 4, + /*bank0*/ 4, 4, 4, 4, 4, 4, 4, 4, + /*bank1*/ 4, 4, 4, 4, 4, 4, 4, 4, + /*bank0*/ 4, 3, 3, 3, 3, 3, 3, 3, + /*bank1*/ 4, 3, 3, 3, 3, 3, 3, 3, + /*bank0*/ 3, 3, 3, 3, 3, 3, 3, 3, + /*bank1*/ 3, 3, 3, 3, 3, 3, 3, 3, + /*bank0*/ 3, 3, 3, 3, 3, 3, 2, 2, + /*bank1*/ 3, 3, 3, 3, 3, 3, 2, 2, + /*bank0*/ 2, 2, 2, 2, 2, 2, 2, 2, + /*bank1*/ 2, 2, 2, 2, 2, 2, 2, 2, + /*bank0*/ 2, 2, 2, 2, 2, 2, 2, 2, + /*bank1*/ 2, 2, 2, 2, 2, 2, 2, 2, + /*bank0*/ 2, 2, 2, 2, 2, 2, 2, 2, + /*bank1*/ 2, 2, 2, 2, 2, 2, 2, 2, + /*bank0*/ 2, 2, 2, 2, 2, 2, 2, 2, + /*bank1*/ 2, 2, 2, 2, 2, 2, 2, 2, + /*bank0*/ 2, 2, 2, 2, 2, 2, 2, 2, + /*bank1*/ 2, 2, 2, 2, 2, 2, 2, 2, + /*bank0*/ 2, 1, 1, 1, 1, 1, 1, 1, + /*bank1*/ 2, 1, 1, 1, 1, 1, 1, 1, + /*bank0*/ 1, 1, 1, 1, 1, 1, 1, 1, + /*bank1*/ 1, 1, 1, 1, 1, 1, 1, 1, + /*bank0*/ 1, 1, 1, 1, 1, 1, 1, 1, + /*bank1*/ 1, 1, 1, 1, 1, 1, 1, 1, + /*bank0*/ 1, 1, 1, 1, 1, 1, 1, 1, + /*bank1*/ 1, 1, 1, 1, 1, 1, 1, 1, + /*bank0*/ 1, 1, 1, 1, 1, 1, 1, 1, + /*bank1*/ 1, 1, 1, 1, 1, 1, 1, 1, + /*bank0*/ 1, 1, 1, 1, 1, 1, 1, 1, + /*bank1*/ 1, 1, 1, 1, 1, 1, 1, 1, + /*bank0*/ 1, 1, 1, 1, 1, 1, 1, 1, + /*bank1*/ 1, 1, 1, 1, 1, 1, 1, 1, + /*bank0*/ 1, 1, 1, 1, 1, 1, 1, 1, + /*bank1*/ 1, 1, 1, 1, 1, 1, 1, 1, + /*bank0*/ 1, 1, 1, 1, 1, 1, 1, 1, + /*bank1*/ 1, 1, 1, 1, 1, 1, 1, 1, + /*bank0*/ 1, 1, 1, 1, 1, 1, 1, 1, + /*bank1*/ 1, 1, 1, 1, 1, 1, 1, 1, + /*bank0*/ 1, 1, 1, 1, 1, 1, 1, 1, + /*bank1*/ 1, 1, 1, 1, 1, 1, 1, 1, + /*bank0*/ 1, 1, 1, 1, 1, 1, 1, 1, + /*bank1*/ 1, 1, 1, 1, 1, 1, 1, 1, + /*bank0*/ 1, 1, 1, 1, 1, 1, 1, 1, + /*bank1*/ 1, 1, 1, 1, 1, 1, 1, 1, + /*bank0*/ 1, 1, 1, 1, 1, 1, 1, 1, + /*bank1*/ 1, 1, 1, 1, 1, 1, 1, 1, + /*bank0*/ 1, 1, 1, 1, 1, 1, 1, 1, + /*bank1*/ 1, 1, 1, 1, 1, 1, 1, 1, + /*bank0*/ 1, 1, 1, 1, 1, 1, 1, 1, + /*bank1*/ 1, 1, 1, 1, 1, 1, 1, 1, }; // Q0.8 data format alignas(aie::vector_decl_align) const uint16 lut_inv_8b_cd[] = { -/*bank0*/255,0,128,85,64,51,42,36,/*bank1*/255,0,128,85,64,51,42,36, -/*bank0*/32,28,25,23,21,19,18,17,/*bank1*/32,28,25,23,21,19,18,17, -/*bank0*/16,15,14,13,12,12,11,11,/*bank1*/16,15,14,13,12,12,11,11, -/*bank0*/10,10,9,9,9,8,8,8,/*bank1*/10,10,9,9,9,8,8,8, -/*bank0*/8,7,7,7,7,6,6,6,/*bank1*/8,7,7,7,7,6,6,6, -/*bank0*/6,6,6,5,5,5,5,5,/*bank1*/6,6,6,5,5,5,5,5, -/*bank0*/5,5,5,5,4,4,4,4,/*bank1*/5,5,5,5,4,4,4,4, -/*bank0*/4,4,4,4,4,4,4,4,/*bank1*/4,4,4,4,4,4,4,4, -/*bank0*/4,3,3,3,3,3,3,3,/*bank1*/4,3,3,3,3,3,3,3, -/*bank0*/3,3,3,3,3,3,3,3,/*bank1*/3,3,3,3,3,3,3,3, -/*bank0*/3,3,3,3,3,3,2,2,/*bank1*/3,3,3,3,3,3,2,2, -/*bank0*/2,2,2,2,2,2,2,2,/*bank1*/2,2,2,2,2,2,2,2, -/*bank0*/2,2,2,2,2,2,2,2,/*bank1*/2,2,2,2,2,2,2,2, -/*bank0*/2,2,2,2,2,2,2,2,/*bank1*/2,2,2,2,2,2,2,2, -/*bank0*/2,2,2,2,2,2,2,2,/*bank1*/2,2,2,2,2,2,2,2, -/*bank0*/2,2,2,2,2,2,2,2,/*bank1*/2,2,2,2,2,2,2,2, -/*bank0*/2,1,1,1,1,1,1,1,/*bank1*/2,1,1,1,1,1,1,1, -/*bank0*/1,1,1,1,1,1,1,1,/*bank1*/1,1,1,1,1,1,1,1, -/*bank0*/1,1,1,1,1,1,1,1,/*bank1*/1,1,1,1,1,1,1,1, -/*bank0*/1,1,1,1,1,1,1,1,/*bank1*/1,1,1,1,1,1,1,1, -/*bank0*/1,1,1,1,1,1,1,1,/*bank1*/1,1,1,1,1,1,1,1, -/*bank0*/1,1,1,1,1,1,1,1,/*bank1*/1,1,1,1,1,1,1,1, -/*bank0*/1,1,1,1,1,1,1,1,/*bank1*/1,1,1,1,1,1,1,1, -/*bank0*/1,1,1,1,1,1,1,1,/*bank1*/1,1,1,1,1,1,1,1, -/*bank0*/1,1,1,1,1,1,1,1,/*bank1*/1,1,1,1,1,1,1,1, -/*bank0*/1,1,1,1,1,1,1,1,/*bank1*/1,1,1,1,1,1,1,1, -/*bank0*/1,1,1,1,1,1,1,1,/*bank1*/1,1,1,1,1,1,1,1, -/*bank0*/1,1,1,1,1,1,1,1,/*bank1*/1,1,1,1,1,1,1,1, -/*bank0*/1,1,1,1,1,1,1,1,/*bank1*/1,1,1,1,1,1,1,1, -/*bank0*/1,1,1,1,1,1,1,1,/*bank1*/1,1,1,1,1,1,1,1, -/*bank0*/1,1,1,1,1,1,1,1,/*bank1*/1,1,1,1,1,1,1,1, -/*bank0*/1,1,1,1,1,1,1,1,/*bank1*/1,1,1,1,1,1,1,1, + /*bank0*/ 255, 0, 128, 85, 64, 51, 42, 36, + /*bank1*/ 255, 0, 128, 85, 64, 51, 42, 36, + /*bank0*/ 32, 28, 25, 23, 21, 19, 18, 17, + /*bank1*/ 32, 28, 25, 23, 21, 19, 18, 17, + /*bank0*/ 16, 15, 14, 13, 12, 12, 11, 11, + /*bank1*/ 16, 15, 14, 13, 12, 12, 11, 11, + /*bank0*/ 10, 10, 9, 9, 9, 8, 8, 8, + /*bank1*/ 10, 10, 9, 9, 9, 8, 8, 8, + /*bank0*/ 8, 7, 7, 7, 7, 6, 6, 6, + /*bank1*/ 8, 7, 7, 7, 7, 6, 6, 6, + /*bank0*/ 6, 6, 6, 5, 5, 5, 5, 5, + /*bank1*/ 6, 6, 6, 5, 5, 5, 5, 5, + /*bank0*/ 5, 5, 5, 5, 4, 4, 4, 4, + /*bank1*/ 5, 5, 5, 5, 4, 4, 4, 4, + /*bank0*/ 4, 4, 4, 4, 4, 4, 4, 4, + /*bank1*/ 4, 4, 4, 4, 4, 4, 4, 4, + /*bank0*/ 4, 3, 3, 3, 3, 3, 3, 3, + /*bank1*/ 4, 3, 3, 3, 3, 3, 3, 3, + /*bank0*/ 3, 3, 3, 3, 3, 3, 3, 3, + /*bank1*/ 3, 3, 3, 3, 3, 3, 3, 3, + /*bank0*/ 3, 3, 3, 3, 3, 3, 2, 2, + /*bank1*/ 3, 3, 3, 3, 3, 3, 2, 2, + /*bank0*/ 2, 2, 2, 2, 2, 2, 2, 2, + /*bank1*/ 2, 2, 2, 2, 2, 2, 2, 2, + /*bank0*/ 2, 2, 2, 2, 2, 2, 2, 2, + /*bank1*/ 2, 2, 2, 2, 2, 2, 2, 2, + /*bank0*/ 2, 2, 2, 2, 2, 2, 2, 2, + /*bank1*/ 2, 2, 2, 2, 2, 2, 2, 2, + /*bank0*/ 2, 2, 2, 2, 2, 2, 2, 2, + /*bank1*/ 2, 2, 2, 2, 2, 2, 2, 2, + /*bank0*/ 2, 2, 2, 2, 2, 2, 2, 2, + /*bank1*/ 2, 2, 2, 2, 2, 2, 2, 2, + /*bank0*/ 2, 1, 1, 1, 1, 1, 1, 1, + /*bank1*/ 2, 1, 1, 1, 1, 1, 1, 1, + /*bank0*/ 1, 1, 1, 1, 1, 1, 1, 1, + /*bank1*/ 1, 1, 1, 1, 1, 1, 1, 1, + /*bank0*/ 1, 1, 1, 1, 1, 1, 1, 1, + /*bank1*/ 1, 1, 1, 1, 1, 1, 1, 1, + /*bank0*/ 1, 1, 1, 1, 1, 1, 1, 1, + /*bank1*/ 1, 1, 1, 1, 1, 1, 1, 1, + /*bank0*/ 1, 1, 1, 1, 1, 1, 1, 1, + /*bank1*/ 1, 1, 1, 1, 1, 1, 1, 1, + /*bank0*/ 1, 1, 1, 1, 1, 1, 1, 1, + /*bank1*/ 1, 1, 1, 1, 1, 1, 1, 1, + /*bank0*/ 1, 1, 1, 1, 1, 1, 1, 1, + /*bank1*/ 1, 1, 1, 1, 1, 1, 1, 1, + /*bank0*/ 1, 1, 1, 1, 1, 1, 1, 1, + /*bank1*/ 1, 1, 1, 1, 1, 1, 1, 1, + /*bank0*/ 1, 1, 1, 1, 1, 1, 1, 1, + /*bank1*/ 1, 1, 1, 1, 1, 1, 1, 1, + /*bank0*/ 1, 1, 1, 1, 1, 1, 1, 1, + /*bank1*/ 1, 1, 1, 1, 1, 1, 1, 1, + /*bank0*/ 1, 1, 1, 1, 1, 1, 1, 1, + /*bank1*/ 1, 1, 1, 1, 1, 1, 1, 1, + /*bank0*/ 1, 1, 1, 1, 1, 1, 1, 1, + /*bank1*/ 1, 1, 1, 1, 1, 1, 1, 1, + /*bank0*/ 1, 1, 1, 1, 1, 1, 1, 1, + /*bank1*/ 1, 1, 1, 1, 1, 1, 1, 1, + /*bank0*/ 1, 1, 1, 1, 1, 1, 1, 1, + /*bank1*/ 1, 1, 1, 1, 1, 1, 1, 1, + /*bank0*/ 1, 1, 1, 1, 1, 1, 1, 1, + /*bank1*/ 1, 1, 1, 1, 1, 1, 1, 1, + /*bank0*/ 1, 1, 1, 1, 1, 1, 1, 1, + /*bank1*/ 1, 1, 1, 1, 1, 1, 1, 1, }; // constexpr uint16 num_entries_lut_inv_16b = 256; @@ -154,74 +218,138 @@ constexpr uint16 num_entries_lut_inv_16b = 256; // Q7.9 data format (inv * 85) alignas(aie::vector_decl_align) const uint16 lut_inv_16b_ab[] = { -/*bank0*/43520,43520,21760,14506,10880,8704,7253,6217,/*bank1*/43520,43520,21760,14506,10880,8704,7253,6217, -/*bank0*/5440,4835,4352,3956,3626,3347,3108,2901,/*bank1*/5440,4835,4352,3956,3626,3347,3108,2901, -/*bank0*/2720,2560,2417,2290,2176,2072,1978,1892,/*bank1*/2720,2560,2417,2290,2176,2072,1978,1892, -/*bank0*/1813,1740,1673,1611,1554,1500,1450,1403,/*bank1*/1813,1740,1673,1611,1554,1500,1450,1403, -/*bank0*/1360,1318,1280,1243,1208,1176,1145,1115,/*bank1*/1360,1318,1280,1243,1208,1176,1145,1115, -/*bank0*/1088,1061,1036,1012,989,967,946,925,/*bank1*/1088,1061,1036,1012,989,967,946,925, -/*bank0*/906,888,870,853,836,821,805,791,/*bank1*/906,888,870,853,836,821,805,791, -/*bank0*/777,763,750,737,725,713,701,690,/*bank1*/777,763,750,737,725,713,701,690, -/*bank0*/680,669,659,649,640,630,621,612,/*bank1*/680,669,659,649,640,630,621,612, -/*bank0*/604,596,588,580,572,565,557,550,/*bank1*/604,596,588,580,572,565,557,550, -/*bank0*/544,537,530,524,518,512,506,500,/*bank1*/544,537,530,524,518,512,506,500, -/*bank0*/494,488,483,478,473,467,462,458,/*bank1*/494,488,483,478,473,467,462,458, -/*bank0*/453,448,444,439,435,430,426,422,/*bank1*/453,448,444,439,435,430,426,422, -/*bank0*/418,414,410,406,402,399,395,392,/*bank1*/418,414,410,406,402,399,395,392, -/*bank0*/388,385,381,378,375,371,368,365,/*bank1*/388,385,381,378,375,371,368,365, -/*bank0*/362,359,356,353,350,348,345,342,/*bank1*/362,359,356,353,350,348,345,342, -/*bank0*/340,337,334,332,329,327,324,322,/*bank1*/340,337,334,332,329,327,324,322, -/*bank0*/320,317,315,313,310,308,306,304,/*bank1*/320,317,315,313,310,308,306,304, -/*bank0*/302,300,298,296,294,292,290,288,/*bank1*/302,300,298,296,294,292,290,288, -/*bank0*/286,284,282,280,278,277,275,273,/*bank1*/286,284,282,280,278,277,275,273, -/*bank0*/272,270,268,266,265,263,262,260,/*bank1*/272,270,268,266,265,263,262,260, -/*bank0*/259,257,256,254,253,251,250,248,/*bank1*/259,257,256,254,253,251,250,248, -/*bank0*/247,245,244,243,241,240,239,237,/*bank1*/247,245,244,243,241,240,239,237, -/*bank0*/236,235,233,232,231,230,229,227,/*bank1*/236,235,233,232,231,230,229,227, -/*bank0*/226,225,224,223,222,220,219,218,/*bank1*/226,225,224,223,222,220,219,218, -/*bank0*/217,216,215,214,213,212,211,210,/*bank1*/217,216,215,214,213,212,211,210, -/*bank0*/209,208,207,206,205,204,203,202,/*bank1*/209,208,207,206,205,204,203,202, -/*bank0*/201,200,199,198,197,196,196,195,/*bank1*/201,200,199,198,197,196,196,195, -/*bank0*/194,193,192,191,190,190,189,188,/*bank1*/194,193,192,191,190,190,189,188, -/*bank0*/187,186,185,185,184,183,182,182,/*bank1*/187,186,185,185,184,183,182,182, -/*bank0*/181,180,179,179,178,177,176,176,/*bank1*/181,180,179,179,178,177,176,176, -/*bank0*/175,174,174,173,172,172,171,170,/*bank1*/175,174,174,173,172,172,171,170, + /*bank0*/ 43520, 43520, 21760, 14506, 10880, 8704, 7253, 6217, + /*bank1*/ 43520, 43520, 21760, 14506, 10880, 8704, 7253, 6217, + /*bank0*/ 5440, 4835, 4352, 3956, 3626, 3347, 3108, 2901, + /*bank1*/ 5440, 4835, 4352, 3956, 3626, 3347, 3108, 2901, + /*bank0*/ 2720, 2560, 2417, 2290, 2176, 2072, 1978, 1892, + /*bank1*/ 2720, 2560, 2417, 2290, 2176, 2072, 1978, 1892, + /*bank0*/ 1813, 1740, 1673, 1611, 1554, 1500, 1450, 1403, + /*bank1*/ 1813, 1740, 1673, 1611, 1554, 1500, 1450, 1403, + /*bank0*/ 1360, 1318, 1280, 1243, 1208, 1176, 1145, 1115, + /*bank1*/ 1360, 1318, 1280, 1243, 1208, 1176, 1145, 1115, + /*bank0*/ 1088, 1061, 1036, 1012, 989, 967, 946, 925, + /*bank1*/ 1088, 1061, 1036, 1012, 989, 967, 946, 925, + /*bank0*/ 906, 888, 870, 853, 836, 821, 805, 791, + /*bank1*/ 906, 888, 870, 853, 836, 821, 805, 791, + /*bank0*/ 777, 763, 750, 737, 725, 713, 701, 690, + /*bank1*/ 777, 763, 750, 737, 725, 713, 701, 690, + /*bank0*/ 680, 669, 659, 649, 640, 630, 621, 612, + /*bank1*/ 680, 669, 659, 649, 640, 630, 621, 612, + /*bank0*/ 604, 596, 588, 580, 572, 565, 557, 550, + /*bank1*/ 604, 596, 588, 580, 572, 565, 557, 550, + /*bank0*/ 544, 537, 530, 524, 518, 512, 506, 500, + /*bank1*/ 544, 537, 530, 524, 518, 512, 506, 500, + /*bank0*/ 494, 488, 483, 478, 473, 467, 462, 458, + /*bank1*/ 494, 488, 483, 478, 473, 467, 462, 458, + /*bank0*/ 453, 448, 444, 439, 435, 430, 426, 422, + /*bank1*/ 453, 448, 444, 439, 435, 430, 426, 422, + /*bank0*/ 418, 414, 410, 406, 402, 399, 395, 392, + /*bank1*/ 418, 414, 410, 406, 402, 399, 395, 392, + /*bank0*/ 388, 385, 381, 378, 375, 371, 368, 365, + /*bank1*/ 388, 385, 381, 378, 375, 371, 368, 365, + /*bank0*/ 362, 359, 356, 353, 350, 348, 345, 342, + /*bank1*/ 362, 359, 356, 353, 350, 348, 345, 342, + /*bank0*/ 340, 337, 334, 332, 329, 327, 324, 322, + /*bank1*/ 340, 337, 334, 332, 329, 327, 324, 322, + /*bank0*/ 320, 317, 315, 313, 310, 308, 306, 304, + /*bank1*/ 320, 317, 315, 313, 310, 308, 306, 304, + /*bank0*/ 302, 300, 298, 296, 294, 292, 290, 288, + /*bank1*/ 302, 300, 298, 296, 294, 292, 290, 288, + /*bank0*/ 286, 284, 282, 280, 278, 277, 275, 273, + /*bank1*/ 286, 284, 282, 280, 278, 277, 275, 273, + /*bank0*/ 272, 270, 268, 266, 265, 263, 262, 260, + /*bank1*/ 272, 270, 268, 266, 265, 263, 262, 260, + /*bank0*/ 259, 257, 256, 254, 253, 251, 250, 248, + /*bank1*/ 259, 257, 256, 254, 253, 251, 250, 248, + /*bank0*/ 247, 245, 244, 243, 241, 240, 239, 237, + /*bank1*/ 247, 245, 244, 243, 241, 240, 239, 237, + /*bank0*/ 236, 235, 233, 232, 231, 230, 229, 227, + /*bank1*/ 236, 235, 233, 232, 231, 230, 229, 227, + /*bank0*/ 226, 225, 224, 223, 222, 220, 219, 218, + /*bank1*/ 226, 225, 224, 223, 222, 220, 219, 218, + /*bank0*/ 217, 216, 215, 214, 213, 212, 211, 210, + /*bank1*/ 217, 216, 215, 214, 213, 212, 211, 210, + /*bank0*/ 209, 208, 207, 206, 205, 204, 203, 202, + /*bank1*/ 209, 208, 207, 206, 205, 204, 203, 202, + /*bank0*/ 201, 200, 199, 198, 197, 196, 196, 195, + /*bank1*/ 201, 200, 199, 198, 197, 196, 196, 195, + /*bank0*/ 194, 193, 192, 191, 190, 190, 189, 188, + /*bank1*/ 194, 193, 192, 191, 190, 190, 189, 188, + /*bank0*/ 187, 186, 185, 185, 184, 183, 182, 182, + /*bank1*/ 187, 186, 185, 185, 184, 183, 182, 182, + /*bank0*/ 181, 180, 179, 179, 178, 177, 176, 176, + /*bank1*/ 181, 180, 179, 179, 178, 177, 176, 176, + /*bank0*/ 175, 174, 174, 173, 172, 172, 171, 170, + /*bank1*/ 175, 174, 174, 173, 172, 172, 171, 170, }; // Q7.9 data format (inv * 85) alignas(aie::vector_decl_align) const uint16 lut_inv_16b_cd[] = { -/*bank0*/43520,43520,21760,14506,10880,8704,7253,6217,/*bank1*/43520,43520,21760,14506,10880,8704,7253,6217, -/*bank0*/5440,4835,4352,3956,3626,3347,3108,2901,/*bank1*/5440,4835,4352,3956,3626,3347,3108,2901, -/*bank0*/2720,2560,2417,2290,2176,2072,1978,1892,/*bank1*/2720,2560,2417,2290,2176,2072,1978,1892, -/*bank0*/1813,1740,1673,1611,1554,1500,1450,1403,/*bank1*/1813,1740,1673,1611,1554,1500,1450,1403, -/*bank0*/1360,1318,1280,1243,1208,1176,1145,1115,/*bank1*/1360,1318,1280,1243,1208,1176,1145,1115, -/*bank0*/1088,1061,1036,1012,989,967,946,925,/*bank1*/1088,1061,1036,1012,989,967,946,925, -/*bank0*/906,888,870,853,836,821,805,791,/*bank1*/906,888,870,853,836,821,805,791, -/*bank0*/777,763,750,737,725,713,701,690,/*bank1*/777,763,750,737,725,713,701,690, -/*bank0*/680,669,659,649,640,630,621,612,/*bank1*/680,669,659,649,640,630,621,612, -/*bank0*/604,596,588,580,572,565,557,550,/*bank1*/604,596,588,580,572,565,557,550, -/*bank0*/544,537,530,524,518,512,506,500,/*bank1*/544,537,530,524,518,512,506,500, -/*bank0*/494,488,483,478,473,467,462,458,/*bank1*/494,488,483,478,473,467,462,458, -/*bank0*/453,448,444,439,435,430,426,422,/*bank1*/453,448,444,439,435,430,426,422, -/*bank0*/418,414,410,406,402,399,395,392,/*bank1*/418,414,410,406,402,399,395,392, -/*bank0*/388,385,381,378,375,371,368,365,/*bank1*/388,385,381,378,375,371,368,365, -/*bank0*/362,359,356,353,350,348,345,342,/*bank1*/362,359,356,353,350,348,345,342, -/*bank0*/340,337,334,332,329,327,324,322,/*bank1*/340,337,334,332,329,327,324,322, -/*bank0*/320,317,315,313,310,308,306,304,/*bank1*/320,317,315,313,310,308,306,304, -/*bank0*/302,300,298,296,294,292,290,288,/*bank1*/302,300,298,296,294,292,290,288, -/*bank0*/286,284,282,280,278,277,275,273,/*bank1*/286,284,282,280,278,277,275,273, -/*bank0*/272,270,268,266,265,263,262,260,/*bank1*/272,270,268,266,265,263,262,260, -/*bank0*/259,257,256,254,253,251,250,248,/*bank1*/259,257,256,254,253,251,250,248, -/*bank0*/247,245,244,243,241,240,239,237,/*bank1*/247,245,244,243,241,240,239,237, -/*bank0*/236,235,233,232,231,230,229,227,/*bank1*/236,235,233,232,231,230,229,227, -/*bank0*/226,225,224,223,222,220,219,218,/*bank1*/226,225,224,223,222,220,219,218, -/*bank0*/217,216,215,214,213,212,211,210,/*bank1*/217,216,215,214,213,212,211,210, -/*bank0*/209,208,207,206,205,204,203,202,/*bank1*/209,208,207,206,205,204,203,202, -/*bank0*/201,200,199,198,197,196,196,195,/*bank1*/201,200,199,198,197,196,196,195, -/*bank0*/194,193,192,191,190,190,189,188,/*bank1*/194,193,192,191,190,190,189,188, -/*bank0*/187,186,185,185,184,183,182,182,/*bank1*/187,186,185,185,184,183,182,182, -/*bank0*/181,180,179,179,178,177,176,176,/*bank1*/181,180,179,179,178,177,176,176, -/*bank0*/175,174,174,173,172,172,171,170,/*bank1*/175,174,174,173,172,172,171,170, + /*bank0*/ 43520, 43520, 21760, 14506, 10880, 8704, 7253, 6217, + /*bank1*/ 43520, 43520, 21760, 14506, 10880, 8704, 7253, 6217, + /*bank0*/ 5440, 4835, 4352, 3956, 3626, 3347, 3108, 2901, + /*bank1*/ 5440, 4835, 4352, 3956, 3626, 3347, 3108, 2901, + /*bank0*/ 2720, 2560, 2417, 2290, 2176, 2072, 1978, 1892, + /*bank1*/ 2720, 2560, 2417, 2290, 2176, 2072, 1978, 1892, + /*bank0*/ 1813, 1740, 1673, 1611, 1554, 1500, 1450, 1403, + /*bank1*/ 1813, 1740, 1673, 1611, 1554, 1500, 1450, 1403, + /*bank0*/ 1360, 1318, 1280, 1243, 1208, 1176, 1145, 1115, + /*bank1*/ 1360, 1318, 1280, 1243, 1208, 1176, 1145, 1115, + /*bank0*/ 1088, 1061, 1036, 1012, 989, 967, 946, 925, + /*bank1*/ 1088, 1061, 1036, 1012, 989, 967, 946, 925, + /*bank0*/ 906, 888, 870, 853, 836, 821, 805, 791, + /*bank1*/ 906, 888, 870, 853, 836, 821, 805, 791, + /*bank0*/ 777, 763, 750, 737, 725, 713, 701, 690, + /*bank1*/ 777, 763, 750, 737, 725, 713, 701, 690, + /*bank0*/ 680, 669, 659, 649, 640, 630, 621, 612, + /*bank1*/ 680, 669, 659, 649, 640, 630, 621, 612, + /*bank0*/ 604, 596, 588, 580, 572, 565, 557, 550, + /*bank1*/ 604, 596, 588, 580, 572, 565, 557, 550, + /*bank0*/ 544, 537, 530, 524, 518, 512, 506, 500, + /*bank1*/ 544, 537, 530, 524, 518, 512, 506, 500, + /*bank0*/ 494, 488, 483, 478, 473, 467, 462, 458, + /*bank1*/ 494, 488, 483, 478, 473, 467, 462, 458, + /*bank0*/ 453, 448, 444, 439, 435, 430, 426, 422, + /*bank1*/ 453, 448, 444, 439, 435, 430, 426, 422, + /*bank0*/ 418, 414, 410, 406, 402, 399, 395, 392, + /*bank1*/ 418, 414, 410, 406, 402, 399, 395, 392, + /*bank0*/ 388, 385, 381, 378, 375, 371, 368, 365, + /*bank1*/ 388, 385, 381, 378, 375, 371, 368, 365, + /*bank0*/ 362, 359, 356, 353, 350, 348, 345, 342, + /*bank1*/ 362, 359, 356, 353, 350, 348, 345, 342, + /*bank0*/ 340, 337, 334, 332, 329, 327, 324, 322, + /*bank1*/ 340, 337, 334, 332, 329, 327, 324, 322, + /*bank0*/ 320, 317, 315, 313, 310, 308, 306, 304, + /*bank1*/ 320, 317, 315, 313, 310, 308, 306, 304, + /*bank0*/ 302, 300, 298, 296, 294, 292, 290, 288, + /*bank1*/ 302, 300, 298, 296, 294, 292, 290, 288, + /*bank0*/ 286, 284, 282, 280, 278, 277, 275, 273, + /*bank1*/ 286, 284, 282, 280, 278, 277, 275, 273, + /*bank0*/ 272, 270, 268, 266, 265, 263, 262, 260, + /*bank1*/ 272, 270, 268, 266, 265, 263, 262, 260, + /*bank0*/ 259, 257, 256, 254, 253, 251, 250, 248, + /*bank1*/ 259, 257, 256, 254, 253, 251, 250, 248, + /*bank0*/ 247, 245, 244, 243, 241, 240, 239, 237, + /*bank1*/ 247, 245, 244, 243, 241, 240, 239, 237, + /*bank0*/ 236, 235, 233, 232, 231, 230, 229, 227, + /*bank1*/ 236, 235, 233, 232, 231, 230, 229, 227, + /*bank0*/ 226, 225, 224, 223, 222, 220, 219, 218, + /*bank1*/ 226, 225, 224, 223, 222, 220, 219, 218, + /*bank0*/ 217, 216, 215, 214, 213, 212, 211, 210, + /*bank1*/ 217, 216, 215, 214, 213, 212, 211, 210, + /*bank0*/ 209, 208, 207, 206, 205, 204, 203, 202, + /*bank1*/ 209, 208, 207, 206, 205, 204, 203, 202, + /*bank0*/ 201, 200, 199, 198, 197, 196, 196, 195, + /*bank1*/ 201, 200, 199, 198, 197, 196, 196, 195, + /*bank0*/ 194, 193, 192, 191, 190, 190, 189, 188, + /*bank1*/ 194, 193, 192, 191, 190, 190, 189, 188, + /*bank0*/ 187, 186, 185, 185, 184, 183, 182, 182, + /*bank1*/ 187, 186, 185, 185, 184, 183, 182, 182, + /*bank0*/ 181, 180, 179, 179, 178, 177, 176, 176, + /*bank1*/ 181, 180, 179, 179, 178, 177, 176, 176, + /*bank0*/ 175, 174, 174, 173, 172, 172, 171, 170, + /*bank1*/ 175, 174, 174, 173, 172, 172, 171, 170, }; -#endif \ No newline at end of file +#endif diff --git a/reference_designs/ipu-xrt/vision_pipelines/vision_kernels/rgba2hue.cc b/reference_designs/ipu-xrt/vision_pipelines/vision_kernels/rgba2hue.cc index 3839599d35..4bc2278889 100644 --- a/reference_designs/ipu-xrt/vision_pipelines/vision_kernels/rgba2hue.cc +++ b/reference_designs/ipu-xrt/vision_pipelines/vision_kernels/rgba2hue.cc @@ -17,8 +17,8 @@ #define REL_WRITE 0 #define REL_READ 1 -#include "lut_inv_8b.h" #include +#include "lut_inv_8b.h" const int32_t SRS_SHIFT = 12; diff --git a/test/python/ipu.py b/test/python/ipu.py index 699be4e58f..927f83eaa2 100644 --- a/test/python/ipu.py +++ b/test/python/ipu.py @@ -143,7 +143,7 @@ def core_body(): def sequence(A, B, C): ipu_dma_memcpy_nd(metadata="out", bd_id=0, mem=C, lengths=[1, 1, 1, N]) ipu_dma_memcpy_nd(metadata="in", bd_id=1, mem=A, lengths=[1, 1, 1, N]) - ipu_sync(column=0, row=0, direction=0, channel=0) + ipu_sync(column=0, row=0, direction=0, channel=0, column_num=1, row_num=1) # CHECK-LABEL: my_matmul @@ -372,7 +372,9 @@ def sequence(A, B, C): strides=[n_in_i32s, k_x_N_in_i32s, N_in_i32s], ) - ipu_sync(column=0, row=0, direction=0, channel=0) + ipu_sync( + column=0, row=0, direction=0, channel=0, column_num=1, row_num=1 + ) # CHECK-LABEL: edge_detect @@ -839,7 +841,7 @@ def sequence(I, B, O): lengths=[1, 1, 36, 64], strides=[0, 0, 64], ) - ipu_sync(column=0, row=0, direction=0, channel=0) + ipu_sync(column=0, row=0, direction=0, channel=0, column_num=1, row_num=1) # CHECK-LABEL: my_add_one_objFifo @@ -964,4 +966,4 @@ def sequence(inTensor, notUsed, outTensor): ipu_dma_memcpy_nd( metadata="in0", bd_id=1, mem=inTensor, lengths=[1, 1, 1, 64] ) - ipu_sync(column=0, row=0, direction=0, channel=0) + ipu_sync(column=0, row=0, direction=0, channel=0, column_num=1, row_num=1)