diff --git a/reference_designs/ipu-xrt/vision_pipelines/color_threshold/aie2_colorThreshold.py b/reference_designs/ipu-xrt/vision_pipelines/color_threshold/aie2_colorThreshold.py index 75d725bcb2..929045ff31 100644 --- a/reference_designs/ipu-xrt/vision_pipelines/color_threshold/aie2_colorThreshold.py +++ b/reference_designs/ipu-xrt/vision_pipelines/color_threshold/aie2_colorThreshold.py @@ -112,12 +112,6 @@ def device_body(): # Compute tile 2 @core(ComputeTile2, "threshold.cc.o") def core_body(): - # thresholdValue = arith.trunci(T.i16(), memref.load(rtpComputeTile2, [0])) - # maxValue = arith.trunci(T.i16(), memref.load(rtpComputeTile2, [1])) - # thresholdType = arith.trunci(T.i8(), memref.load(rtpComputeTile2, [2])) - maxValue = arith.constant(255, T.i16()) - thresholdValue = arith.constant(50, T.i16()) - thresholdType = arith.constant(0, T.i8()) # for _ in for_(4096): for _ in for_(sys.maxsize): elemIn = acquire( @@ -133,6 +127,14 @@ def core_body(): T.memref(lineWidth, T.ui8()), ).acquired_elem() + # RTPs written from the instruction stream must be read right before the kernel + # after the ObjectFIFO acquires + thresholdValue = arith.trunci(T.i16(), memref.load(rtpComputeTile2, [0])) + maxValue = arith.trunci(T.i16(), memref.load(rtpComputeTile2, [1])) + thresholdType = arith.trunci(T.i8(), memref.load(rtpComputeTile2, [2])) + # maxValue = arith.constant(255, T.i16()) + # thresholdValue = arith.constant(50, T.i16()) + # thresholdType = arith.constant(0, T.i8()) Call( thresholdLine, [ @@ -152,12 +154,6 @@ def core_body(): # Compute tile 3 @core(ComputeTile3, "threshold.cc.o") def core_body(): - # thresholdValue = arith.trunci(T.i16(), memref.load(rtpComputeTile3, [0])) - # maxValue = arith.trunci(T.i16(), memref.load(rtpComputeTile3, [1])) - # thresholdType = arith.trunci(T.i8(), memref.load(rtpComputeTile3, [2])) - maxValue = arith.constant(255, T.i16()) - thresholdValue = arith.constant(50, T.i16()) - thresholdType = arith.constant(0, T.i8()) # for _ in for_(4096): for _ in for_(sys.maxsize): elemIn = acquire( @@ -172,7 +168,15 @@ def core_body(): 1, T.memref(lineWidth, T.ui8()), ).acquired_elem() - + + # RTPs written from the instruction stream must be read right before the kernel + # after the ObjectFIFO acquires + thresholdValue = arith.trunci(T.i16(), memref.load(rtpComputeTile3, [0])) + maxValue = arith.trunci(T.i16(), memref.load(rtpComputeTile3, [1])) + thresholdType = arith.trunci(T.i8(), memref.load(rtpComputeTile3, [2])) + # maxValue = arith.constant(255, T.i16()) + # thresholdValue = arith.constant(50, T.i16()) + # thresholdType = arith.constant(0, T.i8()) Call( thresholdLine, [ @@ -192,12 +196,6 @@ def core_body(): # Compute tile 4 @core(ComputeTile4, "threshold.cc.o") def core_body(): - # thresholdValue = arith.trunci(T.i16(), memref.load(rtpComputeTile4, [0])) - # maxValue = arith.trunci(T.i16(), memref.load(rtpComputeTile4, [1])) - # thresholdType = arith.trunci(T.i8(), memref.load(rtpComputeTile4, [2])) - maxValue = arith.constant(255, T.i16()) - thresholdValue = arith.constant(50, T.i16()) - thresholdType = arith.constant(0, T.i8()) # for _ in for_(4096): for _ in for_(sys.maxsize): elemIn = acquire( @@ -213,6 +211,14 @@ def core_body(): T.memref(lineWidth, T.ui8()), ).acquired_elem() + # RTPs written from the instruction stream must be read right before the kernel + # after the ObjectFIFO acquires + thresholdValue = arith.trunci(T.i16(), memref.load(rtpComputeTile4, [0])) + maxValue = arith.trunci(T.i16(), memref.load(rtpComputeTile4, [1])) + thresholdType = arith.trunci(T.i8(), memref.load(rtpComputeTile4, [2])) + # maxValue = arith.constant(255, T.i16()) + # thresholdValue = arith.constant(50, T.i16()) + # thresholdType = arith.constant(0, T.i8()) Call( thresholdLine, [ @@ -231,13 +237,7 @@ def core_body(): # Compute tile 5 @core(ComputeTile5, "threshold.cc.o") - def core_body(): - # thresholdValue = arith.trunci(T.i16(), memref.load(rtpComputeTile5, [0])) - # maxValue = arith.trunci(T.i16(), memref.load(rtpComputeTile5, [1])) - # thresholdType = arith.trunci(T.i8(), memref.load(rtpComputeTile5, [2])) - maxValue = arith.constant(255, T.i16()) - thresholdValue = arith.constant(50, T.i16()) - thresholdType = arith.constant(0, T.i8()) + def core_body():) # for _ in for_(4096): for _ in for_(sys.maxsize): elemIn = acquire( @@ -253,6 +253,14 @@ def core_body(): T.memref(lineWidth, T.ui8()), ).acquired_elem() + # RTPs written from the instruction stream must be read right before the kernel + # after the ObjectFIFO acquires + thresholdValue = arith.trunci(T.i16(), memref.load(rtpComputeTile5, [0])) + maxValue = arith.trunci(T.i16(), memref.load(rtpComputeTile5, [1])) + thresholdType = arith.trunci(T.i8(), memref.load(rtpComputeTile5, [2])) + # maxValue = arith.constant(255, T.i16()) + # thresholdValue = arith.constant(50, T.i16()) + # thresholdType = arith.constant(0, T.i8() Call( thresholdLine, [ @@ -281,21 +289,21 @@ def core_body(): ) def sequence(inTensor, notUsed, outTensor): # thresholdValue, maxValue, thresholdType - # IpuWriteRTPOp("rtpComputeTile2", col = 0, row = 2, index = 0, value = 50) - # IpuWriteRTPOp("rtpComputeTile2", col = 0, row = 2, index = 1, value = 255) - # IpuWriteRTPOp("rtpComputeTile2", col = 0, row = 2, index = 2, value = 0) + IpuWriteRTPOp("rtpComputeTile2", col = 0, row = 2, index = 0, value = 50) + IpuWriteRTPOp("rtpComputeTile2", col = 0, row = 2, index = 1, value = 255) + IpuWriteRTPOp("rtpComputeTile2", col = 0, row = 2, index = 2, value = 0) - # IpuWriteRTPOp("rtpComputeTile3", col = 0, row = 3, index = 0, value = 50) - # IpuWriteRTPOp("rtpComputeTile3", col = 0, row = 3, index = 1, value = 255) - # IpuWriteRTPOp("rtpComputeTile3", col = 0, row = 3, index = 2, value = 0) + IpuWriteRTPOp("rtpComputeTile3", col = 0, row = 3, index = 0, value = 50) + IpuWriteRTPOp("rtpComputeTile3", col = 0, row = 3, index = 1, value = 255) + IpuWriteRTPOp("rtpComputeTile3", col = 0, row = 3, index = 2, value = 0) - # IpuWriteRTPOp("rtpComputeTile4", col = 0, row = 4, index = 0, value = 50) - # IpuWriteRTPOp("rtpComputeTile4", col = 0, row = 4, index = 1, value = 255) - # IpuWriteRTPOp("rtpComputeTile4", col = 0, row = 4, index = 2, value = 0) + IpuWriteRTPOp("rtpComputeTile4", col = 0, row = 4, index = 0, value = 50) + IpuWriteRTPOp("rtpComputeTile4", col = 0, row = 4, index = 1, value = 255) + IpuWriteRTPOp("rtpComputeTile4", col = 0, row = 4, index = 2, value = 0) - # IpuWriteRTPOp("rtpComputeTile5", col = 0, row = 5, index = 0, value = 50) - # IpuWriteRTPOp("rtpComputeTile5", col = 0, row = 5, index = 1, value = 255) - # IpuWriteRTPOp("rtpComputeTile5", col = 0, row = 5, index = 2, value = 0) + IpuWriteRTPOp("rtpComputeTile5", col = 0, row = 5, index = 0, value = 50) + IpuWriteRTPOp("rtpComputeTile5", col = 0, row = 5, index = 1, value = 255) + IpuWriteRTPOp("rtpComputeTile5", col = 0, row = 5, index = 2, value = 0) ipu_dma_memcpy_nd( metadata="inOOB_L3L2",