diff --git a/reference_designs/ipu-xrt/vision_pipelines/color_threshold/aie2_colorThreshold.py b/reference_designs/ipu-xrt/vision_pipelines/color_threshold/aie2_colorThreshold.py
index 75d725bcb2..929045ff31 100644
--- a/reference_designs/ipu-xrt/vision_pipelines/color_threshold/aie2_colorThreshold.py
+++ b/reference_designs/ipu-xrt/vision_pipelines/color_threshold/aie2_colorThreshold.py
@@ -112,12 +112,6 @@ def device_body():
             # Compute tile 2
             @core(ComputeTile2, "threshold.cc.o")
             def core_body():
-                # thresholdValue = arith.trunci(T.i16(), memref.load(rtpComputeTile2, [0]))
-                # maxValue       = arith.trunci(T.i16(), memref.load(rtpComputeTile2, [1]))
-                # thresholdType  = arith.trunci(T.i8(),  memref.load(rtpComputeTile2, [2]))
-                maxValue = arith.constant(255, T.i16())
-                thresholdValue = arith.constant(50, T.i16())
-                thresholdType = arith.constant(0, T.i8())
                 # for _ in for_(4096):
                 for _ in for_(sys.maxsize):
                     elemIn = acquire(
@@ -133,6 +127,14 @@ def core_body():
                         T.memref(lineWidth, T.ui8()),
                     ).acquired_elem()
 
+                    # RTPs written from the instruction stream must be read right before the kernel
+                    # after the ObjectFIFO acquires
+                    thresholdValue = arith.trunci(T.i16(), memref.load(rtpComputeTile2, [0]))
+                    maxValue       = arith.trunci(T.i16(), memref.load(rtpComputeTile2, [1]))
+                    thresholdType  = arith.trunci(T.i8(),  memref.load(rtpComputeTile2, [2]))
+                    # maxValue = arith.constant(255, T.i16())
+                    # thresholdValue = arith.constant(50, T.i16())
+                    # thresholdType = arith.constant(0, T.i8())
                     Call(
                         thresholdLine,
                         [
@@ -152,12 +154,6 @@ def core_body():
             # Compute tile 3
             @core(ComputeTile3, "threshold.cc.o")
             def core_body():
-                # thresholdValue = arith.trunci(T.i16(), memref.load(rtpComputeTile3, [0]))
-                # maxValue       = arith.trunci(T.i16(), memref.load(rtpComputeTile3, [1]))
-                # thresholdType  = arith.trunci(T.i8(),  memref.load(rtpComputeTile3, [2]))
-                maxValue = arith.constant(255, T.i16())
-                thresholdValue = arith.constant(50, T.i16())
-                thresholdType = arith.constant(0, T.i8())
                 # for _ in for_(4096):
                 for _ in for_(sys.maxsize):
                     elemIn = acquire(
@@ -172,7 +168,15 @@ def core_body():
                         1,
                         T.memref(lineWidth, T.ui8()),
                     ).acquired_elem()
-
+                  
+                    # RTPs written from the instruction stream must be read right before the kernel
+                    # after the ObjectFIFO acquires
+                    thresholdValue = arith.trunci(T.i16(), memref.load(rtpComputeTile3, [0]))
+                    maxValue       = arith.trunci(T.i16(), memref.load(rtpComputeTile3, [1]))
+                    thresholdType  = arith.trunci(T.i8(),  memref.load(rtpComputeTile3, [2]))
+                    # maxValue = arith.constant(255, T.i16())
+                    # thresholdValue = arith.constant(50, T.i16())
+                    # thresholdType = arith.constant(0, T.i8())
                     Call(
                         thresholdLine,
                         [
@@ -192,12 +196,6 @@ def core_body():
             # Compute tile 4
             @core(ComputeTile4, "threshold.cc.o")
             def core_body():
-                # thresholdValue = arith.trunci(T.i16(), memref.load(rtpComputeTile4, [0]))
-                # maxValue       = arith.trunci(T.i16(), memref.load(rtpComputeTile4, [1]))
-                # thresholdType  = arith.trunci(T.i8(),  memref.load(rtpComputeTile4, [2]))
-                maxValue = arith.constant(255, T.i16())
-                thresholdValue = arith.constant(50, T.i16())
-                thresholdType = arith.constant(0, T.i8())
                 # for _ in for_(4096):
                 for _ in for_(sys.maxsize):
                     elemIn = acquire(
@@ -213,6 +211,14 @@ def core_body():
                         T.memref(lineWidth, T.ui8()),
                     ).acquired_elem()
 
+                    # RTPs written from the instruction stream must be read right before the kernel
+                    # after the ObjectFIFO acquires
+                    thresholdValue = arith.trunci(T.i16(), memref.load(rtpComputeTile4, [0]))
+                    maxValue       = arith.trunci(T.i16(), memref.load(rtpComputeTile4, [1]))
+                    thresholdType  = arith.trunci(T.i8(),  memref.load(rtpComputeTile4, [2]))
+                    # maxValue = arith.constant(255, T.i16())
+                    # thresholdValue = arith.constant(50, T.i16())
+                    # thresholdType = arith.constant(0, T.i8())
                     Call(
                         thresholdLine,
                         [
@@ -231,13 +237,7 @@ def core_body():
 
             # Compute tile 5
             @core(ComputeTile5, "threshold.cc.o")
-            def core_body():
-                # thresholdValue = arith.trunci(T.i16(), memref.load(rtpComputeTile5, [0]))
-                # maxValue       = arith.trunci(T.i16(), memref.load(rtpComputeTile5, [1]))
-                # thresholdType  = arith.trunci(T.i8(),  memref.load(rtpComputeTile5, [2]))
-                maxValue = arith.constant(255, T.i16())
-                thresholdValue = arith.constant(50, T.i16())
-                thresholdType = arith.constant(0, T.i8())
+            def core_body():)
                 # for _ in for_(4096):
                 for _ in for_(sys.maxsize):
                     elemIn = acquire(
@@ -253,6 +253,14 @@ def core_body():
                         T.memref(lineWidth, T.ui8()),
                     ).acquired_elem()
 
+                    # RTPs written from the instruction stream must be read right before the kernel
+                    # after the ObjectFIFO acquires
+                    thresholdValue = arith.trunci(T.i16(), memref.load(rtpComputeTile5, [0]))
+                    maxValue       = arith.trunci(T.i16(), memref.load(rtpComputeTile5, [1]))
+                    thresholdType  = arith.trunci(T.i8(),  memref.load(rtpComputeTile5, [2]))
+                    # maxValue = arith.constant(255, T.i16())
+                    # thresholdValue = arith.constant(50, T.i16())
+                    # thresholdType = arith.constant(0, T.i8()
                     Call(
                         thresholdLine,
                         [
@@ -281,21 +289,21 @@ def core_body():
             )
             def sequence(inTensor, notUsed, outTensor):
                 # thresholdValue, maxValue, thresholdType
-                # IpuWriteRTPOp("rtpComputeTile2", col = 0, row = 2, index = 0, value = 50)
-                # IpuWriteRTPOp("rtpComputeTile2", col = 0, row = 2, index = 1, value = 255)
-                # IpuWriteRTPOp("rtpComputeTile2", col = 0, row = 2, index = 2, value = 0)
+                IpuWriteRTPOp("rtpComputeTile2", col = 0, row = 2, index = 0, value = 50)
+                IpuWriteRTPOp("rtpComputeTile2", col = 0, row = 2, index = 1, value = 255)
+                IpuWriteRTPOp("rtpComputeTile2", col = 0, row = 2, index = 2, value = 0)
 
-                # IpuWriteRTPOp("rtpComputeTile3", col = 0, row = 3, index = 0, value = 50)
-                # IpuWriteRTPOp("rtpComputeTile3", col = 0, row = 3, index = 1, value = 255)
-                # IpuWriteRTPOp("rtpComputeTile3", col = 0, row = 3, index = 2, value = 0)
+                IpuWriteRTPOp("rtpComputeTile3", col = 0, row = 3, index = 0, value = 50)
+                IpuWriteRTPOp("rtpComputeTile3", col = 0, row = 3, index = 1, value = 255)
+                IpuWriteRTPOp("rtpComputeTile3", col = 0, row = 3, index = 2, value = 0)
 
-                # IpuWriteRTPOp("rtpComputeTile4", col = 0, row = 4, index = 0, value = 50)
-                # IpuWriteRTPOp("rtpComputeTile4", col = 0, row = 4, index = 1, value = 255)
-                # IpuWriteRTPOp("rtpComputeTile4", col = 0, row = 4, index = 2, value = 0)
+                IpuWriteRTPOp("rtpComputeTile4", col = 0, row = 4, index = 0, value = 50)
+                IpuWriteRTPOp("rtpComputeTile4", col = 0, row = 4, index = 1, value = 255)
+                IpuWriteRTPOp("rtpComputeTile4", col = 0, row = 4, index = 2, value = 0)
 
-                # IpuWriteRTPOp("rtpComputeTile5", col = 0, row = 5, index = 0, value = 50)
-                # IpuWriteRTPOp("rtpComputeTile5", col = 0, row = 5, index = 1, value = 255)
-                # IpuWriteRTPOp("rtpComputeTile5", col = 0, row = 5, index = 2, value = 0)
+                IpuWriteRTPOp("rtpComputeTile5", col = 0, row = 5, index = 0, value = 50)
+                IpuWriteRTPOp("rtpComputeTile5", col = 0, row = 5, index = 1, value = 255)
+                IpuWriteRTPOp("rtpComputeTile5", col = 0, row = 5, index = 2, value = 0)
 
                 ipu_dma_memcpy_nd(
                     metadata="inOOB_L3L2",