From e9a38e2131c0302d485270110c9cf071428b67e3 Mon Sep 17 00:00:00 2001
From: snewman-aa <snewman.aa@gmail.com>
Date: Wed, 23 Aug 2023 12:40:19 -0400
Subject: [PATCH] Individual bounding box annotations

---
 app.py | 29 +++++++++++++----------------
 1 file changed, 13 insertions(+), 16 deletions(-)

diff --git a/app.py b/app.py
index 7c44053..a6e2489 100644
--- a/app.py
+++ b/app.py
@@ -46,7 +46,7 @@ def _annotate(self, mmif: Union[str, dict, Mmif], **parameters) -> Mmif:
                 image: np.ndarray = vdh.extract_mid_frame(mmif, timeframe, as_PIL=False)
                 self.logger.debug("Extracted image")
                 self.logger.debug("Running OCR")
-                ocrs = [self.reader.readtext(image)]
+                ocrs = [self.reader.readtext(image, width_ths=0.25)]
                 self.logger.debug(ocrs)
             else:
                 self.logger.debug(f"Sampling {config['sampleFrames']} frames")
@@ -61,25 +61,22 @@ def _annotate(self, mmif: Union[str, dict, Mmif], **parameters) -> Mmif:
                 images = vdh.extract_frames_as_images(video_doc, tf_sample)
                 ocrs = []
                 for image in images:
-                    ocrs.append(self.reader.readtext(image))
+                    ocrs.append(self.reader.readtext(image, width_ths=0.25))
 
-            full_text = ""
-            scores = []
             for ocr in ocrs:
                 for coord, text, score in ocr:
                     if score > 0.4:
-                        full_text += text + " "
-                        scores.append(score)
-
-            self.logger.debug("Confident OCR: " + full_text)
-
-            # add OCR output to text document
-            text_document = new_view.new_textdocument(full_text)
-            # text_document.add_property("confidence", score)
-            align_annotation = new_view.new_annotation(AnnotationTypes.Alignment)
-            align_annotation.add_property("source", timeframe.id)
-            align_annotation.add_property("target", text_document.id)
-            pass
+                        self.logger.debug("Confident OCR: " + text)
+                        text_document = new_view.new_textdocument(text)
+                        bbox_annotation = new_view.new_annotation(AnnotationTypes.BoundingBox)
+                        bbox_annotation.add_property("coordinates", coord)
+                        bbox_annotation.add_property("boxType", "text")
+                        # For now, we're gonna use the start time of the timeframe as the timePoint because vdh extract
+                        # midframe doesn't return the frame number
+                        bbox_annotation.add_property("timePoint", timeframe.properties["start"])
+                        align_annotation = new_view.new_annotation(AnnotationTypes.Alignment)
+                        align_annotation.add_property("source", bbox_annotation.id)
+                        align_annotation.add_property("target", text_document.id)
 
         return mmif