Skip to content

Commit

Permalink
Record image width and height to database while indexing
Browse files Browse the repository at this point in the history
  • Loading branch information
hv0905 committed Dec 18, 2023
1 parent 0f4a594 commit 028d072
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 3 deletions.
11 changes: 10 additions & 1 deletion app/Models/img_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,14 +14,20 @@ class ImageData(BaseModel):
image_vector: Optional[ndarray] = Field(None, exclude=True)
text_contain_vector: Optional[ndarray] = Field(None, exclude=True)
index_date: datetime
width: Optional[int] = None
height: Optional[int] = None
aspect_ratio: Optional[float] = None

@property
def payload(self):
return {
"url": self.url,
"thumbnail_url": self.thumbnail_url,
"ocr_text": self.ocr_text,
"index_date": self.index_date.isoformat()
"index_date": self.index_date.isoformat(),
"width": self.width,
"height": self.height,
"aspect_ratio": self.aspect_ratio
}

@classmethod
Expand All @@ -31,6 +37,9 @@ def from_payload(cls, id: str, payload: dict, vector: Optional[ndarray] = None):
thumbnail_url=payload['thumbnail_url'],
index_date=datetime.fromisoformat(payload['index_date']),
ocr_text=payload['ocr_text'] if 'ocr_text' in payload else None,
width=payload['width'] if 'width' in payload else None,
height=payload['height'] if 'height' in payload else None,
aspect_ratio=payload['aspect_ratio'] if 'aspect_ratio' in payload else None,
image_vector=vector)

class Config:
Expand Down
7 changes: 5 additions & 2 deletions scripts/local_indexing.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@


if __name__ == '__main__':
import sys

Expand Down Expand Up @@ -37,10 +36,11 @@ def copy_and_index(filePath: Path) -> ImageData | None:
img_ext = filePath.suffix
image_ocr_result = None
text_contain_vector = None
[width, height] = img.size
try:
image_vector = transformers_service.get_image_vector(img)
if config.ocr_search.enable:
image_ocr_result = ocr_service.ocr_interface(img)
image_ocr_result = ocr_service.ocr_interface(img) # This will modify img if you use preprocess!
if image_ocr_result != "":
text_contain_vector = transformers_service.get_bert_vector(image_ocr_result)
else:
Expand All @@ -53,6 +53,9 @@ def copy_and_index(filePath: Path) -> ImageData | None:
image_vector=image_vector,
text_contain_vector=text_contain_vector,
index_date=datetime.now(),
width=width,
height=height,
aspect_ratio=float(width) / height,
ocr_text=image_ocr_result)

# copy to static
Expand Down

0 comments on commit 028d072

Please sign in to comment.