Merge pull request #21 from hv0905/docker-include-model

Bundle model in dockerfile
hv0905 · May 6, 2024 · ee42fff · ee42fff
2 parents 23fe7d7 + 0479894
commit ee42fff
Show file tree

Hide file tree

Showing 10 changed files with 47 additions and 30 deletions.
diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml
diff --git a/Dockerfile b/Dockerfile
@@ -8,6 +8,18 @@ COPY requirements.txt .
 
 RUN pip install --no-cache-dir -r requirements.txt
 
+RUN mkdir -p /opt/models && \
+    huggingface-cli download openai/clip-vit-large-patch14 'model.safetensors' '*.txt' '*.json' --local-dir /opt/models/clip && \
+    huggingface-cli download google-bert/bert-base-chinese 'model.safetensors' '*.txt' '*.json' --local-dir /opt/models/bert && \
+    huggingface-cli download pk5ls20/PaddleModel 'PaddleOCR2Pytorch/ch_ptocr_v4_det_infer.pth' 'PaddleOCR2Pytorch/ch_ptocr_v4_rec_infer.pth' \
+     'PaddleOCR2Pytorch/ch_ptocr_mobile_v2.0_cls_infer.pth' 'PaddleOCR2Pytorch/configs/det/ch_PP-OCRv4/ch_PP-OCRv4_det_student.yml' \
+     'PaddleOCR2Pytorch/configs/rec/PP-OCRv4/ch_PP-OCRv4_rec.yml' 'ppocr_keys_v1.txt' --local-dir /opt/models/paddleocr && \
+    rm -rf /root/.cache/huggingface
+
+ENV APP_MODEL__CLIP=/opt/models/clip
+ENV APP_MODEL__BERT=/opt/models/bert
+ENV APP_MODEL__EASYPADDLEOCR=/opt/models/ocr
+
 COPY . .
 
 EXPOSE 8000

diff --git a/app/Services/ocr_services.py b/app/Services/ocr_services.py
@@ -35,7 +35,8 @@ def __init__(self):
         self._paddle_ocr_module = EasyPaddleOCR(use_angle_cls=True,
                                                 needWarmUp=True,
                                                 devices=self._device,
-                                                warmup_size=(960, 960))
+                                                warmup_size=(960, 960),
+                                                model_local_dir=config.model.easypaddleocr if config.model.easypaddleocr else None)
         logger.success("EasyPaddleOCR loaded successfully")
 
     @staticmethod

diff --git a/app/Services/transformers_service.py b/app/Services/transformers_service.py
@@ -17,13 +17,13 @@ def __init__(self):
         if self.device == "auto":
             self.device = "cuda" if torch.cuda.is_available() else "cpu"
         logger.info("Using device: {}; CLIP Model: {}, BERT Model: {}",
-                    self.device, config.clip.model, config.ocr_search.bert_model)
-        self._clip_model = CLIPModel.from_pretrained(config.clip.model).to(self.device)
-        self._clip_processor = CLIPProcessor.from_pretrained(config.clip.model)
+                    self.device, config.model.clip, config.model.bert)
+        self._clip_model = CLIPModel.from_pretrained(config.model.clip).to(self.device)
+        self._clip_processor = CLIPProcessor.from_pretrained(config.model.clip)
         logger.success("CLIP Model loaded successfully")
         if config.ocr_search.enable:
-            self._bert_model = BertModel.from_pretrained(config.ocr_search.bert_model).to(self.device)
-            self._bert_tokenizer = BertTokenizer.from_pretrained(config.ocr_search.bert_model)
+            self._bert_model = BertModel.from_pretrained(config.model.bert).to(self.device)
+            self._bert_tokenizer = BertTokenizer.from_pretrained(config.model.bert)
             logger.success("BERT Model loaded successfully")
         else:
             logger.info("OCR search is disabled. Skipping OCR and BERT model loading.")

diff --git a/app/config.py b/app/config.py
@@ -17,14 +17,14 @@ class QdrantSettings(BaseModel):
     api_key: str | None = None
 
 
-class ClipSettings(BaseModel):
-    model: str = 'openai/clip-vit-large-patch14'
+class ModelsSettings(BaseModel):
+    clip: str = 'openai/clip-vit-large-patch14'
     bert: str = 'bert-base-chinese'
+    easypaddleocr: str | None = None
 
 
 class OCRSearchSettings(BaseModel):
     enable: bool = True
-    bert_model: str = 'bert-base-chinese'
     ocr_module: str = 'easypaddleocr'
     ocr_language: list[str] = ['ch_sim', 'en']
     ocr_min_confidence: float = 1e-2
@@ -68,7 +68,7 @@ class StaticFileSettings(BaseModel):
 
 class Config(BaseSettings):
     qdrant: QdrantSettings = QdrantSettings()
-    clip: ClipSettings = ClipSettings()
+    model: ModelsSettings = ModelsSettings()
     ocr_search: OCRSearchSettings = OCRSearchSettings()
     static_file: StaticFileSettings = StaticFileSettings()  # [Deprecated]
     storage: StorageSettings = StorageSettings()

diff --git a/app/webapp.py b/app/webapp.py
@@ -42,7 +42,9 @@ async def lifespan(_: FastAPI):
     app.include_router(admin_controller.admin_router, prefix="/admin")
 
 if config.storage.method == "local":
-    app.mount("/static", StaticFiles(directory=pathlib.Path(config.storage.local.path)), name="static")
+    # Since we will check & create the static directory soon later when the StorageService initialized, we don't need to
+    # check it here.
+    app.mount("/static", StaticFiles(directory=pathlib.Path(config.storage.local.path), check_dir=False), name="static")
 
 
 @app.get("/", description="Default portal. Test for server availability.")

diff --git a/config/default.env b/config/default.env
@@ -21,9 +21,14 @@
 # Setting this to "auto" allows the system to automatically detect and use available devices, otherwise specify the device name
 # APP_DEVICE="auto"
 
-# CLIP Configuration
-# Model used for CLIP embeddings
-# APP_CLIP__MODEL="openai/clip-vit-large-patch14"
+# Models Configuration
+# Model used for CLIP embeddings (Vision Search), accepts both huggingface hub (transformers) model name and path to the model.
+# APP_MODEL__CLIP="openai/clip-vit-large-patch14"
+# Model used for BERT embeddings (OCR Search), accepts both huggingface hub (transformers) model name and path to the model.
+# APP_MODEL__BERT="bert-base-chinese"
+# Model used for easypaddocr inference (OCR indexing), accepts path to the model. Leave it blank will download automatically from huggingface hub.
+# APP_MODEL__EASYPADDLEOCR=""
+
 
 # BERT Configuration
 # Enable OCR search functionality

diff --git a/cpu-only.Dockerfile b/cpu-only.Dockerfile
@@ -8,6 +8,18 @@ COPY requirements.txt .
 
 RUN pip install --no-cache-dir -r requirements.txt
 
+RUN mkdir -p /opt/models && \
+    huggingface-cli download openai/clip-vit-large-patch14 'model.safetensors' '*.txt' '*.json' --local-dir /opt/models/clip && \
+    huggingface-cli download google-bert/bert-base-chinese 'model.safetensors' '*.txt' '*.json' --local-dir /opt/models/bert && \
+    huggingface-cli download pk5ls20/PaddleModel 'PaddleOCR2Pytorch/ch_ptocr_v4_det_infer.pth' 'PaddleOCR2Pytorch/ch_ptocr_v4_rec_infer.pth' \
+     'PaddleOCR2Pytorch/ch_ptocr_mobile_v2.0_cls_infer.pth' 'PaddleOCR2Pytorch/configs/det/ch_PP-OCRv4/ch_PP-OCRv4_det_student.yml' \
+     'PaddleOCR2Pytorch/configs/rec/PP-OCRv4/ch_PP-OCRv4_rec.yml' 'ppocr_keys_v1.txt' --local-dir /opt/models/paddleocr && \
+    rm -rf /root/.cache/huggingface
+
+ENV APP_MODEL__CLIP=/opt/models/clip
+ENV APP_MODEL__BERT=/opt/models/bert
+ENV APP_MODEL__EASYPADDLEOCR=/opt/models/ocr
+
 COPY . .
 
 EXPOSE 8000

diff --git a/download-model.Dockerfile b/download-model.Dockerfile
diff --git a/requirements.txt b/requirements.txt
@@ -14,7 +14,7 @@ pillow>9.3.0
 numpy
 
 # OCR - you can choose other option if necessary, or completely disable it if you don't need this feature
-easypaddleocr>=0.2.0
+easypaddleocr>=0.2.1
 # easyocr
 # paddleocr