diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml deleted file mode 100644 index 105ce2d..0000000 --- a/.idea/inspectionProfiles/profiles_settings.xml +++ /dev/null @@ -1,6 +0,0 @@ - - - - \ No newline at end of file diff --git a/Dockerfile b/Dockerfile index 555ef8b..94c5ab1 100644 --- a/Dockerfile +++ b/Dockerfile @@ -8,6 +8,18 @@ COPY requirements.txt . RUN pip install --no-cache-dir -r requirements.txt +RUN mkdir -p /opt/models && \ + huggingface-cli download openai/clip-vit-large-patch14 'model.safetensors' '*.txt' '*.json' --local-dir /opt/models/clip && \ + huggingface-cli download google-bert/bert-base-chinese 'model.safetensors' '*.txt' '*.json' --local-dir /opt/models/bert && \ + huggingface-cli download pk5ls20/PaddleModel 'PaddleOCR2Pytorch/ch_ptocr_v4_det_infer.pth' 'PaddleOCR2Pytorch/ch_ptocr_v4_rec_infer.pth' \ + 'PaddleOCR2Pytorch/ch_ptocr_mobile_v2.0_cls_infer.pth' 'PaddleOCR2Pytorch/configs/det/ch_PP-OCRv4/ch_PP-OCRv4_det_student.yml' \ + 'PaddleOCR2Pytorch/configs/rec/PP-OCRv4/ch_PP-OCRv4_rec.yml' 'ppocr_keys_v1.txt' --local-dir /opt/models/paddleocr && \ + rm -rf /root/.cache/huggingface + +ENV APP_MODEL__CLIP=/opt/models/clip +ENV APP_MODEL__BERT=/opt/models/bert +ENV APP_MODEL__EASYPADDLEOCR=/opt/models/ocr + COPY . . EXPOSE 8000 diff --git a/app/Services/ocr_services.py b/app/Services/ocr_services.py index 415776c..63d4bff 100644 --- a/app/Services/ocr_services.py +++ b/app/Services/ocr_services.py @@ -35,7 +35,8 @@ def __init__(self): self._paddle_ocr_module = EasyPaddleOCR(use_angle_cls=True, needWarmUp=True, devices=self._device, - warmup_size=(960, 960)) + warmup_size=(960, 960), + model_local_dir=config.model.easypaddleocr if config.model.easypaddleocr else None) logger.success("EasyPaddleOCR loaded successfully") @staticmethod diff --git a/app/Services/transformers_service.py b/app/Services/transformers_service.py index acc2163..c97b00a 100644 --- a/app/Services/transformers_service.py +++ b/app/Services/transformers_service.py @@ -17,13 +17,13 @@ def __init__(self): if self.device == "auto": self.device = "cuda" if torch.cuda.is_available() else "cpu" logger.info("Using device: {}; CLIP Model: {}, BERT Model: {}", - self.device, config.clip.model, config.ocr_search.bert_model) - self._clip_model = CLIPModel.from_pretrained(config.clip.model).to(self.device) - self._clip_processor = CLIPProcessor.from_pretrained(config.clip.model) + self.device, config.model.clip, config.model.bert) + self._clip_model = CLIPModel.from_pretrained(config.model.clip).to(self.device) + self._clip_processor = CLIPProcessor.from_pretrained(config.model.clip) logger.success("CLIP Model loaded successfully") if config.ocr_search.enable: - self._bert_model = BertModel.from_pretrained(config.ocr_search.bert_model).to(self.device) - self._bert_tokenizer = BertTokenizer.from_pretrained(config.ocr_search.bert_model) + self._bert_model = BertModel.from_pretrained(config.model.bert).to(self.device) + self._bert_tokenizer = BertTokenizer.from_pretrained(config.model.bert) logger.success("BERT Model loaded successfully") else: logger.info("OCR search is disabled. Skipping OCR and BERT model loading.") diff --git a/app/config.py b/app/config.py index ff21955..01a9d31 100644 --- a/app/config.py +++ b/app/config.py @@ -17,14 +17,14 @@ class QdrantSettings(BaseModel): api_key: str | None = None -class ClipSettings(BaseModel): - model: str = 'openai/clip-vit-large-patch14' +class ModelsSettings(BaseModel): + clip: str = 'openai/clip-vit-large-patch14' bert: str = 'bert-base-chinese' + easypaddleocr: str | None = None class OCRSearchSettings(BaseModel): enable: bool = True - bert_model: str = 'bert-base-chinese' ocr_module: str = 'easypaddleocr' ocr_language: list[str] = ['ch_sim', 'en'] ocr_min_confidence: float = 1e-2 @@ -68,7 +68,7 @@ class StaticFileSettings(BaseModel): class Config(BaseSettings): qdrant: QdrantSettings = QdrantSettings() - clip: ClipSettings = ClipSettings() + model: ModelsSettings = ModelsSettings() ocr_search: OCRSearchSettings = OCRSearchSettings() static_file: StaticFileSettings = StaticFileSettings() # [Deprecated] storage: StorageSettings = StorageSettings() diff --git a/app/webapp.py b/app/webapp.py index 0a3df94..7f283ae 100644 --- a/app/webapp.py +++ b/app/webapp.py @@ -42,7 +42,9 @@ async def lifespan(_: FastAPI): app.include_router(admin_controller.admin_router, prefix="/admin") if config.storage.method == "local": - app.mount("/static", StaticFiles(directory=pathlib.Path(config.storage.local.path)), name="static") + # Since we will check & create the static directory soon later when the StorageService initialized, we don't need to + # check it here. + app.mount("/static", StaticFiles(directory=pathlib.Path(config.storage.local.path), check_dir=False), name="static") @app.get("/", description="Default portal. Test for server availability.") diff --git a/config/default.env b/config/default.env index 8377e1f..6ef07a8 100644 --- a/config/default.env +++ b/config/default.env @@ -21,9 +21,14 @@ # Setting this to "auto" allows the system to automatically detect and use available devices, otherwise specify the device name # APP_DEVICE="auto" -# CLIP Configuration -# Model used for CLIP embeddings -# APP_CLIP__MODEL="openai/clip-vit-large-patch14" +# Models Configuration +# Model used for CLIP embeddings (Vision Search), accepts both huggingface hub (transformers) model name and path to the model. +# APP_MODEL__CLIP="openai/clip-vit-large-patch14" +# Model used for BERT embeddings (OCR Search), accepts both huggingface hub (transformers) model name and path to the model. +# APP_MODEL__BERT="bert-base-chinese" +# Model used for easypaddocr inference (OCR indexing), accepts path to the model. Leave it blank will download automatically from huggingface hub. +# APP_MODEL__EASYPADDLEOCR="" + # BERT Configuration # Enable OCR search functionality diff --git a/cpu-only.Dockerfile b/cpu-only.Dockerfile index fb7441b..a1ab4a6 100644 --- a/cpu-only.Dockerfile +++ b/cpu-only.Dockerfile @@ -8,6 +8,18 @@ COPY requirements.txt . RUN pip install --no-cache-dir -r requirements.txt +RUN mkdir -p /opt/models && \ + huggingface-cli download openai/clip-vit-large-patch14 'model.safetensors' '*.txt' '*.json' --local-dir /opt/models/clip && \ + huggingface-cli download google-bert/bert-base-chinese 'model.safetensors' '*.txt' '*.json' --local-dir /opt/models/bert && \ + huggingface-cli download pk5ls20/PaddleModel 'PaddleOCR2Pytorch/ch_ptocr_v4_det_infer.pth' 'PaddleOCR2Pytorch/ch_ptocr_v4_rec_infer.pth' \ + 'PaddleOCR2Pytorch/ch_ptocr_mobile_v2.0_cls_infer.pth' 'PaddleOCR2Pytorch/configs/det/ch_PP-OCRv4/ch_PP-OCRv4_det_student.yml' \ + 'PaddleOCR2Pytorch/configs/rec/PP-OCRv4/ch_PP-OCRv4_rec.yml' 'ppocr_keys_v1.txt' --local-dir /opt/models/paddleocr && \ + rm -rf /root/.cache/huggingface + +ENV APP_MODEL__CLIP=/opt/models/clip +ENV APP_MODEL__BERT=/opt/models/bert +ENV APP_MODEL__EASYPADDLEOCR=/opt/models/ocr + COPY . . EXPOSE 8000 diff --git a/download-model.Dockerfile b/download-model.Dockerfile deleted file mode 100644 index c714376..0000000 --- a/download-model.Dockerfile +++ /dev/null @@ -1,9 +0,0 @@ -FROM edgeneko/neko-image-gallery:latest - -RUN mkdir -p /opt/models && \ - huggingface-cli download openai/clip-vit-large-patch14 model.safetensors *.txt *.json --local-dir /opt/models/clip && \ - huggingface-cli download google-bert/bert-base-chinese model.safetensors *.txt *.json --local-dir /opt/models/bert && \ - rm -rf /root/.cache/huggingface - -ENV APP_CLIP__MODEL=/opt/models/clip -ENV APP_OCR_SEARCH__BERT_MODEL=/opt/models/bert \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index ed18eba..9a0f88e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -14,7 +14,7 @@ pillow>9.3.0 numpy # OCR - you can choose other option if necessary, or completely disable it if you don't need this feature -easypaddleocr>=0.2.0 +easypaddleocr>=0.2.1 # easyocr # paddleocr