From 6b2b94a9fe7cc8800a9935ea10e2048b9defe030 Mon Sep 17 00:00:00 2001 From: mrodina Date: Wed, 22 Nov 2023 08:14:33 +0400 Subject: [PATCH] 2023-11-22: rasa-transformers with pretrained sbert-large --- Makefile | 8 ++++---- ...ll_ru => Dockerfile.pretrained_embeddings_sbert_large} | 8 ++++++-- ...u.yml => config_pretrained_embeddings_sbert_large.yml} | 2 +- docker/docker-bake.hcl | 8 ++++---- 4 files changed, 15 insertions(+), 11 deletions(-) rename docker/{Dockerfile.pretrained_embeddings_full_ru => Dockerfile.pretrained_embeddings_sbert_large} (72%) rename docker/configs/{config_pretrained_embeddings_full_ru.yml => config_pretrained_embeddings_sbert_large.yml} (85%) diff --git a/Makefile b/Makefile index 49db2e665041..e07b9bcded5c 100644 --- a/Makefile +++ b/Makefile @@ -285,13 +285,13 @@ build-docker-spacy-ru: docker buildx bake -f docker/docker-bake.hcl base-builder && \ docker buildx bake -f docker/docker-bake.hcl spacy-ru -build-docker-full-ru: +build-docker-sbert-l: export IMAGE_NAME=rasa && \ docker buildx use default && \ docker buildx bake -f docker/docker-bake.hcl base && \ docker buildx bake -f docker/docker-bake.hcl base-poetry && \ docker buildx bake -f docker/docker-bake.hcl base-builder && \ - docker buildx bake -f docker/docker-bake.hcl full-ru + docker buildx bake -f docker/docker-bake.hcl sbert-l build-docker-spacy-ru-gpu: export IMAGE_NAME=rasa && \ @@ -317,8 +317,8 @@ stop-integration-containers: ## Stop the integration test containers. build-e8: build-docker docker tag rasa:localdev ghcr.io/epoch8/rasa/rasa:$(shell cat version) -build-e8-full-ru: build-docker-full-ru - docker tag rasa:localdev-full-ru ghcr.io/epoch8/rasa/rasa-full-ru:$(shell cat version) +build-e8-sbert-l: build-docker-sbert-l + docker tag rasa:localdev-sbert-l ghcr.io/epoch8/rasa/rasa-sbert-l:$(shell cat version) build-e8-spacy-ru: build-docker-spacy-ru docker tag rasa:localdev-spacy-ru ghcr.io/epoch8/rasa/rasa-spacy-ru:$(shell cat version) diff --git a/docker/Dockerfile.pretrained_embeddings_full_ru b/docker/Dockerfile.pretrained_embeddings_sbert_large similarity index 72% rename from docker/Dockerfile.pretrained_embeddings_full_ru rename to docker/Dockerfile.pretrained_embeddings_sbert_large index 87dc82c07e68..722e3b904493 100644 --- a/docker/Dockerfile.pretrained_embeddings_full_ru +++ b/docker/Dockerfile.pretrained_embeddings_sbert_large @@ -7,7 +7,7 @@ FROM ${IMAGE_BASE_NAME}:base-builder-${BASE_BUILDER_IMAGE_HASH} as builder # copy files COPY . /build/ -COPY docker/configs/config_pretrained_embeddings_full_ru.yml /build/config.yml +COPY docker/configs/config_pretrained_embeddings_sbert_large.yml /build/config.yml # change working directory WORKDIR /build @@ -15,7 +15,7 @@ WORKDIR /build # install dependencies RUN python -m venv /opt/venv && \ . /opt/venv/bin/activate && pip install --no-cache-dir -U "pip==22.*" -U "wheel>0.38.0" -RUN . /opt/venv/bin/activate && poetry install --extras full --no-dev --no-root --no-interaction +RUN . /opt/venv/bin/activate && poetry install --extras transformers --no-dev --no-root --no-interaction RUN . /opt/venv/bin/activate && poetry build -f wheel -n && \ pip install --no-deps dist/*.whl && \ rm -rf dist *.egg-info @@ -37,6 +37,10 @@ ENV HOME=/app # update permissions & change user to not run as root WORKDIR /app + +RUN pip install huggingface_hub +RUN python -c "from huggingface_hub import snapshot_download; snapshot_download(repo_id='ai-forever/sbert_large_mt_nlu_ru', ignore_patterns=['*.msgpack', '*.bin'], local_dir='./weighs', local_dir_use_symlinks=True);" + RUN chgrp -R 0 /app && chmod -R g=u /app && chmod o+wr /app USER 1001 diff --git a/docker/configs/config_pretrained_embeddings_full_ru.yml b/docker/configs/config_pretrained_embeddings_sbert_large.yml similarity index 85% rename from docker/configs/config_pretrained_embeddings_full_ru.yml rename to docker/configs/config_pretrained_embeddings_sbert_large.yml index c94ff0b6c18c..072ac4e471f0 100644 --- a/docker/configs/config_pretrained_embeddings_full_ru.yml +++ b/docker/configs/config_pretrained_embeddings_sbert_large.yml @@ -6,7 +6,7 @@ pipeline: - name: LanguageModelFeaturizer model_name: "bert" - model_weights: "ai-forever/sbert_large_mt_nlu_ru" + model_weights: "weighs" - name: DIETClassifier epochs: 50 diff --git a/docker/docker-bake.hcl b/docker/docker-bake.hcl index 75b22c864d96..734649100e8e 100644 --- a/docker/docker-bake.hcl +++ b/docker/docker-bake.hcl @@ -194,9 +194,9 @@ target "spacy-en" { ] } -target "full-ru" { - dockerfile = "docker/Dockerfile.pretrained_embeddings_full_ru" - tags = ["${IMAGE_NAME}:${IMAGE_TAG}-full-ru"] +target "sbert-l" { + dockerfile = "docker/Dockerfile.pretrained_embeddings_sbert_large" + tags = ["${IMAGE_NAME}:${IMAGE_TAG}-sbert-l"] args = { IMAGE_BASE_NAME = "${IMAGE_NAME}" @@ -209,7 +209,7 @@ target "full-ru" { cache-from = [ "type=registry,ref=${IMAGE_NAME}:base-${BASE_IMAGE_HASH}", "type=registry,ref=${IMAGE_NAME}:base-builder-${BASE_BUILDER_IMAGE_HASH}", - "type=registry,ref=${IMAGE_NAME}:${IMAGE_TAG}-full-ru", + "type=registry,ref=${IMAGE_NAME}:${IMAGE_TAG}-sbert-l", ] }