From ad9e8b7539f8f689826dfa40b716971eca56672e Mon Sep 17 00:00:00 2001 From: mrodina Date: Tue, 14 Nov 2023 15:30:36 +0400 Subject: [PATCH 1/3] 2023-11-12: build rasa-full image --- Makefile | 11 ++++ .../Dockerfile.pretrained_embeddings_full_ru | 52 +++++++++++++++++++ .../config_pretrained_embeddings_full_ru.yml | 18 +++++++ docker/docker-bake.hcl | 19 +++++++ 4 files changed, 100 insertions(+) create mode 100644 docker/Dockerfile.pretrained_embeddings_full_ru create mode 100644 docker/configs/config_pretrained_embeddings_full_ru.yml diff --git a/Makefile b/Makefile index 9e7c793efb5f..49db2e665041 100644 --- a/Makefile +++ b/Makefile @@ -285,6 +285,14 @@ build-docker-spacy-ru: docker buildx bake -f docker/docker-bake.hcl base-builder && \ docker buildx bake -f docker/docker-bake.hcl spacy-ru +build-docker-full-ru: + export IMAGE_NAME=rasa && \ + docker buildx use default && \ + docker buildx bake -f docker/docker-bake.hcl base && \ + docker buildx bake -f docker/docker-bake.hcl base-poetry && \ + docker buildx bake -f docker/docker-bake.hcl base-builder && \ + docker buildx bake -f docker/docker-bake.hcl full-ru + build-docker-spacy-ru-gpu: export IMAGE_NAME=rasa && \ export BASE_IMAGE=nvidia/cuda:11.2.2-devel-ubuntu20.04 && \ @@ -309,6 +317,9 @@ stop-integration-containers: ## Stop the integration test containers. build-e8: build-docker docker tag rasa:localdev ghcr.io/epoch8/rasa/rasa:$(shell cat version) +build-e8-full-ru: build-docker-full-ru + docker tag rasa:localdev-full-ru ghcr.io/epoch8/rasa/rasa-full-ru:$(shell cat version) + build-e8-spacy-ru: build-docker-spacy-ru docker tag rasa:localdev-spacy-ru ghcr.io/epoch8/rasa/rasa-spacy-ru:$(shell cat version) diff --git a/docker/Dockerfile.pretrained_embeddings_full_ru b/docker/Dockerfile.pretrained_embeddings_full_ru new file mode 100644 index 000000000000..87dc82c07e68 --- /dev/null +++ b/docker/Dockerfile.pretrained_embeddings_full_ru @@ -0,0 +1,52 @@ +# The image tagged with the 'spacy-it' suffix +ARG IMAGE_BASE_NAME +ARG BASE_IMAGE_HASH +ARG BASE_BUILDER_IMAGE_HASH + +FROM ${IMAGE_BASE_NAME}:base-builder-${BASE_BUILDER_IMAGE_HASH} as builder + +# copy files +COPY . /build/ +COPY docker/configs/config_pretrained_embeddings_full_ru.yml /build/config.yml + +# change working directory +WORKDIR /build + +# install dependencies +RUN python -m venv /opt/venv && \ + . /opt/venv/bin/activate && pip install --no-cache-dir -U "pip==22.*" -U "wheel>0.38.0" +RUN . /opt/venv/bin/activate && poetry install --extras full --no-dev --no-root --no-interaction +RUN . /opt/venv/bin/activate && poetry build -f wheel -n && \ + pip install --no-deps dist/*.whl && \ + rm -rf dist *.egg-info + +# make sure we use the virtualenv +ENV PATH="/opt/venv/bin:$PATH" + +# start a new build stage +FROM ${IMAGE_BASE_NAME}:base-${BASE_IMAGE_HASH} as runner + +# copy everything from /opt +COPY --from=builder /opt/venv /opt/venv + +# make sure we use the virtualenv +ENV PATH="/opt/venv/bin:$PATH" + +# set HOME environment variable +ENV HOME=/app + +# update permissions & change user to not run as root +WORKDIR /app +RUN chgrp -R 0 /app && chmod -R g=u /app && chmod o+wr /app +USER 1001 + +# Create a volume for temporary data +VOLUME /tmp + +# change shell +SHELL ["/bin/bash", "-o", "pipefail", "-c"] + +# the entry point +EXPOSE 5005 +ENTRYPOINT ["rasa"] +CMD ["--help"] diff --git a/docker/configs/config_pretrained_embeddings_full_ru.yml b/docker/configs/config_pretrained_embeddings_full_ru.yml new file mode 100644 index 000000000000..c94ff0b6c18c --- /dev/null +++ b/docker/configs/config_pretrained_embeddings_full_ru.yml @@ -0,0 +1,18 @@ +language: "ru" + +pipeline: + - name: WhitespaceTokenizer + - name: RegexFeaturizer + + - name: LanguageModelFeaturizer + model_name: "bert" + model_weights: "ai-forever/sbert_large_mt_nlu_ru" + + - name: DIETClassifier + epochs: 50 + batch_size: 4 + constrain_similarities: true + + - name: FallbackClassifier + threshold: 0.2 + ambiguity_threshold: 0.05 diff --git a/docker/docker-bake.hcl b/docker/docker-bake.hcl index 10900486e60c..75b22c864d96 100644 --- a/docker/docker-bake.hcl +++ b/docker/docker-bake.hcl @@ -194,6 +194,25 @@ target "spacy-en" { ] } +target "full-ru" { + dockerfile = "docker/Dockerfile.pretrained_embeddings_full_ru" + tags = ["${IMAGE_NAME}:${IMAGE_TAG}-full-ru"] + + args = { + IMAGE_BASE_NAME = "${IMAGE_NAME}" + BASE_IMAGE_HASH = "${BASE_IMAGE_HASH}" + BASE_BUILDER_IMAGE_HASH = "${BASE_BUILDER_IMAGE_HASH}" + } + + cache-to = ["type=inline"] + + cache-from = [ + "type=registry,ref=${IMAGE_NAME}:base-${BASE_IMAGE_HASH}", + "type=registry,ref=${IMAGE_NAME}:base-builder-${BASE_BUILDER_IMAGE_HASH}", + "type=registry,ref=${IMAGE_NAME}:${IMAGE_TAG}-full-ru", + ] +} + target "spacy-ru" { dockerfile = "docker/Dockerfile.pretrained_embeddings_spacy_ru" tags = ["${IMAGE_NAME}:${IMAGE_TAG}-spacy-ru"] From 6b2b94a9fe7cc8800a9935ea10e2048b9defe030 Mon Sep 17 00:00:00 2001 From: mrodina Date: Wed, 22 Nov 2023 08:14:33 +0400 Subject: [PATCH 2/3] 2023-11-22: rasa-transformers with pretrained sbert-large --- Makefile | 8 ++++---- ...ll_ru => Dockerfile.pretrained_embeddings_sbert_large} | 8 ++++++-- ...u.yml => config_pretrained_embeddings_sbert_large.yml} | 2 +- docker/docker-bake.hcl | 8 ++++---- 4 files changed, 15 insertions(+), 11 deletions(-) rename docker/{Dockerfile.pretrained_embeddings_full_ru => Dockerfile.pretrained_embeddings_sbert_large} (72%) rename docker/configs/{config_pretrained_embeddings_full_ru.yml => config_pretrained_embeddings_sbert_large.yml} (85%) diff --git a/Makefile b/Makefile index 49db2e665041..e07b9bcded5c 100644 --- a/Makefile +++ b/Makefile @@ -285,13 +285,13 @@ build-docker-spacy-ru: docker buildx bake -f docker/docker-bake.hcl base-builder && \ docker buildx bake -f docker/docker-bake.hcl spacy-ru -build-docker-full-ru: +build-docker-sbert-l: export IMAGE_NAME=rasa && \ docker buildx use default && \ docker buildx bake -f docker/docker-bake.hcl base && \ docker buildx bake -f docker/docker-bake.hcl base-poetry && \ docker buildx bake -f docker/docker-bake.hcl base-builder && \ - docker buildx bake -f docker/docker-bake.hcl full-ru + docker buildx bake -f docker/docker-bake.hcl sbert-l build-docker-spacy-ru-gpu: export IMAGE_NAME=rasa && \ @@ -317,8 +317,8 @@ stop-integration-containers: ## Stop the integration test containers. build-e8: build-docker docker tag rasa:localdev ghcr.io/epoch8/rasa/rasa:$(shell cat version) -build-e8-full-ru: build-docker-full-ru - docker tag rasa:localdev-full-ru ghcr.io/epoch8/rasa/rasa-full-ru:$(shell cat version) +build-e8-sbert-l: build-docker-sbert-l + docker tag rasa:localdev-sbert-l ghcr.io/epoch8/rasa/rasa-sbert-l:$(shell cat version) build-e8-spacy-ru: build-docker-spacy-ru docker tag rasa:localdev-spacy-ru ghcr.io/epoch8/rasa/rasa-spacy-ru:$(shell cat version) diff --git a/docker/Dockerfile.pretrained_embeddings_full_ru b/docker/Dockerfile.pretrained_embeddings_sbert_large similarity index 72% rename from docker/Dockerfile.pretrained_embeddings_full_ru rename to docker/Dockerfile.pretrained_embeddings_sbert_large index 87dc82c07e68..722e3b904493 100644 --- a/docker/Dockerfile.pretrained_embeddings_full_ru +++ b/docker/Dockerfile.pretrained_embeddings_sbert_large @@ -7,7 +7,7 @@ FROM ${IMAGE_BASE_NAME}:base-builder-${BASE_BUILDER_IMAGE_HASH} as builder # copy files COPY . /build/ -COPY docker/configs/config_pretrained_embeddings_full_ru.yml /build/config.yml +COPY docker/configs/config_pretrained_embeddings_sbert_large.yml /build/config.yml # change working directory WORKDIR /build @@ -15,7 +15,7 @@ WORKDIR /build # install dependencies RUN python -m venv /opt/venv && \ . /opt/venv/bin/activate && pip install --no-cache-dir -U "pip==22.*" -U "wheel>0.38.0" -RUN . /opt/venv/bin/activate && poetry install --extras full --no-dev --no-root --no-interaction +RUN . /opt/venv/bin/activate && poetry install --extras transformers --no-dev --no-root --no-interaction RUN . /opt/venv/bin/activate && poetry build -f wheel -n && \ pip install --no-deps dist/*.whl && \ rm -rf dist *.egg-info @@ -37,6 +37,10 @@ ENV HOME=/app # update permissions & change user to not run as root WORKDIR /app + +RUN pip install huggingface_hub +RUN python -c "from huggingface_hub import snapshot_download; snapshot_download(repo_id='ai-forever/sbert_large_mt_nlu_ru', ignore_patterns=['*.msgpack', '*.bin'], local_dir='./weighs', local_dir_use_symlinks=True);" + RUN chgrp -R 0 /app && chmod -R g=u /app && chmod o+wr /app USER 1001 diff --git a/docker/configs/config_pretrained_embeddings_full_ru.yml b/docker/configs/config_pretrained_embeddings_sbert_large.yml similarity index 85% rename from docker/configs/config_pretrained_embeddings_full_ru.yml rename to docker/configs/config_pretrained_embeddings_sbert_large.yml index c94ff0b6c18c..072ac4e471f0 100644 --- a/docker/configs/config_pretrained_embeddings_full_ru.yml +++ b/docker/configs/config_pretrained_embeddings_sbert_large.yml @@ -6,7 +6,7 @@ pipeline: - name: LanguageModelFeaturizer model_name: "bert" - model_weights: "ai-forever/sbert_large_mt_nlu_ru" + model_weights: "weighs" - name: DIETClassifier epochs: 50 diff --git a/docker/docker-bake.hcl b/docker/docker-bake.hcl index 75b22c864d96..734649100e8e 100644 --- a/docker/docker-bake.hcl +++ b/docker/docker-bake.hcl @@ -194,9 +194,9 @@ target "spacy-en" { ] } -target "full-ru" { - dockerfile = "docker/Dockerfile.pretrained_embeddings_full_ru" - tags = ["${IMAGE_NAME}:${IMAGE_TAG}-full-ru"] +target "sbert-l" { + dockerfile = "docker/Dockerfile.pretrained_embeddings_sbert_large" + tags = ["${IMAGE_NAME}:${IMAGE_TAG}-sbert-l"] args = { IMAGE_BASE_NAME = "${IMAGE_NAME}" @@ -209,7 +209,7 @@ target "full-ru" { cache-from = [ "type=registry,ref=${IMAGE_NAME}:base-${BASE_IMAGE_HASH}", "type=registry,ref=${IMAGE_NAME}:base-builder-${BASE_BUILDER_IMAGE_HASH}", - "type=registry,ref=${IMAGE_NAME}:${IMAGE_TAG}-full-ru", + "type=registry,ref=${IMAGE_NAME}:${IMAGE_TAG}-sbert-l", ] } From 87a15f8040ab683556215b9d2332105b4c0db66f Mon Sep 17 00:00:00 2001 From: mrodina Date: Wed, 22 Nov 2023 09:58:01 +0400 Subject: [PATCH 3/3] 2023-11-22: add uploader to the Makefile --- Makefile | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index e07b9bcded5c..be92ebd62d6e 100644 --- a/Makefile +++ b/Makefile @@ -333,4 +333,7 @@ upload-spacy-ru: docker push ghcr.io/epoch8/rasa/rasa-spacy-ru:$(shell cat ./version) upload-spacy-ru-gpu: - docker push ghcr.io/epoch8/rasa/rasa-spacy-ru:$(shell cat ./version)-gpu \ No newline at end of file + docker push ghcr.io/epoch8/rasa/rasa-spacy-ru:$(shell cat ./version)-gpu + +upload-sbert-l: + docker push ghcr.io/epoch8/rasa/rasa-sbert-l:$(shell cat ./version) \ No newline at end of file