From 9481927831f1edf03e7b1fcb4dddd5d82d059c9e Mon Sep 17 00:00:00 2001 From: Zach Kimberg Date: Wed, 4 Oct 2023 15:57:41 -0700 Subject: [PATCH] [0.24.0 branch] Release 0.24.0 changes (#1154) --- README.md | 12 ++++++------ benchmark/README.md | 16 ++++++++-------- engines/python/README.md | 4 ++-- serving/docker/Dockerfile | 2 +- serving/docker/README.md | 6 +++--- serving/docker/aarch64.Dockerfile | 2 +- serving/docker/deepspeed.Dockerfile | 6 +++--- serving/docker/fastertransformer.Dockerfile | 2 +- serving/docker/pytorch-cu118.Dockerfile | 2 +- serving/docker/pytorch-inf2.Dockerfile | 2 +- wlm/README.md | 2 +- 11 files changed, 28 insertions(+), 28 deletions(-) diff --git a/README.md b/README.md index f5b814bd9..b246553db 100644 --- a/README.md +++ b/README.md @@ -50,20 +50,20 @@ brew services stop djl-serving For Ubuntu ``` -curl -O https://publish.djl.ai/djl-serving/djl-serving_0.23.0-1_all.deb -sudo dpkg -i djl-serving_0.23.0-1_all.deb +curl -O https://publish.djl.ai/djl-serving/djl-serving_0.24.0-1_all.deb +sudo dpkg -i djl-serving_0.24.0-1_all.deb ``` For Windows We are considering to create a `chocolatey` package for Windows. For the time being, you can -download djl-serving zip file from [here](https://publish.djl.ai/djl-serving/serving-0.23.0.zip). +download djl-serving zip file from [here](https://publish.djl.ai/djl-serving/serving-0.24.0.zip). ``` -curl -O https://publish.djl.ai/djl-serving/serving-0.23.0.zip -unzip serving-0.23.0.zip +curl -O https://publish.djl.ai/djl-serving/serving-0.24.0.zip +unzip serving-0.24.0.zip # start djl-serving -serving-0.23.0\bin\serving.bat +serving-0.24.0\bin\serving.bat ``` ### Docker diff --git a/benchmark/README.md b/benchmark/README.md index abcbb0c0e..d9b9d1795 100644 --- a/benchmark/README.md +++ b/benchmark/README.md @@ -43,25 +43,25 @@ sudo snap alias djlbench djl-bench - Or download .deb package from S3 ``` -curl -O https://publish.djl.ai/djl-bench/0.23.0/djl-bench_0.23.0-1_all.deb -sudo dpkg -i djl-bench_0.23.0-1_all.deb +curl -O https://publish.djl.ai/djl-bench/0.24.0/djl-bench_0.24.0-1_all.deb +sudo dpkg -i djl-bench_0.24.0-1_all.deb ``` For macOS, centOS or Amazon Linux 2 -You can download djl-bench zip file from [here](https://publish.djl.ai/djl-bench/0.23.0/benchmark-0.23.0.zip). +You can download djl-bench zip file from [here](https://publish.djl.ai/djl-bench/0.24.0/benchmark-0.24.0.zip). ``` -curl -O https://publish.djl.ai/djl-bench/0.23.0/benchmark-0.23.0.zip -unzip benchmark-0.23.0.zip -rm benchmark-0.23.0.zip -sudo ln -s $PWD/benchmark-0.23.0/bin/benchmark /usr/bin/djl-bench +curl -O https://publish.djl.ai/djl-bench/0.24.0/benchmark-0.24.0.zip +unzip benchmark-0.24.0.zip +rm benchmark-0.24.0.zip +sudo ln -s $PWD/benchmark-0.24.0/bin/benchmark /usr/bin/djl-bench ``` For Windows We are considering to create a `chocolatey` package for Windows. For the time being, you can -download djl-bench zip file from [here](https://publish.djl.ai/djl-bench/0.23.0/benchmark-0.23.0.zip). +download djl-bench zip file from [here](https://publish.djl.ai/djl-bench/0.24.0/benchmark-0.24.0.zip). Or you can run benchmark using gradle: diff --git a/engines/python/README.md b/engines/python/README.md index 2527d2534..10094d4aa 100644 --- a/engines/python/README.md +++ b/engines/python/README.md @@ -29,13 +29,13 @@ The javadocs output is generated in the `build/doc/javadoc` folder. ## Installation You can pull the Python engine from the central Maven repository by including the following dependency: -- ai.djl.python:python:0.23.0 +- ai.djl.python:python:0.24.0 ```xml ai.djl.python python - 0.23.0 + 0.24.0 runtime ``` diff --git a/serving/docker/Dockerfile b/serving/docker/Dockerfile index 490afeb0f..00e624215 100644 --- a/serving/docker/Dockerfile +++ b/serving/docker/Dockerfile @@ -10,7 +10,7 @@ # BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, express or implied. See the License for # the specific language governing permissions and limitations under the License. FROM ubuntu:20.04 AS base -ARG djl_version=0.24.0~SNAPSHOT +ARG djl_version=0.24.0 COPY scripts scripts/ RUN mkdir -p /opt/djl/conf && \ diff --git a/serving/docker/README.md b/serving/docker/README.md index 3eead9eec..1625b0cbb 100644 --- a/serving/docker/README.md +++ b/serving/docker/README.md @@ -32,7 +32,7 @@ mkdir models cd models curl -O https://resources.djl.ai/test-models/pytorch/bert_qa_jit.tar.gz -docker run -it --rm -v $PWD:/opt/ml/model -p 8080:8080 deepjavalibrary/djl-serving:0.23.0 +docker run -it --rm -v $PWD:/opt/ml/model -p 8080:8080 deepjavalibrary/djl-serving:0.24.0 ``` ### GPU @@ -42,7 +42,7 @@ mkdir models cd models curl -O https://resources.djl.ai/test-models/pytorch/bert_qa_jit.tar.gz -docker run -it --runtime=nvidia --shm-size 2g -v $PWD:/opt/ml/model -p 8080:8080 deepjavalibrary/djl-serving:0.23.0-pytorch-cu118 +docker run -it --runtime=nvidia --shm-size 2g -v $PWD:/opt/ml/model -p 8080:8080 deepjavalibrary/djl-serving:0.24.0-pytorch-cu118 ``` ### AWS Inferentia @@ -52,5 +52,5 @@ mkdir models cd models curl -O https://resources.djl.ai/test-models/pytorch/resnet18_inf2_2_4.tar.gz -docker run --device /dev/neuron0 -it --rm -v $PWD:/opt/ml/model -p 8080:8080 deepjavalibrary/djl-serving:0.23.0-pytorch-inf2 +docker run --device /dev/neuron0 -it --rm -v $PWD:/opt/ml/model -p 8080:8080 deepjavalibrary/djl-serving:0.24.0-pytorch-inf2 ``` diff --git a/serving/docker/aarch64.Dockerfile b/serving/docker/aarch64.Dockerfile index a16c4dda3..9ab740f00 100644 --- a/serving/docker/aarch64.Dockerfile +++ b/serving/docker/aarch64.Dockerfile @@ -10,7 +10,7 @@ # BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, express or implied. See the License for # the specific language governing permissions and limitations under the License. FROM arm64v8/ubuntu:20.04 -ARG djl_version=0.24.0~SNAPSHOT +ARG djl_version=0.24.0 ARG torch_version=2.0.1 EXPOSE 8080 diff --git a/serving/docker/deepspeed.Dockerfile b/serving/docker/deepspeed.Dockerfile index d05ebeeb1..ab2a04e8d 100644 --- a/serving/docker/deepspeed.Dockerfile +++ b/serving/docker/deepspeed.Dockerfile @@ -11,18 +11,18 @@ # the specific language governing permissions and limitations under the License. ARG version=11.8.0-cudnn8-devel-ubuntu20.04 FROM nvidia/cuda:$version -ARG djl_version=0.24.0~SNAPSHOT +ARG djl_version=0.24.0 ARG python_version=3.9 ARG torch_version=2.0.1 ARG torch_vision_version=0.15.2 ARG vllm_version=0.2.0 -ARG deepspeed_wheel="https://publish.djl.ai/deepspeed/deepspeed-nightly-py2.py3-none-any.whl" +ARG deepspeed_wheel="https://publish.djl.ai/deepspeed/deepspeed-0.10.0-py2.py3-none-any.whl" ARG flash_attn_wheel="https://publish.djl.ai/flash_attn/flash_attn_1-1.0.9-cp39-cp39-linux_x86_64.whl" ARG dropout_layer_norm_wheel="https://publish.djl.ai/flash_attn/dropout_layer_norm-0.1-cp39-cp39-linux_x86_64.whl" ARG rotary_emb_wheel="https://publish.djl.ai/flash_attn/rotary_emb-0.1-cp39-cp39-linux_x86_64.whl" ARG flash_attn_2_wheel="https://publish.djl.ai/flash_attn/flash_attn-2.0.1-cp39-cp39-linux_x86_64.whl" ARG lmi_vllm_wheel="https://publish.djl.ai/lmi_vllm/lmi_vllm-0.1.1-cp39-cp39-linux_x86_64.whl" -ARG lmi_dist_wheel="https://publish.djl.ai/lmi_dist/lmi_dist-nightly-py3-none-any.whl" +ARG lmi_dist_wheel="https://publish.djl.ai/lmi_dist/lmi_dist-0.9.4-py3-none-any.whl" ARG seq_scheduler_wheel="https://publish.djl.ai/seq_scheduler/seq_scheduler-0.1.0-py3-none-any.whl" ARG peft_wheel="https://publish.djl.ai/peft/peft-0.5.0alpha-py3-none-any.whl" ARG mmaploader_wheel="https://publish.djl.ai/mmaploader/mmaploader-nightly-py3-none-any.whl" diff --git a/serving/docker/fastertransformer.Dockerfile b/serving/docker/fastertransformer.Dockerfile index 0ae63e282..f896901b6 100644 --- a/serving/docker/fastertransformer.Dockerfile +++ b/serving/docker/fastertransformer.Dockerfile @@ -11,7 +11,7 @@ # the specific language governing permissions and limitations under the License. ARG version=11.8.0-cudnn8-devel-ubuntu20.04 FROM nvidia/cuda:$version -ARG djl_version=0.24.0~SNAPSHOT +ARG djl_version=0.24.0 ARG python_version=3.9 ARG ft_version="llama" ARG triton_version="r23.04" diff --git a/serving/docker/pytorch-cu118.Dockerfile b/serving/docker/pytorch-cu118.Dockerfile index 5f2859739..6b12382de 100644 --- a/serving/docker/pytorch-cu118.Dockerfile +++ b/serving/docker/pytorch-cu118.Dockerfile @@ -13,7 +13,7 @@ ARG version=11.8.0-cudnn8-devel-ubuntu20.04 FROM nvidia/cuda:$version as base -ARG djl_version=0.24.0~SNAPSHOT +ARG djl_version=0.24.0 ARG torch_version=2.0.1 ARG torch_vision_version=0.15.2 ARG python_version=3.9 diff --git a/serving/docker/pytorch-inf2.Dockerfile b/serving/docker/pytorch-inf2.Dockerfile index f0b87818a..7431ca9d3 100644 --- a/serving/docker/pytorch-inf2.Dockerfile +++ b/serving/docker/pytorch-inf2.Dockerfile @@ -10,7 +10,7 @@ # BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, express or implied. See the License for # the specific language governing permissions and limitations under the License. FROM ubuntu:20.04 -ARG djl_version=0.24.0~SNAPSHOT +ARG djl_version=0.24.0 ARG torch_version=1.13.1 ARG python_version=3.8 ARG torch_neuronx_version=1.13.1.1.11.0 diff --git a/wlm/README.md b/wlm/README.md index 95a59269a..b0df8e789 100644 --- a/wlm/README.md +++ b/wlm/README.md @@ -56,7 +56,7 @@ You can pull the server from the central Maven repository by including the follo ai.djl.serving wlm - 0.23.0 + 0.24.0 ```