Integrate with seq_scheduler wheel (#1122)

Co-authored-by: KexinFeng <[email protected]>
deepjavalibrary · Sep 30, 2023 · 1b8f783 · 1b8f783
1 parent 2311de6
commit 1b8f783
Show file tree

Hide file tree

Showing 20 changed files with 43 additions and 47 deletions.
diff --git a/engines/python/setup/djl_python/huggingface.py b/engines/python/setup/djl_python/huggingface.py
@@ -28,7 +28,7 @@
 from djl_python.inputs import Input
 from djl_python.outputs import Output
 from djl_python.streaming_utils import StreamingUtils
-from djl_python.rolling_batch import SchedulerRollingBatch
+from djl_python.rolling_batch.scheduler_rolling_batch import SchedulerRollingBatch
 
 ARCHITECTURES_2_TASK = {
     "TapasForQuestionAnswering": "table-question-answering",

diff --git a/engines/python/setup/djl_python/rolling_batch/__init__.py b/engines/python/setup/djl_python/rolling_batch/__init__.py
@@ -10,5 +10,3 @@
 # or in the "LICENSE.txt" file accompanying this file. This file is distributed on an "AS IS"
 # BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, express or implied. See the License for
 # the specific language governing permissions and limitations under the License.
-
-from .scheduler_rolling_batch import SchedulerRollingBatch
diff --git a/engines/python/setup/djl_python/rolling_batch/scheduler_rolling_batch.py b/engines/python/setup/djl_python/rolling_batch/scheduler_rolling_batch.py
@@ -11,14 +11,14 @@
 # BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, express or implied. See the License for
 # the specific language governing permissions and limitations under the License.
 
-from djl_python.scheduler import HuggingfaceBlock, BloomBlock, FalconBlock, SearchConfig, SeqBatchScheduler
-# from seq_scheduler import HuggingfaceBlock, BloomBlock, FalconBlock, SearchConfig, SeqBatchScheduler
+from seq_scheduler.lm_block import HuggingfaceBlock, BloomBlock, FalconBlock
+from seq_scheduler.search_config import SearchConfig
+from seq_scheduler.seq_batch_scheduler import SeqBatchScheduler
 from collections import namedtuple, defaultdict
 from djl_python.rolling_batch.rolling_batch import RollingBatch, stop_on_any_exception
 from transformers import AutoModelForCausalLM, AutoTokenizer, AutoConfig
 
 import torch
-import re
 
 MODEL_TYPE_2_BLOCK = {'bloom': BloomBlock, 'falcon': FalconBlock}
 DEFAULT_SEARCH_ALGORITHM = 'greedy'

diff --git a/...on/setup/djl_python/scheduler/__init__.py → ...etup/djl_python/seq_scheduler/__init__.py b/...on/setup/djl_python/scheduler/__init__.py → ...etup/djl_python/seq_scheduler/__init__.py
@@ -10,7 +10,3 @@
 # or in the "LICENSE.txt" file accompanying this file. This file is distributed on an "AS IS"
 # BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, express or implied. See the License for
 # the specific language governing permissions and limitations under the License.
-
-from .lm_block import HuggingfaceBlock, BloomBlock, FalconBlock
-from .search_config import SearchConfig
-from .seq_batch_scheduler import SeqBatchScheduler
diff --git a/...ython/setup/djl_python/scheduler/batch.py → ...n/setup/djl_python/seq_scheduler/batch.py b/...ython/setup/djl_python/scheduler/batch.py → ...n/setup/djl_python/seq_scheduler/batch.py
@@ -13,7 +13,7 @@
 from __future__ import annotations
 
 import torch
-from djl_python.scheduler.utils import merge_tensors, trim_tensor, nudge_tensor
+from djl_python.seq_scheduler.utils import merge_tensors, trim_tensor, nudge_tensor
 from abc import ABC, abstractmethod
 
 

diff --git a/...on/setup/djl_python/scheduler/lm_block.py → ...etup/djl_python/seq_scheduler/lm_block.py b/...on/setup/djl_python/scheduler/lm_block.py → ...etup/djl_python/seq_scheduler/lm_block.py
diff --git a/...tup/djl_python/scheduler/search_config.py → ...djl_python/seq_scheduler/search_config.py b/...tup/djl_python/scheduler/search_config.py → ...djl_python/seq_scheduler/search_config.py
diff --git a/...l_python/scheduler/seq_batch_scheduler.py → ...thon/seq_scheduler/seq_batch_scheduler.py b/...l_python/scheduler/seq_batch_scheduler.py → ...thon/seq_scheduler/seq_batch_scheduler.py
@@ -16,11 +16,11 @@
 import torch
 import copy
 
-from djl_python.scheduler.search_config import SearchConfig
-from djl_python.scheduler.lm_block import LMBlock
-from djl_python.scheduler.seq_batcher import SeqBatcher
-from djl_python.scheduler.seq_batcher_impl import GreedySeqBatcher, ContrastiveSeqBatcher
-from djl_python.scheduler.utils import compute_kv_cache
+from djl_python.seq_scheduler.search_config import SearchConfig
+from djl_python.seq_scheduler.lm_block import LMBlock
+from djl_python.seq_scheduler.seq_batcher import SeqBatcher
+from djl_python.seq_scheduler.seq_batcher_impl import GreedySeqBatcher, ContrastiveSeqBatcher
+from djl_python.seq_scheduler.utils import compute_kv_cache
 
 SEARCH_ALGORITHM_TO_CLASS = {
     "greedy": GreedySeqBatcher,

diff --git a/...setup/djl_python/scheduler/seq_batcher.py → ...p/djl_python/seq_scheduler/seq_batcher.py b/...setup/djl_python/scheduler/seq_batcher.py → ...p/djl_python/seq_scheduler/seq_batcher.py
@@ -16,10 +16,10 @@
 from typing import Dict, Union, Tuple, List, Any
 from abc import ABC, abstractmethod
 
-from djl_python.scheduler.batch import Batch, ContrastiveBatch
-from djl_python.scheduler.lm_block import LMBlock
+from djl_python.seq_scheduler.batch import Batch, ContrastiveBatch
+from djl_python.seq_scheduler.lm_block import LMBlock
 import torch
-from djl_python.scheduler import SearchConfig
+from djl_python.seq_scheduler.search_config import SearchConfig
 
 
 class SeqBatcher(ABC):

diff --git a/.../djl_python/scheduler/seq_batcher_impl.py → ..._python/seq_scheduler/seq_batcher_impl.py b/.../djl_python/scheduler/seq_batcher_impl.py → ..._python/seq_scheduler/seq_batcher_impl.py
@@ -16,19 +16,19 @@
 from collections import defaultdict
 from typing import Dict, Union, Tuple, List, Any
 
-from djl_python.scheduler.batch import Batch, ContrastiveBatch
-from djl_python.scheduler.lm_block import LMBlock
+from djl_python.seq_scheduler.batch import Batch, ContrastiveBatch
+from djl_python.seq_scheduler.lm_block import LMBlock
 import torch
-from torch.nn.functional import normalize, softmax
+from torch.nn.functional import softmax
 
-from djl_python.scheduler.step_generation import greedy_step_generate, contrastive_step_generate, sampling_step_generate, \
+from djl_python.seq_scheduler.step_generation import contrastive_step_generate, sampling_step_generate, \
     sampler_bucket_sort
-from djl_python.scheduler.utils import compute_offsets, compute_attention_mask, compute_position_ids, \
+from djl_python.seq_scheduler.utils import compute_offsets, compute_attention_mask, compute_position_ids, \
     assemble_prefix_kv_cache
-from djl_python.scheduler import SearchConfig
+from djl_python.seq_scheduler.search_config import SearchConfig
 import warnings
 
-from djl_python.scheduler.seq_batcher import SeqBatcher
+from djl_python.seq_scheduler.seq_batcher import SeqBatcher
 
 
 class GreedySeqBatcher(SeqBatcher):

diff --git a/...p/djl_python/scheduler/step_generation.py → ...l_python/seq_scheduler/step_generation.py b/...p/djl_python/scheduler/step_generation.py → ...l_python/seq_scheduler/step_generation.py
@@ -16,7 +16,7 @@
 import torch
 from torch.nn.functional import normalize, softmax
 from typing import Tuple, List, Dict
-from djl_python.scheduler.search_config import SearchConfig
+from djl_python.seq_scheduler.search_config import SearchConfig
 import numpy, heapq
 
 

diff --git a/...ython/setup/djl_python/scheduler/utils.py → ...n/setup/djl_python/seq_scheduler/utils.py b/...ython/setup/djl_python/scheduler/utils.py → ...n/setup/djl_python/seq_scheduler/utils.py
@@ -14,8 +14,8 @@
 
 import torch
 
-from djl_python.scheduler.lm_block import LMBlock
-from djl_python.scheduler.search_config import SearchConfig
+from djl_python.seq_scheduler.lm_block import LMBlock
+from djl_python.seq_scheduler.search_config import SearchConfig
 
 
 def merge_tensors(tensor1: torch.Tensor,

diff --git a/engines/python/setup/djl_python/tests/rolling_batch_test_scripts/efficiency_benchmark.py b/engines/python/setup/djl_python/tests/rolling_batch_test_scripts/efficiency_benchmark.py
@@ -1,12 +1,12 @@
-from djl_python.scheduler import HuggingfaceBlock, BloomBlock
+from djl_python.seq_scheduler.lm_block import HuggingfaceBlock, BloomBlock
 from transformers import GPT2LMHeadModel, GPT2Tokenizer
 import torch
 from collections import defaultdict
 
 from transformers import AutoTokenizer, BloomForCausalLM
 
-from djl_python.scheduler import SearchConfig
-from djl_python.scheduler.seq_batch_scheduler import SeqBatchScheduler
+from djl_python.seq_scheduler.search_config import SearchConfig
+from djl_python.seq_scheduler.seq_batch_scheduler import SeqBatchScheduler
 from typing import List
 
 from functools import wraps

diff --git a/engines/python/setup/djl_python/tests/rolling_batch_test_scripts/rolling_batch_scheduler.py b/engines/python/setup/djl_python/tests/rolling_batch_test_scripts/rolling_batch_scheduler.py
@@ -1,6 +1,6 @@
 from collections import defaultdict
 import torch
-from djl_python.rolling_batch import SchedulerRollingBatch
+from djl_python.rolling_batch.scheduler_rolling_batch import SchedulerRollingBatch
 import torch.distributed as dist
 
 

diff --git a/engines/python/setup/djl_python/tests/rolling_batch_test_scripts/run_rolling_batch_alone.py b/engines/python/setup/djl_python/tests/rolling_batch_test_scripts/run_rolling_batch_alone.py
@@ -17,7 +17,7 @@
 
 def get_rolling_batch_class_from_str(rolling_batch_type: str):
     if rolling_batch_type == "scheduler":
-        from djl_python.rolling_batch import SchedulerRollingBatch
+        from djl_python.rolling_batch.scheduler_rolling_batch import SchedulerRollingBatch
         return SchedulerRollingBatch
     elif rolling_batch_type == "lmi-dist":
         from djl_python.rolling_batch.lmi_dist_rolling_batch import LmiDistRollingBatch

diff --git a/...python/setup/djl_python/tests/rolling_batch_test_scripts/test_rolling_batch_scheduler2.py b/...python/setup/djl_python/tests/rolling_batch_test_scripts/test_rolling_batch_scheduler2.py
@@ -1,6 +1,6 @@
 from collections import defaultdict
 import torch
-from djl_python.rolling_batch import SchedulerRollingBatch
+from djl_python.rolling_batch.scheduler_rolling_batch import SchedulerRollingBatch
 import torch.distributed as dist
 
 

diff --git a/engines/python/setup/djl_python/tests/rolling_batch_test_scripts/test_scheduler_sharded.py b/engines/python/setup/djl_python/tests/rolling_batch_test_scripts/test_scheduler_sharded.py
@@ -1,7 +1,7 @@
-from djl_python.scheduler.lm_block import HuggingfaceBlock
-from djl_python.scheduler.seq_batch_scheduler import SeqBatchScheduler
+from djl_python.seq_scheduler.lm_block import HuggingfaceBlock
+from djl_python.seq_scheduler.seq_batch_scheduler import SeqBatchScheduler
 from transformers import AutoConfig
-from djl_python.scheduler.search_config import SearchConfig
+from djl_python.seq_scheduler.search_config import SearchConfig
 import torch
 from transformers import AutoTokenizer
 

diff --git a/engines/python/setup/djl_python/tests/test_scheduler.py b/engines/python/setup/djl_python/tests/test_scheduler.py
@@ -2,13 +2,13 @@
 from collections import defaultdict
 import copy
 
-from djl_python.scheduler.lm_block import FalconBlock, HuggingfaceBlock
-from djl_python.scheduler.utils import compute_offsets, compute_position_ids, compute_attention_mask, merge_tensors, \
+from djl_python.seq_scheduler.lm_block import FalconBlock, HuggingfaceBlock
+from djl_python.seq_scheduler.utils import compute_offsets, compute_position_ids, compute_attention_mask, merge_tensors, \
     trim_tensor, compute_kv_cache
-from djl_python.scheduler.seq_batch_scheduler import SeqBatchScheduler
-from djl_python.scheduler.seq_batcher_impl import ContrastiveSeqBatcher
+from djl_python.seq_scheduler.seq_batch_scheduler import SeqBatchScheduler
+from djl_python.seq_scheduler.seq_batcher_impl import ContrastiveSeqBatcher
 from transformers import AutoModelForCausalLM
-from djl_python.scheduler.search_config import SearchConfig
+from djl_python.seq_scheduler.search_config import SearchConfig
 import torch
 
 from transformers import GPT2LMHeadModel, GPT2Tokenizer

diff --git a/engines/python/setup/djl_python/tests/test_scheduler_bloom.py b/engines/python/setup/djl_python/tests/test_scheduler_bloom.py
@@ -1,9 +1,9 @@
 import unittest
 
-from djl_python.scheduler.lm_block import BloomBlock, FalconBlock, HuggingfaceBlock
-from djl_python.scheduler.seq_batch_scheduler import SeqBatchScheduler
+from djl_python.seq_scheduler.lm_block import BloomBlock, FalconBlock, HuggingfaceBlock
+from djl_python.seq_scheduler.seq_batch_scheduler import SeqBatchScheduler
 from transformers import AutoConfig, BloomForCausalLM, AutoTokenizer
-from djl_python.scheduler.search_config import SearchConfig
+from djl_python.seq_scheduler.search_config import SearchConfig
 import torch
 from transformers import AutoTokenizer, AutoModelForCausalLM
 

diff --git a/serving/docker/deepspeed.Dockerfile b/serving/docker/deepspeed.Dockerfile
@@ -22,6 +22,7 @@ ARG rotary_emb_wheel="https://publish.djl.ai/flash_attn/rotary_emb-0.1-cp39-cp39
 ARG flash_attn_2_wheel="https://publish.djl.ai/flash_attn/flash_attn_2-2.3.0-cp39-cp39-linux_x86_64.whl"
 ARG vllm_wheel="https://publish.djl.ai/vllm/vllm-0.1.1-cp39-cp39-linux_x86_64.whl"
 ARG lmi_dist_wheel="https://publish.djl.ai/lmi_dist/lmi_dist-nightly-py3-none-any.whl"
+ARG seq_scheduler_wheel="https://publish.djl.ai/seq_scheduler/seq_scheduler-nightly-py3-none-any.whl"
 ARG peft_wheel="https://publish.djl.ai/peft/peft-0.5.0alpha-py3-none-any.whl"
 ARG protobuf_version=3.20.3
 ARG transformers_version=4.33.2
@@ -66,7 +67,8 @@ RUN apt-get update && \
     DEBIAN_FRONTEND=noninteractive apt-get install -yq libaio-dev libopenmpi-dev && \
     pip3 install torch==${torch_version} torchvision==${torch_vision_version} --extra-index-url https://download.pytorch.org/whl/cu118 \
     ${deepspeed_wheel} ${flash_attn_wheel} ${dropout_layer_norm_wheel} ${rotary_emb_wheel} ${flash_attn_2_wheel} \
-    ${vllm_wheel} ${lmi_dist_wheel} ${peft_wheel} protobuf==${protobuf_version} transformers==${transformers_version} \
+    ${vllm_wheel} ${lmi_dist_wheel} ${seq_scheduler_wheel} ${peft_wheel} protobuf==${protobuf_version} \
+    transformers==${transformers_version} \
     mpi4py sentencepiece einops accelerate==${accelerate_version} bitsandbytes==${bitsandbytes_version}\
     diffusers[torch]==${diffusers_version} opencv-contrib-python-headless safetensors scipy && \
     scripts/install_aitemplate.sh && \