[Bug FIx] import llama context ops fix (hpcaitech#4524)

* added _vllm_rms_norm * change place * added tests * added tests * modify * adding kernels * added tests: * adding kernels * modify * added * updating kernels * adding tests * added tests * kernel change * submit * modify * added * edit comments * change name * change commnets and fix import * add * added * fix * add ops into init.py * add
tiandiao123 · Sep 7, 2023 · 02c2409 · 02c2409
1 parent 57b5f25
commit 02c2409
Show file tree

Hide file tree

Showing 4 changed files with 14 additions and 4 deletions.
diff --git a/colossalai/kernel/__init__.py b/colossalai/kernel/__init__.py
@@ -1,7 +1,14 @@
 from .cuda_native import FusedScaleMaskSoftmax, LayerNorm, MultiHeadAttention
+from .triton import llama_context_attn_fwd, bloom_context_attn_fwd
+from .triton import softmax
+from .triton import copy_kv_cache_to_dest
 
 __all__ = [
     "LayerNorm",
     "FusedScaleMaskSoftmax",
     "MultiHeadAttention",
+    "llama_context_attn_fwd",
+    "bloom_context_attn_fwd",
+    "softmax",
+    "copy_kv_cache_to_dest",
 ]
diff --git a/colossalai/kernel/triton/__init__.py b/colossalai/kernel/triton/__init__.py
@@ -0,0 +1,3 @@
+from .context_attention import llama_context_attn_fwd, bloom_context_attn_fwd
+from .softmax import softmax
+from .copy_kv_cache_dest import copy_kv_cache_to_dest
diff --git a/tests/test_infer_ops/triton/test_bloom_context_attention.py b/tests/test_infer_ops/triton/test_bloom_context_attention.py
@@ -9,8 +9,8 @@
 try:
     import triton
     import triton.language as tl
-    from tests.test_kernels.triton.utils import benchmark, torch_context_attention
-    from colossalai.kernel.triton.context_attention import bloom_context_attn_fwd
+    from tests.test_infer_ops.triton.utils import benchmark, torch_context_attention
+    from colossalai.kernel.triton import bloom_context_attn_fwd
     HAS_TRITON = True
 except ImportError:
     HAS_TRITON = False

diff --git a/tests/test_infer_ops/triton/test_llama_context_attention.py b/tests/test_infer_ops/triton/test_llama_context_attention.py
@@ -9,8 +9,8 @@
 try:
     import triton
     import triton.language as tl
-    from tests.test_kernels.triton.utils import benchmark, torch_context_attention
-    from colossalai.kernel.triton.context_attention import llama_context_attn_fwd
+    from tests.test_infer_ops.triton.utils import benchmark, torch_context_attention
+    from colossalai.kernel.triton import llama_context_attn_fwd
     HAS_TRITON = True
 except ImportError:
     HAS_TRITON = False