Skip to content

Commit

Permalink
Runtime stitching APIs and sanity tests, ttnn runtime submit refactor (
Browse files Browse the repository at this point in the history
  • Loading branch information
jnie-TT authored Dec 4, 2024
1 parent ca09c69 commit 29281af
Show file tree
Hide file tree
Showing 63 changed files with 2,201 additions and 620 deletions.
96 changes: 96 additions & 0 deletions .github/workflows/build-and-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -475,6 +475,102 @@ jobs:
report_paths: ${{ steps.strings.outputs.test_report_path }}
check_name: Run ttrt tests

run-runtime-api-tests:

timeout-minutes: 30
needs:
- build-image
- build-ttmlir
strategy:
fail-fast: false
matrix:
build: [
{runs-on: n150, enable_perf: OFF, name: "run"},
]

runs-on:
- in-service
- ${{ matrix.build.runs-on }}

container:
image: ${{ needs.build-image.outputs.docker-image }}
options: --device /dev/tenstorrent/0
volumes:
- /dev/hugepages:/dev/hugepages
- /dev/hugepages-1G:/dev/hugepages-1G
- /etc/udev/rules.d:/etc/udev/rules.d
- /lib/modules:/lib/modules
- /opt/tt_metal_infra/provisioning/provisioning_env:/opt/tt_metal_infra/provisioning/provisioning_env

steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0

- name: Set reusable strings
id: strings
shell: bash
run: |
echo "work-dir=$(pwd)" >> "$GITHUB_OUTPUT"
echo "build-output-dir=$(pwd)/build" >> "$GITHUB_OUTPUT"
echo "install-output-dir=$(pwd)/install" >> "$GITHUB_OUTPUT"
- name: Git safe dir
run: git config --global --add safe.directory ${{ steps.strings.outputs.work-dir }}

- name: Use build artifacts
uses: actions/download-artifact@v4
with:
name: install-artifacts-${{ matrix.build.name }}
path: ${{ steps.strings.outputs.install-output-dir }}

# This is needed to preserve file permissions
# https://github.com/actions/upload-artifact?tab=readme-ov-file#permission-loss
- name: 'Untar install directory'
shell: bash
working-directory: ${{ steps.strings.outputs.install-output-dir }}
run: tar xvf artifact.tar

- name: Remove existing whls files
shell: bash
run: |
rm -f *.whl
- name: Download ttrt run whls
uses: actions/download-artifact@v4
with:
name: ttrt-whl-${{ matrix.build.name }}

# Runtime tests currently require ttrt whls to be installed
- name: Install ttrt run whls
shell: bash
run: |
source env/activate
pip show ttrt && pip uninstall -y ttrt
pip install ttrt-${{ env.version }}*.whl --force-reinstall
pip install pytest
- name: Generate system descriptor
shell: bash
run: |
source env/activate
ttrt query --save-artifacts
- name: Generate tests
shell: bash
run: |
source env/activate
export LD_LIBRARY_PATH="${TTMLIR_TOOLCHAIN_DIR}/lib:${LD_LIBRARY_PATH}"
export SYSTEM_DESC_PATH="${GITHUB_WORKSPACE}/ttrt-artifacts/system_desc.ttsys"
ln -sf ${{ steps.strings.outputs.install-output-dir }} ${{ steps.strings.outputs.build-output-dir }}
llvm-lit -sv ${{ steps.strings.outputs.build-output-dir }}/test
- name: ttnn api tests
shell: bash
run: |
source env/activate
pytest -ssv runtime/test/python/ttnn/test_runtime_api.py
build-and-test-explorer:
needs: build-image
timeout-minutes: 60
Expand Down
5 changes: 2 additions & 3 deletions runtime/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ set(TT_RUNTIME_OPTIONS
TT_RUNTIME_DEBUG
TT_RUNTIME_ENABLE_PERF_TRACE
TT_RUNTIME_WORKAROUNDS
TTMLIR_ENABLE_RUNTIME_TESTS
)

foreach(OPTION ${TT_RUNTIME_OPTIONS})
Expand All @@ -24,6 +25,4 @@ endforeach()

add_subdirectory(lib)
add_subdirectory(tools)
if (TTMLIR_ENABLE_RUNTIME_TESTS)
add_subdirectory(test)
endif()
add_subdirectory(test)
12 changes: 8 additions & 4 deletions runtime/include/tt/runtime/detail/ttmetal.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,12 +39,16 @@ void closeDevice(Device device);

void deallocateBuffers(Device device);

Event submit(Device device, Binary executable, std::uint32_t programIndex,
std::vector<Tensor> const &inputs,
std::vector<Tensor> const &outputs);

void wait(Event event);

void wait(Tensor tensor);

void wait(std::vector<Tensor> const &tensors);

Event submit(Device deviceHandle, Binary executableHandle,
std::uint32_t programIndex, std::vector<Tensor> const &inputs,
std::vector<Tensor> const &outputs);

std::string getOpDebugString(OpContext opContextHandle);

std::string getOpLocInfo(OpContext opContextHandle);
Expand Down
55 changes: 47 additions & 8 deletions runtime/include/tt/runtime/detail/ttnn.h
Original file line number Diff line number Diff line change
Expand Up @@ -53,16 +53,27 @@ createTensor(std::vector<std::shared_ptr<void>> &data,
::tt::target::DataType dataType,
std::unordered_map<std::string, std::string> const &strategy);

Tensor createTensor(Device device, Layout layout,
std::vector<std::uint32_t> const &shape,
std::vector<std::uint32_t> const &stride,
std::uint32_t itemsize);

inline Tensor createTensor(std::shared_ptr<void> data, TensorDesc const &desc) {
return createTensor(data, desc.shape, desc.stride, desc.itemsize,
desc.dataType);
return ::tt::runtime::ttnn::createTensor(data, desc.shape, desc.stride,
desc.itemsize, desc.dataType);
}

inline Tensor
createTensor(std::vector<std::shared_ptr<void>> &data, TensorDesc const &desc,
std::unordered_map<std::string, std::string> const &strategy) {
return createTensor(data, desc.shape, desc.stride, desc.itemsize,
desc.dataType, strategy);
return ::tt::runtime::ttnn::createTensor(
data, desc.shape, desc.stride, desc.itemsize, desc.dataType, strategy);
}

inline Tensor createTensor(Device device, Layout layout,
TensorDesc const &desc) {
return ::tt::runtime::ttnn::createTensor(device, layout, desc.shape,
desc.stride, desc.itemsize);
}

tt::target::DataType getTensorDataType(Tensor tensor);
Expand All @@ -75,12 +86,23 @@ void closeDevice(Device device);

void deallocateBuffers(Device device);

Event submit(Device device, Binary executable, std::uint32_t programIndex,
std::vector<Tensor> const &inputs,
std::vector<Tensor> const &outputs);

void wait(Event event);

void wait(Tensor tensor);

void wait(std::vector<Tensor> const &tensors);

Tensor toHost(Tensor tensor, bool untilize = false);

Tensor toLayout(Tensor tensor, Device device, Layout layout);

Layout getLayout(Binary executableHandle, std::uint32_t programIndex,
std::uint32_t inputIndex);

void memcpy(Tensor dst, Tensor src);

void deallocateTensor(Tensor &tensor, bool force = false);

std::string getOpDebugString(OpContext opContextHandle);

std::string getOpLocInfo(OpContext opContextHandle);
Expand All @@ -90,10 +112,27 @@ Tensor getOpOutputTensor(OpContext opContextHandle,

std::vector<float> getTensorData(Tensor tensor);

namespace legacy {
/* Will be deprecated soon once FEs migrate to new API */

Event submit(Device deviceHandle, Binary executableHandle,
std::uint32_t programIndex, std::vector<Tensor> const &inputs,
std::vector<Tensor> const &outputs);

void runProgram(::ttnn::MeshDevice &meshDevice, Binary &executableHandle,
std::uint32_t programIndex,
std::vector<::ttnn::Tensor *> const &inputs,
std::vector<::ttnn::Tensor *> const &outputs);
} // namespace legacy

std::vector<Tensor> submit(Device deviceHandle, Binary executableHandle,
std::uint32_t programIndex,
std::vector<Tensor> const &inputs);

std::vector<Tensor> runProgram(::ttnn::MeshDevice &meshDevice,
Binary executableHandle,
std::uint32_t programIndex,
std::vector<::ttnn::Tensor *> const &inputs);

} // namespace tt::runtime::ttnn

Expand Down
46 changes: 38 additions & 8 deletions runtime/include/tt/runtime/runtime.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,16 +43,27 @@ createTensor(std::vector<std::shared_ptr<void>> &data,
::tt::target::DataType dataType,
std::unordered_map<std::string, std::string> const &strategy);

Tensor createTensor(Device device, Layout layout,
std::vector<std::uint32_t> const &shape,
std::vector<std::uint32_t> const &stride,
std::uint32_t itemsize);

inline Tensor createTensor(std::shared_ptr<void> data, TensorDesc const &desc) {
return createTensor(data, desc.shape, desc.stride, desc.itemsize,
desc.dataType);
return ::tt::runtime::createTensor(data, desc.shape, desc.stride,
desc.itemsize, desc.dataType);
}

inline Tensor
createTensor(std::vector<std::shared_ptr<void>> &data, TensorDesc const &desc,
std::unordered_map<std::string, std::string> const &strategy) {
return createTensor(data, desc.shape, desc.stride, desc.itemsize,
desc.dataType, strategy);
return ::tt::runtime::createTensor(data, desc.shape, desc.stride,
desc.itemsize, desc.dataType, strategy);
}

inline Tensor createTensor(Device device, Layout layout,
TensorDesc const &desc) {
return ::tt::runtime::createTensor(device, layout, desc.shape, desc.stride,
desc.itemsize);
}

tt::target::DataType getTensorDataType(Tensor tensor);
Expand All @@ -63,12 +74,23 @@ Device openDevice(DeviceIds const &deviceIds, size_t numHWCQs = 1);

void closeDevice(Device device);

Event submit(Device device, Binary executable, std::uint32_t programIndex,
std::vector<Tensor> const &inputs,
std::vector<Tensor> const &outputs);

void wait(Event event);

void wait(Tensor tensor);

void wait(std::vector<Tensor> const &tensors);

Tensor toHost(Tensor tensor, bool untilize = false);

Tensor toLayout(Tensor tensor, Device device, Layout layout);

Layout getLayout(Binary executableHandle, std::uint32_t programIndex,
std::uint32_t inputIndex);

void memcpy(Tensor dst, Tensor src);

void deallocateTensor(Tensor &tensor, bool force = false);

std::string getOpDebugString(OpContext opContextHandle);

std::string getOpLocInfo(OpContext opContextHandle);
Expand All @@ -78,6 +100,14 @@ Tensor getOpOutputTensor(OpContext opContextHandle,

std::vector<float> getTensorData(Tensor tensor);

std::vector<Tensor> submit(Device deviceHandle, Binary executableHandle,
std::uint32_t programIndex,
std::vector<Tensor> const &inputs);

Event submit(Device deviceHandle, Binary executableHandle,
std::uint32_t programIndex, std::vector<Tensor> const &inputs,
std::vector<Tensor> const &outputs);

} // namespace tt::runtime

#endif
17 changes: 17 additions & 0 deletions runtime/include/tt/runtime/test/utils.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
// SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC
//
// SPDX-License-Identifier: Apache-2.0

#ifndef TT_RUNTIME_TEST_UTILS_H
#define TT_RUNTIME_TEST_UTILS_H

#include "tt/runtime/types.h"

// Utility functions for testing TTNN runtime
namespace tt::runtime::ttnn::test {
Layout getDramInterleavedTileLayout(::tt::target::DataType dataType);
Layout getDramInterleavedRowMajorLayout(::tt::target::DataType dataType);
Layout getHostRowMajorLayout(::tt::target::DataType dataType);
} // namespace tt::runtime::ttnn::test

#endif // TT_RUNTIME_TEST_UTILS_H
14 changes: 12 additions & 2 deletions runtime/include/tt/runtime/types.h
Original file line number Diff line number Diff line change
Expand Up @@ -122,10 +122,20 @@ struct Event : public detail::RuntimeCheckedObjectImpl {

struct Tensor : public detail::RuntimeCheckedObjectImpl {
std::shared_ptr<void> data;

Event event;
Tensor(std::shared_ptr<void> handle, std::shared_ptr<void> data,
DeviceRuntime runtime)
: detail::RuntimeCheckedObjectImpl(handle, runtime), data(data) {}
: detail::RuntimeCheckedObjectImpl(handle, runtime), data(data),
event(nullptr, runtime) {}

Tensor(std::shared_ptr<void> handle, std::shared_ptr<void> data,
std::shared_ptr<void> eventHandle, DeviceRuntime runtime)
: detail::RuntimeCheckedObjectImpl(handle, runtime), data(data),
event(eventHandle, runtime) {}
};

struct Layout : public detail::RuntimeCheckedObjectImpl {
using detail::RuntimeCheckedObjectImpl::RuntimeCheckedObjectImpl;
};

struct CallbackContext : public detail::RuntimeCheckedObjectImpl {
Expand Down
Loading

0 comments on commit 29281af

Please sign in to comment.