Skip to content

Commit

Permalink
Merge branch 'blackhole'
Browse files Browse the repository at this point in the history
  • Loading branch information
alewycky-tenstorrent committed Dec 19, 2024
2 parents 79d811a + ddcf4f2 commit 145626a
Show file tree
Hide file tree
Showing 17 changed files with 110 additions and 16 deletions.
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
# SPDX-License-Identifier: GPL-2.0-only

obj-m += tenstorrent.o
tenstorrent-y := module.o chardev.o enumerate.o interrupt.o grayskull.o wormhole.o pcie.o hwmon.o sg_helpers.o memory.o
tenstorrent-y := module.o chardev.o enumerate.o interrupt.o grayskull.o wormhole.o blackhole.o pcie.o hwmon.o sg_helpers.o memory.o

KDIR := /lib/modules/$(shell uname -r)/build
KMAKE := $(MAKE) -C $(KDIR) M=$(CURDIR)
Expand Down
6 changes: 3 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
## Supported hardware:
* Grayskull
* Wormhole
* Blackhole

The driver registers device files named `/dev/tenstorrent/%d`, one for each enumerated device.

Expand All @@ -18,14 +19,13 @@ The driver registers device files named `/dev/tenstorrent/%d`, one for each enum
* `dnf install epel-release && dnf install dkms` (Enterprise Linux based)
```
sudo dkms add .
sudo dkms install tenstorrent/1.30
sudo dkms install tenstorrent/1.30-bh
sudo modprobe tenstorrent
```
(or reboot, driver will auto-load next boot)

### To uninstall:
```
sudo modprobe -r tenstorrent
sudo dkms remove tenstorrent/1.30 --all
sudo dkms remove tenstorrent/1.30-bh --all
```

42 changes: 42 additions & 0 deletions blackhole.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
// SPDX-FileCopyrightText: © 2024 Tenstorrent Inc.
// SPDX-License-Identifier: GPL-2.0-only

#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/types.h>

#include "blackhole.h"
#include "pcie.h"
#include "module.h"

#define MAX_MRRS 4096

static bool blackhole_init(struct tenstorrent_device *tt_dev) {
return true;
}

static bool blackhole_init_hardware(struct tenstorrent_device *tt_dev) {
struct pci_dev *pdev = tt_dev->pdev;
pcie_set_readrq(pdev, MAX_MRRS);
return true;
}

static bool blackhole_post_hardware_init(struct tenstorrent_device *tt_dev) {
return true;
}

static void blackhole_cleanup_hardware(struct tenstorrent_device *tt_dev) {
}

static void blackhole_cleanup(struct tenstorrent_device *tt_dev) {
}

struct tenstorrent_device_class blackhole_class = {
.name = "Blackhole",
.instance_size = sizeof(struct blackhole_device),
.dma_address_bits = 58,
.init_device = blackhole_init,
.init_hardware = blackhole_init_hardware,
.post_hardware_init = blackhole_post_hardware_init,
.cleanup_hardware = blackhole_cleanup_hardware,
.cleanup_device = blackhole_cleanup,
};
17 changes: 17 additions & 0 deletions blackhole.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
// SPDX-FileCopyrightText: © 2023 Tenstorrent Inc.
// SPDX-License-Identifier: GPL-2.0-only

#ifndef TTDRIVER_BLACKHOLE_H_INCLUDED
#define TTDRIVER_BLACKHOLE_H_INCLUDED

#include <linux/types.h>
#include "device.h"

struct blackhole_device {
struct tenstorrent_device tt;
};

#define tt_dev_to_wh_dev(ttdev) \
container_of((tt_dev), struct blackhole_device, tt)

#endif
3 changes: 2 additions & 1 deletion chardev.c
Original file line number Diff line number Diff line change
Expand Up @@ -185,9 +185,10 @@ static long ioctl_reset_device(struct chardev_private *priv,
ok = priv->device->dev_class->init_hardware(priv->device);
else
ok = false;

} else if (in.flags == TENSTORRENT_RESET_DEVICE_RESET_PCIE_LINK) {
ok = pcie_hot_reset_and_restore_state(pdev);
} else if (in.flags == TENSTORRENT_RESET_DEVICE_CONFIG_WRITE) {
ok = pcie_timer_interrupt(pdev);
} else {
return -EINVAL;
}
Expand Down
1 change: 1 addition & 0 deletions device.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ struct tenstorrent_device {
struct tenstorrent_device_class {
const char *name;
u32 instance_size;
u32 dma_address_bits;
bool (*init_device)(struct tenstorrent_device *ttdev);
bool (*init_hardware)(struct tenstorrent_device *ttdev);
bool (*post_hardware_init)(struct tenstorrent_device *ttdev);
Expand Down
2 changes: 1 addition & 1 deletion dkms.conf
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
PACKAGE_NAME="tenstorrent"
PACKAGE_VERSION="1.30"
PACKAGE_VERSION="1.30-bh"
BUILT_MODULE_NAME="tenstorrent"

DEST_MODULE_LOCATION="/kernel/extra"
Expand Down
3 changes: 2 additions & 1 deletion enumerate.c
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,8 @@ static int tenstorrent_pci_probe(struct pci_dev *dev, const struct pci_device_id

mutex_init(&tt_dev->chardev_mutex);

tt_dev->dma_capable = (dma_set_mask_and_coherent(&dev->dev, DMA_BIT_MASK(dma_address_bits ?: 32)) == 0);
tt_dev->dma_capable = (dma_set_mask_and_coherent(&dev->dev, DMA_BIT_MASK(dma_address_bits ?: device_class->dma_address_bits)) == 0);

// Max these to ensure the IOVA allocator will not split large pinned regions.
dma_set_max_seg_size(&dev->dev, UINT_MAX);
dma_set_seg_boundary(&dev->dev, ULONG_MAX);
Expand Down
1 change: 1 addition & 0 deletions enumerate.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
#define PCI_VENDOR_ID_TENSTORRENT 0x1E52
#define PCI_DEVICE_ID_GRAYSKULL 0xFACA
#define PCI_DEVICE_ID_WORMHOLE 0x401E
#define PCI_DEVICE_ID_BLACKHOLE 0xB140

struct pci_dev;
struct cdev;
Expand Down
1 change: 1 addition & 0 deletions grayskull.c
Original file line number Diff line number Diff line change
Expand Up @@ -875,6 +875,7 @@ static void grayskull_last_release_handler(struct tenstorrent_device *tt_dev) {
struct tenstorrent_device_class grayskull_class = {
.name = "Grayskull",
.instance_size = sizeof(struct grayskull_device),
.dma_address_bits = 32,
.init_device = grayskull_init,
.init_hardware = grayskull_init_hardware,
.post_hardware_init = grayskull_post_hardware_init,
Expand Down
1 change: 1 addition & 0 deletions ioctl.h
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,7 @@ struct tenstorrent_get_driver_info {
// tenstorrent_reset_device_in.flags
#define TENSTORRENT_RESET_DEVICE_RESTORE_STATE 0
#define TENSTORRENT_RESET_DEVICE_RESET_PCIE_LINK 1
#define TENSTORRENT_RESET_DEVICE_CONFIG_WRITE 2

struct tenstorrent_reset_device_in {
__u32 output_size_bytes;
Expand Down
14 changes: 7 additions & 7 deletions memory.c
Original file line number Diff line number Diff line change
Expand Up @@ -122,17 +122,17 @@ static void unpin_user_pages_dirty_lock(struct page **pages, unsigned long npage
// These are the mmap offsets for various resources. In the user-kernel
// interface they are dynamic (TENSTORRENT_IOCTL_QUERY_MAPPINGS and
// TENSTORRENT_IOCTL_ALLOCATE_DMA_BUF), but they are actually hard-coded.
#define MMAP_OFFSET_RESOURCE0_UC (U64_C(0) << 32)
#define MMAP_OFFSET_RESOURCE0_WC (U64_C(1) << 32)
#define MMAP_OFFSET_RESOURCE1_UC (U64_C(2) << 32)
#define MMAP_OFFSET_RESOURCE1_WC (U64_C(3) << 32)
#define MMAP_OFFSET_RESOURCE2_UC (U64_C(4) << 32)
#define MMAP_OFFSET_RESOURCE2_WC (U64_C(5) << 32)
#define MMAP_OFFSET_RESOURCE0_UC (U64_C(0) << 35)
#define MMAP_OFFSET_RESOURCE0_WC (U64_C(1) << 35)
#define MMAP_OFFSET_RESOURCE1_UC (U64_C(2) << 35)
#define MMAP_OFFSET_RESOURCE1_WC (U64_C(3) << 35)
#define MMAP_OFFSET_RESOURCE2_UC (U64_C(4) << 35)
#define MMAP_OFFSET_RESOURCE2_WC (U64_C(5) << 35)

// tenstorrent_allocate_dma_buf_in.buf_index is u8 so that sets a limit of
// U8_MAX DMA buffers per fd. 32-bit mmap offsets are divided by PAGE_SIZE,
// so PAGE_SIZE << 32 is the largest possible offset.
#define MMAP_OFFSET_DMA_BUF ((u64)(PAGE_SIZE-U8_MAX) << 32)
#define MMAP_OFFSET_DMA_BUF ((u64)(PAGE_SIZE-U8_MAX-1) << 32)

#define MMAP_SIZE_DMA_BUF (U64_C(1) << 32)

Expand Down
5 changes: 4 additions & 1 deletion module.c
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
#include "chardev.h"
#include "enumerate.h"

#define TTDRIVER_VER "1.30"
#define TTDRIVER_VER "1.30-bh"

MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("Tenstorrent AI kernel driver");
Expand Down Expand Up @@ -94,12 +94,15 @@ MODULE_PARM_DESC(auto_reset_timeout, "Timeout duration in seconds for M3 auto re
struct tenstorrent_device_class;
extern struct tenstorrent_device_class grayskull_class;
extern struct tenstorrent_device_class wormhole_class;
extern struct tenstorrent_device_class blackhole_class;

const struct pci_device_id tenstorrent_ids[] = {
{ PCI_DEVICE(PCI_VENDOR_ID_TENSTORRENT, PCI_DEVICE_ID_GRAYSKULL),
.driver_data=(kernel_ulong_t)&grayskull_class },
{ PCI_DEVICE(PCI_VENDOR_ID_TENSTORRENT, PCI_DEVICE_ID_WORMHOLE),
.driver_data=(kernel_ulong_t)&wormhole_class },
{ PCI_DEVICE(PCI_VENDOR_ID_TENSTORRENT, PCI_DEVICE_ID_BLACKHOLE),
.driver_data=(kernel_ulong_t)&blackhole_class },
{ 0 },
};

Expand Down
13 changes: 13 additions & 0 deletions pcie.c
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,12 @@
#include "grayskull.h"

#define FW_MSG_PCIE_RETRAIN 0xB6
#define INTERFACE_TIMER_CONTROL_OFF 0x930
#define INTERFACE_TIMER_TARGET_OFF 0x934

#define INTERFACE_TIMER_TARGET 0x1
#define INTERFACE_TIMER_EN 0x1
#define INTERFACE_FORCE_PENDING 0x10

static bool poll_pcie_link_up(struct pci_dev *pdev, u32 timeout_ms) {
u16 tt_vendor_id;
Expand Down Expand Up @@ -116,3 +122,10 @@ bool complete_pcie_init(struct tenstorrent_device *tt_dev, u8 __iomem* reset_uni

return false;
}

bool pcie_timer_interrupt(struct pci_dev *pdev)
{
pci_write_config_dword(pdev, INTERFACE_TIMER_TARGET_OFF, INTERFACE_TIMER_TARGET);
pci_write_config_dword(pdev, INTERFACE_TIMER_CONTROL_OFF, INTERFACE_TIMER_EN | INTERFACE_FORCE_PENDING);
return true;
}
1 change: 1 addition & 0 deletions pcie.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,5 +9,6 @@
bool safe_pci_restore_state(struct pci_dev *pdev);
bool complete_pcie_init(struct tenstorrent_device *tt_dev, u8 __iomem* reset_unit_regs);
bool pcie_hot_reset_and_restore_state(struct pci_dev *pdev);
bool pcie_timer_interrupt(struct pci_dev *pdev);

#endif
13 changes: 12 additions & 1 deletion test/query_mappings.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// SPDX-FileCopyrightText: © 2023 Tenstorrent Inc.
// SPDX-FileCopyrightText: © 2024 Tenstorrent Inc.
// SPDX-License-Identifier: GPL-2.0-only

// Verify that all resource IDs are known to us.
Expand Down Expand Up @@ -128,6 +128,7 @@ void VerifyNoOverlap(const std::vector<tenstorrent_mapping> &mappings)
}

// Verify that size > 0. Verify that base & size are multiples of the page size.
// Verify that the size is not too large and that mapping_base is not too high.
void VerifySizes(const std::vector<tenstorrent_mapping> &mappings)
{
if (std::any_of(mappings.begin(), mappings.end(),
Expand All @@ -143,6 +144,16 @@ void VerifySizes(const std::vector<tenstorrent_mapping> &mappings)
if (std::any_of(mappings.begin(), mappings.end(),
[=](const auto &m) { return m.mapping_id != TENSTORRENT_MAPPING_UNUSED && m.mapping_base % pagesize != 0; }))
THROW_TEST_FAILURE("Mapping base is not a multiple of page size in QUERY_MAPPINGS results.");

if (std::any_of(mappings.begin(), mappings.end(),
[](const auto &m) { return m.mapping_size > std::numeric_limits<std::uint64_t>::max() - m.mapping_base; }))
THROW_TEST_FAILURE("Mapping region wraps around.");

std::uint64_t mmap_offset_limit_for_32b = (std::uint64_t)1 << 44; // 32 + log(PAGE_SIZE)

if (std::any_of(mappings.begin(), mappings.end(),
[=](const auto &m) { return m.mapping_size + m.mapping_base >= mmap_offset_limit_for_32b; }))
THROW_TEST_FAILURE("Mapping base/size do not fit into 32-bit mmap offset.");
}

void PrintMappings(const std::vector<tenstorrent_mapping>& mappings)
Expand Down
1 change: 1 addition & 0 deletions wormhole.c
Original file line number Diff line number Diff line change
Expand Up @@ -211,6 +211,7 @@ static void wormhole_cleanup(struct tenstorrent_device *tt_dev) {
struct tenstorrent_device_class wormhole_class = {
.name = "Wormhole",
.instance_size = sizeof(struct wormhole_device),
.dma_address_bits = 32,
.init_device = wormhole_init,
.init_hardware = wormhole_init_hardware,
.post_hardware_init = wormhole_post_hardware_init,
Expand Down

0 comments on commit 145626a

Please sign in to comment.