diff --git a/Makefile b/Makefile index 29993f9..bf48fa0 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ # SPDX-License-Identifier: GPL-2.0-only obj-m += tenstorrent.o -tenstorrent-y := module.o chardev.o enumerate.o interrupt.o grayskull.o wormhole.o pcie.o hwmon.o sg_helpers.o memory.o +tenstorrent-y := module.o chardev.o enumerate.o interrupt.o grayskull.o wormhole.o blackhole.o pcie.o hwmon.o sg_helpers.o memory.o KDIR := /lib/modules/$(shell uname -r)/build KMAKE := $(MAKE) -C $(KDIR) M=$(CURDIR) diff --git a/README.md b/README.md index 83afee6..999d68f 100644 --- a/README.md +++ b/README.md @@ -7,6 +7,7 @@ ## Supported hardware: * Grayskull * Wormhole +* Blackhole The driver registers device files named `/dev/tenstorrent/%d`, one for each enumerated device. @@ -18,7 +19,7 @@ The driver registers device files named `/dev/tenstorrent/%d`, one for each enum * `dnf install epel-release && dnf install dkms` (Enterprise Linux based) ``` sudo dkms add . -sudo dkms install tenstorrent/1.30 +sudo dkms install tenstorrent/1.30-bh sudo modprobe tenstorrent ``` (or reboot, driver will auto-load next boot) @@ -26,6 +27,5 @@ sudo modprobe tenstorrent ### To uninstall: ``` sudo modprobe -r tenstorrent -sudo dkms remove tenstorrent/1.30 --all +sudo dkms remove tenstorrent/1.30-bh --all ``` - diff --git a/blackhole.c b/blackhole.c new file mode 100644 index 0000000..cc4f651 --- /dev/null +++ b/blackhole.c @@ -0,0 +1,42 @@ +// SPDX-FileCopyrightText: © 2024 Tenstorrent Inc. +// SPDX-License-Identifier: GPL-2.0-only + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include + +#include "blackhole.h" +#include "pcie.h" +#include "module.h" + +#define MAX_MRRS 4096 + +static bool blackhole_init(struct tenstorrent_device *tt_dev) { + return true; +} + +static bool blackhole_init_hardware(struct tenstorrent_device *tt_dev) { + struct pci_dev *pdev = tt_dev->pdev; + pcie_set_readrq(pdev, MAX_MRRS); + return true; +} + +static bool blackhole_post_hardware_init(struct tenstorrent_device *tt_dev) { + return true; +} + +static void blackhole_cleanup_hardware(struct tenstorrent_device *tt_dev) { +} + +static void blackhole_cleanup(struct tenstorrent_device *tt_dev) { +} + +struct tenstorrent_device_class blackhole_class = { + .name = "Blackhole", + .instance_size = sizeof(struct blackhole_device), + .dma_address_bits = 58, + .init_device = blackhole_init, + .init_hardware = blackhole_init_hardware, + .post_hardware_init = blackhole_post_hardware_init, + .cleanup_hardware = blackhole_cleanup_hardware, + .cleanup_device = blackhole_cleanup, +}; diff --git a/blackhole.h b/blackhole.h new file mode 100644 index 0000000..2c9d561 --- /dev/null +++ b/blackhole.h @@ -0,0 +1,17 @@ +// SPDX-FileCopyrightText: © 2023 Tenstorrent Inc. +// SPDX-License-Identifier: GPL-2.0-only + +#ifndef TTDRIVER_BLACKHOLE_H_INCLUDED +#define TTDRIVER_BLACKHOLE_H_INCLUDED + +#include +#include "device.h" + +struct blackhole_device { + struct tenstorrent_device tt; +}; + +#define tt_dev_to_wh_dev(ttdev) \ + container_of((tt_dev), struct blackhole_device, tt) + +#endif diff --git a/chardev.c b/chardev.c index 1329fae..22e2ece 100644 --- a/chardev.c +++ b/chardev.c @@ -185,9 +185,10 @@ static long ioctl_reset_device(struct chardev_private *priv, ok = priv->device->dev_class->init_hardware(priv->device); else ok = false; - } else if (in.flags == TENSTORRENT_RESET_DEVICE_RESET_PCIE_LINK) { ok = pcie_hot_reset_and_restore_state(pdev); + } else if (in.flags == TENSTORRENT_RESET_DEVICE_CONFIG_WRITE) { + ok = pcie_timer_interrupt(pdev); } else { return -EINVAL; } diff --git a/device.h b/device.h index f09bcce..437a014 100644 --- a/device.h +++ b/device.h @@ -42,6 +42,7 @@ struct tenstorrent_device { struct tenstorrent_device_class { const char *name; u32 instance_size; + u32 dma_address_bits; bool (*init_device)(struct tenstorrent_device *ttdev); bool (*init_hardware)(struct tenstorrent_device *ttdev); bool (*post_hardware_init)(struct tenstorrent_device *ttdev); diff --git a/dkms.conf b/dkms.conf index 73616bc..635f506 100644 --- a/dkms.conf +++ b/dkms.conf @@ -1,5 +1,5 @@ PACKAGE_NAME="tenstorrent" -PACKAGE_VERSION="1.30" +PACKAGE_VERSION="1.30-bh" BUILT_MODULE_NAME="tenstorrent" DEST_MODULE_LOCATION="/kernel/extra" diff --git a/enumerate.c b/enumerate.c index 131bb9d..487c553 100644 --- a/enumerate.c +++ b/enumerate.c @@ -77,7 +77,8 @@ static int tenstorrent_pci_probe(struct pci_dev *dev, const struct pci_device_id mutex_init(&tt_dev->chardev_mutex); - tt_dev->dma_capable = (dma_set_mask_and_coherent(&dev->dev, DMA_BIT_MASK(dma_address_bits ?: 32)) == 0); + tt_dev->dma_capable = (dma_set_mask_and_coherent(&dev->dev, DMA_BIT_MASK(dma_address_bits ?: device_class->dma_address_bits)) == 0); + // Max these to ensure the IOVA allocator will not split large pinned regions. dma_set_max_seg_size(&dev->dev, UINT_MAX); dma_set_seg_boundary(&dev->dev, ULONG_MAX); diff --git a/enumerate.h b/enumerate.h index 0d58dc9..d485531 100644 --- a/enumerate.h +++ b/enumerate.h @@ -14,6 +14,7 @@ #define PCI_VENDOR_ID_TENSTORRENT 0x1E52 #define PCI_DEVICE_ID_GRAYSKULL 0xFACA #define PCI_DEVICE_ID_WORMHOLE 0x401E +#define PCI_DEVICE_ID_BLACKHOLE 0xB140 struct pci_dev; struct cdev; diff --git a/grayskull.c b/grayskull.c index 78244ed..08e4b9c 100644 --- a/grayskull.c +++ b/grayskull.c @@ -875,6 +875,7 @@ static void grayskull_last_release_handler(struct tenstorrent_device *tt_dev) { struct tenstorrent_device_class grayskull_class = { .name = "Grayskull", .instance_size = sizeof(struct grayskull_device), + .dma_address_bits = 32, .init_device = grayskull_init, .init_hardware = grayskull_init_hardware, .post_hardware_init = grayskull_post_hardware_init, diff --git a/ioctl.h b/ioctl.h index e33890f..d56e1f6 100644 --- a/ioctl.h +++ b/ioctl.h @@ -125,6 +125,7 @@ struct tenstorrent_get_driver_info { // tenstorrent_reset_device_in.flags #define TENSTORRENT_RESET_DEVICE_RESTORE_STATE 0 #define TENSTORRENT_RESET_DEVICE_RESET_PCIE_LINK 1 +#define TENSTORRENT_RESET_DEVICE_CONFIG_WRITE 2 struct tenstorrent_reset_device_in { __u32 output_size_bytes; diff --git a/memory.c b/memory.c index c3c7719..a9d7c22 100644 --- a/memory.c +++ b/memory.c @@ -122,17 +122,17 @@ static void unpin_user_pages_dirty_lock(struct page **pages, unsigned long npage // These are the mmap offsets for various resources. In the user-kernel // interface they are dynamic (TENSTORRENT_IOCTL_QUERY_MAPPINGS and // TENSTORRENT_IOCTL_ALLOCATE_DMA_BUF), but they are actually hard-coded. -#define MMAP_OFFSET_RESOURCE0_UC (U64_C(0) << 32) -#define MMAP_OFFSET_RESOURCE0_WC (U64_C(1) << 32) -#define MMAP_OFFSET_RESOURCE1_UC (U64_C(2) << 32) -#define MMAP_OFFSET_RESOURCE1_WC (U64_C(3) << 32) -#define MMAP_OFFSET_RESOURCE2_UC (U64_C(4) << 32) -#define MMAP_OFFSET_RESOURCE2_WC (U64_C(5) << 32) +#define MMAP_OFFSET_RESOURCE0_UC (U64_C(0) << 35) +#define MMAP_OFFSET_RESOURCE0_WC (U64_C(1) << 35) +#define MMAP_OFFSET_RESOURCE1_UC (U64_C(2) << 35) +#define MMAP_OFFSET_RESOURCE1_WC (U64_C(3) << 35) +#define MMAP_OFFSET_RESOURCE2_UC (U64_C(4) << 35) +#define MMAP_OFFSET_RESOURCE2_WC (U64_C(5) << 35) // tenstorrent_allocate_dma_buf_in.buf_index is u8 so that sets a limit of // U8_MAX DMA buffers per fd. 32-bit mmap offsets are divided by PAGE_SIZE, // so PAGE_SIZE << 32 is the largest possible offset. -#define MMAP_OFFSET_DMA_BUF ((u64)(PAGE_SIZE-U8_MAX) << 32) +#define MMAP_OFFSET_DMA_BUF ((u64)(PAGE_SIZE-U8_MAX-1) << 32) #define MMAP_SIZE_DMA_BUF (U64_C(1) << 32) diff --git a/module.c b/module.c index 4ffba29..eed4f4e 100644 --- a/module.c +++ b/module.c @@ -9,7 +9,7 @@ #include "chardev.h" #include "enumerate.h" -#define TTDRIVER_VER "1.30" +#define TTDRIVER_VER "1.30-bh" MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Tenstorrent AI kernel driver"); @@ -94,12 +94,15 @@ MODULE_PARM_DESC(auto_reset_timeout, "Timeout duration in seconds for M3 auto re struct tenstorrent_device_class; extern struct tenstorrent_device_class grayskull_class; extern struct tenstorrent_device_class wormhole_class; +extern struct tenstorrent_device_class blackhole_class; const struct pci_device_id tenstorrent_ids[] = { { PCI_DEVICE(PCI_VENDOR_ID_TENSTORRENT, PCI_DEVICE_ID_GRAYSKULL), .driver_data=(kernel_ulong_t)&grayskull_class }, { PCI_DEVICE(PCI_VENDOR_ID_TENSTORRENT, PCI_DEVICE_ID_WORMHOLE), .driver_data=(kernel_ulong_t)&wormhole_class }, + { PCI_DEVICE(PCI_VENDOR_ID_TENSTORRENT, PCI_DEVICE_ID_BLACKHOLE), + .driver_data=(kernel_ulong_t)&blackhole_class }, { 0 }, }; diff --git a/pcie.c b/pcie.c index 84e1db0..95a5dce 100644 --- a/pcie.c +++ b/pcie.c @@ -13,6 +13,12 @@ #include "grayskull.h" #define FW_MSG_PCIE_RETRAIN 0xB6 +#define INTERFACE_TIMER_CONTROL_OFF 0x930 +#define INTERFACE_TIMER_TARGET_OFF 0x934 + +#define INTERFACE_TIMER_TARGET 0x1 +#define INTERFACE_TIMER_EN 0x1 +#define INTERFACE_FORCE_PENDING 0x10 static bool poll_pcie_link_up(struct pci_dev *pdev, u32 timeout_ms) { u16 tt_vendor_id; @@ -116,3 +122,10 @@ bool complete_pcie_init(struct tenstorrent_device *tt_dev, u8 __iomem* reset_uni return false; } + +bool pcie_timer_interrupt(struct pci_dev *pdev) +{ + pci_write_config_dword(pdev, INTERFACE_TIMER_TARGET_OFF, INTERFACE_TIMER_TARGET); + pci_write_config_dword(pdev, INTERFACE_TIMER_CONTROL_OFF, INTERFACE_TIMER_EN | INTERFACE_FORCE_PENDING); + return true; +} diff --git a/pcie.h b/pcie.h index bede693..7f6e707 100644 --- a/pcie.h +++ b/pcie.h @@ -9,5 +9,6 @@ bool safe_pci_restore_state(struct pci_dev *pdev); bool complete_pcie_init(struct tenstorrent_device *tt_dev, u8 __iomem* reset_unit_regs); bool pcie_hot_reset_and_restore_state(struct pci_dev *pdev); +bool pcie_timer_interrupt(struct pci_dev *pdev); #endif diff --git a/test/query_mappings.cpp b/test/query_mappings.cpp index df95ca4..0721485 100644 --- a/test/query_mappings.cpp +++ b/test/query_mappings.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: © 2023 Tenstorrent Inc. +// SPDX-FileCopyrightText: © 2024 Tenstorrent Inc. // SPDX-License-Identifier: GPL-2.0-only // Verify that all resource IDs are known to us. @@ -128,6 +128,7 @@ void VerifyNoOverlap(const std::vector &mappings) } // Verify that size > 0. Verify that base & size are multiples of the page size. +// Verify that the size is not too large and that mapping_base is not too high. void VerifySizes(const std::vector &mappings) { if (std::any_of(mappings.begin(), mappings.end(), @@ -143,6 +144,16 @@ void VerifySizes(const std::vector &mappings) if (std::any_of(mappings.begin(), mappings.end(), [=](const auto &m) { return m.mapping_id != TENSTORRENT_MAPPING_UNUSED && m.mapping_base % pagesize != 0; })) THROW_TEST_FAILURE("Mapping base is not a multiple of page size in QUERY_MAPPINGS results."); + + if (std::any_of(mappings.begin(), mappings.end(), + [](const auto &m) { return m.mapping_size > std::numeric_limits::max() - m.mapping_base; })) + THROW_TEST_FAILURE("Mapping region wraps around."); + + std::uint64_t mmap_offset_limit_for_32b = (std::uint64_t)1 << 44; // 32 + log(PAGE_SIZE) + + if (std::any_of(mappings.begin(), mappings.end(), + [=](const auto &m) { return m.mapping_size + m.mapping_base >= mmap_offset_limit_for_32b; })) + THROW_TEST_FAILURE("Mapping base/size do not fit into 32-bit mmap offset."); } void PrintMappings(const std::vector& mappings) diff --git a/wormhole.c b/wormhole.c index 0c1d1cf..496c132 100644 --- a/wormhole.c +++ b/wormhole.c @@ -211,6 +211,7 @@ static void wormhole_cleanup(struct tenstorrent_device *tt_dev) { struct tenstorrent_device_class wormhole_class = { .name = "Wormhole", .instance_size = sizeof(struct wormhole_device), + .dma_address_bits = 32, .init_device = wormhole_init, .init_hardware = wormhole_init_hardware, .post_hardware_init = wormhole_post_hardware_init,