Skip to content

Commit

Permalink
Fix validation error in WithCommandQueue.SubgroupSize (#748)
Browse files Browse the repository at this point in the history
Respect maxComputeWorkgroupSubgroups limit

Change-Id: I1242c9bf13dbd10dfc87351767a6698e1fa8879c

Signed-off-by: Kévin Petit <[email protected]>
  • Loading branch information
kpet authored Dec 10, 2024
1 parent daad9ae commit 5c59aa5
Show file tree
Hide file tree
Showing 6 changed files with 55 additions and 14 deletions.
5 changes: 3 additions & 2 deletions src/api.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -942,9 +942,10 @@ cl_int CLVK_API_CALL clGetDeviceInfo(cl_device_id dev,
}
copy_ptr = val_subgroup_sizes.data();
size_ret = sizeof(size_t) * val_subgroup_sizes.size();
break;
} else {
ret = CL_INVALID_VALUE;
}
[[fallthrough]];
break;
default:
ret = CL_INVALID_VALUE;
break;
Expand Down
6 changes: 6 additions & 0 deletions src/device.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1106,6 +1106,12 @@ void cvk_device::log_limits_and_memory_information() {
limits.maxComputeWorkGroupSize[0],
limits.maxComputeWorkGroupSize[1],
limits.maxComputeWorkGroupSize[2]);
cvk_info(" Min subgroup size: %u",
m_subgroup_size_control_properties.minSubgroupSize);
cvk_info(" Max subgroup size: %u",
m_subgroup_size_control_properties.maxSubgroupSize);
cvk_info(" Max subgroups per work-group: %u",
m_subgroup_size_control_properties.maxComputeWorkgroupSubgroups);

// Print memoy information
cvk_info("Device has %u memory types:", m_mem_properties.memoryTypeCount);
Expand Down
4 changes: 4 additions & 0 deletions src/device.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -356,6 +356,10 @@ struct cvk_device : public _cl_device_id,
if (!supports_subgroups()) {
return 0;
}
if (supports_subgroup_size_selection()) {
return m_subgroup_size_control_properties
.maxComputeWorkgroupSubgroups;
}
return ceil_div(max_work_group_size(),
static_cast<size_t>(sub_group_size()));
}
Expand Down
3 changes: 1 addition & 2 deletions src/kernel.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -116,8 +116,7 @@ struct cvk_kernel : public _cl_kernel, api_object<object_magic::kernel> {
}

size_t max_num_sub_groups(const cvk_device* device) const {
return ceil_div(max_work_group_size(device),
static_cast<size_t>(device->sub_group_size()));
return device->max_num_sub_groups();
}

const std::array<uint32_t, 3>& required_work_group_size() const {
Expand Down
22 changes: 12 additions & 10 deletions tests/api/subgroup_size.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,22 +25,23 @@ static const char* program_source = R"(
)";

TEST_F(WithCommandQueue, SubgroupSizes) {
REQUIRE_EXTENSION("cl_intel_required_subgroup_size");
std::vector<size_t> subgroup_sizes;
size_t raw_size;
GetDeviceInfo(CL_DEVICE_SUB_GROUP_SIZES_INTEL, 0, nullptr, &raw_size);
subgroup_sizes.resize(raw_size / sizeof(size_t));
GetDeviceInfo(CL_DEVICE_SUB_GROUP_SIZES_INTEL, raw_size,
subgroup_sizes.data(), nullptr);

size_t max_work_group_size;
GetDeviceInfo(CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(size_t),
&max_work_group_size, nullptr);
size_t max_num_sub_groups;
GetDeviceInfo(CL_DEVICE_MAX_NUM_SUB_GROUPS, sizeof(size_t),
&max_num_sub_groups, nullptr);

auto buffer = CreateBuffer(0, sizeof(cl_int));

auto run = [this](const char* kernel_prefix, size_t subgroup_size,
size_t max_work_group_size, cl_mem buffer) {
cl_int expected_num_sub_groups = max_work_group_size / subgroup_size;
auto run = [this, max_num_sub_groups](const char* kernel_prefix,
size_t subgroup_size, cl_mem buffer) {
cl_int expected_num_sub_groups = max_num_sub_groups;
size_t work_size = expected_num_sub_groups * subgroup_size;
char source[512];
snprintf(source, sizeof(source), program_source, kernel_prefix);
Expand All @@ -51,9 +52,10 @@ TEST_F(WithCommandQueue, SubgroupSizes) {
auto result =
EnqueueMapBuffer<cl_int>(buffer, true, 0, 0, sizeof(cl_int));
#if 0
printf("\tkernel_prefix '%s' subgroup_size %lu max_work_group_size %lu "
printf("\tkernel_prefix '%s' subgroup_size %lu "
"max_num_sub_groups %lu "
"result %u expected %u\n",
kernel_prefix, subgroup_size, max_work_group_size, *result,
kernel_prefix, subgroup_size, max_num_sub_groups, *result,
expected_num_sub_groups);
#endif
ASSERT_EQ(expected_num_sub_groups, *result);
Expand All @@ -65,12 +67,12 @@ TEST_F(WithCommandQueue, SubgroupSizes) {
snprintf(attribute, sizeof(attribute),
"__attribute__((intel_reqd_sub_group_size(%lu))) ",
subgroup_size);
run(attribute, subgroup_size, max_work_group_size, buffer);
run(attribute, subgroup_size, buffer);
}
for (auto subgroup_size : subgroup_sizes) {
auto cfg = CLVK_CONFIG_SCOPED_OVERRIDE(force_subgroup_size, uint32_t,
subgroup_size, true);
run("", subgroup_size, max_work_group_size, buffer);
run("", subgroup_size, buffer);
}
}
#endif
29 changes: 29 additions & 0 deletions tests/api/testcl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -775,8 +775,37 @@ class WithCommandQueue : public WithContext {
param_value, param_value_size_ret);
ASSERT_CL_SUCCESS(err);
}

bool HasExtension(const std::string& name) {
std::vector<cl_name_version> exts;
size_t num_extensions;
GetDeviceInfo(CL_DEVICE_EXTENSIONS_WITH_VERSION, 0, nullptr,
&num_extensions);

exts.resize(num_extensions);

GetDeviceInfo(CL_DEVICE_EXTENSIONS_WITH_VERSION,
num_extensions * sizeof(cl_name_version), exts.data(),
nullptr);

for (auto const& ext : exts) {
if (ext.name == name) {
return true;
}
}

return false;
}
};

#define REQUIRE_EXTENSION(name) \
do { \
if (!HasExtension(name)) { \
GTEST_SKIP() << "Device does not support " << name \
<< " extension"; \
} \
} while (0);

class WithProfiledCommandQueue : public WithCommandQueue {
protected:
void SetUp() override {
Expand Down

0 comments on commit 5c59aa5

Please sign in to comment.