Skip to content

Commit

Permalink
[SYCLNATIVECPU] cc1 option, builtin names
Browse files Browse the repository at this point in the history
  • Loading branch information
PietroGhg committed Jun 1, 2023
1 parent 93fecfa commit c419d01
Show file tree
Hide file tree
Showing 17 changed files with 108 additions and 80 deletions.
1 change: 1 addition & 0 deletions clang/include/clang/Basic/LangOptions.def
Original file line number Diff line number Diff line change
Expand Up @@ -295,6 +295,7 @@ LANGOPT(SYCLEnableIntHeaderDiags, 1, 0, "Enable diagnostics that require the "
"SYCL integration header")
LANGOPT(SYCLAllowVirtualFunctions, 1, 0,
"Allow virtual functions calls in code for SYCL device")
LANGOPT(SYCLIsNativeCPU , 1, 0, "Generate code for SYCL NativeCPU")

LANGOPT(HIPUseNewLaunchAPI, 1, 0, "Use new kernel launching API for HIP")

Expand Down
4 changes: 4 additions & 0 deletions clang/include/clang/Driver/Options.td
Original file line number Diff line number Diff line change
Expand Up @@ -6862,6 +6862,10 @@ def fsycl_use_main_file_name : Flag<["-"], "fsycl-use-main-file-name">,
def fsycl_allow_virtual_functions : Flag<["-"], "fsycl-allow-virtual-functions">,
HelpText<"Allow virtual functions calls in code for SYCL device">,
MarshallingInfoFlag<LangOpts<"SYCLAllowVirtualFunctions">>;
def fsycl_is_native_cpu : Flag<["-"], "fsycl-is-native-cpu">,
HelpText<"Perform device compilation for Native CPU.">,
Flags<[CC1Option, NoDriverOption]>,
MarshallingInfoFlag<LangOpts<"SYCLIsNativeCPU">>;

} // let Flags = [CC1Option, NoDriverOption]

Expand Down
3 changes: 1 addition & 2 deletions clang/lib/CodeGen/BackendUtil.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,6 @@ using namespace llvm;

namespace llvm {
extern cl::opt<bool> DebugInfoCorrelate;
extern cl::opt<bool> SYCLNativeCPU;

// Experiment to move sanitizers earlier.
static cl::opt<bool> ClSanitizeOnOptimizerEarlyEP(
Expand Down Expand Up @@ -1072,7 +1071,7 @@ void EmitAssemblyHelper::RunOptimizationPipeline(
MPM.addPass(CompileTimePropertiesPass());
}

if (LangOpts.SYCLIsDevice && llvm::SYCLNativeCPU) {
if (LangOpts.SYCLIsDevice && LangOpts.SYCLIsNativeCPU) {
MPM.addPass(
EmitSYCLNativeCPUHeaderPass(getNativeCPUHeaderName(LangOpts)));
MPM.addPass(PrepareSYCLNativeCPUPass());
Expand Down
10 changes: 5 additions & 5 deletions clang/lib/CodeGen/CodeGenModule.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -79,10 +79,6 @@ static llvm::cl::opt<bool> LimitedCoverage(
"limited-coverage-experimental", llvm::cl::Hidden,
llvm::cl::desc("Emit limited coverage mapping information (experimental)"));

namespace llvm {
extern cl::opt<bool> SYCLNativeCPU;
}

static const char AnnotationSection[] = "llvm.metadata";

static CGCXXABI *createCXXABI(CodeGenModule &CGM) {
Expand Down Expand Up @@ -898,6 +894,10 @@ void CodeGenModule::Release() {
getTarget().getTargetOpts().NVVMCudaPrecSqrt);
}

if (LangOpts.SYCLIsDevice && LangOpts.SYCLIsNativeCPU) {
getModule().addModuleFlag(llvm::Module::Error, "is-native-cpu", 1);
}

if (LangOpts.EHAsynch)
getModule().addModuleFlag(llvm::Module::Warning, "eh-asynch", 1);

Expand Down Expand Up @@ -2115,7 +2115,7 @@ void CodeGenModule::GenKernelArgMetadata(llvm::Function *Fn,
Fn->setMetadata("kernel_arg_exclusive_ptr",
llvm::MDNode::get(VMContext, argSYCLAccessorPtrs));
}
if (llvm::SYCLNativeCPU) {
if (LangOpts.SYCLIsNativeCPU) {
Fn->setMetadata("kernel_arg_type",
llvm::MDNode::get(VMContext, argTypeNames));
}
Expand Down
3 changes: 1 addition & 2 deletions clang/lib/Driver/ToolChains/Clang.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5073,8 +5073,7 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
CmdArgs.push_back("-sycl-opt");
}
if (IsSYCLNativeCPU) {
CmdArgs.push_back("-mllvm");
CmdArgs.push_back("-sycl-native-cpu");
CmdArgs.push_back("-fsycl-is-native-cpu");
CmdArgs.push_back("-D");
CmdArgs.push_back("__SYCL_NATIVE_CPU__");
CmdArgs.push_back("-fno-autolink");
Expand Down
9 changes: 2 additions & 7 deletions clang/lib/Sema/SemaSYCL.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,11 +37,6 @@
#include <initializer_list>
#include <regex>

namespace llvm {
cl::opt<bool> SYCLNativeCPU("sycl-native-cpu", cl::init(false),
cl::desc("Enable SYCL Native CPU"));
}

using namespace clang;
using namespace std::placeholders;

Expand Down Expand Up @@ -1068,7 +1063,7 @@ constructKernelName(Sema &S, const FunctionDecl *KernelCallerFunc,
// When compiling for the SYCLNativeCPU device we need a C++ identifier
// as the kernel name and cannot use the name produced by some manglers
// including the MS mangler.
if (llvm::SYCLNativeCPU) {
if (S.getLangOpts().SYCLIsNativeCPU) {
MangledName = StableName;
changeManglingForNativeCPU(MangledName);
}
Expand Down Expand Up @@ -5723,7 +5718,7 @@ bool SYCLIntegrationFooter::emit(raw_ostream &OS) {
}
}

if (llvm::SYCLNativeCPU) {
if (S.getLangOpts().SYCLIsNativeCPU) {
// This is a temporary workaround for the integration header file
// being emitted too early.
std::string HCName = getNativeCPUHeaderName(S.getLangOpts());
Expand Down
2 changes: 1 addition & 1 deletion clang/test/Driver/sycl-native-cpu-fsycl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
//CHECK_BINDINGS:# "{{.*}}" - "clang", inputs: ["[[SRCWFOOTER]].cpp", "[[KERNELIR]].bc"], output: "[[HOSTOBJ:.*]].o"
//CHECK_BINDINGS:# "{{.*}}" - "{{.*}}::Linker", inputs: ["[[HOSTOBJ]].o", "[[KERNELOBJ]].o"], output: "a.{{.*}}"

//CHECK_INVO:{{.*}}clang{{.*}}-fsycl-is-device{{.*}}"-mllvm" "-sycl-native-cpu" "-D" "__SYCL_NATIVE_CPU__"
//CHECK_INVO:{{.*}}clang{{.*}}-fsycl-is-device{{.*}}"-fsycl-is-native-cpu" "-D" "__SYCL_NATIVE_CPU__"
//CHECK_INVO:{{.*}}clang{{.*}}"-x" "ir"
//CHECK_INVO:{{.*}}clang{{.*}}"-fsycl-is-host"{{.*}}

Expand Down
4 changes: 2 additions & 2 deletions clang/test/Driver/sycl-native-cpu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@


// checks that the host and device triple are the same, and that the sycl-native-cpu LLVM option is set
// CHECK: clang{{.*}}"-triple" "[[TRIPLE:.*]]"{{.*}}"-aux-triple" "[[TRIPLE]]"{{.*}}"-mllvm" "-sycl-native-cpu"{{.*}}"-D" "__SYCL_NATIVE_CPU__"
// CHECK: clang{{.*}}"-triple" "[[TRIPLE:.*]]"{{.*}}"-aux-triple" "[[TRIPLE]]"{{.*}}"-fsycl-is-native-cpu"{{.*}}"-D" "__SYCL_NATIVE_CPU__"

// checks that the target triples are set correctly when the target is set explicitly
// CHECK-AARCH64: clang{{.*}}"-triple" "aarch64-unknown-linux-gnu"{{.*}}"-aux-triple" "aarch64-unknown-linux-gnu"{{.*}}"-mllvm" "-sycl-native-cpu"{{.*}}"-D" "__SYCL_NATIVE_CPU__"
// CHECK-AARCH64: clang{{.*}}"-triple" "aarch64-unknown-linux-gnu"{{.*}}"-aux-triple" "aarch64-unknown-linux-gnu"{{.*}}"-fsycl-is-native-cpu"{{.*}}"-D" "__SYCL_NATIVE_CPU__"
16 changes: 8 additions & 8 deletions clang/test/SemaSYCL/native_cpu/kernelhandler-scalar.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,32 +55,32 @@ int main() {



// CHECK:extern "C" void _Z6init_aIiE(void *, void *, int, nativecpu_state *);
// CHECK:inline static void _Z6init_aIiEsubhandler(const sycl::detail::NativeCPUArgDesc *MArgs, nativecpu_state *state) {
// CHECK:extern "C" void _Z6init_aIiE(void *, void *, int, __nativecpu_state *);
// CHECK:inline static void _Z6init_aIiEsubhandler(const sycl::detail::NativeCPUArgDesc *MArgs, __nativecpu_state *state) {
// CHECK-NEXT: void* arg0 = MArgs[0].getPtr();
// CHECK-NEXT: void* arg3 = MArgs[3].getPtr();
// CHECK-NEXT: int arg4 = *(int*)MArgs[4].getPtr();
// CHECK-NEXT: _Z6init_aIiE(arg0, arg3, arg4, state);
// CHECK-NEXT:};

// CHECK:extern "C" void _Z6init_aIjE(void *, void *, unsigned int, nativecpu_state *);
// CHECK:inline static void _Z6init_aIjEsubhandler(const sycl::detail::NativeCPUArgDesc *MArgs, nativecpu_state *state) {
// CHECK:extern "C" void _Z6init_aIjE(void *, void *, unsigned int, __nativecpu_state *);
// CHECK:inline static void _Z6init_aIjEsubhandler(const sycl::detail::NativeCPUArgDesc *MArgs, __nativecpu_state *state) {
// CHECK-NEXT: void* arg0 = MArgs[0].getPtr();
// CHECK-NEXT: void* arg3 = MArgs[3].getPtr();
// CHECK-NEXT: unsigned int arg4 = *(unsigned int*)MArgs[4].getPtr();
// CHECK-NEXT: _Z6init_aIjE(arg0, arg3, arg4, state);
// CHECK-NEXT:};

// CHECK:extern "C" void _Z6init_aIfE(void *, void *, float, nativecpu_state *);
// CHECK:inline static void _Z6init_aIfEsubhandler(const sycl::detail::NativeCPUArgDesc *MArgs, nativecpu_state *state) {
// CHECK:extern "C" void _Z6init_aIfE(void *, void *, float, __nativecpu_state *);
// CHECK:inline static void _Z6init_aIfEsubhandler(const sycl::detail::NativeCPUArgDesc *MArgs, __nativecpu_state *state) {
// CHECK-NEXT: void* arg0 = MArgs[0].getPtr();
// CHECK-NEXT: void* arg3 = MArgs[3].getPtr();
// CHECK-NEXT: float arg4 = *(float*)MArgs[4].getPtr();
// CHECK-NEXT: _Z6init_aIfE(arg0, arg3, arg4, state);
// CHECK-NEXT:};

// CHECK:extern "C" void _Z6init_aIdE(void *, void *, double, nativecpu_state *);
// CHECK:inline static void _Z6init_aIdEsubhandler(const sycl::detail::NativeCPUArgDesc *MArgs, nativecpu_state *state) {
// CHECK:extern "C" void _Z6init_aIdE(void *, void *, double, __nativecpu_state *);
// CHECK:inline static void _Z6init_aIdEsubhandler(const sycl::detail::NativeCPUArgDesc *MArgs, __nativecpu_state *state) {
// CHECK-NEXT: void* arg0 = MArgs[0].getPtr();
// CHECK-NEXT: void* arg3 = MArgs[3].getPtr();
// CHECK-NEXT: double arg4 = *(double*)MArgs[4].getPtr();
Expand Down
4 changes: 2 additions & 2 deletions clang/test/SemaSYCL/native_cpu/kernelhandler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,8 @@ int main() {
//CHECK-HC-NEXT: #include <sycl/detail/native_cpu.hpp>
//CHECK-HC-NEXT: #include <sycl/detail/pi.h>
//CHECK-HC-NEXT: extern "C" void __sycl_register_lib(pi_device_binaries desc);
//CHECK-HC:extern "C" void _Z5Test1(void *, void *, nativecpu_state *);
//CHECK-HC:inline static void _Z5Test1subhandler(const sycl::detail::NativeCPUArgDesc *MArgs, nativecpu_state *state) {
//CHECK-HC:extern "C" void _Z5Test1(void *, void *, __nativecpu_state *);
//CHECK-HC:inline static void _Z5Test1subhandler(const sycl::detail::NativeCPUArgDesc *MArgs, __nativecpu_state *state) {
//CHECK-HC-NEXT: void* arg0 = MArgs[0].getPtr();
//CHECK-HC-NEXT: void* arg3 = MArgs[3].getPtr();
//CHECK-HC-NEXT: _Z5Test1(arg0, arg3, state);
Expand Down
2 changes: 1 addition & 1 deletion clang/test/SemaSYCL/native_cpu/kernelhandler_noargs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,6 @@ int main() {
//CHECK-HC: #pragma once
//CHECK-HC-NEXT: #include <sycl/detail/native_cpu.hpp>
//CHECK-HC:extern "C" void _Z5Test1();
//CHECK-HC:inline static void _Z5Test1subhandler(const sycl::detail::NativeCPUArgDesc *MArgs, nativecpu_state *state) {
//CHECK-HC:inline static void _Z5Test1subhandler(const sycl::detail::NativeCPUArgDesc *MArgs, __nativecpu_state *state) {
//CHECK-HC-NEXT: _Z5Test1();
//CHECK-HC-NEXT:};
2 changes: 1 addition & 1 deletion clang/test/SemaSYCL/native_cpu/kernelhandler_noargs2.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,6 @@ int main() {
//CHECK-HC: #pragma once
//CHECK-HC-NEXT: #include <sycl/detail/native_cpu.hpp>
//CHECK-HC:extern "C" void _ZZ4mainE10TestKernel();
//CHECK-HC:inline static void _ZZ4mainE10TestKernelsubhandler(const sycl::detail::NativeCPUArgDesc *MArgs, nativecpu_state *state) {
//CHECK-HC:inline static void _ZZ4mainE10TestKernelsubhandler(const sycl::detail::NativeCPUArgDesc *MArgs, __nativecpu_state *state) {
//CHECK-HC-NEXT: _ZZ4mainE10TestKernel();
//CHECK-HC-NEXT:};
4 changes: 2 additions & 2 deletions llvm/lib/SYCLLowerIR/EmitSYCLNativeCPUHeader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ void emitKernelDecl(const Function *F, const SmallVector<bool> &argMask,
// find the index of the last used arg
while (!argMask[I] && I + 1 < argMask.size())
I++;
O << EmitArgDecl(F->getArg(UsedI), I) << ", nativecpu_state *);\n";
O << EmitArgDecl(F->getArg(UsedI), I) << ", __nativecpu_state *);\n";
}

void emitSubKernelHandler(const Function *F, const SmallVector<bool> &argMask,
Expand All @@ -138,7 +138,7 @@ void emitSubKernelHandler(const Function *F, const SmallVector<bool> &argMask,

O << "\ninline static void " << F->getName() << "subhandler(";
O << "const sycl::detail::NativeCPUArgDesc *MArgs, "
"nativecpu_state *state) {\n";
"__nativecpu_state *state) {\n";
// Retrieve only the args that are used
for (unsigned I = 0, UsedI = 0;
I < argMask.size() && UsedI < F->getFunctionType()->getNumParams();
Expand Down
18 changes: 8 additions & 10 deletions llvm/lib/SYCLLowerIR/PrepareSYCLNativeCPU.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -96,15 +96,13 @@ Function *cloneFunctionAndAddParam(Function *oldF, Type *T) {

// Todo: add support for more SPIRV builtins here
static std::map<std::string, std::string> BuiltinNamesMap{
{"__spirv_BuiltInGlobalInvocationId",
"_Z13get_global_idmP15nativecpu_state"},
{"__spirv_BuiltInWorkgroupSize", "_Z13get_wg_sizemP15nativecpu_state"},
{"__spirv_BuiltInWorkgroupId", "_Z13get_wgid_mP15nativecpu_state"},
{"__spirv_BuiltInLocalInvocationId",
"_Z13get_local_id_mP15nativecpu_state"},
{"__spirv_BuiltInNumWorkgroups", "_Z13get_num_groupsmP15nativecpu_state"},
{"__spirv_BuiltInGlobalOffset", "_Z13get_global_offsetmP15nativecpu_state"},
{"__spirv_BuiltInGlobalSize", "_Z13get_global_rangemP15nativecpu_state"}};
{"__spirv_BuiltInGlobalInvocationId", "__dpcpp_nativecpu_global_id"},
{"__spirv_BuiltInGlobalSize", "__dpcpp_nativecpu_global_range"},
{"__spirv_BuiltInWorkgroupSize", "__dpcpp_nativecpu_get_wg_size"},
{"__spirv_BuiltInWorkgroupId", "__dpcpp_nativecpu_get_wg_id"},
{"__spirv_BuiltInLocalInvocationId", "__dpcpp_nativecpu_get_local_id"},
{"__spirv_BuiltInNumWorkgroups", "__dpcpp_nativecpu_get_num_groups"},
{"__spirv_BuiltInGlobalOffset", "__dpcpp_nativecpu_get_global_offset"}};

Function *getReplaceFunc(Module &M, Type *T, StringRef Name) {
Function *F = M.getFunction(Name);
Expand Down Expand Up @@ -153,7 +151,7 @@ PreservedAnalyses PrepareSYCLNativeCPUPass::run(Module &M,
// First we add a pointer to the Native CPU state as arg to all the
// kernels.
Type *StateType =
StructType::getTypeByName(M.getContext(), "struct.nativecpu_state");
StructType::getTypeByName(M.getContext(), "struct.__nativecpu_state");
if (!StateType)
report_fatal_error("Couldn't find the Native CPU state in the "
"module, make sure that -D __SYCL_NATIVE_CPU__ is set",
Expand Down
41 changes: 21 additions & 20 deletions sycl/include/sycl/detail/native_cpu.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,19 +19,20 @@ struct NativeCPUArgDesc {
} // __SYCL_INLINE_VER_NAMESPACE(_V1)
} // namespace sycl

struct nativecpu_state {
struct __nativecpu_state {
size_t MGlobal_id[3];
size_t MGlobal_range[3];
size_t MWorkGroup_size[3];
size_t MWorkGroup_id[3];
size_t MLocal_id[3];
size_t MNumGroups[3];
size_t MGlobalOffset[3];
nativecpu_state(size_t globalR0, size_t globalR1, size_t globalR2,
size_t localR0, size_t localR1, size_t localR2,
size_t globalO0, size_t globalO1, size_t globalO2)
: MGlobal_range{globalR0, globalR1, globalR2},
MWorkGroup_size{localR0, localR1, localR2},
__nativecpu_state(size_t globalR0, size_t globalR1, size_t globalR2,
size_t localR0, size_t localR1, size_t localR2,
size_t globalO0, size_t globalO1, size_t globalO2)
: MGlobal_range{globalR0, globalR1, globalR2}, MWorkGroup_size{localR0,
localR1,
localR2},
MNumGroups{globalR0 / localR0, globalR1 / localR1, globalR2 / localR2},
MGlobalOffset{globalO0, globalO1, globalO2} {
MGlobal_id[0] = 0;
Expand Down Expand Up @@ -64,44 +65,44 @@ struct nativecpu_state {
[[intel::device_indirectly_callable]]

extern "C" __SYCL_HC_ATTRS __attribute((address_space(0))) size_t *
_Z13get_global_idmP15nativecpu_state(__attribute((address_space(0)))
nativecpu_state *s) {
__dpcpp_nativecpu_global_id(__attribute((address_space(0)))
__nativecpu_state *s) {
return &(s->MGlobal_id[0]);
}

extern "C" __SYCL_HC_ATTRS __attribute((address_space(0))) size_t *
_Z13get_global_rangemP15nativecpu_state(__attribute((address_space(0)))
nativecpu_state *s) {
__dpcpp_nativecpu_global_range(__attribute((address_space(0)))
__nativecpu_state *s) {
return &(s->MGlobal_range[0]);
}

extern "C" __SYCL_HC_ATTRS __attribute((address_space(0))) size_t *
_Z13get_wg_sizemP15nativecpu_state(__attribute((address_space(0)))
nativecpu_state *s) {
__dpcpp_nativecpu_get_wg_size(__attribute((address_space(0)))
__nativecpu_state *s) {
return &(s->MWorkGroup_size[0]);
}

extern "C" __SYCL_HC_ATTRS __attribute((address_space(0))) size_t *
_Z13get_wgid_mP15nativecpu_state(__attribute((address_space(0)))
nativecpu_state *s) {
__dpcpp_nativecpu_get_wg_id(__attribute((address_space(0)))
__nativecpu_state *s) {
return &(s->MWorkGroup_id[0]);
}

extern "C" __SYCL_HC_ATTRS __attribute((address_space(0))) size_t *
_Z13get_local_id_mP15nativecpu_state(__attribute((address_space(0)))
nativecpu_state *s) {
__dpcpp_nativecpu_get_local_id(__attribute((address_space(0)))
__nativecpu_state *s) {
return &(s->MLocal_id[0]);
}

extern "C" __SYCL_HC_ATTRS __attribute((address_space(0))) size_t *
_Z13get_num_groupsmP15nativecpu_state(__attribute((address_space(0)))
nativecpu_state *s) {
__dpcpp_nativecpu_get_num_groups(__attribute((address_space(0)))
__nativecpu_state *s) {
return &(s->MNumGroups[0]);
}

extern "C" __SYCL_HC_ATTRS __attribute((address_space(0))) size_t *
_Z13get_global_offsetmP15nativecpu_state(__attribute((address_space(0)))
nativecpu_state *s) {
__dpcpp_nativecpu_get_global_offset(__attribute((address_space(0)))
__nativecpu_state *s) {
return &(s->MGlobalOffset[0]);
}
#undef __SYCL_HC_ATTRS
Expand Down
10 changes: 5 additions & 5 deletions sycl/plugins/native_cpu/pi_native_cpu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ struct _pi_program : _pi_object {
};

using nativecpu_kernel_t = void(const sycl::detail::NativeCPUArgDesc *,
nativecpu_state *);
__nativecpu_state *);
using nativecpu_ptr_t = nativecpu_kernel_t *;
using nativecpu_task_t = std::function<nativecpu_kernel_t>;
struct _pi_kernel : _pi_object {
Expand Down Expand Up @@ -1052,10 +1052,10 @@ piEnqueueKernelLaunch(pi_queue Queue, pi_kernel Kernel, pi_uint32 WorkDim,
// TODO: add proper event dep management
sycl::detail::NDRDescT ndr =
getNDRDesc(WorkDim, GlobalWorkOffset, GlobalWorkSize, LocalWorkSize);
nativecpu_state state(ndr.GlobalSize[0], ndr.GlobalSize[1], ndr.GlobalSize[2],
ndr.LocalSize[0], ndr.LocalSize[1], ndr.LocalSize[2],
ndr.GlobalOffset[0], ndr.GlobalOffset[1],
ndr.GlobalOffset[2]);
__nativecpu_state state(ndr.GlobalSize[0], ndr.GlobalSize[1],
ndr.GlobalSize[2], ndr.LocalSize[0], ndr.LocalSize[1],
ndr.LocalSize[2], ndr.GlobalOffset[0],
ndr.GlobalOffset[1], ndr.GlobalOffset[2]);
auto numWG0 = ndr.GlobalSize[0] / ndr.LocalSize[0];
auto numWG1 = ndr.GlobalSize[1] / ndr.LocalSize[1];
auto numWG2 = ndr.GlobalSize[2] / ndr.LocalSize[2];
Expand Down
Loading

0 comments on commit c419d01

Please sign in to comment.