From c419d01ea9a0ddb10c012ce3f674bbfacaa7f264 Mon Sep 17 00:00:00 2001 From: Pietro Ghiglio Date: Thu, 1 Jun 2023 14:30:40 +0000 Subject: [PATCH] [SYCLNATIVECPU] cc1 option, builtin names --- clang/include/clang/Basic/LangOptions.def | 1 + clang/include/clang/Driver/Options.td | 4 ++ clang/lib/CodeGen/BackendUtil.cpp | 3 +- clang/lib/CodeGen/CodeGenModule.cpp | 10 ++-- clang/lib/Driver/ToolChains/Clang.cpp | 3 +- clang/lib/Sema/SemaSYCL.cpp | 9 +-- clang/test/Driver/sycl-native-cpu-fsycl.cpp | 2 +- clang/test/Driver/sycl-native-cpu.cpp | 4 +- .../native_cpu/kernelhandler-scalar.cpp | 16 +++--- .../SemaSYCL/native_cpu/kernelhandler.cpp | 4 +- .../native_cpu/kernelhandler_noargs.cpp | 2 +- .../native_cpu/kernelhandler_noargs2.cpp | 2 +- .../SYCLLowerIR/EmitSYCLNativeCPUHeader.cpp | 4 +- llvm/lib/SYCLLowerIR/PrepareSYCLNativeCPU.cpp | 18 +++--- sycl/include/sycl/detail/native_cpu.hpp | 41 +++++++------- sycl/plugins/native_cpu/pi_native_cpu.cpp | 10 ++-- .../check_device_code/native_cpu_builtins.cpp | 55 +++++++++++++++---- 17 files changed, 108 insertions(+), 80 deletions(-) diff --git a/clang/include/clang/Basic/LangOptions.def b/clang/include/clang/Basic/LangOptions.def index eca98352b7c8..9369c8f6b79c 100644 --- a/clang/include/clang/Basic/LangOptions.def +++ b/clang/include/clang/Basic/LangOptions.def @@ -295,6 +295,7 @@ LANGOPT(SYCLEnableIntHeaderDiags, 1, 0, "Enable diagnostics that require the " "SYCL integration header") LANGOPT(SYCLAllowVirtualFunctions, 1, 0, "Allow virtual functions calls in code for SYCL device") +LANGOPT(SYCLIsNativeCPU , 1, 0, "Generate code for SYCL NativeCPU") LANGOPT(HIPUseNewLaunchAPI, 1, 0, "Use new kernel launching API for HIP") diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 7850888416fc..e710f9302b20 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -6862,6 +6862,10 @@ def fsycl_use_main_file_name : Flag<["-"], "fsycl-use-main-file-name">, def fsycl_allow_virtual_functions : Flag<["-"], "fsycl-allow-virtual-functions">, HelpText<"Allow virtual functions calls in code for SYCL device">, MarshallingInfoFlag>; +def fsycl_is_native_cpu : Flag<["-"], "fsycl-is-native-cpu">, + HelpText<"Perform device compilation for Native CPU.">, + Flags<[CC1Option, NoDriverOption]>, + MarshallingInfoFlag>; } // let Flags = [CC1Option, NoDriverOption] diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp index 15c98d8c20d5..f8ed4e76d185 100644 --- a/clang/lib/CodeGen/BackendUtil.cpp +++ b/clang/lib/CodeGen/BackendUtil.cpp @@ -103,7 +103,6 @@ using namespace llvm; namespace llvm { extern cl::opt DebugInfoCorrelate; -extern cl::opt SYCLNativeCPU; // Experiment to move sanitizers earlier. static cl::opt ClSanitizeOnOptimizerEarlyEP( @@ -1072,7 +1071,7 @@ void EmitAssemblyHelper::RunOptimizationPipeline( MPM.addPass(CompileTimePropertiesPass()); } - if (LangOpts.SYCLIsDevice && llvm::SYCLNativeCPU) { + if (LangOpts.SYCLIsDevice && LangOpts.SYCLIsNativeCPU) { MPM.addPass( EmitSYCLNativeCPUHeaderPass(getNativeCPUHeaderName(LangOpts))); MPM.addPass(PrepareSYCLNativeCPUPass()); diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index 1f30e9bef4ec..0f4b7281f978 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -79,10 +79,6 @@ static llvm::cl::opt LimitedCoverage( "limited-coverage-experimental", llvm::cl::Hidden, llvm::cl::desc("Emit limited coverage mapping information (experimental)")); -namespace llvm { -extern cl::opt SYCLNativeCPU; -} - static const char AnnotationSection[] = "llvm.metadata"; static CGCXXABI *createCXXABI(CodeGenModule &CGM) { @@ -898,6 +894,10 @@ void CodeGenModule::Release() { getTarget().getTargetOpts().NVVMCudaPrecSqrt); } + if (LangOpts.SYCLIsDevice && LangOpts.SYCLIsNativeCPU) { + getModule().addModuleFlag(llvm::Module::Error, "is-native-cpu", 1); + } + if (LangOpts.EHAsynch) getModule().addModuleFlag(llvm::Module::Warning, "eh-asynch", 1); @@ -2115,7 +2115,7 @@ void CodeGenModule::GenKernelArgMetadata(llvm::Function *Fn, Fn->setMetadata("kernel_arg_exclusive_ptr", llvm::MDNode::get(VMContext, argSYCLAccessorPtrs)); } - if (llvm::SYCLNativeCPU) { + if (LangOpts.SYCLIsNativeCPU) { Fn->setMetadata("kernel_arg_type", llvm::MDNode::get(VMContext, argTypeNames)); } diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 40aa403c538d..5d6402a54880 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -5073,8 +5073,7 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, CmdArgs.push_back("-sycl-opt"); } if (IsSYCLNativeCPU) { - CmdArgs.push_back("-mllvm"); - CmdArgs.push_back("-sycl-native-cpu"); + CmdArgs.push_back("-fsycl-is-native-cpu"); CmdArgs.push_back("-D"); CmdArgs.push_back("__SYCL_NATIVE_CPU__"); CmdArgs.push_back("-fno-autolink"); diff --git a/clang/lib/Sema/SemaSYCL.cpp b/clang/lib/Sema/SemaSYCL.cpp index ccb141bc1ee3..81beccd048f5 100644 --- a/clang/lib/Sema/SemaSYCL.cpp +++ b/clang/lib/Sema/SemaSYCL.cpp @@ -37,11 +37,6 @@ #include #include -namespace llvm { -cl::opt SYCLNativeCPU("sycl-native-cpu", cl::init(false), - cl::desc("Enable SYCL Native CPU")); -} - using namespace clang; using namespace std::placeholders; @@ -1068,7 +1063,7 @@ constructKernelName(Sema &S, const FunctionDecl *KernelCallerFunc, // When compiling for the SYCLNativeCPU device we need a C++ identifier // as the kernel name and cannot use the name produced by some manglers // including the MS mangler. - if (llvm::SYCLNativeCPU) { + if (S.getLangOpts().SYCLIsNativeCPU) { MangledName = StableName; changeManglingForNativeCPU(MangledName); } @@ -5723,7 +5718,7 @@ bool SYCLIntegrationFooter::emit(raw_ostream &OS) { } } - if (llvm::SYCLNativeCPU) { + if (S.getLangOpts().SYCLIsNativeCPU) { // This is a temporary workaround for the integration header file // being emitted too early. std::string HCName = getNativeCPUHeaderName(S.getLangOpts()); diff --git a/clang/test/Driver/sycl-native-cpu-fsycl.cpp b/clang/test/Driver/sycl-native-cpu-fsycl.cpp index f43f43f1e896..6646b29dc1d0 100644 --- a/clang/test/Driver/sycl-native-cpu-fsycl.cpp +++ b/clang/test/Driver/sycl-native-cpu-fsycl.cpp @@ -29,7 +29,7 @@ //CHECK_BINDINGS:# "{{.*}}" - "clang", inputs: ["[[SRCWFOOTER]].cpp", "[[KERNELIR]].bc"], output: "[[HOSTOBJ:.*]].o" //CHECK_BINDINGS:# "{{.*}}" - "{{.*}}::Linker", inputs: ["[[HOSTOBJ]].o", "[[KERNELOBJ]].o"], output: "a.{{.*}}" -//CHECK_INVO:{{.*}}clang{{.*}}-fsycl-is-device{{.*}}"-mllvm" "-sycl-native-cpu" "-D" "__SYCL_NATIVE_CPU__" +//CHECK_INVO:{{.*}}clang{{.*}}-fsycl-is-device{{.*}}"-fsycl-is-native-cpu" "-D" "__SYCL_NATIVE_CPU__" //CHECK_INVO:{{.*}}clang{{.*}}"-x" "ir" //CHECK_INVO:{{.*}}clang{{.*}}"-fsycl-is-host"{{.*}} diff --git a/clang/test/Driver/sycl-native-cpu.cpp b/clang/test/Driver/sycl-native-cpu.cpp index b5831e8fbaca..62209233a161 100644 --- a/clang/test/Driver/sycl-native-cpu.cpp +++ b/clang/test/Driver/sycl-native-cpu.cpp @@ -3,7 +3,7 @@ // checks that the host and device triple are the same, and that the sycl-native-cpu LLVM option is set -// CHECK: clang{{.*}}"-triple" "[[TRIPLE:.*]]"{{.*}}"-aux-triple" "[[TRIPLE]]"{{.*}}"-mllvm" "-sycl-native-cpu"{{.*}}"-D" "__SYCL_NATIVE_CPU__" +// CHECK: clang{{.*}}"-triple" "[[TRIPLE:.*]]"{{.*}}"-aux-triple" "[[TRIPLE]]"{{.*}}"-fsycl-is-native-cpu"{{.*}}"-D" "__SYCL_NATIVE_CPU__" // checks that the target triples are set correctly when the target is set explicitly -// CHECK-AARCH64: clang{{.*}}"-triple" "aarch64-unknown-linux-gnu"{{.*}}"-aux-triple" "aarch64-unknown-linux-gnu"{{.*}}"-mllvm" "-sycl-native-cpu"{{.*}}"-D" "__SYCL_NATIVE_CPU__" +// CHECK-AARCH64: clang{{.*}}"-triple" "aarch64-unknown-linux-gnu"{{.*}}"-aux-triple" "aarch64-unknown-linux-gnu"{{.*}}"-fsycl-is-native-cpu"{{.*}}"-D" "__SYCL_NATIVE_CPU__" diff --git a/clang/test/SemaSYCL/native_cpu/kernelhandler-scalar.cpp b/clang/test/SemaSYCL/native_cpu/kernelhandler-scalar.cpp index cbe06c6c116c..950e2f6662d2 100644 --- a/clang/test/SemaSYCL/native_cpu/kernelhandler-scalar.cpp +++ b/clang/test/SemaSYCL/native_cpu/kernelhandler-scalar.cpp @@ -55,32 +55,32 @@ int main() { -// CHECK:extern "C" void _Z6init_aIiE(void *, void *, int, nativecpu_state *); -// CHECK:inline static void _Z6init_aIiEsubhandler(const sycl::detail::NativeCPUArgDesc *MArgs, nativecpu_state *state) { +// CHECK:extern "C" void _Z6init_aIiE(void *, void *, int, __nativecpu_state *); +// CHECK:inline static void _Z6init_aIiEsubhandler(const sycl::detail::NativeCPUArgDesc *MArgs, __nativecpu_state *state) { // CHECK-NEXT: void* arg0 = MArgs[0].getPtr(); // CHECK-NEXT: void* arg3 = MArgs[3].getPtr(); // CHECK-NEXT: int arg4 = *(int*)MArgs[4].getPtr(); // CHECK-NEXT: _Z6init_aIiE(arg0, arg3, arg4, state); // CHECK-NEXT:}; -// CHECK:extern "C" void _Z6init_aIjE(void *, void *, unsigned int, nativecpu_state *); -// CHECK:inline static void _Z6init_aIjEsubhandler(const sycl::detail::NativeCPUArgDesc *MArgs, nativecpu_state *state) { +// CHECK:extern "C" void _Z6init_aIjE(void *, void *, unsigned int, __nativecpu_state *); +// CHECK:inline static void _Z6init_aIjEsubhandler(const sycl::detail::NativeCPUArgDesc *MArgs, __nativecpu_state *state) { // CHECK-NEXT: void* arg0 = MArgs[0].getPtr(); // CHECK-NEXT: void* arg3 = MArgs[3].getPtr(); // CHECK-NEXT: unsigned int arg4 = *(unsigned int*)MArgs[4].getPtr(); // CHECK-NEXT: _Z6init_aIjE(arg0, arg3, arg4, state); // CHECK-NEXT:}; -// CHECK:extern "C" void _Z6init_aIfE(void *, void *, float, nativecpu_state *); -// CHECK:inline static void _Z6init_aIfEsubhandler(const sycl::detail::NativeCPUArgDesc *MArgs, nativecpu_state *state) { +// CHECK:extern "C" void _Z6init_aIfE(void *, void *, float, __nativecpu_state *); +// CHECK:inline static void _Z6init_aIfEsubhandler(const sycl::detail::NativeCPUArgDesc *MArgs, __nativecpu_state *state) { // CHECK-NEXT: void* arg0 = MArgs[0].getPtr(); // CHECK-NEXT: void* arg3 = MArgs[3].getPtr(); // CHECK-NEXT: float arg4 = *(float*)MArgs[4].getPtr(); // CHECK-NEXT: _Z6init_aIfE(arg0, arg3, arg4, state); // CHECK-NEXT:}; -// CHECK:extern "C" void _Z6init_aIdE(void *, void *, double, nativecpu_state *); -// CHECK:inline static void _Z6init_aIdEsubhandler(const sycl::detail::NativeCPUArgDesc *MArgs, nativecpu_state *state) { +// CHECK:extern "C" void _Z6init_aIdE(void *, void *, double, __nativecpu_state *); +// CHECK:inline static void _Z6init_aIdEsubhandler(const sycl::detail::NativeCPUArgDesc *MArgs, __nativecpu_state *state) { // CHECK-NEXT: void* arg0 = MArgs[0].getPtr(); // CHECK-NEXT: void* arg3 = MArgs[3].getPtr(); // CHECK-NEXT: double arg4 = *(double*)MArgs[4].getPtr(); diff --git a/clang/test/SemaSYCL/native_cpu/kernelhandler.cpp b/clang/test/SemaSYCL/native_cpu/kernelhandler.cpp index e38d21032aec..95d3a2523501 100644 --- a/clang/test/SemaSYCL/native_cpu/kernelhandler.cpp +++ b/clang/test/SemaSYCL/native_cpu/kernelhandler.cpp @@ -31,8 +31,8 @@ int main() { //CHECK-HC-NEXT: #include //CHECK-HC-NEXT: #include //CHECK-HC-NEXT: extern "C" void __sycl_register_lib(pi_device_binaries desc); -//CHECK-HC:extern "C" void _Z5Test1(void *, void *, nativecpu_state *); -//CHECK-HC:inline static void _Z5Test1subhandler(const sycl::detail::NativeCPUArgDesc *MArgs, nativecpu_state *state) { +//CHECK-HC:extern "C" void _Z5Test1(void *, void *, __nativecpu_state *); +//CHECK-HC:inline static void _Z5Test1subhandler(const sycl::detail::NativeCPUArgDesc *MArgs, __nativecpu_state *state) { //CHECK-HC-NEXT: void* arg0 = MArgs[0].getPtr(); //CHECK-HC-NEXT: void* arg3 = MArgs[3].getPtr(); //CHECK-HC-NEXT: _Z5Test1(arg0, arg3, state); diff --git a/clang/test/SemaSYCL/native_cpu/kernelhandler_noargs.cpp b/clang/test/SemaSYCL/native_cpu/kernelhandler_noargs.cpp index 635d5a3593af..759c01b2c62e 100644 --- a/clang/test/SemaSYCL/native_cpu/kernelhandler_noargs.cpp +++ b/clang/test/SemaSYCL/native_cpu/kernelhandler_noargs.cpp @@ -23,6 +23,6 @@ int main() { //CHECK-HC: #pragma once //CHECK-HC-NEXT: #include //CHECK-HC:extern "C" void _Z5Test1(); -//CHECK-HC:inline static void _Z5Test1subhandler(const sycl::detail::NativeCPUArgDesc *MArgs, nativecpu_state *state) { +//CHECK-HC:inline static void _Z5Test1subhandler(const sycl::detail::NativeCPUArgDesc *MArgs, __nativecpu_state *state) { //CHECK-HC-NEXT: _Z5Test1(); //CHECK-HC-NEXT:}; diff --git a/clang/test/SemaSYCL/native_cpu/kernelhandler_noargs2.cpp b/clang/test/SemaSYCL/native_cpu/kernelhandler_noargs2.cpp index ebfdd185fc95..0340d44331bd 100644 --- a/clang/test/SemaSYCL/native_cpu/kernelhandler_noargs2.cpp +++ b/clang/test/SemaSYCL/native_cpu/kernelhandler_noargs2.cpp @@ -16,6 +16,6 @@ int main() { //CHECK-HC: #pragma once //CHECK-HC-NEXT: #include //CHECK-HC:extern "C" void _ZZ4mainE10TestKernel(); -//CHECK-HC:inline static void _ZZ4mainE10TestKernelsubhandler(const sycl::detail::NativeCPUArgDesc *MArgs, nativecpu_state *state) { +//CHECK-HC:inline static void _ZZ4mainE10TestKernelsubhandler(const sycl::detail::NativeCPUArgDesc *MArgs, __nativecpu_state *state) { //CHECK-HC-NEXT: _ZZ4mainE10TestKernel(); //CHECK-HC-NEXT:}; diff --git a/llvm/lib/SYCLLowerIR/EmitSYCLNativeCPUHeader.cpp b/llvm/lib/SYCLLowerIR/EmitSYCLNativeCPUHeader.cpp index a2169709154d..6c2c3a26a0ee 100644 --- a/llvm/lib/SYCLLowerIR/EmitSYCLNativeCPUHeader.cpp +++ b/llvm/lib/SYCLLowerIR/EmitSYCLNativeCPUHeader.cpp @@ -113,7 +113,7 @@ void emitKernelDecl(const Function *F, const SmallVector &argMask, // find the index of the last used arg while (!argMask[I] && I + 1 < argMask.size()) I++; - O << EmitArgDecl(F->getArg(UsedI), I) << ", nativecpu_state *);\n"; + O << EmitArgDecl(F->getArg(UsedI), I) << ", __nativecpu_state *);\n"; } void emitSubKernelHandler(const Function *F, const SmallVector &argMask, @@ -138,7 +138,7 @@ void emitSubKernelHandler(const Function *F, const SmallVector &argMask, O << "\ninline static void " << F->getName() << "subhandler("; O << "const sycl::detail::NativeCPUArgDesc *MArgs, " - "nativecpu_state *state) {\n"; + "__nativecpu_state *state) {\n"; // Retrieve only the args that are used for (unsigned I = 0, UsedI = 0; I < argMask.size() && UsedI < F->getFunctionType()->getNumParams(); diff --git a/llvm/lib/SYCLLowerIR/PrepareSYCLNativeCPU.cpp b/llvm/lib/SYCLLowerIR/PrepareSYCLNativeCPU.cpp index b49367c391ee..4944dbee9914 100644 --- a/llvm/lib/SYCLLowerIR/PrepareSYCLNativeCPU.cpp +++ b/llvm/lib/SYCLLowerIR/PrepareSYCLNativeCPU.cpp @@ -96,15 +96,13 @@ Function *cloneFunctionAndAddParam(Function *oldF, Type *T) { // Todo: add support for more SPIRV builtins here static std::map BuiltinNamesMap{ - {"__spirv_BuiltInGlobalInvocationId", - "_Z13get_global_idmP15nativecpu_state"}, - {"__spirv_BuiltInWorkgroupSize", "_Z13get_wg_sizemP15nativecpu_state"}, - {"__spirv_BuiltInWorkgroupId", "_Z13get_wgid_mP15nativecpu_state"}, - {"__spirv_BuiltInLocalInvocationId", - "_Z13get_local_id_mP15nativecpu_state"}, - {"__spirv_BuiltInNumWorkgroups", "_Z13get_num_groupsmP15nativecpu_state"}, - {"__spirv_BuiltInGlobalOffset", "_Z13get_global_offsetmP15nativecpu_state"}, - {"__spirv_BuiltInGlobalSize", "_Z13get_global_rangemP15nativecpu_state"}}; + {"__spirv_BuiltInGlobalInvocationId", "__dpcpp_nativecpu_global_id"}, + {"__spirv_BuiltInGlobalSize", "__dpcpp_nativecpu_global_range"}, + {"__spirv_BuiltInWorkgroupSize", "__dpcpp_nativecpu_get_wg_size"}, + {"__spirv_BuiltInWorkgroupId", "__dpcpp_nativecpu_get_wg_id"}, + {"__spirv_BuiltInLocalInvocationId", "__dpcpp_nativecpu_get_local_id"}, + {"__spirv_BuiltInNumWorkgroups", "__dpcpp_nativecpu_get_num_groups"}, + {"__spirv_BuiltInGlobalOffset", "__dpcpp_nativecpu_get_global_offset"}}; Function *getReplaceFunc(Module &M, Type *T, StringRef Name) { Function *F = M.getFunction(Name); @@ -153,7 +151,7 @@ PreservedAnalyses PrepareSYCLNativeCPUPass::run(Module &M, // First we add a pointer to the Native CPU state as arg to all the // kernels. Type *StateType = - StructType::getTypeByName(M.getContext(), "struct.nativecpu_state"); + StructType::getTypeByName(M.getContext(), "struct.__nativecpu_state"); if (!StateType) report_fatal_error("Couldn't find the Native CPU state in the " "module, make sure that -D __SYCL_NATIVE_CPU__ is set", diff --git a/sycl/include/sycl/detail/native_cpu.hpp b/sycl/include/sycl/detail/native_cpu.hpp index 92410b60e75c..4c57c274ac93 100644 --- a/sycl/include/sycl/detail/native_cpu.hpp +++ b/sycl/include/sycl/detail/native_cpu.hpp @@ -19,7 +19,7 @@ struct NativeCPUArgDesc { } // __SYCL_INLINE_VER_NAMESPACE(_V1) } // namespace sycl -struct nativecpu_state { +struct __nativecpu_state { size_t MGlobal_id[3]; size_t MGlobal_range[3]; size_t MWorkGroup_size[3]; @@ -27,11 +27,12 @@ struct nativecpu_state { size_t MLocal_id[3]; size_t MNumGroups[3]; size_t MGlobalOffset[3]; - nativecpu_state(size_t globalR0, size_t globalR1, size_t globalR2, - size_t localR0, size_t localR1, size_t localR2, - size_t globalO0, size_t globalO1, size_t globalO2) - : MGlobal_range{globalR0, globalR1, globalR2}, - MWorkGroup_size{localR0, localR1, localR2}, + __nativecpu_state(size_t globalR0, size_t globalR1, size_t globalR2, + size_t localR0, size_t localR1, size_t localR2, + size_t globalO0, size_t globalO1, size_t globalO2) + : MGlobal_range{globalR0, globalR1, globalR2}, MWorkGroup_size{localR0, + localR1, + localR2}, MNumGroups{globalR0 / localR0, globalR1 / localR1, globalR2 / localR2}, MGlobalOffset{globalO0, globalO1, globalO2} { MGlobal_id[0] = 0; @@ -64,44 +65,44 @@ struct nativecpu_state { [[intel::device_indirectly_callable]] extern "C" __SYCL_HC_ATTRS __attribute((address_space(0))) size_t * -_Z13get_global_idmP15nativecpu_state(__attribute((address_space(0))) - nativecpu_state *s) { +__dpcpp_nativecpu_global_id(__attribute((address_space(0))) + __nativecpu_state *s) { return &(s->MGlobal_id[0]); } extern "C" __SYCL_HC_ATTRS __attribute((address_space(0))) size_t * -_Z13get_global_rangemP15nativecpu_state(__attribute((address_space(0))) - nativecpu_state *s) { +__dpcpp_nativecpu_global_range(__attribute((address_space(0))) + __nativecpu_state *s) { return &(s->MGlobal_range[0]); } extern "C" __SYCL_HC_ATTRS __attribute((address_space(0))) size_t * -_Z13get_wg_sizemP15nativecpu_state(__attribute((address_space(0))) - nativecpu_state *s) { +__dpcpp_nativecpu_get_wg_size(__attribute((address_space(0))) + __nativecpu_state *s) { return &(s->MWorkGroup_size[0]); } extern "C" __SYCL_HC_ATTRS __attribute((address_space(0))) size_t * -_Z13get_wgid_mP15nativecpu_state(__attribute((address_space(0))) - nativecpu_state *s) { +__dpcpp_nativecpu_get_wg_id(__attribute((address_space(0))) + __nativecpu_state *s) { return &(s->MWorkGroup_id[0]); } extern "C" __SYCL_HC_ATTRS __attribute((address_space(0))) size_t * -_Z13get_local_id_mP15nativecpu_state(__attribute((address_space(0))) - nativecpu_state *s) { +__dpcpp_nativecpu_get_local_id(__attribute((address_space(0))) + __nativecpu_state *s) { return &(s->MLocal_id[0]); } extern "C" __SYCL_HC_ATTRS __attribute((address_space(0))) size_t * -_Z13get_num_groupsmP15nativecpu_state(__attribute((address_space(0))) - nativecpu_state *s) { +__dpcpp_nativecpu_get_num_groups(__attribute((address_space(0))) + __nativecpu_state *s) { return &(s->MNumGroups[0]); } extern "C" __SYCL_HC_ATTRS __attribute((address_space(0))) size_t * -_Z13get_global_offsetmP15nativecpu_state(__attribute((address_space(0))) - nativecpu_state *s) { +__dpcpp_nativecpu_get_global_offset(__attribute((address_space(0))) + __nativecpu_state *s) { return &(s->MGlobalOffset[0]); } #undef __SYCL_HC_ATTRS diff --git a/sycl/plugins/native_cpu/pi_native_cpu.cpp b/sycl/plugins/native_cpu/pi_native_cpu.cpp index 32be8d243b87..5def62e83732 100644 --- a/sycl/plugins/native_cpu/pi_native_cpu.cpp +++ b/sycl/plugins/native_cpu/pi_native_cpu.cpp @@ -56,7 +56,7 @@ struct _pi_program : _pi_object { }; using nativecpu_kernel_t = void(const sycl::detail::NativeCPUArgDesc *, - nativecpu_state *); + __nativecpu_state *); using nativecpu_ptr_t = nativecpu_kernel_t *; using nativecpu_task_t = std::function; struct _pi_kernel : _pi_object { @@ -1052,10 +1052,10 @@ piEnqueueKernelLaunch(pi_queue Queue, pi_kernel Kernel, pi_uint32 WorkDim, // TODO: add proper event dep management sycl::detail::NDRDescT ndr = getNDRDesc(WorkDim, GlobalWorkOffset, GlobalWorkSize, LocalWorkSize); - nativecpu_state state(ndr.GlobalSize[0], ndr.GlobalSize[1], ndr.GlobalSize[2], - ndr.LocalSize[0], ndr.LocalSize[1], ndr.LocalSize[2], - ndr.GlobalOffset[0], ndr.GlobalOffset[1], - ndr.GlobalOffset[2]); + __nativecpu_state state(ndr.GlobalSize[0], ndr.GlobalSize[1], + ndr.GlobalSize[2], ndr.LocalSize[0], ndr.LocalSize[1], + ndr.LocalSize[2], ndr.GlobalOffset[0], + ndr.GlobalOffset[1], ndr.GlobalOffset[2]); auto numWG0 = ndr.GlobalSize[0] / ndr.LocalSize[0]; auto numWG1 = ndr.GlobalSize[1] / ndr.LocalSize[1]; auto numWG2 = ndr.GlobalSize[2] / ndr.LocalSize[2]; diff --git a/sycl/test/check_device_code/native_cpu_builtins.cpp b/sycl/test/check_device_code/native_cpu_builtins.cpp index 63e8013871c7..b4c422346749 100644 --- a/sycl/test/check_device_code/native_cpu_builtins.cpp +++ b/sycl/test/check_device_code/native_cpu_builtins.cpp @@ -1,5 +1,10 @@ // RUN: %clangxx -fsycl-device-only -fsycl-targets=native_cpu -Xclang -sycl-std=2020 -mllvm -sycl-opt -S -emit-llvm -o - %s | FileCheck %s +// check that we added the state struct as a function argument, and that we +// inject the calls to our builtins. We disable index flipping for SYCL Native +// CPU, so id.get_global_id(1) maps to dimension 1 for a 2-D kernel (as opposed +// to dim 0), etc + #include "sycl.hpp" class Test1; class Test2; @@ -10,6 +15,8 @@ int main() { sycl::range<1> r(1); deviceQueue.submit([&](sycl::handler &h) { h.parallel_for(r, [=](sycl::id<1> id) { acc[id[0]] = 42; }); + // CHECK: @_Z5Test1(ptr {{.*}}%0, ptr {{.*}}%1, ptr %2) + // CHECK: call{{.*}}__dpcpp_nativecpu_global_id(ptr %2) }); sycl::nd_range<2> r2({1, 1}, { 1, @@ -17,25 +24,49 @@ int main() { }); deviceQueue.submit([&](sycl::handler &h) { h.parallel_for(r2, [=](sycl::id<2> id) { acc[id[1]] = 42; }); + // CHECK: @_Z5Test2(ptr {{.*}}%0, ptr {{.*}}%1, ptr %2) + // CHECK: call{{.*}}__dpcpp_nativecpu_global_id(ptr %2) }); sycl::nd_range<3> r3({1, 1, 1}, {1, 1, 1}); deviceQueue.submit([&](sycl::handler &h) { h.parallel_for( r3, [=](sycl::item<3> item) { acc[item[2]] = item.get_range(0); }); + // CHECK: @_Z5Test3(ptr {{.*}}%0, ptr {{.*}}%1, ptr %2) + // CHECK-DAG: call{{.*}}__dpcpp_nativecpu_global_range(ptr %2) + // CHECK-DAG: call{{.*}}__dpcpp_nativecpu_global_id(ptr %2) }); -} -// check that we added the state struct as a function argument, and that we -// inject the calls to our builtins. We disable index flipping for SYCL Native -// CPU, so id.get_global_id(1) maps to dimension 1 for a 2-D kernel (as opposed -// to dim 0), etc + const size_t dim = 2; + using dataT = std::tuple; + sycl::range<3> NumOfWorkItems{2 * dim, 2 * (dim + 1), 2 * (dim + 2)}; + sycl::range<3> LocalSizes{dim, dim + 1, dim + 2}; + sycl::buffer Buffer(NumOfWorkItems); + + sycl::queue Queue; -// CHECK: @_Z5Test1(ptr {{.*}}%0, ptr {{.*}}%1, ptr %2) -// CHECK: call{{.*}}_Z13get_global_idmP15nativecpu_state(ptr %2) + Queue.submit([&](sycl::handler &cgh) { + sycl::accessor Accessor{Buffer, cgh, sycl::write_only}; + sycl::nd_range<3> TheRange{NumOfWorkItems, LocalSizes}; + cgh.parallel_for(TheRange, [=](sycl::nd_item<3> id) { + auto localX = id.get_local_id(0); + auto localY = id.get_local_id(1); + auto localZ = id.get_local_id(2); -// CHECK: @_Z5Test2(ptr {{.*}}%0, ptr {{.*}}%1, ptr %2) -// CHECK: call{{.*}}_Z13get_global_idmP15nativecpu_state(ptr %2) + auto groupX = id.get_group(0); + auto groupY = id.get_group(1); + auto groupZ = id.get_group(2); + + auto rangeX = id.get_local_range(0); + auto rangeY = id.get_local_range(1); + auto rangeZ = id.get_local_range(2); + Accessor[groupX * rangeX + localX][groupY * rangeY + localY] + [groupZ * rangeZ + localZ] = {rangeX, rangeY, rangeZ}; + // CHECK-DAG: call{{.*}}__dpcpp_nativecpu_get_local_id(ptr %{{[0-9]*}}) + // CHECK-DAG: call{{.*}}__dpcpp_nativecpu_get_wg_size(ptr %{{[0-9]*}}) + // CHECK-DAG: call{{.*}}__dpcpp_nativecpu_get_wg_id(ptr %{{[0-9]*}}) + }); + }); +} -// CHECK: @_Z5Test3(ptr {{.*}}%0, ptr {{.*}}%1, ptr %2) -// CHECK: call{{.*}}_Z13get_global_rangemP15nativecpu_state(ptr %2) -// CHECK: call{{.*}}_Z13get_global_idmP15nativecpu_state(ptr %2) +// check that the generated module has the is-native-cpu module flag set +// CHECK: !{{[0-9]*}} = !{i32 1, !"is-native-cpu", i32 1}